From 11838ae4f72af4cef7293375d6f80b002ddf6484 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Thu, 26 Mar 2026 15:47:02 -0700
Subject: [PATCH 01/52] python(feat): add data import api

---
 .../low_level_wrappers/data_imports.py        | 212 ++++++++++++++
 python/lib/sift_client/client.py              |   7 +
 python/lib/sift_client/resources/__init__.py  |   4 +
 .../lib/sift_client/resources/data_imports.py | 239 ++++++++++++++++
 .../resources/sync_stubs/__init__.py          |   3 +
 .../resources/sync_stubs/__init__.pyi         | 146 ++++++++++
 .../lib/sift_client/sift_types/data_import.py | 269 ++++++++++++++++++
 python/lib/sift_client/util/util.py           |   4 +
 8 files changed, 884 insertions(+)
 create mode 100644 python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
 create mode 100644 python/lib/sift_client/resources/data_imports.py
 create mode 100644 python/lib/sift_client/sift_types/data_import.py

diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py b/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
new file mode 100644
index 000000000..d83f42142
--- /dev/null
+++ b/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
@@ -0,0 +1,212 @@
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, cast
+
+from sift.data_imports.v2.data_imports_pb2 import (
+    CreateDataImportFromUploadRequest,
+    CreateDataImportFromUploadResponse,
+    CreateDataImportFromUrlRequest,
+    CreateDataImportFromUrlResponse,
+    DetectConfigRequest,
+    DetectConfigResponse,
+    GetDataImportRequest,
+    GetDataImportResponse,
+    ListDataImportsRequest,
+    ListDataImportsResponse,
+    RetryDataImportRequest,
+)
+from sift.data_imports.v2.data_imports_pb2_grpc import DataImportServiceStub
+
+from sift_client._internal.low_level_wrappers.base import LowLevelClientBase
+from sift_client._internal.util.executor import run_sync_function
+from sift_client.sift_types.data_import import CsvImportConfig, DataImport
+from sift_client.transport import WithGrpcClient, WithRestClient
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from sift.data_imports.v2.data_imports_pb2 import DataTypeKey
+
+    from sift_client.transport.grpc_transport import GrpcClient
+    from sift_client.transport.rest_transport import RestClient
+
+# Union of all supported config types. Extend this as new formats are added.
+ImportConfig = CsvImportConfig
+
+
+def _set_config_on_request(
+    request: CreateDataImportFromUploadRequest | CreateDataImportFromUrlRequest,
+    config: ImportConfig,
+) -> None:
+    """Set the appropriate config field on a proto request based on the config type."""
+    if isinstance(config, CsvImportConfig):
+        request.csv_config.CopyFrom(config._to_proto())
+    else:
+        raise TypeError(f"Unsupported import config type: {type(config).__name__}")
+
+
+logger = logging.getLogger(__name__)
+
+
+class DataImportsLowLevelClient(LowLevelClientBase, WithGrpcClient, WithRestClient):
+    """Low-level client for the DataImportService.
+
+    This class provides a thin wrapper around the autogenerated bindings for the DataImportsAPI.
+    """
+
+    def __init__(self, grpc_client: GrpcClient, rest_client: RestClient):
+        WithGrpcClient.__init__(self, grpc_client=grpc_client)
+        WithRestClient.__init__(self, rest_client=rest_client)
+
+    async def create_from_upload(self, config: ImportConfig) -> tuple[str, str]:
+        """Create a data import and get back a presigned upload URL.
+
+        Args:
+            config: The import configuration.
+
+        Returns:
+            A tuple of (data_import_id, upload_url).
+        """
+        request = CreateDataImportFromUploadRequest()
+        _set_config_on_request(request, config)
+        response = await self._grpc_client.get_stub(
+            DataImportServiceStub
+        ).CreateDataImportFromUpload(request)
+        response = cast("CreateDataImportFromUploadResponse", response)
+        return response.data_import_id, response.upload_url
+
+    async def upload_file(self, upload_url: str, file_path: Path) -> None:
+        """Upload a file to a presigned URL.
+
+        Runs the synchronous HTTP POST in a thread pool to avoid blocking
+        the event loop.
+
+        Args:
+            upload_url: The presigned URL to upload to.
+            file_path: Path to the file to upload.
+        """
+        rest_client = self._rest_client
+
+        def _do_upload() -> None:
+            with open(file_path, "rb") as f:
+                response = rest_client.post(upload_url, data=f)
+                response.raise_for_status()
+
+        await run_sync_function(_do_upload)
+
+    async def create_from_url(self, url: str, config: ImportConfig) -> str:
+        """Create a data import from a remote URL.
+
+        Args:
+            url: The URL to import from (HTTP or S3).
+            config: The import configuration.
+
+        Returns:
+            The data_import_id.
+        """
+        request = CreateDataImportFromUrlRequest(url=url)
+        _set_config_on_request(request, config)
+        response = await self._grpc_client.get_stub(DataImportServiceStub).CreateDataImportFromUrl(
+            request
+        )
+        response = cast("CreateDataImportFromUrlResponse", response)
+        return response.data_import_id
+
+    async def get(self, data_import_id: str) -> DataImport:
+        """Get a data import by ID.
+
+        Args:
+            data_import_id: The ID of the data import.
+
+        Returns:
+            The DataImport.
+        """
+        request = GetDataImportRequest(data_import_id=data_import_id)
+        response = await self._grpc_client.get_stub(DataImportServiceStub).GetDataImport(request)
+        response = cast("GetDataImportResponse", response)
+        return DataImport._from_proto(response.data_import)
+
+    async def list_(
+        self,
+        *,
+        page_size: int | None = None,
+        page_token: str | None = None,
+        query_filter: str = "",
+        order_by: str = "",
+    ) -> tuple[list[DataImport], str]:
+        """List data imports with optional filtering and pagination.
+
+        Args:
+            page_size: Maximum number of results per page.
+            page_token: Token for the next page of results.
+            query_filter: CEL filter string.
+            order_by: Ordering string (e.g. "created_date desc").
+
+        Returns:
+            A tuple of (list of DataImports, next_page_token).
+        """
+        request = ListDataImportsRequest(
+            filter=query_filter,
+            order_by=order_by,
+        )
+        if page_size is not None:
+            request.page_size = page_size
+        if page_token:
+            request.page_token = page_token
+
+        response = await self._grpc_client.get_stub(DataImportServiceStub).ListDataImports(request)
+        response = cast("ListDataImportsResponse", response)
+        data_imports = [DataImport._from_proto(di) for di in response.data_imports]
+        return data_imports, response.next_page_token
+
+    async def list_all(
+        self,
+        *,
+        query_filter: str = "",
+        order_by: str = "",
+        max_results: int | None = None,
+    ) -> list[DataImport]:
+        """List all data imports, handling pagination automatically.
+
+        Args:
+            query_filter: CEL filter string.
+            order_by: Ordering string (e.g. "created_date desc").
+            max_results: Maximum total results to return.
+
+        Returns:
+            A list of all matching DataImports.
+        """
+        return await self._handle_pagination(
+            func=self.list_,
+            kwargs={"query_filter": query_filter, "order_by": order_by},
+            max_results=max_results,
+        )
+
+    async def retry(self, data_import_id: str) -> None:
+        """Retry a failed data import.
+
+        Only works for URL-based imports in a failed state.
+
+        Args:
+            data_import_id: The ID of the data import to retry.
+        """
+        request = RetryDataImportRequest(data_import_id=data_import_id)
+        await self._grpc_client.get_stub(DataImportServiceStub).RetryDataImport(request)
+
+    async def detect_config(
+        self, data: bytes, data_type_key: DataTypeKey.ValueType
+    ) -> DetectConfigResponse:
+        """Call the DetectConfig RPC to auto-detect import configuration.
+
+        Args:
+            data: A sample of the file content (e.g. the first 64 KiB).
+            data_type_key: The file type hint.
+
+        Returns:
+            The raw DetectConfigResponse proto. The caller (resource API)
+            is responsible for converting to a sift_type.
+        """
+        request = DetectConfigRequest(data=data, type=data_type_key)
+        response = await self._grpc_client.get_stub(DataImportServiceStub).DetectConfig(request)
+        return cast("DetectConfigResponse", response)
diff --git a/python/lib/sift_client/client.py b/python/lib/sift_client/client.py
index ed7aeba9a..95fd25b71 100644
--- a/python/lib/sift_client/client.py
+++ b/python/lib/sift_client/client.py
@@ -9,6 +9,8 @@
     ChannelsAPIAsync,
     DataExportAPI,
     DataExportAPIAsync,
+    DataImportAPI,
+    DataImportAPIAsync,
     FileAttachmentsAPI,
     FileAttachmentsAPIAsync,
     IngestionAPIAsync,
@@ -110,6 +112,9 @@ class SiftClient(
     data_export: DataExportAPI
     """Instance of the Data Export API for making synchronous requests."""
 
+    data_import: DataImportAPI
+    """Instance of the Data Import API for making synchronous requests."""
+
     async_: AsyncAPIs
     """Accessor for the asynchronous APIs. All asynchronous APIs are available as attributes on this accessor."""
 
@@ -159,6 +164,7 @@ def __init__(
         self.tags = TagsAPI(self)
         self.test_results = TestResultsAPI(self)
         self.data_export = DataExportAPI(self)
+        self.data_import = DataImportAPI(self)
 
         # Accessor for the asynchronous APIs
         self.async_ = AsyncAPIs(
@@ -175,6 +181,7 @@ def __init__(
             tags=TagsAPIAsync(self),
             test_results=TestResultsAPIAsync(self),
             data_export=DataExportAPIAsync(self),
+            data_import=DataImportAPIAsync(self),
         )
 
     @property
diff --git a/python/lib/sift_client/resources/__init__.py b/python/lib/sift_client/resources/__init__.py
index 78b3b4eba..2b7a4c55b 100644
--- a/python/lib/sift_client/resources/__init__.py
+++ b/python/lib/sift_client/resources/__init__.py
@@ -162,6 +162,7 @@ async def main():
 from sift_client.resources.runs import RunsAPIAsync
 from sift_client.resources.tags import TagsAPIAsync
 from sift_client.resources.test_results import TestResultsAPIAsync
+from sift_client.resources.data_imports import DataImportAPIAsync
 from sift_client.resources.exports import DataExportAPIAsync
 
 # ruff: noqa All imports needs to be imported before sync_stubs to avoid circular import
@@ -178,6 +179,7 @@ async def main():
     TestResultsAPI,
     FileAttachmentsAPI,
     DataExportAPI,
+    DataImportAPI,
 )
 
 import sys
@@ -215,4 +217,6 @@ async def main():
     "TracingConfig",
     "DataExportAPI",
     "DataExportAPIAsync",
+    "DataImportAPI",
+    "DataImportAPIAsync",
 ]
diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
new file mode 100644
index 000000000..8ec2a3706
--- /dev/null
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -0,0 +1,239 @@
+from __future__ import annotations
+
+import asyncio
+import logging
+import time
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from sift.data_imports.v2.data_imports_pb2 import DATA_TYPE_KEY_CSV
+
+from sift_client._internal.low_level_wrappers.data_imports import DataImportsLowLevelClient
+from sift_client.resources._base import ResourceBase
+from sift_client.sift_types.data_import import (
+    CsvImportConfig,
+    DataImport,
+    DataImportStatus,
+)
+from sift_client.util import cel_utils as cel
+
+if TYPE_CHECKING:
+    from sift_client._internal.low_level_wrappers.data_imports import ImportConfig
+    from sift_client.client import SiftClient
+
+logger = logging.getLogger(__name__)
+
+_DETECT_CONFIG_SAMPLE_SIZE = 65_536  # 64 KiB
+
+
+class DataImportAPIAsync(ResourceBase):
+    """High-level API for importing data into Sift.
+
+    Supports importing data from local files or remote URLs. Returns a
+    `DataImport` object that can be polled for status.
+    """
+
+    def __init__(self, sift_client: SiftClient):
+        """Initialize the DataImportAPI.
+
+        Args:
+            sift_client: The Sift client to use.
+        """
+        super().__init__(sift_client)
+        self._low_level_client = DataImportsLowLevelClient(
+            grpc_client=self.client.grpc_client,
+            rest_client=self.client.rest_client,
+        )
+
+    async def import_from_path(
+        self,
+        *,
+        file_path: str | Path,
+        config: ImportConfig,
+    ) -> DataImport:
+        """Import data from a local file.
+
+        Creates a data import on the server and uploads the file to the
+        returned presigned URL. Returns a :class:`DataImport` that can be
+        polled for status via ``data_import.refresh()``.
+
+        Args:
+            file_path: Path to the local file to import.
+            config: Import configuration describing the file format and column
+                mapping.
+
+        Returns:
+            A :class:`DataImport` representing the import operation.
+
+        Raises:
+            FileNotFoundError: If the file does not exist.
+        """
+        path = Path(file_path)
+        if not path.is_file():
+            raise FileNotFoundError(f"File not found: {file_path}")
+
+        data_import_id, upload_url = await self._low_level_client.create_from_upload(config)
+        logger.info("Created data import %s", data_import_id)
+
+        await self._low_level_client.upload_file(upload_url, path)
+        logger.info("Uploaded file to presigned URL for import %s", data_import_id)
+
+        data_import = await self._low_level_client.get(data_import_id)
+        return self._apply_client_to_instance(data_import)
+
+    async def import_from_url(
+        self,
+        *,
+        url: str,
+        config: ImportConfig,
+    ) -> DataImport:
+        """Import data from a remote URL (HTTP or S3).
+
+        Returns a :class:`DataImport` that can be polled for status via
+        ``data_import.refresh()``.
+
+        Args:
+            url: The URL to import from.
+            config: Import configuration describing the file format and column
+                mapping.
+
+        Returns:
+            A :class:`DataImport` representing the import operation.
+        """
+        data_import_id = await self._low_level_client.create_from_url(url, config)
+        logger.info("Created URL-based data import %s", data_import_id)
+
+        data_import = await self._low_level_client.get(data_import_id)
+        return self._apply_client_to_instance(data_import)
+
+    async def get(self, data_import_id: str) -> DataImport:
+        """Get a data import by ID.
+
+        Args:
+            data_import_id: The ID of the data import.
+
+        Returns:
+            The DataImport.
+        """
+        data_import = await self._low_level_client.get(data_import_id)
+        return self._apply_client_to_instance(data_import)
+
+    async def list_(
+        self,
+        *,
+        data_import_ids: list[str] | None = None,
+        status: DataImportStatus | None = None,
+        filter_query: str | None = None,
+        order_by: str | None = None,
+        limit: int | None = None,
+    ) -> list[DataImport]:
+        """List data imports with optional filtering.
+
+        Args:
+            data_import_ids: Filter to imports with any of these IDs.
+            status: Filter to imports with this status.
+            filter_query: Explicit CEL filter string.
+            order_by: Ordering string (e.g. "created_date desc").
+            limit: Maximum number of imports to return. If None, returns all.
+
+        Returns:
+            A list of DataImport objects matching the filter criteria.
+        """
+        filter_parts = []
+        if data_import_ids:
+            filter_parts.append(cel.in_("data_import_id", data_import_ids))
+        if status is not None:
+            filter_parts.append(cel.equals("status", str(status.value)))
+        if filter_query:
+            filter_parts.append(filter_query)
+        query_filter = cel.and_(*filter_parts)
+
+        data_imports = await self._low_level_client.list_all(
+            query_filter=query_filter or "",
+            order_by=order_by or "",
+            max_results=limit,
+        )
+        return self._apply_client_to_instances(data_imports)
+
+    async def retry(self, data_import: str | DataImport) -> None:
+        """Retry a failed data import.
+
+        Only works for URL-based imports in a failed state.
+
+        Args:
+            data_import: The DataImport or data_import_id to retry.
+        """
+        data_import_id = (
+            data_import._id_or_error if isinstance(data_import, DataImport) else data_import
+        )
+        await self._low_level_client.retry(data_import_id)
+
+    async def detect_config(self, file_path: str | Path) -> CsvImportConfig:
+        """Auto-detect import configuration from a file.
+
+        Reads a sample of the file, sends it to the server's DetectConfig
+        endpoint, and returns the detected configuration. You can inspect
+        and modify the result before passing it to :meth:`import_from_path`.
+
+        Currently supports CSV files only.
+
+        Args:
+            file_path: Path to the file to analyze.
+
+        Returns:
+            The detected import config.
+
+        Raises:
+            FileNotFoundError: If the file does not exist.
+            ValueError: If detection returns no config.
+        """
+        path = Path(file_path)
+        if not path.is_file():
+            raise FileNotFoundError(f"File not found: {file_path}")
+
+        with open(path, "rb") as f:
+            sample = f.read(_DETECT_CONFIG_SAMPLE_SIZE)
+
+        response = await self._low_level_client.detect_config(sample, DATA_TYPE_KEY_CSV)
+
+        if response.HasField("csv_config"):
+            return CsvImportConfig._from_proto(response.csv_config)
+
+        raise ValueError("Server returned an empty DetectConfig response.")
+
+    async def wait_until_complete(
+        self,
+        data_import: str | DataImport,
+        *,
+        polling_interval_secs: int = 5,
+        timeout_secs: int | None = None,
+    ) -> DataImport:
+        """Wait until a data import reaches a terminal state.
+
+        Polls the import status at the given interval until the import is
+        SUCCEEDED or FAILED, returning the completed DataImport.
+
+        Args:
+            data_import: The DataImport or data_import_id to wait for.
+            polling_interval_secs: Seconds between status polls. Defaults to 5s.
+            timeout_secs: Maximum seconds to wait. If None, polls indefinitely.
+                Defaults to None (indefinite).
+
+        Returns:
+            The DataImport in its terminal state.
+        """
+        data_import_id = (
+            data_import._id_or_error if isinstance(data_import, DataImport) else data_import
+        )
+
+        start = time.monotonic()
+        while True:
+            result = await self.get(data_import_id)
+            if result.is_complete:
+                return result
+            if timeout_secs is not None and (time.monotonic() - start) >= timeout_secs:
+                raise TimeoutError(
+                    f"Data import '{data_import_id}' did not complete "
+                    f"within {timeout_secs} seconds."
+                )
+            await asyncio.sleep(polling_interval_secs)
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.py b/python/lib/sift_client/resources/sync_stubs/__init__.py
index acd73755e..982a028c6 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.py
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.py
@@ -8,6 +8,7 @@
     CalculatedChannelsAPIAsync,
     ChannelsAPIAsync,
     DataExportAPIAsync,
+    DataImportAPIAsync,
     FileAttachmentsAPIAsync,
     JobsAPIAsync,
     PingAPIAsync,
@@ -30,12 +31,14 @@
 TagsAPI = generate_sync_api(TagsAPIAsync, "TagsAPI")
 TestResultsAPI = generate_sync_api(TestResultsAPIAsync, "TestResultsAPI")
 DataExportAPI = generate_sync_api(DataExportAPIAsync, "DataExportAPI")
+DataImportAPI = generate_sync_api(DataImportAPIAsync, "DataImportAPI")
 
 __all__ = [
     "AssetsAPI",
     "CalculatedChannelsAPI",
     "ChannelsAPI",
     "DataExportAPI",
+    "DataImportAPI",
     "FileAttachmentsAPI",
     "JobsAPI",
     "PingAPI",
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index fe87809cd..a96efe70c 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -13,6 +13,9 @@ if TYPE_CHECKING:
     import pandas as pd
     import pyarrow as pa
 
+    from sift_client._internal.low_level_wrappers.data_imports import (
+        ImportConfig,
+    )
     from sift_client.client import SiftClient
     from sift_client.sift_types.asset import Asset, AssetUpdate
     from sift_client.sift_types.calculated_channel import (
@@ -21,6 +24,7 @@ if TYPE_CHECKING:
         CalculatedChannelUpdate,
     )
     from sift_client.sift_types.channel import Channel
+    from sift_client.sift_types.data_import import CsvImportConfig, DataImport, DataImportStatus
     from sift_client.sift_types.export import ExportOutputFormat
     from sift_client.sift_types.file_attachment import (
         FileAttachment,
@@ -621,6 +625,148 @@ class DataExportAPI:
         """
         ...
 
+class DataImportAPI:
+    """Sync counterpart to `DataImportAPIAsync`.
+
+    High-level API for importing data into Sift.
+
+    Supports importing data from local files or remote URLs. Returns a
+    `DataImport` object that can be polled for status.
+    """
+
+    def __init__(self, sift_client: SiftClient):
+        """Initialize the DataImportAPI.
+
+        Args:
+            sift_client: The Sift client to use.
+        """
+        ...
+
+    def _run(self, coro): ...
+    def detect_config(self, file_path: str | Path) -> CsvImportConfig:
+        """Auto-detect import configuration from a file.
+
+        Reads a sample of the file, sends it to the server's DetectConfig
+        endpoint, and returns the detected configuration. You can inspect
+        and modify the result before passing it to :meth:`import_from_path`.
+
+        Currently supports CSV files only.
+
+        Args:
+            file_path: Path to the file to analyze.
+
+        Returns:
+            The detected import config.
+
+        Raises:
+            FileNotFoundError: If the file does not exist.
+            ValueError: If detection returns no config.
+        """
+        ...
+
+    def get(self, data_import_id: str) -> DataImport:
+        """Get a data import by ID.
+
+        Args:
+            data_import_id: The ID of the data import.
+
+        Returns:
+            The DataImport.
+        """
+        ...
+
+    def import_from_path(self, *, file_path: str | Path, config: ImportConfig) -> DataImport:
+        """Import data from a local file.
+
+        Creates a data import on the server and uploads the file to the
+        returned presigned URL. Returns a :class:`DataImport` that can be
+        polled for status via ``data_import.refresh()``.
+
+        Args:
+            file_path: Path to the local file to import.
+            config: Import configuration describing the file format and column
+                mapping.
+
+        Returns:
+            A :class:`DataImport` representing the import operation.
+
+        Raises:
+            FileNotFoundError: If the file does not exist.
+        """
+        ...
+
+    def import_from_url(self, *, url: str, config: ImportConfig) -> DataImport:
+        """Import data from a remote URL (HTTP or S3).
+
+        Returns a :class:`DataImport` that can be polled for status via
+        ``data_import.refresh()``.
+
+        Args:
+            url: The URL to import from.
+            config: Import configuration describing the file format and column
+                mapping.
+
+        Returns:
+            A :class:`DataImport` representing the import operation.
+        """
+        ...
+
+    def list_(
+        self,
+        *,
+        data_import_ids: list[str] | None = None,
+        status: DataImportStatus | None = None,
+        filter_query: str | None = None,
+        order_by: str | None = None,
+        limit: int | None = None,
+    ) -> list[DataImport]:
+        """List data imports with optional filtering.
+
+        Args:
+            data_import_ids: Filter to imports with any of these IDs.
+            status: Filter to imports with this status.
+            filter_query: Explicit CEL filter string.
+            order_by: Ordering string (e.g. "created_date desc").
+            limit: Maximum number of imports to return. If None, returns all.
+
+        Returns:
+            A list of DataImport objects matching the filter criteria.
+        """
+        ...
+
+    def retry(self, data_import: str | DataImport) -> None:
+        """Retry a failed data import.
+
+        Only works for URL-based imports in a failed state.
+
+        Args:
+            data_import: The DataImport or data_import_id to retry.
+        """
+        ...
+
+    def wait_until_complete(
+        self,
+        data_import: str | DataImport,
+        *,
+        polling_interval_secs: int = 5,
+        timeout_secs: int | None = None,
+    ) -> DataImport:
+        """Wait until a data import reaches a terminal state.
+
+        Polls the import status at the given interval until the import is
+        SUCCEEDED or FAILED, returning the completed DataImport.
+
+        Args:
+            data_import: The DataImport or data_import_id to wait for.
+            polling_interval_secs: Seconds between status polls. Defaults to 5s.
+            timeout_secs: Maximum seconds to wait. If None, polls indefinitely.
+                Defaults to None (indefinite).
+
+        Returns:
+            The DataImport in its terminal state.
+        """
+        ...
+
 class FileAttachmentsAPI:
     """Sync counterpart to `FileAttachmentsAPIAsync`.
 
diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
new file mode 100644
index 000000000..fc0bf119a
--- /dev/null
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -0,0 +1,269 @@
+from __future__ import annotations
+
+from datetime import datetime  # noqa: TC003
+from enum import Enum
+from typing import TYPE_CHECKING
+
+from pydantic import BaseModel, ConfigDict
+from sift.common.type.v1.channel_config_pb2 import ChannelConfig as ChannelConfigProto
+from sift.data_imports.v2.data_imports_pb2 import CsvConfig as CsvConfigProto
+from sift.data_imports.v2.data_imports_pb2 import CsvTimeColumn as CsvTimeColumnProto
+from sift.data_imports.v2.data_imports_pb2 import DataImport as DataImportProto
+from sift.data_imports.v2.data_imports_pb2 import DataImportStatus as DataImportStatusProto
+from sift.data_imports.v2.data_imports_pb2 import TimeFormat as TimeFormatProto
+
+from sift_client._internal.util.timestamp import to_pb_timestamp
+from sift_client.sift_types._base import BaseType
+from sift_client.sift_types.channel import ChannelDataType
+
+if TYPE_CHECKING:
+    from sift_client.client import SiftClient
+
+
+# ---------------------------------------------------------------------------
+# Enums
+# ---------------------------------------------------------------------------
+
+
+class TimeFormat(Enum):
+    """Supported time formats for data import columns."""
+
+    RELATIVE_NANOSECONDS = TimeFormatProto.TIME_FORMAT_RELATIVE_NANOSECONDS
+    RELATIVE_MICROSECONDS = TimeFormatProto.TIME_FORMAT_RELATIVE_MICROSECONDS
+    RELATIVE_MILLISECONDS = TimeFormatProto.TIME_FORMAT_RELATIVE_MILLISECONDS
+    RELATIVE_SECONDS = TimeFormatProto.TIME_FORMAT_RELATIVE_SECONDS
+    RELATIVE_MINUTES = TimeFormatProto.TIME_FORMAT_RELATIVE_MINUTES
+    RELATIVE_HOURS = TimeFormatProto.TIME_FORMAT_RELATIVE_HOURS
+    ABSOLUTE_RFC3339 = TimeFormatProto.TIME_FORMAT_ABSOLUTE_RFC3339
+    ABSOLUTE_DATETIME = TimeFormatProto.TIME_FORMAT_ABSOLUTE_DATETIME
+    ABSOLUTE_UNIX_SECONDS = TimeFormatProto.TIME_FORMAT_ABSOLUTE_UNIX_SECONDS
+    ABSOLUTE_UNIX_MILLISECONDS = TimeFormatProto.TIME_FORMAT_ABSOLUTE_UNIX_MILLISECONDS
+    ABSOLUTE_UNIX_MICROSECONDS = TimeFormatProto.TIME_FORMAT_ABSOLUTE_UNIX_MICROSECONDS
+    ABSOLUTE_UNIX_NANOSECONDS = TimeFormatProto.TIME_FORMAT_ABSOLUTE_UNIX_NANOSECONDS
+
+
+class DataImportStatus(Enum):
+    """Status of a data import."""
+
+    PENDING = DataImportStatusProto.DATA_IMPORT_STATUS_PENDING
+    IN_PROGRESS = DataImportStatusProto.DATA_IMPORT_STATUS_IN_PROGRESS
+    SUCCEEDED = DataImportStatusProto.DATA_IMPORT_STATUS_SUCCEEDED
+    FAILED = DataImportStatusProto.DATA_IMPORT_STATUS_FAILED
+
+
+# ---------------------------------------------------------------------------
+# CSV config types
+# ---------------------------------------------------------------------------
+
+
+class CsvTimeColumn(BaseModel):
+    """Time column configuration for CSV imports.
+
+    Attributes:
+        column: The 1-indexed column number of the time column.
+        format: The time format used in this column.
+        relative_start_time: Required when using a relative time format.
+    """
+
+    model_config = ConfigDict(frozen=True)
+
+    column: int
+    format: TimeFormat
+    relative_start_time: datetime | None = None
+
+    def _to_proto(self) -> CsvTimeColumnProto:
+        proto = CsvTimeColumnProto(
+            column_number=self.column,
+            format=self.format.value,
+        )
+        if self.relative_start_time is not None:
+            proto.relative_start_time.CopyFrom(to_pb_timestamp(self.relative_start_time))
+        return proto
+
+
+class CsvDataColumn(BaseModel):
+    """A data column definition for CSV imports.
+
+    Attributes:
+        column: The 1-indexed column number.
+        name: Channel name.
+        data_type: The data type of the channel values.
+        units: Optional units string.
+        description: Optional channel description.
+    """
+
+    model_config = ConfigDict(frozen=True)
+
+    column: int
+    name: str
+    data_type: ChannelDataType
+    units: str = ""
+    description: str = ""
+
+
+class CsvImportConfig(BaseModel):
+    """Configuration for importing a CSV file.
+
+    Attributes:
+        asset_name: Name of the asset to import data into.
+        run_name: Name for the run. Ignored if ``run_id`` is set.
+        run_id: ID of an existing run to append data to.
+        first_data_row: The first row containing data (1-indexed). Defaults to 2 to skip a header row.
+        time_column: Time column configuration.
+        data_columns: List of data column definitions.
+    """
+
+    model_config = ConfigDict(frozen=True)
+
+    asset_name: str
+    run_name: str | None = None
+    run_id: str | None = None
+    first_data_row: int = 2
+    time_column: CsvTimeColumn
+    data_columns: list[CsvDataColumn]
+
+    def _to_proto(self) -> CsvConfigProto:
+        return CsvConfigProto(
+            asset_name=self.asset_name,
+            run_name=self.run_name or "",
+            run_id=self.run_id or "",
+            first_data_row=self.first_data_row,
+            time_column=self.time_column._to_proto(),
+            data_columns={
+                dc.column: ChannelConfigProto(
+                    name=dc.name,
+                    data_type=dc.data_type.value,
+                    units=dc.units,
+                    description=dc.description,
+                )
+                for dc in self.data_columns
+            },
+        )
+
+    @classmethod
+    def _from_proto(cls, proto: CsvConfigProto) -> CsvImportConfig:
+        """Create from a proto CsvConfig (e.g. from DetectConfig response)."""
+        time_column = CsvTimeColumn(
+            column=proto.time_column.column_number,
+            format=TimeFormat(proto.time_column.format),
+        )
+        data_columns = [
+            CsvDataColumn(
+                column=col_num,
+                name=ch_cfg.name,
+                data_type=ChannelDataType(ch_cfg.data_type),
+                units=ch_cfg.units,
+                description=ch_cfg.description,
+            )
+            for col_num, ch_cfg in proto.data_columns.items()
+        ]
+        return cls(
+            asset_name=proto.asset_name,
+            run_name=proto.run_name or None,
+            run_id=proto.run_id or None,
+            first_data_row=proto.first_data_row or 2,
+            time_column=time_column,
+            data_columns=data_columns,
+        )
+
+
+# ---------------------------------------------------------------------------
+# DataImport resource type
+# ---------------------------------------------------------------------------
+
+
+class DataImport(BaseType[DataImportProto, "DataImport"]):
+    """A data import in the Sift system.
+
+    Represents the status and metadata of an import operation. Use
+    ``client.data_import.upload()`` to create one, or ``client.data_import.get()``
+    to retrieve an existing import by ID.
+    """
+
+    # Required fields
+    status: DataImportStatus
+    created_date: datetime
+    modified_date: datetime
+
+    # Optional fields
+    error_message: str | None
+    source_url: str | None
+    run_id: str | None
+    report_id: str | None
+    asset_id: str | None
+    data_start_time: datetime | None
+    data_stop_time: datetime | None
+
+    # Config used for this import
+    csv_config: CsvImportConfig | None
+
+    @classmethod
+    def _from_proto(
+        cls, proto: DataImportProto, sift_client: SiftClient | None = None
+    ) -> DataImport:
+        from datetime import timezone
+
+        return cls(
+            proto=proto,
+            id_=proto.data_import_id,
+            status=DataImportStatus(proto.status),
+            error_message=proto.error_message or None,
+            created_date=proto.created_date.ToDatetime(tzinfo=timezone.utc),
+            modified_date=proto.modified_date.ToDatetime(tzinfo=timezone.utc),
+            source_url=proto.source_url or None,
+            run_id=proto.run_id if proto.HasField("_run_id") else None,
+            report_id=proto.report_id if proto.HasField("_report_id") else None,
+            asset_id=proto.asset_id if proto.HasField("_asset_id") else None,
+            data_start_time=(
+                proto.data_start_time.ToDatetime(tzinfo=timezone.utc)
+                if proto.HasField("_data_start_time")
+                else None
+            ),
+            data_stop_time=(
+                proto.data_stop_time.ToDatetime(tzinfo=timezone.utc)
+                if proto.HasField("_data_stop_time")
+                else None
+            ),
+            csv_config=(
+                CsvImportConfig._from_proto(proto.csv_config)
+                if proto.HasField("csv_config")
+                else None
+            ),
+            _client=sift_client,
+        )
+
+    @property
+    def is_pending(self) -> bool:
+        """Return True if the import is pending."""
+        return self.status == DataImportStatus.PENDING
+
+    @property
+    def is_in_progress(self) -> bool:
+        """Return True if the import is in progress."""
+        return self.status == DataImportStatus.IN_PROGRESS
+
+    @property
+    def is_succeeded(self) -> bool:
+        """Return True if the import succeeded."""
+        return self.status == DataImportStatus.SUCCEEDED
+
+    @property
+    def is_failed(self) -> bool:
+        """Return True if the import failed."""
+        return self.status == DataImportStatus.FAILED
+
+    @property
+    def is_complete(self) -> bool:
+        """Return True if the import reached a terminal state (succeeded or failed)."""
+        return self.status in (DataImportStatus.SUCCEEDED, DataImportStatus.FAILED)
+
+    def refresh(self) -> DataImport:
+        """Refresh this import with the latest data from the API."""
+        updated = self.client.data_import.get(self._id_or_error)
+        self._update(updated)
+        return self
+
+    def retry(self) -> None:
+        """Retry this import. Only works for URL-based imports in a failed state."""
+        self.client.data_import.retry(self._id_or_error)
+        self.refresh()
diff --git a/python/lib/sift_client/util/util.py b/python/lib/sift_client/util/util.py
index e82a8ccfe..98719cfdd 100644
--- a/python/lib/sift_client/util/util.py
+++ b/python/lib/sift_client/util/util.py
@@ -8,6 +8,7 @@
         CalculatedChannelsAPIAsync,
         ChannelsAPIAsync,
         DataExportAPIAsync,
+        DataImportAPIAsync,
         FileAttachmentsAPIAsync,
         IngestionAPIAsync,
         JobsAPIAsync,
@@ -62,6 +63,9 @@ class AsyncAPIs(NamedTuple):
     data_export: DataExportAPIAsync
     """Instance of the Data Export API for making asynchronous requests."""
 
+    data_import: DataImportAPIAsync
+    """Instance of the Data Import API for making asynchronous requests."""
+
 
 def count_non_none(*args: Any) -> int:
     """Count the number of non-none arguments."""

From 7224b79c6a7ed16fda0f717ff23fa9f5f3df2c75 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Fri, 27 Mar 2026 08:46:42 -0700
Subject: [PATCH 02/52] add detect config data types

---
 .../lib/sift_client/resources/data_imports.py | 70 ++++++++++++++++---
 .../resources/sync_stubs/__init__.pyi         | 36 ++++++++--
 .../lib/sift_client/sift_types/data_import.py | 28 ++++++++
 3 files changed, 116 insertions(+), 18 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 8ec2a3706..bc2ac9cdf 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -6,11 +6,10 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
-from sift.data_imports.v2.data_imports_pb2 import DATA_TYPE_KEY_CSV
-
 from sift_client._internal.low_level_wrappers.data_imports import DataImportsLowLevelClient
 from sift_client.resources._base import ResourceBase
 from sift_client.sift_types.data_import import (
+    EXTENSION_TO_DATA_TYPE_KEY,
     CsvImportConfig,
     DataImport,
     DataImportStatus,
@@ -49,7 +48,10 @@ async def import_from_path(
         self,
         *,
         file_path: str | Path,
-        config: ImportConfig,
+        config: ImportConfig | None = None,
+        asset_name: str | None = None,
+        run_name: str | None = None,
+        run_id: str | None = None,
     ) -> DataImport:
         """Import data from a local file.
 
@@ -57,21 +59,47 @@ async def import_from_path(
         returned presigned URL. Returns a :class:`DataImport` that can be
         polled for status via ``data_import.refresh()``.
 
+        When ``config`` is omitted the file format is auto-detected via
+        :meth:`detect_config` and a :class:`CsvImportConfig` is built using
+        the provided ``asset_name`` and optional ``run_name`` / ``run_id``.
+
         Args:
             file_path: Path to the local file to import.
             config: Import configuration describing the file format and column
-                mapping.
+                mapping. When provided, ``asset_name``, ``run_name``, and
+                ``run_id`` are ignored.
+            asset_name: Name of the asset to import into. Required when
+                ``config`` is not provided.
+            run_name: Optional run name. Only used when ``config`` is not
+                provided.
+            run_id: Optional existing run ID. Only used when ``config`` is not
+                provided.
 
         Returns:
             A :class:`DataImport` representing the import operation.
 
         Raises:
             FileNotFoundError: If the file does not exist.
+            ValueError: If neither ``config`` nor ``asset_name`` is provided.
         """
         path = Path(file_path)
         if not path.is_file():
             raise FileNotFoundError(f"File not found: {file_path}")
 
+        if config is None:
+            if asset_name is None:
+                raise ValueError(
+                    "Either 'config' or 'asset_name' must be provided."
+                )
+            detected = await self.detect_config(file_path)
+            config = detected.model_copy(
+                update={
+                    "asset_name": asset_name,
+                    "run_name": run_name,
+                    "run_id": run_id,
+                }
+            )
+
         data_import_id, upload_url = await self._low_level_client.create_from_upload(config)
         logger.info("Created data import %s", data_import_id)
 
@@ -168,14 +196,15 @@ async def retry(self, data_import: str | DataImport) -> None:
         )
         await self._low_level_client.retry(data_import_id)
 
-    async def detect_config(self, file_path: str | Path) -> CsvImportConfig:
+    async def detect_config(self, file_path: str | Path) -> ImportConfig:
         """Auto-detect import configuration from a file.
 
         Reads a sample of the file, sends it to the server's DetectConfig
-        endpoint, and returns the detected configuration. You can inspect
-        and modify the result before passing it to :meth:`import_from_path`.
+        endpoint, and returns the detected configuration. The file format
+        is inferred from the file extension. You can inspect and modify the
+        result before passing it to :meth:`import_from_path`.
 
-        Currently supports CSV files only.
+        Supported extensions: .csv, .parquet, .tdms, .ch10, .ch11, .h5, .hdf5
 
         Args:
             file_path: Path to the file to analyze.
@@ -185,19 +214,38 @@ async def detect_config(self, file_path: str | Path) -> CsvImportConfig:
 
         Raises:
             FileNotFoundError: If the file does not exist.
-            ValueError: If detection returns no config.
+            ValueError: If the file extension is unsupported or detection
+                returns no config.
         """
         path = Path(file_path)
         if not path.is_file():
             raise FileNotFoundError(f"File not found: {file_path}")
 
+        ext = path.suffix.lower()
+        data_type_key = EXTENSION_TO_DATA_TYPE_KEY.get(ext)
+        if data_type_key is None:
+            raise ValueError(
+                f"Unsupported file extension '{ext}'. "
+                f"Supported: {', '.join(sorted(EXTENSION_TO_DATA_TYPE_KEY))}"
+            )
+
         with open(path, "rb") as f:
             sample = f.read(_DETECT_CONFIG_SAMPLE_SIZE)
 
-        response = await self._low_level_client.detect_config(sample, DATA_TYPE_KEY_CSV)
+        response = await self._low_level_client.detect_config(sample, data_type_key.value)
 
         if response.HasField("csv_config"):
-            return CsvImportConfig._from_proto(response.csv_config)
+            config = CsvImportConfig._from_proto(response.csv_config)
+            # The server's DetectConfig may include the time column in
+            # data_columns, but CreateDataImportFromUpload rejects that
+            # overlap. Filter it out so the config is import-ready.
+            time_col = config.time_column.column
+            filtered = [dc for dc in config.data_columns if dc.column != time_col]
+            if len(filtered) != len(config.data_columns):
+                config = config.model_copy(update={"data_columns": filtered})
+            return config
+
+        # TODO: Add parquet_config and hdf5_config once their config types are added.
 
         raise ValueError("Server returned an empty DetectConfig response.")
 
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index a96efe70c..ce8d04cf5 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -643,14 +643,15 @@ class DataImportAPI:
         ...
 
     def _run(self, coro): ...
-    def detect_config(self, file_path: str | Path) -> CsvImportConfig:
+    def detect_config(self, file_path: str | Path) -> ImportConfig:
         """Auto-detect import configuration from a file.
 
         Reads a sample of the file, sends it to the server's DetectConfig
-        endpoint, and returns the detected configuration. You can inspect
-        and modify the result before passing it to :meth:`import_from_path`.
+        endpoint, and returns the detected configuration. The file format
+        is inferred from the file extension. You can inspect and modify the
+        result before passing it to :meth:`import_from_path`.
 
-        Currently supports CSV files only.
+        Supported extensions: .csv, .parquet, .tdms, .ch10, .ch11, .h5, .hdf5
 
         Args:
             file_path: Path to the file to analyze.
@@ -660,7 +661,8 @@ class DataImportAPI:
 
         Raises:
             FileNotFoundError: If the file does not exist.
-            ValueError: If detection returns no config.
+            ValueError: If the file extension is unsupported or detection
+                returns no config.
         """
         ...
 
@@ -675,23 +677,43 @@ class DataImportAPI:
         """
         ...
 
-    def import_from_path(self, *, file_path: str | Path, config: ImportConfig) -> DataImport:
+    def import_from_path(
+        self,
+        *,
+        file_path: str | Path,
+        config: ImportConfig | None = None,
+        asset_name: str | None = None,
+        run_name: str | None = None,
+        run_id: str | None = None,
+    ) -> DataImport:
         """Import data from a local file.
 
         Creates a data import on the server and uploads the file to the
         returned presigned URL. Returns a :class:`DataImport` that can be
         polled for status via ``data_import.refresh()``.
 
+        When ``config`` is omitted the file format is auto-detected via
+        :meth:`detect_config` and a :class:`CsvImportConfig` is built using
+        the provided ``asset_name`` and optional ``run_name`` / ``run_id``.
+
         Args:
             file_path: Path to the local file to import.
             config: Import configuration describing the file format and column
-                mapping.
+                mapping. When provided, ``asset_name``, ``run_name``, and
+                ``run_id`` are ignored.
+            asset_name: Name of the asset to import into. Required when
+                ``config`` is not provided.
+            run_name: Optional run name. Only used when ``config`` is not
+                provided.
+            run_id: Optional existing run ID. Only used when ``config`` is not
+                provided.
 
         Returns:
             A :class:`DataImport` representing the import operation.
 
         Raises:
             FileNotFoundError: If the file does not exist.
+            ValueError: If neither ``config`` nor ``asset_name`` is provided.
         """
         ...
 
diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
index fc0bf119a..3ced4e9f6 100644
--- a/python/lib/sift_client/sift_types/data_import.py
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -6,6 +6,13 @@
 
 from pydantic import BaseModel, ConfigDict
 from sift.common.type.v1.channel_config_pb2 import ChannelConfig as ChannelConfigProto
+from sift.data_imports.v2.data_imports_pb2 import (
+    DATA_TYPE_KEY_CH10,
+    DATA_TYPE_KEY_CSV,
+    DATA_TYPE_KEY_HDF5,
+    DATA_TYPE_KEY_PARQUET_FLATDATASET,
+    DATA_TYPE_KEY_TDMS,
+)
 from sift.data_imports.v2.data_imports_pb2 import CsvConfig as CsvConfigProto
 from sift.data_imports.v2.data_imports_pb2 import CsvTimeColumn as CsvTimeColumnProto
 from sift.data_imports.v2.data_imports_pb2 import DataImport as DataImportProto
@@ -51,6 +58,27 @@ class DataImportStatus(Enum):
     FAILED = DataImportStatusProto.DATA_IMPORT_STATUS_FAILED
 
 
+class DataTypeKey(Enum):
+    """Supported file types for data import detection."""
+
+    CSV = DATA_TYPE_KEY_CSV
+    PARQUET = DATA_TYPE_KEY_PARQUET_FLATDATASET
+    TDMS = DATA_TYPE_KEY_TDMS
+    CH10 = DATA_TYPE_KEY_CH10
+    HDF5 = DATA_TYPE_KEY_HDF5
+
+
+EXTENSION_TO_DATA_TYPE_KEY: dict[str, DataTypeKey] = {
+    ".csv": DataTypeKey.CSV,
+    ".parquet": DataTypeKey.PARQUET,
+    ".tdms": DataTypeKey.TDMS,
+    ".ch10": DataTypeKey.CH10,
+    ".ch11": DataTypeKey.CH10,
+    ".h5": DataTypeKey.HDF5,
+    ".hdf5": DataTypeKey.HDF5,
+}
+
+
 # ---------------------------------------------------------------------------
 # CSV config types
 # ---------------------------------------------------------------------------

From d27b0703c047517c2418e91bdbb7eb708d6afec3 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Wed, 1 Apr 2026 14:52:24 -0700
Subject: [PATCH 03/52] added relative time validation, refactored the import
 process

---
 .../lib/sift_client/resources/data_imports.py | 35 ++++++++++++-------
 .../resources/sync_stubs/__init__.pyi         |  2 +-
 .../lib/sift_client/sift_types/data_import.py | 12 +++++--
 3 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index bc2ac9cdf..a03024188 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -7,6 +7,7 @@
 from typing import TYPE_CHECKING
 
 from sift_client._internal.low_level_wrappers.data_imports import DataImportsLowLevelClient
+from sift_client._internal.util.executor import run_sync_function
 from sift_client.resources._base import ResourceBase
 from sift_client.sift_types.data_import import (
     EXTENSION_TO_DATA_TYPE_KEY,
@@ -25,6 +26,16 @@
 _DETECT_CONFIG_SAMPLE_SIZE = 65_536  # 64 KiB
 
 
+def _validate_config(config: ImportConfig) -> None:
+    """Validate an import config before sending it to the server."""
+    if isinstance(config, CsvImportConfig):
+        tc = config.time_column
+        if tc.format.name.startswith("RELATIVE_") and tc.relative_start_time is None:
+            raise ValueError(
+                f"'relative_start_time' is required when using a relative time format ({tc.format.name})."
+            )
+
+
 class DataImportAPIAsync(ResourceBase):
     """High-level API for importing data into Sift.
 
@@ -55,9 +66,8 @@ async def import_from_path(
     ) -> DataImport:
         """Import data from a local file.
 
-        Creates a data import on the server and uploads the file to the
-        returned presigned URL. Returns a :class:`DataImport` that can be
-        polled for status via ``data_import.refresh()``.
+        Creates a data import on the server, uploads the file, and waits
+        for the import to complete. Returns the completed :class:`DataImport`.
 
         When ``config`` is omitted the file format is auto-detected via
         :meth:`detect_config` and a :class:`CsvImportConfig` is built using
@@ -88,9 +98,7 @@ async def import_from_path(
 
         if config is None:
             if asset_name is None:
-                raise ValueError(
-                    "Either 'config' or 'asset_name' must be provided."
-                )
+                raise ValueError("Either 'config' or 'asset_name' must be provided.")
             detected = await self.detect_config(file_path)
             config = detected.model_copy(
                 update={
@@ -100,14 +108,14 @@ async def import_from_path(
                 }
             )
 
+        _validate_config(config)
         data_import_id, upload_url = await self._low_level_client.create_from_upload(config)
         logger.info("Created data import %s", data_import_id)
 
         await self._low_level_client.upload_file(upload_url, path)
         logger.info("Uploaded file to presigned URL for import %s", data_import_id)
 
-        data_import = await self._low_level_client.get(data_import_id)
-        return self._apply_client_to_instance(data_import)
+        return await self.wait_until_complete(data_import_id)
 
     async def import_from_url(
         self,
@@ -128,11 +136,11 @@ async def import_from_url(
         Returns:
             A :class:`DataImport` representing the import operation.
         """
+        _validate_config(config)
         data_import_id = await self._low_level_client.create_from_url(url, config)
         logger.info("Created URL-based data import %s", data_import_id)
 
-        data_import = await self._low_level_client.get(data_import_id)
-        return self._apply_client_to_instance(data_import)
+        return await self.wait_until_complete(data_import_id)
 
     async def get(self, data_import_id: str) -> DataImport:
         """Get a data import by ID.
@@ -229,8 +237,11 @@ async def detect_config(self, file_path: str | Path) -> ImportConfig:
                 f"Supported: {', '.join(sorted(EXTENSION_TO_DATA_TYPE_KEY))}"
             )
 
-        with open(path, "rb") as f:
-            sample = f.read(_DETECT_CONFIG_SAMPLE_SIZE)
+        def _read_sample() -> bytes:
+            with open(path, "rb") as f:
+                return f.read(_DETECT_CONFIG_SAMPLE_SIZE)
+
+        sample = await run_sync_function(_read_sample)
 
         response = await self._low_level_client.detect_config(sample, data_type_key.value)
 
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index ce8d04cf5..e9153aafc 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -24,7 +24,7 @@ if TYPE_CHECKING:
         CalculatedChannelUpdate,
     )
     from sift_client.sift_types.channel import Channel
-    from sift_client.sift_types.data_import import CsvImportConfig, DataImport, DataImportStatus
+    from sift_client.sift_types.data_import import DataImport, DataImportStatus
     from sift_client.sift_types.export import ExportOutputFormat
     from sift_client.sift_types.file_attachment import (
         FileAttachment,
diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
index 3ced4e9f6..2d6242276 100644
--- a/python/lib/sift_client/sift_types/data_import.py
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -171,9 +171,17 @@ def _to_proto(self) -> CsvConfigProto:
     @classmethod
     def _from_proto(cls, proto: CsvConfigProto) -> CsvImportConfig:
         """Create from a proto CsvConfig (e.g. from DetectConfig response)."""
+        relative_start_time = None
+        if proto.time_column.HasField("relative_start_time"):
+            from datetime import timezone
+
+            relative_start_time = proto.time_column.relative_start_time.ToDatetime(
+                tzinfo=timezone.utc
+            )
         time_column = CsvTimeColumn(
             column=proto.time_column.column_number,
             format=TimeFormat(proto.time_column.format),
+            relative_start_time=relative_start_time,
         )
         data_columns = [
             CsvDataColumn(
@@ -204,8 +212,8 @@ class DataImport(BaseType[DataImportProto, "DataImport"]):
     """A data import in the Sift system.
 
     Represents the status and metadata of an import operation. Use
-    ``client.data_import.upload()`` to create one, or ``client.data_import.get()``
-    to retrieve an existing import by ID.
+    ``client.data_import.import_from_path()`` to create one, or
+    ``client.data_import.get()`` to retrieve an existing import by ID.
     """
 
     # Required fields

From 77dbf868929a821dcab2c55540d4dc890f1aa307 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Wed, 1 Apr 2026 15:28:20 -0700
Subject: [PATCH 04/52] added progress spinner for polling

---
 .../lib/sift_client/resources/data_imports.py | 59 +++++++++++++++----
 1 file changed, 47 insertions(+), 12 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index a03024188..39fa13c2f 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -6,6 +6,9 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
+from alive_progress import alive_bar  # type: ignore[import-untyped]
+
+import sift_client as _sift_client_module
 from sift_client._internal.low_level_wrappers.data_imports import DataImportsLowLevelClient
 from sift_client._internal.util.executor import run_sync_function
 from sift_client.resources._base import ResourceBase
@@ -63,6 +66,7 @@ async def import_from_path(
         asset_name: str | None = None,
         run_name: str | None = None,
         run_id: str | None = None,
+        show_progress: bool | None = None,
     ) -> DataImport:
         """Import data from a local file.
 
@@ -84,6 +88,9 @@ async def import_from_path(
                 provided.
             run_id: Optional existing run ID. Only used when ``config`` is not
                 provided.
+            show_progress: If True, display a progress spinner while waiting
+                for the import to complete. Defaults to True for sync, False
+                for async.
 
         Returns:
             A :class:`DataImport` representing the import operation.
@@ -115,13 +122,14 @@ async def import_from_path(
         await self._low_level_client.upload_file(upload_url, path)
         logger.info("Uploaded file to presigned URL for import %s", data_import_id)
 
-        return await self.wait_until_complete(data_import_id)
+        return await self.wait_until_complete(data_import_id, show_progress=show_progress)
 
     async def import_from_url(
         self,
         *,
         url: str,
         config: ImportConfig,
+        show_progress: bool | None = None,
     ) -> DataImport:
         """Import data from a remote URL (HTTP or S3).
 
@@ -132,6 +140,9 @@ async def import_from_url(
             url: The URL to import from.
             config: Import configuration describing the file format and column
                 mapping.
+            show_progress: If True, display a progress spinner while waiting
+                for the import to complete. Defaults to True for sync, False
+                for async.
 
         Returns:
             A :class:`DataImport` representing the import operation.
@@ -140,7 +151,7 @@ async def import_from_url(
         data_import_id = await self._low_level_client.create_from_url(url, config)
         logger.info("Created URL-based data import %s", data_import_id)
 
-        return await self.wait_until_complete(data_import_id)
+        return await self.wait_until_complete(data_import_id, show_progress=show_progress)
 
     async def get(self, data_import_id: str) -> DataImport:
         """Get a data import by ID.
@@ -266,6 +277,7 @@ async def wait_until_complete(
         *,
         polling_interval_secs: int = 5,
         timeout_secs: int | None = None,
+        show_progress: bool | None = None,
     ) -> DataImport:
         """Wait until a data import reaches a terminal state.
 
@@ -277,6 +289,10 @@ async def wait_until_complete(
             polling_interval_secs: Seconds between status polls. Defaults to 5s.
             timeout_secs: Maximum seconds to wait. If None, polls indefinitely.
                 Defaults to None (indefinite).
+            show_progress: If True, display an animated progress spinner alongside
+                the import status while polling. Defaults to True for sync, False
+                for async. Use ``sift_client.config.show_progress = False`` to disable
+                globally for sync.
 
         Returns:
             The DataImport in its terminal state.
@@ -284,15 +300,34 @@ async def wait_until_complete(
         data_import_id = (
             data_import._id_or_error if isinstance(data_import, DataImport) else data_import
         )
+        if show_progress is None:
+            global_setting = _sift_client_module.config.show_progress
+            if global_setting is not None:
+                show_progress = global_setting
+            elif getattr(self, "_is_sync", False):
+                show_progress = True
+            else:
+                show_progress = False
 
         start = time.monotonic()
-        while True:
-            result = await self.get(data_import_id)
-            if result.is_complete:
-                return result
-            if timeout_secs is not None and (time.monotonic() - start) >= timeout_secs:
-                raise TimeoutError(
-                    f"Data import '{data_import_id}' did not complete "
-                    f"within {timeout_secs} seconds."
-                )
-            await asyncio.sleep(polling_interval_secs)
+        with alive_bar(
+            title=f"Data Import ID {data_import_id}: polling",
+            bar=None,
+            spinner_length=7,
+            spinner="dots_waves",
+            monitor=False,
+            stats=False,
+            disable=not show_progress,
+        ) as bar:
+            while True:
+                result = await self.get(data_import_id)
+                bar.title(f"Data Import ID {data_import_id}: {result.status.name}")
+                bar()
+                if result.is_complete:
+                    return result
+                if timeout_secs is not None and (time.monotonic() - start) >= timeout_secs:
+                    raise TimeoutError(
+                        f"Data import '{data_import_id}' did not complete "
+                        f"within {timeout_secs} seconds."
+                    )
+                await asyncio.sleep(polling_interval_secs)

From bd5e9f81e5062ed2a885d4057cdac5773cafe596 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Wed, 1 Apr 2026 16:38:50 -0700
Subject: [PATCH 05/52] missing run defaults to filename

---
 .../lib/sift_client/resources/data_imports.py | 30 ++++++++++++++-----
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 39fa13c2f..f940e38b7 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -26,8 +26,6 @@
 
 logger = logging.getLogger(__name__)
 
-_DETECT_CONFIG_SAMPLE_SIZE = 65_536  # 64 KiB
-
 
 def _validate_config(config: ImportConfig) -> None:
     """Validate an import config before sending it to the server."""
@@ -66,6 +64,8 @@ async def import_from_path(
         asset_name: str | None = None,
         run_name: str | None = None,
         run_id: str | None = None,
+        polling_interval_secs: int = 5,
+        timeout_secs: int | None = None,
         show_progress: bool | None = None,
     ) -> DataImport:
         """Import data from a local file.
@@ -88,6 +88,8 @@ async def import_from_path(
                 provided.
             run_id: Optional existing run ID. Only used when ``config`` is not
                 provided.
+            polling_interval_secs: Seconds between status polls. Defaults to 5s.
+            timeout_secs: Maximum seconds to wait. If None, polls indefinitely.
             show_progress: If True, display a progress spinner while waiting
                 for the import to complete. Defaults to True for sync, False
                 for async.
@@ -110,7 +112,7 @@ async def import_from_path(
             config = detected.model_copy(
                 update={
                     "asset_name": asset_name,
-                    "run_name": run_name,
+                    "run_name": run_name if run_name or run_id else path.name,
                     "run_id": run_id,
                 }
             )
@@ -122,13 +124,20 @@ async def import_from_path(
         await self._low_level_client.upload_file(upload_url, path)
         logger.info("Uploaded file to presigned URL for import %s", data_import_id)
 
-        return await self.wait_until_complete(data_import_id, show_progress=show_progress)
+        return await self.wait_until_complete(
+            data_import_id,
+            polling_interval_secs=polling_interval_secs,
+            timeout_secs=timeout_secs,
+            show_progress=show_progress,
+        )
 
     async def import_from_url(
         self,
         *,
         url: str,
         config: ImportConfig,
+        polling_interval_secs: int = 5,
+        timeout_secs: int | None = None,
         show_progress: bool | None = None,
     ) -> DataImport:
         """Import data from a remote URL (HTTP or S3).
@@ -140,6 +149,8 @@ async def import_from_url(
             url: The URL to import from.
             config: Import configuration describing the file format and column
                 mapping.
+            polling_interval_secs: Seconds between status polls. Defaults to 5s.
+            timeout_secs: Maximum seconds to wait. If None, polls indefinitely.
             show_progress: If True, display a progress spinner while waiting
                 for the import to complete. Defaults to True for sync, False
                 for async.
@@ -151,7 +162,12 @@ async def import_from_url(
         data_import_id = await self._low_level_client.create_from_url(url, config)
         logger.info("Created URL-based data import %s", data_import_id)
 
-        return await self.wait_until_complete(data_import_id, show_progress=show_progress)
+        return await self.wait_until_complete(
+            data_import_id,
+            polling_interval_secs=polling_interval_secs,
+            timeout_secs=timeout_secs,
+            show_progress=show_progress,
+        )
 
     async def get(self, data_import_id: str) -> DataImport:
         """Get a data import by ID.
@@ -250,7 +266,7 @@ async def detect_config(self, file_path: str | Path) -> ImportConfig:
 
         def _read_sample() -> bytes:
             with open(path, "rb") as f:
-                return f.read(_DETECT_CONFIG_SAMPLE_SIZE)
+                return f.read(65_536)  # 64 KiB
 
         sample = await run_sync_function(_read_sample)
 
@@ -267,7 +283,7 @@ def _read_sample() -> bytes:
                 config = config.model_copy(update={"data_columns": filtered})
             return config
 
-        # TODO: Add parquet_config and hdf5_config once their config types are added.
+        # TODO: Add other file format configs
 
         raise ValueError("Server returned an empty DetectConfig response.")
 

From 41f9c082a3eef7e48d7ad950ab560313cb8bdb0b Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Thu, 2 Apr 2026 13:44:02 -0700
Subject: [PATCH 06/52] added relative time format validation in the model

---
 python/lib/sift_client/resources/data_imports.py | 15 +--------------
 python/lib/sift_client/sift_types/data_import.py | 10 +++++++++-
 2 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index f940e38b7..3ed12e50d 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -27,16 +27,6 @@
 logger = logging.getLogger(__name__)
 
 
-def _validate_config(config: ImportConfig) -> None:
-    """Validate an import config before sending it to the server."""
-    if isinstance(config, CsvImportConfig):
-        tc = config.time_column
-        if tc.format.name.startswith("RELATIVE_") and tc.relative_start_time is None:
-            raise ValueError(
-                f"'relative_start_time' is required when using a relative time format ({tc.format.name})."
-            )
-
-
 class DataImportAPIAsync(ResourceBase):
     """High-level API for importing data into Sift.
 
@@ -112,12 +102,10 @@ async def import_from_path(
             config = detected.model_copy(
                 update={
                     "asset_name": asset_name,
-                    "run_name": run_name if run_name or run_id else path.name,
+                    "run_name": run_name if run_name or run_id else path.stem,
                     "run_id": run_id,
                 }
             )
-
-        _validate_config(config)
         data_import_id, upload_url = await self._low_level_client.create_from_upload(config)
         logger.info("Created data import %s", data_import_id)
 
@@ -158,7 +146,6 @@ async def import_from_url(
         Returns:
             A :class:`DataImport` representing the import operation.
         """
-        _validate_config(config)
         data_import_id = await self._low_level_client.create_from_url(url, config)
         logger.info("Created URL-based data import %s", data_import_id)
 
diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
index 2d6242276..8f6cc0212 100644
--- a/python/lib/sift_client/sift_types/data_import.py
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -4,7 +4,7 @@
 from enum import Enum
 from typing import TYPE_CHECKING
 
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, model_validator
 from sift.common.type.v1.channel_config_pb2 import ChannelConfig as ChannelConfigProto
 from sift.data_imports.v2.data_imports_pb2 import (
     DATA_TYPE_KEY_CH10,
@@ -108,6 +108,14 @@ def _to_proto(self) -> CsvTimeColumnProto:
             proto.relative_start_time.CopyFrom(to_pb_timestamp(self.relative_start_time))
         return proto
 
+    @model_validator(mode="after")
+    def _check_relative_start_time(self) -> CsvTimeColumn:
+        if self.format.name.startswith("RELATIVE_") and self.relative_start_time is None:
+            raise ValueError(
+                f"'relative_start_time' is required when using a relative time format ({self.format.name})."
+            )
+        return self
+
 
 class CsvDataColumn(BaseModel):
     """A data column definition for CSV imports.

From e76d2d2017523ee731ffda9c4f785e9d056808bc Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Thu, 2 Apr 2026 14:01:02 -0700
Subject: [PATCH 07/52] updated post request to include file name for
 downstream file attachment

---
 .../_internal/low_level_wrappers/data_imports.py          | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py b/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
index d83f42142..2e0a3bb50 100644
--- a/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
+++ b/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
@@ -90,7 +90,13 @@ async def upload_file(self, upload_url: str, file_path: Path) -> None:
 
         def _do_upload() -> None:
             with open(file_path, "rb") as f:
-                response = rest_client.post(upload_url, data=f)
+                response = rest_client.post(
+                    upload_url,
+                    data=f,
+                    headers={
+                        "Content-Disposition": f'attachment; filename="{file_path.name}"'
+                    },  # Preserve original filename for server-side storage.
+                )
                 response.raise_for_status()
 
         await run_sync_function(_do_upload)

From 4cb5ebd93a07675817694d3b7da8004239e2b1f3 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Thu, 2 Apr 2026 15:39:16 -0700
Subject: [PATCH 08/52] added parquet data type, refactor to use util

---
 .../low_level_wrappers/data_imports.py        | 36 ++-----------
 python/lib/sift_client/_internal/util/file.py | 26 ++++++++++
 .../lib/sift_client/resources/data_imports.py | 50 +++++++++++++------
 .../lib/sift_client/sift_types/data_import.py | 21 ++------
 4 files changed, 66 insertions(+), 67 deletions(-)

diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py b/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
index 2e0a3bb50..5cd1b91e5 100644
--- a/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
+++ b/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
@@ -19,17 +19,13 @@
 from sift.data_imports.v2.data_imports_pb2_grpc import DataImportServiceStub
 
 from sift_client._internal.low_level_wrappers.base import LowLevelClientBase
-from sift_client._internal.util.executor import run_sync_function
 from sift_client.sift_types.data_import import CsvImportConfig, DataImport
-from sift_client.transport import WithGrpcClient, WithRestClient
+from sift_client.transport import WithGrpcClient
 
 if TYPE_CHECKING:
-    from pathlib import Path
-
     from sift.data_imports.v2.data_imports_pb2 import DataTypeKey
 
     from sift_client.transport.grpc_transport import GrpcClient
-    from sift_client.transport.rest_transport import RestClient
 
 # Union of all supported config types. Extend this as new formats are added.
 ImportConfig = CsvImportConfig
@@ -49,15 +45,14 @@ def _set_config_on_request(
 logger = logging.getLogger(__name__)
 
 
-class DataImportsLowLevelClient(LowLevelClientBase, WithGrpcClient, WithRestClient):
+class DataImportsLowLevelClient(LowLevelClientBase, WithGrpcClient):
     """Low-level client for the DataImportService.
 
     This class provides a thin wrapper around the autogenerated bindings for the DataImportsAPI.
     """
 
-    def __init__(self, grpc_client: GrpcClient, rest_client: RestClient):
+    def __init__(self, grpc_client: GrpcClient):
         WithGrpcClient.__init__(self, grpc_client=grpc_client)
-        WithRestClient.__init__(self, rest_client=rest_client)
 
     async def create_from_upload(self, config: ImportConfig) -> tuple[str, str]:
         """Create a data import and get back a presigned upload URL.
@@ -76,31 +71,6 @@ async def create_from_upload(self, config: ImportConfig) -> tuple[str, str]:
         response = cast("CreateDataImportFromUploadResponse", response)
         return response.data_import_id, response.upload_url
 
-    async def upload_file(self, upload_url: str, file_path: Path) -> None:
-        """Upload a file to a presigned URL.
-
-        Runs the synchronous HTTP POST in a thread pool to avoid blocking
-        the event loop.
-
-        Args:
-            upload_url: The presigned URL to upload to.
-            file_path: Path to the file to upload.
-        """
-        rest_client = self._rest_client
-
-        def _do_upload() -> None:
-            with open(file_path, "rb") as f:
-                response = rest_client.post(
-                    upload_url,
-                    data=f,
-                    headers={
-                        "Content-Disposition": f'attachment; filename="{file_path.name}"'
-                    },  # Preserve original filename for server-side storage.
-                )
-                response.raise_for_status()
-
-        await run_sync_function(_do_upload)
-
     async def create_from_url(self, url: str, config: ImportConfig) -> str:
         """Create a data import from a remote URL.
 
diff --git a/python/lib/sift_client/_internal/util/file.py b/python/lib/sift_client/_internal/util/file.py
index 518bce847..0e977ced6 100644
--- a/python/lib/sift_client/_internal/util/file.py
+++ b/python/lib/sift_client/_internal/util/file.py
@@ -14,6 +14,32 @@
     from sift_client.transport.rest_transport import RestClient
 
 
+def upload_file(
+    signed_url: str,
+    file_path: Path,
+    *,
+    rest_client: RestClient,
+) -> None:
+    """Upload a file to a presigned URL.
+
+    Args:
+        signed_url: The presigned URL to upload to.
+        file_path: Path to the file to upload.
+        rest_client: The SDK rest client to use for the upload.
+
+    Raises:
+        ValueError: If the upload request fails.
+    """
+    with open(file_path, "rb") as f:
+        response = rest_client.post(
+            signed_url,
+            data=f,
+            headers={"Content-Disposition": f'attachment; filename="{file_path.name}"'},
+        )
+        if not response.ok:
+            raise ValueError(f"Upload failed ({response.status_code}): {response.text}")
+
+
 def download_file(
     signed_url: str,
     output_path: Path,
diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 3ed12e50d..b76091707 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -11,12 +11,14 @@
 import sift_client as _sift_client_module
 from sift_client._internal.low_level_wrappers.data_imports import DataImportsLowLevelClient
 from sift_client._internal.util.executor import run_sync_function
+from sift_client._internal.util.file import upload_file
 from sift_client.resources._base import ResourceBase
 from sift_client.sift_types.data_import import (
     EXTENSION_TO_DATA_TYPE_KEY,
     CsvImportConfig,
     DataImport,
     DataImportStatus,
+    DataTypeKey,
 )
 from sift_client.util import cel_utils as cel
 
@@ -43,7 +45,6 @@ def __init__(self, sift_client: SiftClient):
         super().__init__(sift_client)
         self._low_level_client = DataImportsLowLevelClient(
             grpc_client=self.client.grpc_client,
-            rest_client=self.client.rest_client,
         )
 
     async def import_from_path(
@@ -51,6 +52,7 @@ async def import_from_path(
         *,
         file_path: str | Path,
         config: ImportConfig | None = None,
+        data_type: DataTypeKey | None = None,
         asset_name: str | None = None,
         run_name: str | None = None,
         run_id: str | None = None,
@@ -70,8 +72,11 @@ async def import_from_path(
         Args:
             file_path: Path to the local file to import.
             config: Import configuration describing the file format and column
-                mapping. When provided, ``asset_name``, ``run_name``, and
-                ``run_id`` are ignored.
+                mapping. When provided, ``asset_name``, ``run_name``,
+                ``run_id``, and ``data_type`` are ignored.
+            data_type: Explicit data type key. Required for formats like
+                Parquet where the extension alone is ambiguous. Only used
+                when ``config`` is not provided.
             asset_name: Name of the asset to import into. Required when
                 ``config`` is not provided.
             run_name: Optional run name. Only used when ``config`` is not
@@ -98,7 +103,7 @@ async def import_from_path(
         if config is None:
             if asset_name is None:
                 raise ValueError("Either 'config' or 'asset_name' must be provided.")
-            detected = await self.detect_config(file_path)
+            detected = await self.detect_config(file_path, data_type=data_type)
             config = detected.model_copy(
                 update={
                     "asset_name": asset_name,
@@ -109,7 +114,9 @@ async def import_from_path(
         data_import_id, upload_url = await self._low_level_client.create_from_upload(config)
         logger.info("Created data import %s", data_import_id)
 
-        await self._low_level_client.upload_file(upload_url, path)
+        await run_sync_function(
+            lambda: upload_file(upload_url, path, rest_client=self.client.rest_client)
+        )
         logger.info("Uploaded file to presigned URL for import %s", data_import_id)
 
         return await self.wait_until_complete(
@@ -218,18 +225,25 @@ async def retry(self, data_import: str | DataImport) -> None:
         )
         await self._low_level_client.retry(data_import_id)
 
-    async def detect_config(self, file_path: str | Path) -> ImportConfig:
+    async def detect_config(
+        self,
+        file_path: str | Path,
+        data_type: DataTypeKey | None = None,
+    ) -> ImportConfig:
         """Auto-detect import configuration from a file.
 
         Reads a sample of the file, sends it to the server's DetectConfig
         endpoint, and returns the detected configuration. The file format
-        is inferred from the file extension. You can inspect and modify the
-        result before passing it to :meth:`import_from_path`.
+        is inferred from the file extension when ``data_type`` is not
+        provided.
 
-        Supported extensions: .csv, .parquet, .tdms, .ch10, .ch11, .h5, .hdf5
+        For file types with multiple layouts (e.g. Parquet), ``data_type``
+        must be specified explicitly.
 
         Args:
             file_path: Path to the file to analyze.
+            data_type: Explicit data type key. Required for formats like
+                Parquet where the extension alone is ambiguous.
 
         Returns:
             The detected import config.
@@ -243,13 +257,17 @@ async def detect_config(self, file_path: str | Path) -> ImportConfig:
         if not path.is_file():
             raise FileNotFoundError(f"File not found: {file_path}")
 
-        ext = path.suffix.lower()
-        data_type_key = EXTENSION_TO_DATA_TYPE_KEY.get(ext)
-        if data_type_key is None:
-            raise ValueError(
-                f"Unsupported file extension '{ext}'. "
-                f"Supported: {', '.join(sorted(EXTENSION_TO_DATA_TYPE_KEY))}"
-            )
+        if data_type is not None:
+            data_type_key = data_type
+        else:
+            ext = path.suffix.lower()
+            data_type_key = EXTENSION_TO_DATA_TYPE_KEY.get(ext)
+            if data_type_key is None:
+                raise ValueError(
+                    f"Unsupported file extension '{ext}'. "
+                    f"Supported: {', '.join(sorted(EXTENSION_TO_DATA_TYPE_KEY))}. "
+                    f"For other formats (e.g. Parquet), pass 'data_type' explicitly."
+                )
 
         def _read_sample() -> bytes:
             with open(path, "rb") as f:
diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
index 8f6cc0212..c51d1a7ec 100644
--- a/python/lib/sift_client/sift_types/data_import.py
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -11,6 +11,7 @@
     DATA_TYPE_KEY_CSV,
     DATA_TYPE_KEY_HDF5,
     DATA_TYPE_KEY_PARQUET_FLATDATASET,
+    DATA_TYPE_KEY_PARQUET_SINGLE_CHANNEL_PER_ROW,
     DATA_TYPE_KEY_TDMS,
 )
 from sift.data_imports.v2.data_imports_pb2 import CsvConfig as CsvConfigProto
@@ -27,11 +28,6 @@
     from sift_client.client import SiftClient
 
 
-# ---------------------------------------------------------------------------
-# Enums
-# ---------------------------------------------------------------------------
-
-
 class TimeFormat(Enum):
     """Supported time formats for data import columns."""
 
@@ -62,7 +58,8 @@ class DataTypeKey(Enum):
     """Supported file types for data import detection."""
 
     CSV = DATA_TYPE_KEY_CSV
-    PARQUET = DATA_TYPE_KEY_PARQUET_FLATDATASET
+    PARQUET_FLATDATASET = DATA_TYPE_KEY_PARQUET_FLATDATASET
+    PARQUET_SINGLE_CHANNEL_PER_ROW = DATA_TYPE_KEY_PARQUET_SINGLE_CHANNEL_PER_ROW
     TDMS = DATA_TYPE_KEY_TDMS
     CH10 = DATA_TYPE_KEY_CH10
     HDF5 = DATA_TYPE_KEY_HDF5
@@ -70,20 +67,13 @@ class DataTypeKey(Enum):
 
 EXTENSION_TO_DATA_TYPE_KEY: dict[str, DataTypeKey] = {
     ".csv": DataTypeKey.CSV,
-    ".parquet": DataTypeKey.PARQUET,
     ".tdms": DataTypeKey.TDMS,
     ".ch10": DataTypeKey.CH10,
-    ".ch11": DataTypeKey.CH10,
     ".h5": DataTypeKey.HDF5,
     ".hdf5": DataTypeKey.HDF5,
 }
 
 
-# ---------------------------------------------------------------------------
-# CSV config types
-# ---------------------------------------------------------------------------
-
-
 class CsvTimeColumn(BaseModel):
     """Time column configuration for CSV imports.
 
@@ -211,11 +201,6 @@ def _from_proto(cls, proto: CsvConfigProto) -> CsvImportConfig:
         )
 
 
-# ---------------------------------------------------------------------------
-# DataImport resource type
-# ---------------------------------------------------------------------------
-
-
 class DataImport(BaseType[DataImportProto, "DataImport"]):
     """A data import in the Sift system.
 

From 01f5831be866e0a07c61d4860590a6a12a8b4fe3 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Fri, 3 Apr 2026 13:34:47 -0700
Subject: [PATCH 09/52] remove upload_from_url

---
 .../low_level_wrappers/data_imports.py        | 22 +--------
 python/lib/sift_client/_internal/util/file.py |  6 ++-
 .../lib/sift_client/resources/data_imports.py | 47 +------------------
 .../lib/sift_client/sift_types/data_import.py |  4 +-
 4 files changed, 9 insertions(+), 70 deletions(-)

diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py b/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
index 5cd1b91e5..581407fb2 100644
--- a/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
+++ b/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
@@ -6,8 +6,6 @@
 from sift.data_imports.v2.data_imports_pb2 import (
     CreateDataImportFromUploadRequest,
     CreateDataImportFromUploadResponse,
-    CreateDataImportFromUrlRequest,
-    CreateDataImportFromUrlResponse,
     DetectConfigRequest,
     DetectConfigResponse,
     GetDataImportRequest,
@@ -32,7 +30,7 @@
 
 
 def _set_config_on_request(
-    request: CreateDataImportFromUploadRequest | CreateDataImportFromUrlRequest,
+    request: CreateDataImportFromUploadRequest,
     config: ImportConfig,
 ) -> None:
     """Set the appropriate config field on a proto request based on the config type."""
@@ -71,24 +69,6 @@ async def create_from_upload(self, config: ImportConfig) -> tuple[str, str]:
         response = cast("CreateDataImportFromUploadResponse", response)
         return response.data_import_id, response.upload_url
 
-    async def create_from_url(self, url: str, config: ImportConfig) -> str:
-        """Create a data import from a remote URL.
-
-        Args:
-            url: The URL to import from (HTTP or S3).
-            config: The import configuration.
-
-        Returns:
-            The data_import_id.
-        """
-        request = CreateDataImportFromUrlRequest(url=url)
-        _set_config_on_request(request, config)
-        response = await self._grpc_client.get_stub(DataImportServiceStub).CreateDataImportFromUrl(
-            request
-        )
-        response = cast("CreateDataImportFromUrlResponse", response)
-        return response.data_import_id
-
     async def get(self, data_import_id: str) -> DataImport:
         """Get a data import by ID.
 
diff --git a/python/lib/sift_client/_internal/util/file.py b/python/lib/sift_client/_internal/util/file.py
index 0e977ced6..e39003581 100644
--- a/python/lib/sift_client/_internal/util/file.py
+++ b/python/lib/sift_client/_internal/util/file.py
@@ -19,7 +19,7 @@ def upload_file(
     file_path: Path,
     *,
     rest_client: RestClient,
-) -> None:
+) -> dict:
     """Upload a file to a presigned URL.
 
     Args:
@@ -27,6 +27,9 @@ def upload_file(
         file_path: Path to the file to upload.
         rest_client: The SDK rest client to use for the upload.
 
+    Returns:
+        The parsed JSON response from the server.
+
     Raises:
         ValueError: If the upload request fails.
     """
@@ -38,6 +41,7 @@ def upload_file(
         )
         if not response.ok:
             raise ValueError(f"Upload failed ({response.status_code}): {response.text}")
+        return response.json()
 
 
 def download_file(
diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index b76091707..85fa5ff87 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import asyncio
-import logging
 import time
 from pathlib import Path
 from typing import TYPE_CHECKING
@@ -26,13 +25,11 @@
     from sift_client._internal.low_level_wrappers.data_imports import ImportConfig
     from sift_client.client import SiftClient
 
-logger = logging.getLogger(__name__)
-
 
 class DataImportAPIAsync(ResourceBase):
     """High-level API for importing data into Sift.
 
-    Supports importing data from local files or remote URLs. Returns a
+    Supports importing data from local files. Returns a
     `DataImport` object that can be polled for status.
     """
 
@@ -112,49 +109,11 @@ async def import_from_path(
                 }
             )
         data_import_id, upload_url = await self._low_level_client.create_from_upload(config)
-        logger.info("Created data import %s", data_import_id)
 
         await run_sync_function(
             lambda: upload_file(upload_url, path, rest_client=self.client.rest_client)
         )
-        logger.info("Uploaded file to presigned URL for import %s", data_import_id)
-
-        return await self.wait_until_complete(
-            data_import_id,
-            polling_interval_secs=polling_interval_secs,
-            timeout_secs=timeout_secs,
-            show_progress=show_progress,
-        )
-
-    async def import_from_url(
-        self,
-        *,
-        url: str,
-        config: ImportConfig,
-        polling_interval_secs: int = 5,
-        timeout_secs: int | None = None,
-        show_progress: bool | None = None,
-    ) -> DataImport:
-        """Import data from a remote URL (HTTP or S3).
-
-        Returns a :class:`DataImport` that can be polled for status via
-        ``data_import.refresh()``.
-
-        Args:
-            url: The URL to import from.
-            config: Import configuration describing the file format and column
-                mapping.
-            polling_interval_secs: Seconds between status polls. Defaults to 5s.
-            timeout_secs: Maximum seconds to wait. If None, polls indefinitely.
-            show_progress: If True, display a progress spinner while waiting
-                for the import to complete. Defaults to True for sync, False
-                for async.
-
-        Returns:
-            A :class:`DataImport` representing the import operation.
-        """
-        data_import_id = await self._low_level_client.create_from_url(url, config)
-        logger.info("Created URL-based data import %s", data_import_id)
+        # job_id = response["job_id"]
 
         return await self.wait_until_complete(
             data_import_id,
@@ -215,8 +174,6 @@ async def list_(
     async def retry(self, data_import: str | DataImport) -> None:
         """Retry a failed data import.
 
-        Only works for URL-based imports in a failed state.
-
         Args:
             data_import: The DataImport or data_import_id to retry.
         """
diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
index c51d1a7ec..b372498b8 100644
--- a/python/lib/sift_client/sift_types/data_import.py
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -216,7 +216,6 @@ class DataImport(BaseType[DataImportProto, "DataImport"]):
 
     # Optional fields
     error_message: str | None
-    source_url: str | None
     run_id: str | None
     report_id: str | None
     asset_id: str | None
@@ -239,7 +238,6 @@ def _from_proto(
             error_message=proto.error_message or None,
             created_date=proto.created_date.ToDatetime(tzinfo=timezone.utc),
             modified_date=proto.modified_date.ToDatetime(tzinfo=timezone.utc),
-            source_url=proto.source_url or None,
             run_id=proto.run_id if proto.HasField("_run_id") else None,
             report_id=proto.report_id if proto.HasField("_report_id") else None,
             asset_id=proto.asset_id if proto.HasField("_asset_id") else None,
@@ -293,6 +291,6 @@ def refresh(self) -> DataImport:
         return self
 
     def retry(self) -> None:
-        """Retry this import. Only works for URL-based imports in a failed state."""
+        """Retry a failed import."""
         self.client.data_import.retry(self._id_or_error)
         self.refresh()

From 269a8b5f075d58fdb7f058156ed7247341406cf2 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Fri, 3 Apr 2026 14:06:45 -0700
Subject: [PATCH 10/52] converted imports to using jobs

---
 .../low_level_wrappers/data_imports.py        |  95 +--------
 .../lib/sift_client/resources/data_imports.py | 182 +++---------------
 .../resources/sync_stubs/__init__.pyi         | 123 +++---------
 .../lib/sift_client/sift_types/data_import.py | 111 -----------
 4 files changed, 48 insertions(+), 463 deletions(-)

diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py b/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
index 581407fb2..279c46d38 100644
--- a/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
+++ b/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import logging
 from typing import TYPE_CHECKING, cast
 
 from sift.data_imports.v2.data_imports_pb2 import (
@@ -8,16 +7,11 @@
     CreateDataImportFromUploadResponse,
     DetectConfigRequest,
     DetectConfigResponse,
-    GetDataImportRequest,
-    GetDataImportResponse,
-    ListDataImportsRequest,
-    ListDataImportsResponse,
-    RetryDataImportRequest,
 )
 from sift.data_imports.v2.data_imports_pb2_grpc import DataImportServiceStub
 
 from sift_client._internal.low_level_wrappers.base import LowLevelClientBase
-from sift_client.sift_types.data_import import CsvImportConfig, DataImport
+from sift_client.sift_types.data_import import CsvImportConfig
 from sift_client.transport import WithGrpcClient
 
 if TYPE_CHECKING:
@@ -40,9 +34,6 @@ def _set_config_on_request(
         raise TypeError(f"Unsupported import config type: {type(config).__name__}")
 
 
-logger = logging.getLogger(__name__)
-
-
 class DataImportsLowLevelClient(LowLevelClientBase, WithGrpcClient):
     """Low-level client for the DataImportService.
 
@@ -69,87 +60,6 @@ async def create_from_upload(self, config: ImportConfig) -> tuple[str, str]:
         response = cast("CreateDataImportFromUploadResponse", response)
         return response.data_import_id, response.upload_url
 
-    async def get(self, data_import_id: str) -> DataImport:
-        """Get a data import by ID.
-
-        Args:
-            data_import_id: The ID of the data import.
-
-        Returns:
-            The DataImport.
-        """
-        request = GetDataImportRequest(data_import_id=data_import_id)
-        response = await self._grpc_client.get_stub(DataImportServiceStub).GetDataImport(request)
-        response = cast("GetDataImportResponse", response)
-        return DataImport._from_proto(response.data_import)
-
-    async def list_(
-        self,
-        *,
-        page_size: int | None = None,
-        page_token: str | None = None,
-        query_filter: str = "",
-        order_by: str = "",
-    ) -> tuple[list[DataImport], str]:
-        """List data imports with optional filtering and pagination.
-
-        Args:
-            page_size: Maximum number of results per page.
-            page_token: Token for the next page of results.
-            query_filter: CEL filter string.
-            order_by: Ordering string (e.g. "created_date desc").
-
-        Returns:
-            A tuple of (list of DataImports, next_page_token).
-        """
-        request = ListDataImportsRequest(
-            filter=query_filter,
-            order_by=order_by,
-        )
-        if page_size is not None:
-            request.page_size = page_size
-        if page_token:
-            request.page_token = page_token
-
-        response = await self._grpc_client.get_stub(DataImportServiceStub).ListDataImports(request)
-        response = cast("ListDataImportsResponse", response)
-        data_imports = [DataImport._from_proto(di) for di in response.data_imports]
-        return data_imports, response.next_page_token
-
-    async def list_all(
-        self,
-        *,
-        query_filter: str = "",
-        order_by: str = "",
-        max_results: int | None = None,
-    ) -> list[DataImport]:
-        """List all data imports, handling pagination automatically.
-
-        Args:
-            query_filter: CEL filter string.
-            order_by: Ordering string (e.g. "created_date desc").
-            max_results: Maximum total results to return.
-
-        Returns:
-            A list of all matching DataImports.
-        """
-        return await self._handle_pagination(
-            func=self.list_,
-            kwargs={"query_filter": query_filter, "order_by": order_by},
-            max_results=max_results,
-        )
-
-    async def retry(self, data_import_id: str) -> None:
-        """Retry a failed data import.
-
-        Only works for URL-based imports in a failed state.
-
-        Args:
-            data_import_id: The ID of the data import to retry.
-        """
-        request = RetryDataImportRequest(data_import_id=data_import_id)
-        await self._grpc_client.get_stub(DataImportServiceStub).RetryDataImport(request)
-
     async def detect_config(
         self, data: bytes, data_type_key: DataTypeKey.ValueType
     ) -> DetectConfigResponse:
@@ -160,8 +70,7 @@ async def detect_config(
             data_type_key: The file type hint.
 
         Returns:
-            The raw DetectConfigResponse proto. The caller (resource API)
-            is responsible for converting to a sift_type.
+            The raw DetectConfigResponse proto.
         """
         request = DetectConfigRequest(data=data, type=data_type_key)
         response = await self._grpc_client.get_stub(DataImportServiceStub).DetectConfig(request)
diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 85fa5ff87..03f18320f 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -1,13 +1,8 @@
 from __future__ import annotations
 
-import asyncio
-import time
 from pathlib import Path
 from typing import TYPE_CHECKING
 
-from alive_progress import alive_bar  # type: ignore[import-untyped]
-
-import sift_client as _sift_client_module
 from sift_client._internal.low_level_wrappers.data_imports import DataImportsLowLevelClient
 from sift_client._internal.util.executor import run_sync_function
 from sift_client._internal.util.file import upload_file
@@ -15,23 +10,17 @@
 from sift_client.sift_types.data_import import (
     EXTENSION_TO_DATA_TYPE_KEY,
     CsvImportConfig,
-    DataImport,
-    DataImportStatus,
     DataTypeKey,
 )
-from sift_client.util import cel_utils as cel
 
 if TYPE_CHECKING:
     from sift_client._internal.low_level_wrappers.data_imports import ImportConfig
     from sift_client.client import SiftClient
+    from sift_client.sift_types.job import Job
 
 
 class DataImportAPIAsync(ResourceBase):
-    """High-level API for importing data into Sift.
-
-    Supports importing data from local files. Returns a
-    `DataImport` object that can be polled for status.
-    """
+    """High-level API for importing data into Sift."""
 
     def __init__(self, sift_client: SiftClient):
         """Initialize the DataImportAPI.
@@ -46,21 +35,19 @@ def __init__(self, sift_client: SiftClient):
 
     async def import_from_path(
         self,
-        *,
         file_path: str | Path,
+        *,
         config: ImportConfig | None = None,
         data_type: DataTypeKey | None = None,
         asset_name: str | None = None,
         run_name: str | None = None,
         run_id: str | None = None,
-        polling_interval_secs: int = 5,
-        timeout_secs: int | None = None,
-        show_progress: bool | None = None,
-    ) -> DataImport:
+    ) -> Job:
         """Import data from a local file.
 
-        Creates a data import on the server, uploads the file, and waits
-        for the import to complete. Returns the completed :class:`DataImport`.
+        Creates a data import on the server, uploads the file, and returns
+        a :class:`Job` handle. Use ``job.wait_until_complete()`` to poll
+        for completion.
 
         When ``config`` is omitted the file format is auto-detected via
         :meth:`detect_config` and a :class:`CsvImportConfig` is built using
@@ -80,14 +67,9 @@ async def import_from_path(
                 provided.
             run_id: Optional existing run ID. Only used when ``config`` is not
                 provided.
-            polling_interval_secs: Seconds between status polls. Defaults to 5s.
-            timeout_secs: Maximum seconds to wait. If None, polls indefinitely.
-            show_progress: If True, display a progress spinner while waiting
-                for the import to complete. Defaults to True for sync, False
-                for async.
 
         Returns:
-            A :class:`DataImport` representing the import operation.
+            A :class:`Job` handle for the pending import.
 
         Raises:
             FileNotFoundError: If the file does not exist.
@@ -108,79 +90,14 @@ async def import_from_path(
                     "run_id": run_id,
                 }
             )
-        data_import_id, upload_url = await self._low_level_client.create_from_upload(config)
+        _, upload_url = await self._low_level_client.create_from_upload(config)
 
-        await run_sync_function(
+        response = await run_sync_function(
             lambda: upload_file(upload_url, path, rest_client=self.client.rest_client)
         )
-        # job_id = response["job_id"]
-
-        return await self.wait_until_complete(
-            data_import_id,
-            polling_interval_secs=polling_interval_secs,
-            timeout_secs=timeout_secs,
-            show_progress=show_progress,
-        )
-
-    async def get(self, data_import_id: str) -> DataImport:
-        """Get a data import by ID.
-
-        Args:
-            data_import_id: The ID of the data import.
-
-        Returns:
-            The DataImport.
-        """
-        data_import = await self._low_level_client.get(data_import_id)
-        return self._apply_client_to_instance(data_import)
-
-    async def list_(
-        self,
-        *,
-        data_import_ids: list[str] | None = None,
-        status: DataImportStatus | None = None,
-        filter_query: str | None = None,
-        order_by: str | None = None,
-        limit: int | None = None,
-    ) -> list[DataImport]:
-        """List data imports with optional filtering.
-
-        Args:
-            data_import_ids: Filter to imports with any of these IDs.
-            status: Filter to imports with this status.
-            filter_query: Explicit CEL filter string.
-            order_by: Ordering string (e.g. "created_date desc").
-            limit: Maximum number of imports to return. If None, returns all.
-
-        Returns:
-            A list of DataImport objects matching the filter criteria.
-        """
-        filter_parts = []
-        if data_import_ids:
-            filter_parts.append(cel.in_("data_import_id", data_import_ids))
-        if status is not None:
-            filter_parts.append(cel.equals("status", str(status.value)))
-        if filter_query:
-            filter_parts.append(filter_query)
-        query_filter = cel.and_(*filter_parts)
-
-        data_imports = await self._low_level_client.list_all(
-            query_filter=query_filter or "",
-            order_by=order_by or "",
-            max_results=limit,
-        )
-        return self._apply_client_to_instances(data_imports)
-
-    async def retry(self, data_import: str | DataImport) -> None:
-        """Retry a failed data import.
+        job_id = response["job_id"]
 
-        Args:
-            data_import: The DataImport or data_import_id to retry.
-        """
-        data_import_id = (
-            data_import._id_or_error if isinstance(data_import, DataImport) else data_import
-        )
-        await self._low_level_client.retry(data_import_id)
+        return await self.client.async_.jobs.get(job_id=job_id)
 
     async def detect_config(
         self,
@@ -214,16 +131,22 @@ async def detect_config(
         if not path.is_file():
             raise FileNotFoundError(f"File not found: {file_path}")
 
-        if data_type is not None:
+        ext = path.suffix.lower()
+        if ext in (".parquet", ".pqt"):
+            if data_type is None:
+                raise ValueError(
+                    "Parquet files require 'data_type' to be specified. "
+                    "Use DataTypeKey.PARQUET_FLATDATASET or DataTypeKey.PARQUET_SINGLE_CHANNEL_PER_ROW."
+                )
+            data_type_key = data_type
+        elif data_type is not None:
             data_type_key = data_type
         else:
-            ext = path.suffix.lower()
             data_type_key = EXTENSION_TO_DATA_TYPE_KEY.get(ext)
             if data_type_key is None:
                 raise ValueError(
                     f"Unsupported file extension '{ext}'. "
-                    f"Supported: {', '.join(sorted(EXTENSION_TO_DATA_TYPE_KEY))}. "
-                    f"For other formats (e.g. Parquet), pass 'data_type' explicitly."
+                    f"Supported: {', '.join(sorted(EXTENSION_TO_DATA_TYPE_KEY))}"
                 )
 
         def _read_sample() -> bytes:
@@ -248,64 +171,3 @@ def _read_sample() -> bytes:
         # TODO: Add other file format configs
 
         raise ValueError("Server returned an empty DetectConfig response.")
-
-    async def wait_until_complete(
-        self,
-        data_import: str | DataImport,
-        *,
-        polling_interval_secs: int = 5,
-        timeout_secs: int | None = None,
-        show_progress: bool | None = None,
-    ) -> DataImport:
-        """Wait until a data import reaches a terminal state.
-
-        Polls the import status at the given interval until the import is
-        SUCCEEDED or FAILED, returning the completed DataImport.
-
-        Args:
-            data_import: The DataImport or data_import_id to wait for.
-            polling_interval_secs: Seconds between status polls. Defaults to 5s.
-            timeout_secs: Maximum seconds to wait. If None, polls indefinitely.
-                Defaults to None (indefinite).
-            show_progress: If True, display an animated progress spinner alongside
-                the import status while polling. Defaults to True for sync, False
-                for async. Use ``sift_client.config.show_progress = False`` to disable
-                globally for sync.
-
-        Returns:
-            The DataImport in its terminal state.
-        """
-        data_import_id = (
-            data_import._id_or_error if isinstance(data_import, DataImport) else data_import
-        )
-        if show_progress is None:
-            global_setting = _sift_client_module.config.show_progress
-            if global_setting is not None:
-                show_progress = global_setting
-            elif getattr(self, "_is_sync", False):
-                show_progress = True
-            else:
-                show_progress = False
-
-        start = time.monotonic()
-        with alive_bar(
-            title=f"Data Import ID {data_import_id}: polling",
-            bar=None,
-            spinner_length=7,
-            spinner="dots_waves",
-            monitor=False,
-            stats=False,
-            disable=not show_progress,
-        ) as bar:
-            while True:
-                result = await self.get(data_import_id)
-                bar.title(f"Data Import ID {data_import_id}: {result.status.name}")
-                bar()
-                if result.is_complete:
-                    return result
-                if timeout_secs is not None and (time.monotonic() - start) >= timeout_secs:
-                    raise TimeoutError(
-                        f"Data import '{data_import_id}' did not complete "
-                        f"within {timeout_secs} seconds."
-                    )
-                await asyncio.sleep(polling_interval_secs)
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index e9153aafc..4c38611ad 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -24,7 +24,9 @@ if TYPE_CHECKING:
         CalculatedChannelUpdate,
     )
     from sift_client.sift_types.channel import Channel
-    from sift_client.sift_types.data_import import DataImport, DataImportStatus
+    from sift_client.sift_types.data_import import (
+        DataTypeKey,
+    )
     from sift_client.sift_types.export import ExportOutputFormat
     from sift_client.sift_types.file_attachment import (
         FileAttachment,
@@ -629,9 +631,6 @@ class DataImportAPI:
     """Sync counterpart to `DataImportAPIAsync`.
 
     High-level API for importing data into Sift.
-
-    Supports importing data from local files or remote URLs. Returns a
-    `DataImport` object that can be polled for status.
     """
 
     def __init__(self, sift_client: SiftClient):
@@ -643,18 +642,23 @@ class DataImportAPI:
         ...
 
     def _run(self, coro): ...
-    def detect_config(self, file_path: str | Path) -> ImportConfig:
+    def detect_config(
+        self, file_path: str | Path, data_type: DataTypeKey | None = None
+    ) -> ImportConfig:
         """Auto-detect import configuration from a file.
 
         Reads a sample of the file, sends it to the server's DetectConfig
         endpoint, and returns the detected configuration. The file format
-        is inferred from the file extension. You can inspect and modify the
-        result before passing it to :meth:`import_from_path`.
+        is inferred from the file extension when ``data_type`` is not
+        provided.
 
-        Supported extensions: .csv, .parquet, .tdms, .ch10, .ch11, .h5, .hdf5
+        For file types with multiple layouts (e.g. Parquet), ``data_type``
+        must be specified explicitly.
 
         Args:
             file_path: Path to the file to analyze.
+            data_type: Explicit data type key. Required for formats like
+                Parquet where the extension alone is ambiguous.
 
         Returns:
             The detected import config.
@@ -666,31 +670,21 @@ class DataImportAPI:
         """
         ...
 
-    def get(self, data_import_id: str) -> DataImport:
-        """Get a data import by ID.
-
-        Args:
-            data_import_id: The ID of the data import.
-
-        Returns:
-            The DataImport.
-        """
-        ...
-
     def import_from_path(
         self,
-        *,
         file_path: str | Path,
+        *,
         config: ImportConfig | None = None,
+        data_type: DataTypeKey | None = None,
         asset_name: str | None = None,
         run_name: str | None = None,
         run_id: str | None = None,
-    ) -> DataImport:
+    ) -> Job:
         """Import data from a local file.
 
-        Creates a data import on the server and uploads the file to the
-        returned presigned URL. Returns a :class:`DataImport` that can be
-        polled for status via ``data_import.refresh()``.
+        Creates a data import on the server, uploads the file, and returns
+        a :class:`Job` handle. Use ``job.wait_until_complete()`` to poll
+        for completion.
 
         When ``config`` is omitted the file format is auto-detected via
         :meth:`detect_config` and a :class:`CsvImportConfig` is built using
@@ -699,8 +693,11 @@ class DataImportAPI:
         Args:
             file_path: Path to the local file to import.
             config: Import configuration describing the file format and column
-                mapping. When provided, ``asset_name``, ``run_name``, and
-                ``run_id`` are ignored.
+                mapping. When provided, ``asset_name``, ``run_name``,
+                ``run_id``, and ``data_type`` are ignored.
+            data_type: Explicit data type key. Required for formats like
+                Parquet where the extension alone is ambiguous. Only used
+                when ``config`` is not provided.
             asset_name: Name of the asset to import into. Required when
                 ``config`` is not provided.
             run_name: Optional run name. Only used when ``config`` is not
@@ -709,7 +706,7 @@ class DataImportAPI:
                 provided.
 
         Returns:
-            A :class:`DataImport` representing the import operation.
+            A :class:`Job` handle for the pending import.
 
         Raises:
             FileNotFoundError: If the file does not exist.
@@ -717,78 +714,6 @@ class DataImportAPI:
         """
         ...
 
-    def import_from_url(self, *, url: str, config: ImportConfig) -> DataImport:
-        """Import data from a remote URL (HTTP or S3).
-
-        Returns a :class:`DataImport` that can be polled for status via
-        ``data_import.refresh()``.
-
-        Args:
-            url: The URL to import from.
-            config: Import configuration describing the file format and column
-                mapping.
-
-        Returns:
-            A :class:`DataImport` representing the import operation.
-        """
-        ...
-
-    def list_(
-        self,
-        *,
-        data_import_ids: list[str] | None = None,
-        status: DataImportStatus | None = None,
-        filter_query: str | None = None,
-        order_by: str | None = None,
-        limit: int | None = None,
-    ) -> list[DataImport]:
-        """List data imports with optional filtering.
-
-        Args:
-            data_import_ids: Filter to imports with any of these IDs.
-            status: Filter to imports with this status.
-            filter_query: Explicit CEL filter string.
-            order_by: Ordering string (e.g. "created_date desc").
-            limit: Maximum number of imports to return. If None, returns all.
-
-        Returns:
-            A list of DataImport objects matching the filter criteria.
-        """
-        ...
-
-    def retry(self, data_import: str | DataImport) -> None:
-        """Retry a failed data import.
-
-        Only works for URL-based imports in a failed state.
-
-        Args:
-            data_import: The DataImport or data_import_id to retry.
-        """
-        ...
-
-    def wait_until_complete(
-        self,
-        data_import: str | DataImport,
-        *,
-        polling_interval_secs: int = 5,
-        timeout_secs: int | None = None,
-    ) -> DataImport:
-        """Wait until a data import reaches a terminal state.
-
-        Polls the import status at the given interval until the import is
-        SUCCEEDED or FAILED, returning the completed DataImport.
-
-        Args:
-            data_import: The DataImport or data_import_id to wait for.
-            polling_interval_secs: Seconds between status polls. Defaults to 5s.
-            timeout_secs: Maximum seconds to wait. If None, polls indefinitely.
-                Defaults to None (indefinite).
-
-        Returns:
-            The DataImport in its terminal state.
-        """
-        ...
-
 class FileAttachmentsAPI:
     """Sync counterpart to `FileAttachmentsAPIAsync`.
 
diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
index b372498b8..b238ffffe 100644
--- a/python/lib/sift_client/sift_types/data_import.py
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -2,7 +2,6 @@
 
 from datetime import datetime  # noqa: TC003
 from enum import Enum
-from typing import TYPE_CHECKING
 
 from pydantic import BaseModel, ConfigDict, model_validator
 from sift.common.type.v1.channel_config_pb2 import ChannelConfig as ChannelConfigProto
@@ -16,17 +15,11 @@
 )
 from sift.data_imports.v2.data_imports_pb2 import CsvConfig as CsvConfigProto
 from sift.data_imports.v2.data_imports_pb2 import CsvTimeColumn as CsvTimeColumnProto
-from sift.data_imports.v2.data_imports_pb2 import DataImport as DataImportProto
-from sift.data_imports.v2.data_imports_pb2 import DataImportStatus as DataImportStatusProto
 from sift.data_imports.v2.data_imports_pb2 import TimeFormat as TimeFormatProto
 
 from sift_client._internal.util.timestamp import to_pb_timestamp
-from sift_client.sift_types._base import BaseType
 from sift_client.sift_types.channel import ChannelDataType
 
-if TYPE_CHECKING:
-    from sift_client.client import SiftClient
-
 
 class TimeFormat(Enum):
     """Supported time formats for data import columns."""
@@ -45,15 +38,6 @@ class TimeFormat(Enum):
     ABSOLUTE_UNIX_NANOSECONDS = TimeFormatProto.TIME_FORMAT_ABSOLUTE_UNIX_NANOSECONDS
 
 
-class DataImportStatus(Enum):
-    """Status of a data import."""
-
-    PENDING = DataImportStatusProto.DATA_IMPORT_STATUS_PENDING
-    IN_PROGRESS = DataImportStatusProto.DATA_IMPORT_STATUS_IN_PROGRESS
-    SUCCEEDED = DataImportStatusProto.DATA_IMPORT_STATUS_SUCCEEDED
-    FAILED = DataImportStatusProto.DATA_IMPORT_STATUS_FAILED
-
-
 class DataTypeKey(Enum):
     """Supported file types for data import detection."""
 
@@ -199,98 +183,3 @@ def _from_proto(cls, proto: CsvConfigProto) -> CsvImportConfig:
             time_column=time_column,
             data_columns=data_columns,
         )
-
-
-class DataImport(BaseType[DataImportProto, "DataImport"]):
-    """A data import in the Sift system.
-
-    Represents the status and metadata of an import operation. Use
-    ``client.data_import.import_from_path()`` to create one, or
-    ``client.data_import.get()`` to retrieve an existing import by ID.
-    """
-
-    # Required fields
-    status: DataImportStatus
-    created_date: datetime
-    modified_date: datetime
-
-    # Optional fields
-    error_message: str | None
-    run_id: str | None
-    report_id: str | None
-    asset_id: str | None
-    data_start_time: datetime | None
-    data_stop_time: datetime | None
-
-    # Config used for this import
-    csv_config: CsvImportConfig | None
-
-    @classmethod
-    def _from_proto(
-        cls, proto: DataImportProto, sift_client: SiftClient | None = None
-    ) -> DataImport:
-        from datetime import timezone
-
-        return cls(
-            proto=proto,
-            id_=proto.data_import_id,
-            status=DataImportStatus(proto.status),
-            error_message=proto.error_message or None,
-            created_date=proto.created_date.ToDatetime(tzinfo=timezone.utc),
-            modified_date=proto.modified_date.ToDatetime(tzinfo=timezone.utc),
-            run_id=proto.run_id if proto.HasField("_run_id") else None,
-            report_id=proto.report_id if proto.HasField("_report_id") else None,
-            asset_id=proto.asset_id if proto.HasField("_asset_id") else None,
-            data_start_time=(
-                proto.data_start_time.ToDatetime(tzinfo=timezone.utc)
-                if proto.HasField("_data_start_time")
-                else None
-            ),
-            data_stop_time=(
-                proto.data_stop_time.ToDatetime(tzinfo=timezone.utc)
-                if proto.HasField("_data_stop_time")
-                else None
-            ),
-            csv_config=(
-                CsvImportConfig._from_proto(proto.csv_config)
-                if proto.HasField("csv_config")
-                else None
-            ),
-            _client=sift_client,
-        )
-
-    @property
-    def is_pending(self) -> bool:
-        """Return True if the import is pending."""
-        return self.status == DataImportStatus.PENDING
-
-    @property
-    def is_in_progress(self) -> bool:
-        """Return True if the import is in progress."""
-        return self.status == DataImportStatus.IN_PROGRESS
-
-    @property
-    def is_succeeded(self) -> bool:
-        """Return True if the import succeeded."""
-        return self.status == DataImportStatus.SUCCEEDED
-
-    @property
-    def is_failed(self) -> bool:
-        """Return True if the import failed."""
-        return self.status == DataImportStatus.FAILED
-
-    @property
-    def is_complete(self) -> bool:
-        """Return True if the import reached a terminal state (succeeded or failed)."""
-        return self.status in (DataImportStatus.SUCCEEDED, DataImportStatus.FAILED)
-
-    def refresh(self) -> DataImport:
-        """Refresh this import with the latest data from the API."""
-        updated = self.client.data_import.get(self._id_or_error)
-        self._update(updated)
-        return self
-
-    def retry(self) -> None:
-        """Retry a failed import."""
-        self.client.data_import.retry(self._id_or_error)
-        self.refresh()

From 54014eac08d256988b10e517717051333e90fa21 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Mon, 6 Apr 2026 11:25:09 -0700
Subject: [PATCH 11/52] fix sync/async behavior when polling directly

---
 .../lib/sift_client/resources/data_imports.py | 33 +++++++++++++++----
 .../resources/sync_stubs/__init__.pyi         | 14 +++++---
 2 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 03f18320f..779620be2 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -3,6 +3,7 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
+import sift_client as _sift_client_module
 from sift_client._internal.low_level_wrappers.data_imports import DataImportsLowLevelClient
 from sift_client._internal.util.executor import run_sync_function
 from sift_client._internal.util.file import upload_file
@@ -42,12 +43,14 @@ async def import_from_path(
         asset_name: str | None = None,
         run_name: str | None = None,
         run_id: str | None = None,
+        polling_interval_secs: int = 5,
+        timeout_secs: int | None = None,
+        show_progress: bool | None = None,
     ) -> Job:
         """Import data from a local file.
 
-        Creates a data import on the server, uploads the file, and returns
-        a :class:`Job` handle. Use ``job.wait_until_complete()`` to poll
-        for completion.
+        Creates a data import on the server, uploads the file, and waits
+        for the import to complete.
 
         When ``config`` is omitted the file format is auto-detected via
         :meth:`detect_config` and a :class:`CsvImportConfig` is built using
@@ -67,9 +70,13 @@ async def import_from_path(
                 provided.
             run_id: Optional existing run ID. Only used when ``config`` is not
                 provided.
+            polling_interval_secs: Seconds between status polls. Defaults to 5s.
+            timeout_secs: Maximum seconds to wait. If None, polls indefinitely.
+            show_progress: If True, display a progress spinner while waiting.
+                Defaults to True for sync, False for async.
 
         Returns:
-            A :class:`Job` handle for the pending import.
+            The completed :class:`Job`.
 
         Raises:
             FileNotFoundError: If the file does not exist.
@@ -95,9 +102,21 @@ async def import_from_path(
         response = await run_sync_function(
             lambda: upload_file(upload_url, path, rest_client=self.client.rest_client)
         )
-        job_id = response["job_id"]
-
-        return await self.client.async_.jobs.get(job_id=job_id)
+        job_id = response["jobId"]
+
+        if show_progress is None:
+            global_setting = _sift_client_module.config.show_progress
+            if global_setting is not None:
+                show_progress = global_setting
+            else:
+                show_progress = getattr(self, "_is_sync", False)
+
+        return await self.client.async_.jobs.wait_until_complete(
+            job_id,
+            polling_interval_secs=polling_interval_secs,
+            timeout_secs=timeout_secs,
+            show_progress=show_progress,
+        )
 
     async def detect_config(
         self,
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index 4c38611ad..a4a85d18e 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -679,12 +679,14 @@ class DataImportAPI:
         asset_name: str | None = None,
         run_name: str | None = None,
         run_id: str | None = None,
+        polling_interval_secs: int = 5,
+        timeout_secs: int | None = None,
+        show_progress: bool | None = None,
     ) -> Job:
         """Import data from a local file.
 
-        Creates a data import on the server, uploads the file, and returns
-        a :class:`Job` handle. Use ``job.wait_until_complete()`` to poll
-        for completion.
+        Creates a data import on the server, uploads the file, and waits
+        for the import to complete.
 
         When ``config`` is omitted the file format is auto-detected via
         :meth:`detect_config` and a :class:`CsvImportConfig` is built using
@@ -704,9 +706,13 @@ class DataImportAPI:
                 provided.
             run_id: Optional existing run ID. Only used when ``config`` is not
                 provided.
+            polling_interval_secs: Seconds between status polls. Defaults to 5s.
+            timeout_secs: Maximum seconds to wait. If None, polls indefinitely.
+            show_progress: If True, display a progress spinner while waiting.
+                Defaults to True for sync, False for async.
 
         Returns:
-            A :class:`Job` handle for the pending import.
+            The completed :class:`Job`.
 
         Raises:
             FileNotFoundError: If the file does not exist.

From fe5060447eb8a7a027ee5933827511b98548fe6c Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Tue, 7 Apr 2026 13:25:55 -0700
Subject: [PATCH 12/52] add parquet import support

---
 .../low_level_wrappers/data_imports.py        |  14 +-
 python/lib/sift_client/_internal/util/file.py |  26 ++
 .../lib/sift_client/resources/data_imports.py | 139 ++++++--
 .../resources/sync_stubs/__init__.pyi         |  23 +-
 .../lib/sift_client/sift_types/data_import.py | 334 ++++++++++++++++++
 5 files changed, 491 insertions(+), 45 deletions(-)

diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py b/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
index 279c46d38..c64521963 100644
--- a/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
+++ b/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
@@ -11,7 +11,11 @@
 from sift.data_imports.v2.data_imports_pb2_grpc import DataImportServiceStub
 
 from sift_client._internal.low_level_wrappers.base import LowLevelClientBase
-from sift_client.sift_types.data_import import CsvImportConfig
+from sift_client.sift_types.data_import import (
+    CsvImportConfig,
+    ParquetFlatDatasetImportConfig,
+    ParquetSingleChannelPerRowImportConfig,
+)
 from sift_client.transport import WithGrpcClient
 
 if TYPE_CHECKING:
@@ -20,7 +24,9 @@
     from sift_client.transport.grpc_transport import GrpcClient
 
 # Union of all supported config types. Extend this as new formats are added.
-ImportConfig = CsvImportConfig
+ImportConfig = (
+    CsvImportConfig | ParquetFlatDatasetImportConfig | ParquetSingleChannelPerRowImportConfig
+)
 
 
 def _set_config_on_request(
@@ -30,6 +36,10 @@ def _set_config_on_request(
     """Set the appropriate config field on a proto request based on the config type."""
     if isinstance(config, CsvImportConfig):
         request.csv_config.CopyFrom(config._to_proto())
+    elif isinstance(
+        config, (ParquetFlatDatasetImportConfig, ParquetSingleChannelPerRowImportConfig)
+    ):
+        request.parquet_config.CopyFrom(config._to_proto())
     else:
         raise TypeError(f"Unsupported import config type: {type(config).__name__}")
 
diff --git a/python/lib/sift_client/_internal/util/file.py b/python/lib/sift_client/_internal/util/file.py
index e39003581..4f3a5dead 100644
--- a/python/lib/sift_client/_internal/util/file.py
+++ b/python/lib/sift_client/_internal/util/file.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+import os
+import struct
 import warnings
 import zipfile
 from typing import TYPE_CHECKING
@@ -112,3 +114,27 @@ def extract_zip(zip_path: Path, output_dir: Path, *, delete_zip: bool = True) ->
         except OSError:
             warnings.warn(f"Failed to delete zip file '{zip_path}'", SiftWarning, stacklevel=2)
     return [output_dir / name for name in names if not name.endswith("/")]
+
+
+def extract_parquet_footer(path: Path) -> tuple[bytes, int]:
+    """Extract the Parquet footer bytes and compute the footer offset.
+
+    Args:
+        path: Path to the Parquet file.
+
+    Returns:
+        A tuple of (footer_bytes, footer_offset).
+
+    Raises:
+        ValueError: If the file is not a valid Parquet file.
+    """
+    with open(path, "rb") as f:
+        f.seek(-8, 2)
+        footer_tail = f.read(8)
+        footer_len = struct.unpack("<I", footer_tail[:4])[0]
+        magic = footer_tail[4:]
+        if magic != b"PAR1":
+            raise ValueError(f"Invalid Parquet file: missing magic bytes in {path}")
+        f.seek(-(footer_len + 8), 2)
+        footer_bytes = f.read(footer_len)
+    return footer_bytes, os.path.getsize(path) - len(footer_bytes) - 8
diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 779620be2..05a83243d 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -6,12 +6,16 @@
 import sift_client as _sift_client_module
 from sift_client._internal.low_level_wrappers.data_imports import DataImportsLowLevelClient
 from sift_client._internal.util.executor import run_sync_function
-from sift_client._internal.util.file import upload_file
+from sift_client._internal.util.file import extract_parquet_footer, upload_file
 from sift_client.resources._base import ResourceBase
+from sift_client.sift_types.channel import ChannelDataType
 from sift_client.sift_types.data_import import (
     EXTENSION_TO_DATA_TYPE_KEY,
     CsvImportConfig,
     DataTypeKey,
+    ParquetFlatDatasetImportConfig,
+    ParquetSingleChannelPerRowImportConfig,
+    ParquetTimeColumn,
 )
 
 if TYPE_CHECKING:
@@ -37,10 +41,10 @@ def __init__(self, sift_client: SiftClient):
     async def import_from_path(
         self,
         file_path: str | Path,
+        asset_name: str,
         *,
         config: ImportConfig | None = None,
         data_type: DataTypeKey | None = None,
-        asset_name: str | None = None,
         run_name: str | None = None,
         run_id: str | None = None,
         polling_interval_secs: int = 5,
@@ -53,23 +57,23 @@ async def import_from_path(
         for the import to complete.
 
         When ``config`` is omitted the file format is auto-detected via
-        :meth:`detect_config` and a :class:`CsvImportConfig` is built using
-        the provided ``asset_name`` and optional ``run_name`` / ``run_id``.
+        :meth:`detect_config`. The ``asset_name`` is always applied to
+        the config. If neither ``run_name`` nor ``run_id`` is provided
+        (and none is set on the config), ``run_name`` defaults to the
+        filename.
 
         Args:
             file_path: Path to the local file to import.
+            asset_name: Name of the asset to import data into.
             config: Import configuration describing the file format and column
-                mapping. When provided, ``asset_name``, ``run_name``,
-                ``run_id``, and ``data_type`` are ignored.
+                mapping. When provided, ``data_type`` is ignored.
             data_type: Explicit data type key. Required for formats like
                 Parquet where the extension alone is ambiguous. Only used
                 when ``config`` is not provided.
-            asset_name: Name of the asset to import into. Required when
-                ``config`` is not provided.
-            run_name: Optional run name. Only used when ``config`` is not
-                provided.
-            run_id: Optional existing run ID. Only used when ``config`` is not
-                provided.
+            run_name: Run name to use. Overrides any value on the config.
+                Defaults to the filename if neither ``run_name`` nor
+                ``run_id`` is set.
+            run_id: Existing run ID to use. Overrides any value on the config.
             polling_interval_secs: Seconds between status polls. Defaults to 5s.
             timeout_secs: Maximum seconds to wait. If None, polls indefinitely.
             show_progress: If True, display a progress spinner while waiting.
@@ -80,23 +84,37 @@ async def import_from_path(
 
         Raises:
             FileNotFoundError: If the file does not exist.
-            ValueError: If neither ``config`` nor ``asset_name`` is provided.
         """
         path = Path(file_path)
         if not path.is_file():
             raise FileNotFoundError(f"File not found: {file_path}")
 
         if config is None:
-            if asset_name is None:
-                raise ValueError("Either 'config' or 'asset_name' must be provided.")
-            detected = await self.detect_config(file_path, data_type=data_type)
-            config = detected.model_copy(
-                update={
-                    "asset_name": asset_name,
-                    "run_name": run_name if run_name or run_id else path.stem,
-                    "run_id": run_id,
-                }
-            )
+            config = await self.detect_config(file_path, data_type=data_type)
+
+        updates: dict = {"asset_name": asset_name}
+        if run_name is not None:
+            updates["run_name"] = run_name
+        elif run_id is not None:
+            updates["run_id"] = run_id
+        elif not getattr(config, "run_name", None) and not getattr(config, "run_id", None):
+            updates["run_name"] = path.name
+        config = config.model_copy(update=updates)
+
+        if isinstance(
+            config, (ParquetFlatDatasetImportConfig, ParquetSingleChannelPerRowImportConfig)
+        ):
+            if config.footer_offset == 0 and config.footer_length == 0:
+                footer_bytes, footer_offset = await run_sync_function(
+                    lambda: extract_parquet_footer(path)
+                )
+                config = config.model_copy(
+                    update={
+                        "footer_offset": footer_offset,
+                        "footer_length": len(footer_bytes),
+                    }
+                )
+
         _, upload_url = await self._low_level_client.create_from_upload(config)
 
         response = await run_sync_function(
@@ -168,25 +186,84 @@ async def detect_config(
                     f"Supported: {', '.join(sorted(EXTENSION_TO_DATA_TYPE_KEY))}"
                 )
 
-        def _read_sample() -> bytes:
-            with open(path, "rb") as f:
-                return f.read(65_536)  # 64 KiB
+        is_parquet = data_type_key in (
+            DataTypeKey.PARQUET_FLATDATASET,
+            DataTypeKey.PARQUET_SINGLE_CHANNEL_PER_ROW,
+        )
+
+        footer_offset = 0
+        footer_length = 0
+
+        if is_parquet:
+            footer_bytes, footer_offset = await run_sync_function(
+                lambda: extract_parquet_footer(path)
+            )
+            sample = footer_bytes
+            footer_length = len(footer_bytes)
+        else:
 
-        sample = await run_sync_function(_read_sample)
+            def _read_sample() -> bytes:
+                with open(path, "rb") as f:
+                    return f.read(65_536)  # 64 KiB
+
+            sample = await run_sync_function(_read_sample)
 
         response = await self._low_level_client.detect_config(sample, data_type_key.value)
 
         if response.HasField("csv_config"):
             config = CsvImportConfig._from_proto(response.csv_config)
-            # The server's DetectConfig may include the time column in
-            # data_columns, but CreateDataImportFromUpload rejects that
-            # overlap. Filter it out so the config is import-ready.
+            # Filter out the time column from data_columns to avoid overlap.
             time_col = config.time_column.column
             filtered = [dc for dc in config.data_columns if dc.column != time_col]
             if len(filtered) != len(config.data_columns):
                 config = config.model_copy(update={"data_columns": filtered})
             return config
 
-        # TODO: Add other file format configs
+        if response.HasField("parquet_config"):
+            proto = response.parquet_config
+            if proto.HasField("flat_dataset"):
+                config = ParquetFlatDatasetImportConfig._from_proto(
+                    proto, footer_offset=footer_offset, footer_length=footer_length
+                )
+                # Filter out the time column from data_columns to avoid overlap.
+                time_path = config.time_column.path
+                if time_path:
+                    filtered = [dc for dc in config.data_columns if dc.path != time_path]
+                    if len(filtered) != len(config.data_columns):
+                        config = config.model_copy(update={"data_columns": filtered})
+                else:
+                    # The backend only detects arrow timestamp types. Fall back to
+                    # looking for an integer column whose name contains "time",
+                    # preferring columns that start with "time".
+                    _integer_types = {
+                        ChannelDataType.INT_32,
+                        ChannelDataType.INT_64,
+                        ChannelDataType.UINT_32,
+                        ChannelDataType.UINT_64,
+                    }
+                    match = None
+                    for dc in config.data_columns:
+                        if dc.data_type in _integer_types and dc.name.lower().startswith("time"):
+                            match = dc
+                            break
+                    if match is None:
+                        for dc in config.data_columns:
+                            if dc.data_type in _integer_types and "time" in dc.name.lower():
+                                match = dc
+                                break
+                    if match is not None:
+                        config = config.model_copy(
+                            update={
+                                "time_column": ParquetTimeColumn(path=match.path),
+                                "data_columns": [
+                                    c for c in config.data_columns if c.path != match.path
+                                ],
+                            }
+                        )
+                return config
+            elif proto.HasField("single_channel_per_row"):
+                return ParquetSingleChannelPerRowImportConfig._from_proto(
+                    proto, footer_offset=footer_offset, footer_length=footer_length
+                )
 
         raise ValueError("Server returned an empty DetectConfig response.")
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index a4a85d18e..97f70bf9e 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -673,10 +673,10 @@ class DataImportAPI:
     def import_from_path(
         self,
         file_path: str | Path,
+        asset_name: str,
         *,
         config: ImportConfig | None = None,
         data_type: DataTypeKey | None = None,
-        asset_name: str | None = None,
         run_name: str | None = None,
         run_id: str | None = None,
         polling_interval_secs: int = 5,
@@ -689,23 +689,23 @@ class DataImportAPI:
         for the import to complete.
 
         When ``config`` is omitted the file format is auto-detected via
-        :meth:`detect_config` and a :class:`CsvImportConfig` is built using
-        the provided ``asset_name`` and optional ``run_name`` / ``run_id``.
+        :meth:`detect_config`. The ``asset_name`` is always applied to
+        the config. If neither ``run_name`` nor ``run_id`` is provided
+        (and none is set on the config), ``run_name`` defaults to the
+        filename.
 
         Args:
             file_path: Path to the local file to import.
+            asset_name: Name of the asset to import data into.
             config: Import configuration describing the file format and column
-                mapping. When provided, ``asset_name``, ``run_name``,
-                ``run_id``, and ``data_type`` are ignored.
+                mapping. When provided, ``data_type`` is ignored.
             data_type: Explicit data type key. Required for formats like
                 Parquet where the extension alone is ambiguous. Only used
                 when ``config`` is not provided.
-            asset_name: Name of the asset to import into. Required when
-                ``config`` is not provided.
-            run_name: Optional run name. Only used when ``config`` is not
-                provided.
-            run_id: Optional existing run ID. Only used when ``config`` is not
-                provided.
+            run_name: Run name to use. Overrides any value on the config.
+                Defaults to the filename if neither ``run_name`` nor
+                ``run_id`` is set.
+            run_id: Existing run ID to use. Overrides any value on the config.
             polling_interval_secs: Seconds between status polls. Defaults to 5s.
             timeout_secs: Maximum seconds to wait. If None, polls indefinitely.
             show_progress: If True, display a progress spinner while waiting.
@@ -716,7 +716,6 @@ class DataImportAPI:
 
         Raises:
             FileNotFoundError: If the file does not exist.
-            ValueError: If neither ``config`` nor ``asset_name`` is provided.
         """
         ...
 
diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
index b238ffffe..e50d0b123 100644
--- a/python/lib/sift_client/sift_types/data_import.py
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -12,9 +12,28 @@
     DATA_TYPE_KEY_PARQUET_FLATDATASET,
     DATA_TYPE_KEY_PARQUET_SINGLE_CHANNEL_PER_ROW,
     DATA_TYPE_KEY_TDMS,
+    PARQUET_COMPLEX_TYPES_IMPORT_MODE_BOTH,
+    PARQUET_COMPLEX_TYPES_IMPORT_MODE_BYTES,
+    PARQUET_COMPLEX_TYPES_IMPORT_MODE_IGNORE,
+    PARQUET_COMPLEX_TYPES_IMPORT_MODE_STRING,
 )
 from sift.data_imports.v2.data_imports_pb2 import CsvConfig as CsvConfigProto
 from sift.data_imports.v2.data_imports_pb2 import CsvTimeColumn as CsvTimeColumnProto
+from sift.data_imports.v2.data_imports_pb2 import ParquetConfig as ParquetConfigProto
+from sift.data_imports.v2.data_imports_pb2 import ParquetDataColumn as ParquetDataColumnProto
+from sift.data_imports.v2.data_imports_pb2 import (
+    ParquetFlatDatasetConfig as ParquetFlatDatasetConfigProto,
+)
+from sift.data_imports.v2.data_imports_pb2 import (
+    ParquetSingleChannelPerRowConfig as ParquetSingleChannelPerRowConfigProto,
+)
+from sift.data_imports.v2.data_imports_pb2 import (
+    ParquetSingleChannelPerRowMultiChannelConfig as ParquetSingleChannelPerRowMultiChannelConfigProto,
+)
+from sift.data_imports.v2.data_imports_pb2 import (
+    ParquetSingleChannelPerRowSingleChannelConfig as ParquetSingleChannelPerRowSingleChannelConfigProto,
+)
+from sift.data_imports.v2.data_imports_pb2 import ParquetTimeColumn as ParquetTimeColumnProto
 from sift.data_imports.v2.data_imports_pb2 import TimeFormat as TimeFormatProto
 
 from sift_client._internal.util.timestamp import to_pb_timestamp
@@ -183,3 +202,318 @@ def _from_proto(cls, proto: CsvConfigProto) -> CsvImportConfig:
             time_column=time_column,
             data_columns=data_columns,
         )
+
+
+class ParquetComplexTypesImportMode(Enum):
+    """Controls how complex Parquet types (maps, lists, structs) are imported."""
+
+    IGNORE = PARQUET_COMPLEX_TYPES_IMPORT_MODE_IGNORE
+    BOTH = PARQUET_COMPLEX_TYPES_IMPORT_MODE_BOTH
+    STRING = PARQUET_COMPLEX_TYPES_IMPORT_MODE_STRING
+    BYTES = PARQUET_COMPLEX_TYPES_IMPORT_MODE_BYTES
+
+
+class ParquetTimeColumn(BaseModel):
+    """Time column configuration for Parquet imports.
+
+    Attributes:
+        path: The column path in the Parquet schema (e.g. ``"timestamp"``).
+        format: The time format used in this column.
+        relative_start_time: Required when using a relative time format.
+    """
+
+    model_config = ConfigDict(frozen=True)
+
+    path: str
+    format: TimeFormat = TimeFormat.ABSOLUTE_UNIX_NANOSECONDS
+    relative_start_time: datetime | None = None
+
+    def _to_proto(self) -> ParquetTimeColumnProto:
+        if not self.path:
+            raise ValueError("ParquetTimeColumn.path must be set before importing.")
+        proto = ParquetTimeColumnProto(
+            path=self.path,
+            format=self.format.value,
+        )
+        if self.relative_start_time is not None:
+            proto.relative_start_time.CopyFrom(to_pb_timestamp(self.relative_start_time))
+        return proto
+
+    @classmethod
+    def _from_proto(cls, proto: ParquetTimeColumnProto) -> ParquetTimeColumn:
+        relative_start_time = None
+        if proto.HasField("relative_start_time"):
+            from datetime import timezone
+
+            relative_start_time = proto.relative_start_time.ToDatetime(tzinfo=timezone.utc)
+
+        fmt = TimeFormat(proto.format) if proto.format else TimeFormat.ABSOLUTE_UNIX_NANOSECONDS
+        return cls(
+            path=proto.path or "",
+            format=fmt,
+            relative_start_time=relative_start_time,
+        )
+
+    @model_validator(mode="after")
+    def _check_relative_start_time(self) -> ParquetTimeColumn:
+        if self.format.name.startswith("RELATIVE_") and self.relative_start_time is None:
+            raise ValueError(
+                f"'relative_start_time' is required when using a relative time format ({self.format.name})."
+            )
+        return self
+
+
+class ParquetDataColumn(BaseModel):
+    """A data column definition for Parquet flat dataset imports.
+
+    Attributes:
+        path: The column path in the Parquet schema.
+        name: Channel name.
+        data_type: The data type of the channel values.
+        units: Optional units string.
+        description: Optional channel description.
+    """
+
+    model_config = ConfigDict(frozen=True)
+
+    path: str
+    name: str
+    data_type: ChannelDataType
+    units: str = ""
+    description: str = ""
+
+
+class ParquetFlatDatasetImportConfig(BaseModel):
+    """Configuration for importing a Parquet file with a flat dataset layout.
+
+    Each column in the file maps to a separate channel.
+
+    Attributes:
+        asset_name: Name of the asset to import data into.
+        run_name: Name for the run. Ignored if ``run_id`` is set.
+        run_id: ID of an existing run to append data to.
+        time_column: Time column configuration.
+        data_columns: List of data column definitions.
+        footer_offset: Byte offset where the Parquet footer begins. Populated
+            automatically when using :meth:`~DataImportAPIAsync.detect_config`.
+        footer_length: Length of the Parquet footer in bytes. Populated
+            automatically when using :meth:`~DataImportAPIAsync.detect_config`.
+        complex_types_import_mode: How to handle complex Parquet types.
+    """
+
+    model_config = ConfigDict(frozen=True)
+
+    asset_name: str
+    run_name: str | None = None
+    run_id: str | None = None
+    time_column: ParquetTimeColumn
+    data_columns: list[ParquetDataColumn]
+    footer_offset: int = 0
+    footer_length: int = 0
+    complex_types_import_mode: ParquetComplexTypesImportMode = ParquetComplexTypesImportMode.IGNORE
+
+    def _to_proto(self) -> ParquetConfigProto:
+        flat_dataset = ParquetFlatDatasetConfigProto(
+            time_column=self.time_column._to_proto(),
+            data_columns=[
+                ParquetDataColumnProto(
+                    path=dc.path,
+                    channel_config=ChannelConfigProto(
+                        name=dc.name,
+                        data_type=dc.data_type.value,
+                        units=dc.units,
+                        description=dc.description,
+                    ),
+                )
+                for dc in self.data_columns
+            ],
+        )
+        return ParquetConfigProto(
+            asset_name=self.asset_name,
+            run_name=self.run_name or "",
+            run_id=self.run_id or "",
+            flat_dataset=flat_dataset,
+            footer_offset=self.footer_offset,
+            footer_length=self.footer_length,
+            complex_types_import_mode=self.complex_types_import_mode.value,
+        )
+
+    @classmethod
+    def _from_proto(
+        cls,
+        proto: ParquetConfigProto,
+        footer_offset: int = 0,
+        footer_length: int = 0,
+    ) -> ParquetFlatDatasetImportConfig:
+        """Create from a proto ParquetConfig with a flat_dataset config."""
+        fd = proto.flat_dataset
+        time_column = ParquetTimeColumn._from_proto(fd.time_column)
+        data_columns = [
+            ParquetDataColumn(
+                path=dc.path,
+                name=dc.channel_config.name,
+                data_type=ChannelDataType(dc.channel_config.data_type),
+                units=dc.channel_config.units,
+                description=dc.channel_config.description,
+            )
+            for dc in fd.data_columns
+        ]
+        mode = proto.complex_types_import_mode
+        return cls(
+            asset_name=proto.asset_name,
+            run_name=proto.run_name or None,
+            run_id=proto.run_id or None,
+            time_column=time_column,
+            data_columns=data_columns,
+            footer_offset=footer_offset or proto.footer_offset,
+            footer_length=footer_length or proto.footer_length,
+            complex_types_import_mode=ParquetComplexTypesImportMode(mode)
+            if mode
+            else ParquetComplexTypesImportMode.IGNORE,
+        )
+
+
+class ParquetSingleChannelConfig(BaseModel):
+    """Configuration for a single-channel Parquet single-channel-per-row import.
+
+    Attributes:
+        data_path: The column path containing channel data.
+        name: Channel name.
+        data_type: The data type of the channel values.
+        units: Optional units string.
+        description: Optional channel description.
+    """
+
+    model_config = ConfigDict(frozen=True)
+
+    data_path: str
+    name: str
+    data_type: ChannelDataType
+    units: str = ""
+    description: str = ""
+
+
+class ParquetMultiChannelConfig(BaseModel):
+    """Configuration for a multi-channel Parquet single-channel-per-row import.
+
+    Attributes:
+        name_path: The column path that identifies the channel name per row.
+        data_path: The column path containing channel data.
+    """
+
+    model_config = ConfigDict(frozen=True)
+
+    name_path: str
+    data_path: str
+
+
+class ParquetSingleChannelPerRowImportConfig(BaseModel):
+    """Configuration for importing a Parquet file where each row represents
+    a single channel's data point.
+
+    Exactly one of ``single_channel`` or ``multi_channel`` must be set.
+
+    Attributes:
+        asset_name: Name of the asset to import data into.
+        run_name: Name for the run. Ignored if ``run_id`` is set.
+        run_id: ID of an existing run to append data to.
+        time_column: Time column configuration.
+        single_channel: Set when the entire file contains data for one channel.
+        multi_channel: Set when each row identifies its channel via a name column.
+        footer_offset: Byte offset where the Parquet footer begins. Populated
+            automatically when using :meth:`~DataImportAPIAsync.detect_config`.
+        footer_length: Length of the Parquet footer in bytes. Populated
+            automatically when using :meth:`~DataImportAPIAsync.detect_config`.
+        complex_types_import_mode: How to handle complex Parquet types.
+    """
+
+    model_config = ConfigDict(frozen=True)
+
+    asset_name: str
+    run_name: str | None = None
+    run_id: str | None = None
+    time_column: ParquetTimeColumn
+    single_channel: ParquetSingleChannelConfig | None = None
+    multi_channel: ParquetMultiChannelConfig | None = None
+    footer_offset: int = 0
+    footer_length: int = 0
+    complex_types_import_mode: ParquetComplexTypesImportMode = ParquetComplexTypesImportMode.IGNORE
+
+    def _to_proto(self) -> ParquetConfigProto:
+        scpr = ParquetSingleChannelPerRowConfigProto(
+            time_column=self.time_column._to_proto(),
+        )
+        if self.single_channel is not None:
+            sc = self.single_channel
+            scpr.single_channel.CopyFrom(
+                ParquetSingleChannelPerRowSingleChannelConfigProto(
+                    data_path=sc.data_path,
+                    channel=ChannelConfigProto(
+                        name=sc.name,
+                        data_type=sc.data_type.value,
+                        units=sc.units,
+                        description=sc.description,
+                    ),
+                )
+            )
+        elif self.multi_channel is not None:
+            scpr.multi_channel.CopyFrom(
+                ParquetSingleChannelPerRowMultiChannelConfigProto(
+                    name_path=self.multi_channel.name_path,
+                    data_path=self.multi_channel.data_path,
+                )
+            )
+        return ParquetConfigProto(
+            asset_name=self.asset_name,
+            run_name=self.run_name or "",
+            run_id=self.run_id or "",
+            single_channel_per_row=scpr,
+            footer_offset=self.footer_offset,
+            footer_length=self.footer_length,
+            complex_types_import_mode=self.complex_types_import_mode.value,
+        )
+
+    @classmethod
+    def _from_proto(
+        cls,
+        proto: ParquetConfigProto,
+        footer_offset: int = 0,
+        footer_length: int = 0,
+    ) -> ParquetSingleChannelPerRowImportConfig:
+        """Create from a proto ParquetConfig with a single_channel_per_row config."""
+        scpr = proto.single_channel_per_row
+
+        time_column = ParquetTimeColumn._from_proto(scpr.time_column)
+
+        single_channel = None
+        multi_channel = None
+        if scpr.HasField("single_channel"):
+            sc = scpr.single_channel
+            single_channel = ParquetSingleChannelConfig(
+                data_path=sc.data_path,
+                name=sc.channel.name,
+                data_type=ChannelDataType(sc.channel.data_type),
+                units=sc.channel.units,
+                description=sc.channel.description,
+            )
+        elif scpr.HasField("multi_channel"):
+            mc = scpr.multi_channel
+            multi_channel = ParquetMultiChannelConfig(
+                name_path=mc.name_path,
+                data_path=mc.data_path,
+            )
+
+        mode = proto.complex_types_import_mode
+        return cls(
+            asset_name=proto.asset_name,
+            run_name=proto.run_name or None,
+            run_id=proto.run_id or None,
+            time_column=time_column,
+            single_channel=single_channel,
+            multi_channel=multi_channel,
+            footer_offset=footer_offset or proto.footer_offset,
+            footer_length=footer_length or proto.footer_length,
+            complex_types_import_mode=ParquetComplexTypesImportMode(mode)
+            if mode
+            else ParquetComplexTypesImportMode.IGNORE,
+        )

From f0c186ba316ce2347d273de9ccc7b5bf076da754 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Tue, 7 Apr 2026 13:50:48 -0700
Subject: [PATCH 13/52] unfrozen config model refactor

---
 .../lib/sift_client/resources/data_imports.py | 43 +++++++------------
 .../lib/sift_client/sift_types/data_import.py | 20 +--------
 2 files changed, 16 insertions(+), 47 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 05a83243d..b7ed18140 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -92,14 +92,13 @@ async def import_from_path(
         if config is None:
             config = await self.detect_config(file_path, data_type=data_type)
 
-        updates: dict = {"asset_name": asset_name}
-        if run_name is not None:
-            updates["run_name"] = run_name
-        elif run_id is not None:
-            updates["run_id"] = run_id
-        elif not getattr(config, "run_name", None) and not getattr(config, "run_id", None):
-            updates["run_name"] = path.name
-        config = config.model_copy(update=updates)
+        config.asset_name = asset_name
+        if run_id is not None:
+            config.run_id = run_id
+        elif run_name is not None:
+            config.run_name = run_name
+        elif not config.run_name and not config.run_id:
+            config.run_name = path.name
 
         if isinstance(
             config, (ParquetFlatDatasetImportConfig, ParquetSingleChannelPerRowImportConfig)
@@ -108,12 +107,8 @@ async def import_from_path(
                 footer_bytes, footer_offset = await run_sync_function(
                     lambda: extract_parquet_footer(path)
                 )
-                config = config.model_copy(
-                    update={
-                        "footer_offset": footer_offset,
-                        "footer_length": len(footer_bytes),
-                    }
-                )
+                config.footer_offset = footer_offset
+                config.footer_length = len(footer_bytes)
 
         _, upload_url = await self._low_level_client.create_from_upload(config)
 
@@ -214,9 +209,7 @@ def _read_sample() -> bytes:
             config = CsvImportConfig._from_proto(response.csv_config)
             # Filter out the time column from data_columns to avoid overlap.
             time_col = config.time_column.column
-            filtered = [dc for dc in config.data_columns if dc.column != time_col]
-            if len(filtered) != len(config.data_columns):
-                config = config.model_copy(update={"data_columns": filtered})
+            config.data_columns = [dc for dc in config.data_columns if dc.column != time_col]
             return config
 
         if response.HasField("parquet_config"):
@@ -228,9 +221,7 @@ def _read_sample() -> bytes:
                 # Filter out the time column from data_columns to avoid overlap.
                 time_path = config.time_column.path
                 if time_path:
-                    filtered = [dc for dc in config.data_columns if dc.path != time_path]
-                    if len(filtered) != len(config.data_columns):
-                        config = config.model_copy(update={"data_columns": filtered})
+                    config.data_columns = [dc for dc in config.data_columns if dc.path != time_path]
                 else:
                     # The backend only detects arrow timestamp types. Fall back to
                     # looking for an integer column whose name contains "time",
@@ -252,14 +243,10 @@ def _read_sample() -> bytes:
                                 match = dc
                                 break
                     if match is not None:
-                        config = config.model_copy(
-                            update={
-                                "time_column": ParquetTimeColumn(path=match.path),
-                                "data_columns": [
-                                    c for c in config.data_columns if c.path != match.path
-                                ],
-                            }
-                        )
+                        config.time_column = ParquetTimeColumn(path=match.path)
+                        config.data_columns = [
+                            c for c in config.data_columns if c.path != match.path
+                        ]
                 return config
             elif proto.HasField("single_channel_per_row"):
                 return ParquetSingleChannelPerRowImportConfig._from_proto(
diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
index e50d0b123..03e8416f9 100644
--- a/python/lib/sift_client/sift_types/data_import.py
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -3,7 +3,7 @@
 from datetime import datetime  # noqa: TC003
 from enum import Enum
 
-from pydantic import BaseModel, ConfigDict, model_validator
+from pydantic import BaseModel, model_validator
 from sift.common.type.v1.channel_config_pb2 import ChannelConfig as ChannelConfigProto
 from sift.data_imports.v2.data_imports_pb2 import (
     DATA_TYPE_KEY_CH10,
@@ -86,8 +86,6 @@ class CsvTimeColumn(BaseModel):
         relative_start_time: Required when using a relative time format.
     """
 
-    model_config = ConfigDict(frozen=True)
-
     column: int
     format: TimeFormat
     relative_start_time: datetime | None = None
@@ -121,8 +119,6 @@ class CsvDataColumn(BaseModel):
         description: Optional channel description.
     """
 
-    model_config = ConfigDict(frozen=True)
-
     column: int
     name: str
     data_type: ChannelDataType
@@ -142,8 +138,6 @@ class CsvImportConfig(BaseModel):
         data_columns: List of data column definitions.
     """
 
-    model_config = ConfigDict(frozen=True)
-
     asset_name: str
     run_name: str | None = None
     run_id: str | None = None
@@ -222,8 +216,6 @@ class ParquetTimeColumn(BaseModel):
         relative_start_time: Required when using a relative time format.
     """
 
-    model_config = ConfigDict(frozen=True)
-
     path: str
     format: TimeFormat = TimeFormat.ABSOLUTE_UNIX_NANOSECONDS
     relative_start_time: datetime | None = None
@@ -274,8 +266,6 @@ class ParquetDataColumn(BaseModel):
         description: Optional channel description.
     """
 
-    model_config = ConfigDict(frozen=True)
-
     path: str
     name: str
     data_type: ChannelDataType
@@ -301,8 +291,6 @@ class ParquetFlatDatasetImportConfig(BaseModel):
         complex_types_import_mode: How to handle complex Parquet types.
     """
 
-    model_config = ConfigDict(frozen=True)
-
     asset_name: str
     run_name: str | None = None
     run_id: str | None = None
@@ -384,8 +372,6 @@ class ParquetSingleChannelConfig(BaseModel):
         description: Optional channel description.
     """
 
-    model_config = ConfigDict(frozen=True)
-
     data_path: str
     name: str
     data_type: ChannelDataType
@@ -401,8 +387,6 @@ class ParquetMultiChannelConfig(BaseModel):
         data_path: The column path containing channel data.
     """
 
-    model_config = ConfigDict(frozen=True)
-
     name_path: str
     data_path: str
 
@@ -427,8 +411,6 @@ class ParquetSingleChannelPerRowImportConfig(BaseModel):
         complex_types_import_mode: How to handle complex Parquet types.
     """
 
-    model_config = ConfigDict(frozen=True)
-
     asset_name: str
     run_name: str | None = None
     run_id: str | None = None

From 593f3dc602646c7f9e2aa9f144f07e655dd2e651 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Tue, 7 Apr 2026 13:59:23 -0700
Subject: [PATCH 14/52] mypy fix

---
 .../lib/sift_client/resources/data_imports.py | 34 +++++++++++--------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index b7ed18140..d24ac0082 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -174,12 +174,12 @@ async def detect_config(
         elif data_type is not None:
             data_type_key = data_type
         else:
-            data_type_key = EXTENSION_TO_DATA_TYPE_KEY.get(ext)
-            if data_type_key is None:
+            if ext not in EXTENSION_TO_DATA_TYPE_KEY:
                 raise ValueError(
                     f"Unsupported file extension '{ext}'. "
                     f"Supported: {', '.join(sorted(EXTENSION_TO_DATA_TYPE_KEY))}"
                 )
+            data_type_key = EXTENSION_TO_DATA_TYPE_KEY[ext]
 
         is_parquet = data_type_key in (
             DataTypeKey.PARQUET_FLATDATASET,
@@ -206,22 +206,26 @@ def _read_sample() -> bytes:
         response = await self._low_level_client.detect_config(sample, data_type_key.value)
 
         if response.HasField("csv_config"):
-            config = CsvImportConfig._from_proto(response.csv_config)
+            csv_config = CsvImportConfig._from_proto(response.csv_config)
             # Filter out the time column from data_columns to avoid overlap.
-            time_col = config.time_column.column
-            config.data_columns = [dc for dc in config.data_columns if dc.column != time_col]
-            return config
+            time_col = csv_config.time_column.column
+            csv_config.data_columns = [
+                dc for dc in csv_config.data_columns if dc.column != time_col
+            ]
+            return csv_config
 
         if response.HasField("parquet_config"):
             proto = response.parquet_config
             if proto.HasField("flat_dataset"):
-                config = ParquetFlatDatasetImportConfig._from_proto(
+                parquet_config = ParquetFlatDatasetImportConfig._from_proto(
                     proto, footer_offset=footer_offset, footer_length=footer_length
                 )
                 # Filter out the time column from data_columns to avoid overlap.
-                time_path = config.time_column.path
+                time_path = parquet_config.time_column.path
                 if time_path:
-                    config.data_columns = [dc for dc in config.data_columns if dc.path != time_path]
+                    parquet_config.data_columns = [
+                        dc for dc in parquet_config.data_columns if dc.path != time_path
+                    ]
                 else:
                     # The backend only detects arrow timestamp types. Fall back to
                     # looking for an integer column whose name contains "time",
@@ -233,21 +237,21 @@ def _read_sample() -> bytes:
                         ChannelDataType.UINT_64,
                     }
                     match = None
-                    for dc in config.data_columns:
+                    for dc in parquet_config.data_columns:
                         if dc.data_type in _integer_types and dc.name.lower().startswith("time"):
                             match = dc
                             break
                     if match is None:
-                        for dc in config.data_columns:
+                        for dc in parquet_config.data_columns:
                             if dc.data_type in _integer_types and "time" in dc.name.lower():
                                 match = dc
                                 break
                     if match is not None:
-                        config.time_column = ParquetTimeColumn(path=match.path)
-                        config.data_columns = [
-                            c for c in config.data_columns if c.path != match.path
+                        parquet_config.time_column = ParquetTimeColumn(path=match.path)
+                        parquet_config.data_columns = [
+                            c for c in parquet_config.data_columns if c.path != match.path
                         ]
-                return config
+                return parquet_config
             elif proto.HasField("single_channel_per_row"):
                 return ParquetSingleChannelPerRowImportConfig._from_proto(
                     proto, footer_offset=footer_offset, footer_length=footer_length

From 8725f594639e2076d834629226a30f4c2ebf12ef Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Tue, 7 Apr 2026 14:14:29 -0700
Subject: [PATCH 15/52] small refactor moving configs to sift_types

---
 .../_internal/low_level_wrappers/data_imports.py         | 6 +-----
 python/lib/sift_client/resources/data_imports.py         | 2 +-
 python/lib/sift_client/sift_types/data_import.py         | 9 +++++++++
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py b/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
index c64521963..e7afb9a76 100644
--- a/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
+++ b/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
@@ -13,6 +13,7 @@
 from sift_client._internal.low_level_wrappers.base import LowLevelClientBase
 from sift_client.sift_types.data_import import (
     CsvImportConfig,
+    ImportConfig,
     ParquetFlatDatasetImportConfig,
     ParquetSingleChannelPerRowImportConfig,
 )
@@ -23,11 +24,6 @@
 
     from sift_client.transport.grpc_transport import GrpcClient
 
-# Union of all supported config types. Extend this as new formats are added.
-ImportConfig = (
-    CsvImportConfig | ParquetFlatDatasetImportConfig | ParquetSingleChannelPerRowImportConfig
-)
-
 
 def _set_config_on_request(
     request: CreateDataImportFromUploadRequest,
diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index d24ac0082..e6ae7eb21 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -13,13 +13,13 @@
     EXTENSION_TO_DATA_TYPE_KEY,
     CsvImportConfig,
     DataTypeKey,
+    ImportConfig,
     ParquetFlatDatasetImportConfig,
     ParquetSingleChannelPerRowImportConfig,
     ParquetTimeColumn,
 )
 
 if TYPE_CHECKING:
-    from sift_client._internal.low_level_wrappers.data_imports import ImportConfig
     from sift_client.client import SiftClient
     from sift_client.sift_types.job import Job
 
diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
index 03e8416f9..a0027df7a 100644
--- a/python/lib/sift_client/sift_types/data_import.py
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -2,6 +2,7 @@
 
 from datetime import datetime  # noqa: TC003
 from enum import Enum
+from typing import Union
 
 from pydantic import BaseModel, model_validator
 from sift.common.type.v1.channel_config_pb2 import ChannelConfig as ChannelConfigProto
@@ -499,3 +500,11 @@ def _from_proto(
             if mode
             else ParquetComplexTypesImportMode.IGNORE,
         )
+
+
+# Note: Using Union instead of | syntax for Python 3.9 compatibility at module level.
+# While `from __future__ import annotations` allows | in type hints (they're strings),
+# module-level type aliases are evaluated at runtime and require Union in Python <3.10.
+ImportConfig = Union[
+    CsvImportConfig, ParquetFlatDatasetImportConfig, ParquetSingleChannelPerRowImportConfig
+]

From fd925eb6692c0d6dac215f20a6d5682b270c5f0d Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Tue, 7 Apr 2026 14:37:14 -0700
Subject: [PATCH 16/52] add a helper function to get a specific data_column

---
 .../lib/sift_client/sift_types/data_import.py | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
index a0027df7a..2b241330d 100644
--- a/python/lib/sift_client/sift_types/data_import.py
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -146,6 +146,23 @@ class CsvImportConfig(BaseModel):
     time_column: CsvTimeColumn
     data_columns: list[CsvDataColumn]
 
+    def get_column(self, name: str) -> CsvDataColumn:
+        """Look up a data column by name.
+
+        Args:
+            name: The channel name to search for.
+
+        Returns:
+            The matching data column.
+
+        Raises:
+            KeyError: If no column with the given name exists.
+        """
+        for dc in self.data_columns:
+            if dc.name == name:
+                return dc
+        raise KeyError(f"No data column named '{name}'")
+
     def _to_proto(self) -> CsvConfigProto:
         return CsvConfigProto(
             asset_name=self.asset_name,
@@ -301,6 +318,23 @@ class ParquetFlatDatasetImportConfig(BaseModel):
     footer_length: int = 0
     complex_types_import_mode: ParquetComplexTypesImportMode = ParquetComplexTypesImportMode.IGNORE
 
+    def get_column(self, name: str) -> ParquetDataColumn:
+        """Look up a data column by name.
+
+        Args:
+            name: The channel name to search for.
+
+        Returns:
+            The matching data column.
+
+        Raises:
+            KeyError: If no column with the given name exists.
+        """
+        for dc in self.data_columns:
+            if dc.name == name:
+                return dc
+        raise KeyError(f"No data column named '{name}'")
+
     def _to_proto(self) -> ParquetConfigProto:
         flat_dataset = ParquetFlatDatasetConfigProto(
             time_column=self.time_column._to_proto(),

From 85e87be40d613b4c488d1328d80506b4d3e822c4 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Tue, 7 Apr 2026 14:43:55 -0700
Subject: [PATCH 17/52] add unit tests

---
 .../_tests/resources/test_data_imports.py     | 192 ++++++++++++++++++
 1 file changed, 192 insertions(+)
 create mode 100644 python/lib/sift_client/_tests/resources/test_data_imports.py

diff --git a/python/lib/sift_client/_tests/resources/test_data_imports.py b/python/lib/sift_client/_tests/resources/test_data_imports.py
new file mode 100644
index 000000000..9c9185dc8
--- /dev/null
+++ b/python/lib/sift_client/_tests/resources/test_data_imports.py
@@ -0,0 +1,192 @@
+"""Unit tests for data import config models and helpers."""
+
+from datetime import datetime, timezone
+
+import pytest
+
+from sift_client.sift_types.channel import ChannelDataType
+from sift_client.sift_types.data_import import (
+    EXTENSION_TO_DATA_TYPE_KEY,
+    CsvDataColumn,
+    CsvImportConfig,
+    CsvTimeColumn,
+    DataTypeKey,
+    ParquetDataColumn,
+    ParquetFlatDatasetImportConfig,
+    ParquetTimeColumn,
+    TimeFormat,
+)
+
+
+@pytest.fixture
+def csv_config():
+    return CsvImportConfig(
+        asset_name="test_asset",
+        run_name="test_run",
+        time_column=CsvTimeColumn(
+            column=1,
+            format=TimeFormat.ABSOLUTE_RFC3339,
+        ),
+        data_columns=[
+            CsvDataColumn(column=2, name="cpu_util", data_type=ChannelDataType.DOUBLE),
+            CsvDataColumn(column=3, name="status_flags", data_type=ChannelDataType.INT_32),
+            CsvDataColumn(column=4, name="temperature", data_type=ChannelDataType.FLOAT),
+        ],
+    )
+
+
+@pytest.fixture
+def parquet_config():
+    return ParquetFlatDatasetImportConfig(
+        asset_name="test_asset",
+        run_name="test_run",
+        time_column=ParquetTimeColumn(path="timestamp"),
+        data_columns=[
+            ParquetDataColumn(path="cpu_util", name="cpu_util", data_type=ChannelDataType.DOUBLE),
+            ParquetDataColumn(
+                path="status_flags", name="status_flags", data_type=ChannelDataType.INT_32
+            ),
+            ParquetDataColumn(
+                path="temperature", name="temperature", data_type=ChannelDataType.FLOAT
+            ),
+        ],
+    )
+
+
+class TestCsvConfigMutability:
+    def test_mutate_asset_name(self, csv_config):
+        csv_config.asset_name = "new_asset"
+        assert csv_config.asset_name == "new_asset"
+
+    def test_mutate_run_name(self, csv_config):
+        csv_config.run_name = "new_run"
+        assert csv_config.run_name == "new_run"
+
+    def test_mutate_column_data_type(self, csv_config):
+        csv_config.data_columns[1].data_type = ChannelDataType.STRING
+        assert csv_config.data_columns[1].data_type == ChannelDataType.STRING
+
+    def test_mutate_column_name(self, csv_config):
+        csv_config.data_columns[0].name = "cpu_utilization"
+        assert csv_config.data_columns[0].name == "cpu_utilization"
+
+    def test_append_column(self, csv_config):
+        csv_config.data_columns.append(
+            CsvDataColumn(column=5, name="pressure", data_type=ChannelDataType.DOUBLE)
+        )
+        assert len(csv_config.data_columns) == 4
+        assert csv_config.data_columns[-1].name == "pressure"
+
+    def test_remove_column(self, csv_config):
+        csv_config.data_columns = [
+            dc for dc in csv_config.data_columns if dc.name != "status_flags"
+        ]
+        assert len(csv_config.data_columns) == 2
+        assert all(dc.name != "status_flags" for dc in csv_config.data_columns)
+
+
+class TestParquetConfigMutability:
+    def test_mutate_asset_name(self, parquet_config):
+        parquet_config.asset_name = "new_asset"
+        assert parquet_config.asset_name == "new_asset"
+
+    def test_mutate_column_data_type(self, parquet_config):
+        parquet_config.data_columns[1].data_type = ChannelDataType.STRING
+        assert parquet_config.data_columns[1].data_type == ChannelDataType.STRING
+
+    def test_append_column(self, parquet_config):
+        parquet_config.data_columns.append(
+            ParquetDataColumn(path="pressure", name="pressure", data_type=ChannelDataType.DOUBLE)
+        )
+        assert len(parquet_config.data_columns) == 4
+
+
+class TestGetColumn:
+    def test_csv_get_column(self, csv_config):
+        col = csv_config.get_column("cpu_util")
+        assert col.name == "cpu_util"
+        assert col.data_type == ChannelDataType.DOUBLE
+
+    def test_csv_get_column_not_found(self, csv_config):
+        with pytest.raises(KeyError, match="nonexistent"):
+            csv_config.get_column("nonexistent")
+
+    def test_csv_get_column_mutate(self, csv_config):
+        csv_config.get_column("status_flags").data_type = ChannelDataType.STRING
+        assert csv_config.data_columns[1].data_type == ChannelDataType.STRING
+
+    def test_parquet_get_column(self, parquet_config):
+        col = parquet_config.get_column("temperature")
+        assert col.name == "temperature"
+        assert col.data_type == ChannelDataType.FLOAT
+
+    def test_parquet_get_column_not_found(self, parquet_config):
+        with pytest.raises(KeyError, match="nonexistent"):
+            parquet_config.get_column("nonexistent")
+
+    def test_parquet_get_column_mutate(self, parquet_config):
+        parquet_config.get_column("cpu_util").name = "cpu_utilization"
+        assert parquet_config.data_columns[0].name == "cpu_utilization"
+
+
+class TestTimeColumnValidation:
+    def test_csv_relative_time_requires_start_time(self):
+        with pytest.raises(ValueError, match="relative_start_time"):
+            CsvTimeColumn(
+                column=1,
+                format=TimeFormat.RELATIVE_NANOSECONDS,
+            )
+
+    def test_csv_relative_time_with_start_time(self):
+        col = CsvTimeColumn(
+            column=1,
+            format=TimeFormat.RELATIVE_NANOSECONDS,
+            relative_start_time=datetime(2026, 1, 1, tzinfo=timezone.utc),
+        )
+        assert col.relative_start_time is not None
+
+    def test_parquet_relative_time_requires_start_time(self):
+        with pytest.raises(ValueError, match="relative_start_time"):
+            ParquetTimeColumn(
+                path="timestamp",
+                format=TimeFormat.RELATIVE_SECONDS,
+            )
+
+    def test_parquet_relative_time_with_start_time(self):
+        col = ParquetTimeColumn(
+            path="timestamp",
+            format=TimeFormat.RELATIVE_SECONDS,
+            relative_start_time=datetime(2026, 1, 1, tzinfo=timezone.utc),
+        )
+        assert col.relative_start_time is not None
+
+    def test_absolute_time_does_not_require_start_time(self):
+        col = CsvTimeColumn(column=1, format=TimeFormat.ABSOLUTE_RFC3339)
+        assert col.relative_start_time is None
+
+
+class TestDataTypeKey:
+    def test_csv_extension(self):
+        assert EXTENSION_TO_DATA_TYPE_KEY[".csv"] == DataTypeKey.CSV
+
+    def test_parquet_not_in_extension_map(self):
+        assert ".parquet" not in EXTENSION_TO_DATA_TYPE_KEY
+
+    def test_hdf5_extensions(self):
+        assert EXTENSION_TO_DATA_TYPE_KEY[".h5"] == DataTypeKey.HDF5
+        assert EXTENSION_TO_DATA_TYPE_KEY[".hdf5"] == DataTypeKey.HDF5
+
+
+class TestRunPrecedence:
+    def test_run_id_ignored_when_none(self, csv_config):
+        csv_config.run_id = None
+        csv_config.run_name = "my_run"
+        proto = csv_config._to_proto()
+        assert proto.run_name == "my_run"
+        assert proto.run_id == ""
+
+    def test_run_id_set(self, csv_config):
+        csv_config.run_id = "run_123"
+        csv_config.run_name = "ignored"
+        proto = csv_config._to_proto()
+        assert proto.run_id == "run_123"

From d669cf238017c42108dd30d6cfb3a5401fd058df Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Tue, 7 Apr 2026 15:23:28 -0700
Subject: [PATCH 18/52] added client side validation for detect_config

---
 .../lib/sift_client/resources/data_imports.py   | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index e6ae7eb21..852a2924f 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -212,6 +212,8 @@ def _read_sample() -> bytes:
             csv_config.data_columns = [
                 dc for dc in csv_config.data_columns if dc.column != time_col
             ]
+            if not csv_config.data_columns:
+                raise ValueError(f"No data columns detected in '{path.name}'.")
             return csv_config
 
         if response.HasField("parquet_config"):
@@ -228,8 +230,7 @@ def _read_sample() -> bytes:
                     ]
                 else:
                     # The backend only detects arrow timestamp types. Fall back to
-                    # looking for an integer column whose name contains "time",
-                    # preferring columns that start with "time".
+                    # an integer column whose name starts with "time".
                     _integer_types = {
                         ChannelDataType.INT_32,
                         ChannelDataType.INT_64,
@@ -241,16 +242,18 @@ def _read_sample() -> bytes:
                         if dc.data_type in _integer_types and dc.name.lower().startswith("time"):
                             match = dc
                             break
-                    if match is None:
-                        for dc in parquet_config.data_columns:
-                            if dc.data_type in _integer_types and "time" in dc.name.lower():
-                                match = dc
-                                break
                     if match is not None:
                         parquet_config.time_column = ParquetTimeColumn(path=match.path)
                         parquet_config.data_columns = [
                             c for c in parquet_config.data_columns if c.path != match.path
                         ]
+                if not parquet_config.time_column.path:
+                    raise ValueError(
+                        f"No time column detected in '{path.name}'. "
+                        "Set the time column manually on the config before importing."
+                    )
+                if not parquet_config.data_columns:
+                    raise ValueError(f"No data columns detected in '{path.name}'.")
                 return parquet_config
             elif proto.HasField("single_channel_per_row"):
                 return ParquetSingleChannelPerRowImportConfig._from_proto(

From bac1b525b93aeb100db9fe6bd795828a489bdbef Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Tue, 7 Apr 2026 15:28:11 -0700
Subject: [PATCH 19/52] add validation tests

---
 .../_tests/resources/test_data_imports.py     | 61 +++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/python/lib/sift_client/_tests/resources/test_data_imports.py b/python/lib/sift_client/_tests/resources/test_data_imports.py
index 9c9185dc8..5d1995272 100644
--- a/python/lib/sift_client/_tests/resources/test_data_imports.py
+++ b/python/lib/sift_client/_tests/resources/test_data_imports.py
@@ -177,6 +177,67 @@ def test_hdf5_extensions(self):
         assert EXTENSION_TO_DATA_TYPE_KEY[".hdf5"] == DataTypeKey.HDF5
 
 
+class TestDetectConfigValidation:
+    """Tests for validation checks applied after detect_config."""
+
+    def test_csv_no_data_columns_raises(self):
+        """If all columns are filtered out, detect_config should raise."""
+        config = CsvImportConfig(
+            asset_name="",
+            time_column=CsvTimeColumn(column=1, format=TimeFormat.ABSOLUTE_RFC3339),
+            data_columns=[],
+        )
+        assert not config.data_columns
+
+    def test_parquet_empty_time_column_path(self):
+        """An empty time column path indicates detection failed."""
+        config = ParquetFlatDatasetImportConfig(
+            asset_name="",
+            time_column=ParquetTimeColumn(path=""),
+            data_columns=[
+                ParquetDataColumn(
+                    path="cpu_util", name="cpu_util", data_type=ChannelDataType.DOUBLE
+                ),
+            ],
+        )
+        assert not config.time_column.path
+
+    def test_parquet_no_data_columns(self):
+        """A config with no data columns indicates detection found nothing useful."""
+        config = ParquetFlatDatasetImportConfig(
+            asset_name="",
+            time_column=ParquetTimeColumn(path="timestamp"),
+            data_columns=[],
+        )
+        assert not config.data_columns
+
+    def test_parquet_integer_time_column_fallback(self):
+        """An integer column starting with 'time' should be usable as the time column."""
+        config = ParquetFlatDatasetImportConfig(
+            asset_name="",
+            time_column=ParquetTimeColumn(path=""),
+            data_columns=[
+                ParquetDataColumn(path="time_ns", name="time_ns", data_type=ChannelDataType.INT_64),
+                ParquetDataColumn(
+                    path="cpu_util", name="cpu_util", data_type=ChannelDataType.DOUBLE
+                ),
+            ],
+        )
+        _integer_types = {
+            ChannelDataType.INT_32,
+            ChannelDataType.INT_64,
+            ChannelDataType.UINT_32,
+            ChannelDataType.UINT_64,
+        }
+        match = None
+        for dc in config.data_columns:
+            if dc.data_type in _integer_types and dc.name.lower().startswith("time"):
+                match = dc
+                break
+        assert match is not None
+        assert match.path == "time_ns"
+
+
 class TestRunPrecedence:
     def test_run_id_ignored_when_none(self, csv_config):
         csv_config.run_id = None

From 09f48e9c36163c3ca97485959343412c325e3fbb Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Tue, 7 Apr 2026 16:50:42 -0700
Subject: [PATCH 20/52] update asset_name handlign

---
 .../lib/sift_client/resources/data_imports.py   | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 852a2924f..4fbd26aa8 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -41,8 +41,8 @@ def __init__(self, sift_client: SiftClient):
     async def import_from_path(
         self,
         file_path: str | Path,
-        asset_name: str,
         *,
+        asset_name: str | None = None,
         config: ImportConfig | None = None,
         data_type: DataTypeKey | None = None,
         run_name: str | None = None,
@@ -57,14 +57,16 @@ async def import_from_path(
         for the import to complete.
 
         When ``config`` is omitted the file format is auto-detected via
-        :meth:`detect_config`. The ``asset_name`` is always applied to
-        the config. If neither ``run_name`` nor ``run_id`` is provided
+        :meth:`detect_config`. When ``asset_name`` is provided it overrides
+        the config value; otherwise the config's ``asset_name`` is used.
+        If neither ``run_name`` nor ``run_id`` is provided
         (and none is set on the config), ``run_name`` defaults to the
         filename.
 
         Args:
             file_path: Path to the local file to import.
-            asset_name: Name of the asset to import data into.
+            asset_name: Name of the asset to import data into. Optional
+                when ``config`` already has ``asset_name`` set.
             config: Import configuration describing the file format and column
                 mapping. When provided, ``data_type`` is ignored.
             data_type: Explicit data type key. Required for formats like
@@ -92,7 +94,10 @@ async def import_from_path(
         if config is None:
             config = await self.detect_config(file_path, data_type=data_type)
 
-        config.asset_name = asset_name
+        if asset_name is not None:
+            config.asset_name = asset_name
+        elif not config.asset_name:
+            raise ValueError("'asset_name' is required when not set on the config.")
         if run_id is not None:
             config.run_id = run_id
         elif run_name is not None:
@@ -199,7 +204,7 @@ async def detect_config(
 
             def _read_sample() -> bytes:
                 with open(path, "rb") as f:
-                    return f.read(65_536)  # 64 KiB
+                    return f.read(1048576)  # 1MiB
 
             sample = await run_sync_function(_read_sample)
 

From 3b1e28f9c209e16b8833daf5a7f543fc2bd3d524 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Tue, 7 Apr 2026 17:08:40 -0700
Subject: [PATCH 21/52] update sync stubs

---
 .../sift_client/resources/sync_stubs/__init__.pyi  | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index 97f70bf9e..d93810c68 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -13,9 +13,6 @@ if TYPE_CHECKING:
     import pandas as pd
     import pyarrow as pa
 
-    from sift_client._internal.low_level_wrappers.data_imports import (
-        ImportConfig,
-    )
     from sift_client.client import SiftClient
     from sift_client.sift_types.asset import Asset, AssetUpdate
     from sift_client.sift_types.calculated_channel import (
@@ -26,6 +23,7 @@ if TYPE_CHECKING:
     from sift_client.sift_types.channel import Channel
     from sift_client.sift_types.data_import import (
         DataTypeKey,
+        ImportConfig,
     )
     from sift_client.sift_types.export import ExportOutputFormat
     from sift_client.sift_types.file_attachment import (
@@ -673,8 +671,8 @@ class DataImportAPI:
     def import_from_path(
         self,
         file_path: str | Path,
-        asset_name: str,
         *,
+        asset_name: str | None = None,
         config: ImportConfig | None = None,
         data_type: DataTypeKey | None = None,
         run_name: str | None = None,
@@ -689,14 +687,16 @@ class DataImportAPI:
         for the import to complete.
 
         When ``config`` is omitted the file format is auto-detected via
-        :meth:`detect_config`. The ``asset_name`` is always applied to
-        the config. If neither ``run_name`` nor ``run_id`` is provided
+        :meth:`detect_config`. When ``asset_name`` is provided it overrides
+        the config value; otherwise the config's ``asset_name`` is used.
+        If neither ``run_name`` nor ``run_id`` is provided
         (and none is set on the config), ``run_name`` defaults to the
         filename.
 
         Args:
             file_path: Path to the local file to import.
-            asset_name: Name of the asset to import data into.
+            asset_name: Name of the asset to import data into. Optional
+                when ``config`` already has ``asset_name`` set.
             config: Import configuration describing the file format and column
                 mapping. When provided, ``data_type`` is ignored.
             data_type: Explicit data type key. Required for formats like

From 1ae1e95f8e15c100ad0ed2760b81092ac8082204 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Wed, 8 Apr 2026 10:23:31 -0700
Subject: [PATCH 22/52] add ch10, hdf5, and tdms configs

---
 .../low_level_wrappers/data_imports.py        |   9 ++
 .../lib/sift_client/resources/data_imports.py |  16 +-
 .../resources/sync_stubs/__init__.pyi         |   9 +-
 .../lib/sift_client/sift_types/data_import.py | 149 +++++++++++++++++-
 4 files changed, 179 insertions(+), 4 deletions(-)

diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py b/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
index e7afb9a76..b88b9cfab 100644
--- a/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
+++ b/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
@@ -12,10 +12,13 @@
 
 from sift_client._internal.low_level_wrappers.base import LowLevelClientBase
 from sift_client.sift_types.data_import import (
+    Ch10ImportConfig,
     CsvImportConfig,
+    Hdf5ImportConfig,
     ImportConfig,
     ParquetFlatDatasetImportConfig,
     ParquetSingleChannelPerRowImportConfig,
+    TdmsImportConfig,
 )
 from sift_client.transport import WithGrpcClient
 
@@ -36,6 +39,12 @@ def _set_config_on_request(
         config, (ParquetFlatDatasetImportConfig, ParquetSingleChannelPerRowImportConfig)
     ):
         request.parquet_config.CopyFrom(config._to_proto())
+    elif isinstance(config, Ch10ImportConfig):
+        request.ch10_config.CopyFrom(config._to_proto())
+    elif isinstance(config, TdmsImportConfig):
+        request.tdms_config.CopyFrom(config._to_proto())
+    elif isinstance(config, Hdf5ImportConfig):
+        request.hdf5_config.CopyFrom(config._to_proto())
     else:
         raise TypeError(f"Unsupported import config type: {type(config).__name__}")
 
diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 4fbd26aa8..444d11a01 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -57,7 +57,9 @@ async def import_from_path(
         for the import to complete.
 
         When ``config`` is omitted the file format is auto-detected via
-        :meth:`detect_config`. When ``asset_name`` is provided it overrides
+        :meth:`detect_config` (CSV and Parquet only). For other formats
+        (TDMS, HDF5, CH10), ``config`` must be provided.
+        When ``asset_name`` is provided it overrides
         the config value; otherwise the config's ``asset_name`` is used.
         If neither ``run_name`` nor ``run_id`` is provided
         (and none is set on the config), ``run_name`` defaults to the
@@ -98,11 +100,16 @@ async def import_from_path(
             config.asset_name = asset_name
         elif not config.asset_name:
             raise ValueError("'asset_name' is required when not set on the config.")
+        has_run_id = hasattr(config, "run_id")
         if run_id is not None:
+            if not has_run_id:
+                raise ValueError(
+                    f"'run_id' is not supported for {type(config).__name__}. Use 'run_name' instead."
+                )
             config.run_id = run_id
         elif run_name is not None:
             config.run_name = run_name
-        elif not config.run_name and not config.run_id:
+        elif not config.run_name and not getattr(config, "run_id", None):
             config.run_name = path.name
 
         if isinstance(
@@ -148,6 +155,11 @@ async def detect_config(
         is inferred from the file extension when ``data_type`` is not
         provided.
 
+        Only CSV and Parquet files are currently supported for auto-detection.
+        For other formats (TDMS, HDF5, CH10), create the config manually
+        using :class:`TdmsImportConfig`, :class:`Hdf5ImportConfig`, or
+        :class:`Ch10ImportConfig`.
+
         For file types with multiple layouts (e.g. Parquet), ``data_type``
         must be specified explicitly.
 
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index d93810c68..0fc09dfb5 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -650,6 +650,11 @@ class DataImportAPI:
         is inferred from the file extension when ``data_type`` is not
         provided.
 
+        Only CSV and Parquet files are currently supported for auto-detection.
+        For other formats (TDMS, HDF5, CH10), create the config manually
+        using :class:`TdmsImportConfig`, :class:`Hdf5ImportConfig`, or
+        :class:`Ch10ImportConfig`.
+
         For file types with multiple layouts (e.g. Parquet), ``data_type``
         must be specified explicitly.
 
@@ -687,7 +692,9 @@ class DataImportAPI:
         for the import to complete.
 
         When ``config`` is omitted the file format is auto-detected via
-        :meth:`detect_config`. When ``asset_name`` is provided it overrides
+        :meth:`detect_config` (CSV and Parquet only). For other formats
+        (TDMS, HDF5, CH10), ``config`` must be provided.
+        When ``asset_name`` is provided it overrides
         the config value; otherwise the config's ``asset_name`` is used.
         If neither ``run_name`` nor ``run_id`` is provided
         (and none is set on the config), ``run_name`` defaults to the
diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
index 2b241330d..d46b5cdc8 100644
--- a/python/lib/sift_client/sift_types/data_import.py
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -18,8 +18,11 @@
     PARQUET_COMPLEX_TYPES_IMPORT_MODE_IGNORE,
     PARQUET_COMPLEX_TYPES_IMPORT_MODE_STRING,
 )
+from sift.data_imports.v2.data_imports_pb2 import Ch10Config as Ch10ConfigProto
 from sift.data_imports.v2.data_imports_pb2 import CsvConfig as CsvConfigProto
 from sift.data_imports.v2.data_imports_pb2 import CsvTimeColumn as CsvTimeColumnProto
+from sift.data_imports.v2.data_imports_pb2 import Hdf5Config as Hdf5ConfigProto
+from sift.data_imports.v2.data_imports_pb2 import Hdf5DataConfig as Hdf5DataConfigProto
 from sift.data_imports.v2.data_imports_pb2 import ParquetConfig as ParquetConfigProto
 from sift.data_imports.v2.data_imports_pb2 import ParquetDataColumn as ParquetDataColumnProto
 from sift.data_imports.v2.data_imports_pb2 import (
@@ -35,6 +38,7 @@
     ParquetSingleChannelPerRowSingleChannelConfig as ParquetSingleChannelPerRowSingleChannelConfigProto,
 )
 from sift.data_imports.v2.data_imports_pb2 import ParquetTimeColumn as ParquetTimeColumnProto
+from sift.data_imports.v2.data_imports_pb2 import TDMSConfig as TDMSConfigProto
 from sift.data_imports.v2.data_imports_pb2 import TimeFormat as TimeFormatProto
 
 from sift_client._internal.util.timestamp import to_pb_timestamp
@@ -536,9 +540,152 @@ def _from_proto(
         )
 
 
+class Ch10ImportConfig(BaseModel):
+    """Configuration for importing a CH10 file.
+
+    Attributes:
+        asset_name: Name of the asset to import data into.
+        run_name: Name for the run.
+        scale_values: Whether to apply EU (engineering unit) scaling to channel values.
+    """
+
+    asset_name: str
+    run_name: str | None = None
+    scale_values: bool = False
+
+    def _to_proto(self) -> Ch10ConfigProto:
+        return Ch10ConfigProto(
+            asset_name=self.asset_name,
+            run_name=self.run_name or "",
+            scale_values=self.scale_values,
+        )
+
+
+class TdmsImportConfig(BaseModel):
+    """Configuration for importing a TDMS file.
+
+    Attributes:
+        asset_name: Name of the asset to import data into.
+        run_name: Name for the run. Ignored if ``run_id`` is set.
+        run_id: ID of an existing run to append data to.
+        start_time_override: Override the ``wf_start_time`` metadata field for all channels.
+            Useful when waveform channels have ``wf_increment`` but no ``wf_start_time``.
+        file_size: The file size in bytes. Required if the file has truncated chunks.
+    """
+
+    asset_name: str
+    run_name: str | None = None
+    run_id: str | None = None
+    start_time_override: datetime | None = None
+    file_size: int | None = None
+
+    def _to_proto(self) -> TDMSConfigProto:
+        proto = TDMSConfigProto(
+            asset_name=self.asset_name,
+            run_name=self.run_name or "",
+            run_id=self.run_id or "",
+        )
+        if self.start_time_override is not None:
+            proto.start_time_override.CopyFrom(to_pb_timestamp(self.start_time_override))
+        if self.file_size is not None:
+            proto.file_size = self.file_size
+        return proto
+
+
+class Hdf5DataColumn(BaseModel):
+    """A dataset mapping for HDF5 imports.
+
+    Each entry maps a time/value dataset pair to a channel.
+
+    Attributes:
+        time_dataset: HDF5 path to the time dataset.
+        time_index: Column index within the time dataset. Defaults to 0.
+        value_dataset: HDF5 path to the value dataset.
+        value_index: Column index within the value dataset. Defaults to 0.
+        name: Channel name.
+        data_type: The data type of the channel values.
+        units: Optional units string.
+        description: Optional channel description.
+        time_field: For compound dataset types, the field name to use for time.
+        value_field: For compound dataset types, the field name to use for value.
+    """
+
+    time_dataset: str
+    time_index: int = 0
+    value_dataset: str
+    value_index: int = 0
+    name: str
+    data_type: ChannelDataType
+    units: str = ""
+    description: str = ""
+    time_field: str | None = None
+    value_field: str | None = None
+
+
+class Hdf5ImportConfig(BaseModel):
+    """Configuration for importing an HDF5 file.
+
+    Attributes:
+        asset_name: Name of the asset to import data into.
+        run_name: Name for the run. Ignored if ``run_id`` is set.
+        run_id: ID of an existing run to append data to.
+        data: List of dataset mappings, each pairing a time and value dataset to a channel.
+        time_format: The time format used across all time datasets.
+        relative_start_time: Required when using a relative time format.
+    """
+
+    asset_name: str
+    run_name: str | None = None
+    run_id: str | None = None
+    data: list[Hdf5DataColumn]
+    time_format: TimeFormat
+    relative_start_time: datetime | None = None
+
+    @model_validator(mode="after")
+    def _check_relative_start_time(self) -> Hdf5ImportConfig:
+        if self.time_format.name.startswith("RELATIVE_") and self.relative_start_time is None:
+            raise ValueError(
+                f"'relative_start_time' is required when using a relative time format ({self.time_format.name})."
+            )
+        return self
+
+    def _to_proto(self) -> Hdf5ConfigProto:
+        proto = Hdf5ConfigProto(
+            asset_name=self.asset_name,
+            run_name=self.run_name or "",
+            run_id=self.run_id or "",
+            time_format=self.time_format.value,
+            data=[
+                Hdf5DataConfigProto(
+                    time_dataset=d.time_dataset,
+                    time_index=d.time_index,
+                    value_dataset=d.value_dataset,
+                    value_index=d.value_index,
+                    channel_config=ChannelConfigProto(
+                        name=d.name,
+                        data_type=d.data_type.value,
+                        units=d.units,
+                        description=d.description,
+                    ),
+                    time_field=d.time_field,
+                    value_field=d.value_field,
+                )
+                for d in self.data
+            ],
+        )
+        if self.relative_start_time is not None:
+            proto.relative_start_time.CopyFrom(to_pb_timestamp(self.relative_start_time))
+        return proto
+
+
 # Note: Using Union instead of | syntax for Python 3.9 compatibility at module level.
 # While `from __future__ import annotations` allows | in type hints (they're strings),
 # module-level type aliases are evaluated at runtime and require Union in Python <3.10.
 ImportConfig = Union[
-    CsvImportConfig, ParquetFlatDatasetImportConfig, ParquetSingleChannelPerRowImportConfig
+    CsvImportConfig,
+    ParquetFlatDatasetImportConfig,
+    ParquetSingleChannelPerRowImportConfig,
+    Ch10ImportConfig,
+    TdmsImportConfig,
+    Hdf5ImportConfig,
 ]

From 3ee74a4fa38bafccc43ea3d0af21c8e5e149f26b Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Wed, 8 Apr 2026 10:31:24 -0700
Subject: [PATCH 23/52] mypy fix

---
 python/lib/sift_client/resources/data_imports.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 444d11a01..03fbce1a3 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -11,6 +11,7 @@
 from sift_client.sift_types.channel import ChannelDataType
 from sift_client.sift_types.data_import import (
     EXTENSION_TO_DATA_TYPE_KEY,
+    Ch10ImportConfig,
     CsvImportConfig,
     DataTypeKey,
     ImportConfig,
@@ -100,16 +101,15 @@ async def import_from_path(
             config.asset_name = asset_name
         elif not config.asset_name:
             raise ValueError("'asset_name' is required when not set on the config.")
-        has_run_id = hasattr(config, "run_id")
         if run_id is not None:
-            if not has_run_id:
+            if isinstance(config, Ch10ImportConfig):
                 raise ValueError(
-                    f"'run_id' is not supported for {type(config).__name__}. Use 'run_name' instead."
+                    "'run_id' is not supported for Ch10ImportConfig. Use 'run_name' instead."
                 )
             config.run_id = run_id
         elif run_name is not None:
             config.run_name = run_name
-        elif not config.run_name and not getattr(config, "run_id", None):
+        elif not config.run_name and (isinstance(config, Ch10ImportConfig) or not config.run_id):
             config.run_name = path.name
 
         if isinstance(

From ffdb06f375fd8b1fc37d41249a7f2400f8a840d4 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Wed, 8 Apr 2026 10:39:34 -0700
Subject: [PATCH 24/52] additional file format tests

---
 .../_tests/resources/test_data_imports.py     | 176 ++++++++++++++++++
 1 file changed, 176 insertions(+)

diff --git a/python/lib/sift_client/_tests/resources/test_data_imports.py b/python/lib/sift_client/_tests/resources/test_data_imports.py
index 5d1995272..05bc09f88 100644
--- a/python/lib/sift_client/_tests/resources/test_data_imports.py
+++ b/python/lib/sift_client/_tests/resources/test_data_imports.py
@@ -7,13 +7,17 @@
 from sift_client.sift_types.channel import ChannelDataType
 from sift_client.sift_types.data_import import (
     EXTENSION_TO_DATA_TYPE_KEY,
+    Ch10ImportConfig,
     CsvDataColumn,
     CsvImportConfig,
     CsvTimeColumn,
     DataTypeKey,
+    Hdf5DataColumn,
+    Hdf5ImportConfig,
     ParquetDataColumn,
     ParquetFlatDatasetImportConfig,
     ParquetTimeColumn,
+    TdmsImportConfig,
     TimeFormat,
 )
 
@@ -251,3 +255,175 @@ def test_run_id_set(self, csv_config):
         csv_config.run_name = "ignored"
         proto = csv_config._to_proto()
         assert proto.run_id == "run_123"
+
+
+class TestCh10Config:
+    def test_to_proto(self):
+        config = Ch10ImportConfig(asset_name="my_asset", run_name="run1", scale_values=True)
+        proto = config._to_proto()
+        assert proto.asset_name == "my_asset"
+        assert proto.run_name == "run1"
+        assert proto.scale_values is True
+
+    def test_to_proto_defaults(self):
+        config = Ch10ImportConfig(asset_name="my_asset")
+        proto = config._to_proto()
+        assert proto.run_name == ""
+        assert proto.scale_values is False
+
+    def test_no_run_id_field(self):
+        config = Ch10ImportConfig(asset_name="my_asset")
+        assert not hasattr(config, "run_id")
+
+
+class TestTdmsConfig:
+    def test_to_proto(self):
+        config = TdmsImportConfig(
+            asset_name="my_asset",
+            run_name="run1",
+            run_id="run_123",
+            start_time_override=datetime(2026, 1, 1, tzinfo=timezone.utc),
+            file_size=12345,
+        )
+        proto = config._to_proto()
+        assert proto.asset_name == "my_asset"
+        assert proto.run_id == "run_123"
+        assert proto.file_size == 12345
+        assert proto.HasField("start_time_override")
+
+    def test_to_proto_optional_fields_unset(self):
+        config = TdmsImportConfig(asset_name="my_asset", run_name="run1")
+        proto = config._to_proto()
+        assert proto.run_name == "run1"
+        assert proto.run_id == ""
+        assert not proto.HasField("start_time_override")
+        assert proto.file_size == 0
+
+    def test_run_id_takes_precedence(self):
+        config = TdmsImportConfig(asset_name="a", run_name="ignored", run_id="run_123")
+        proto = config._to_proto()
+        assert proto.run_id == "run_123"
+
+
+class TestHdf5Config:
+    def test_to_proto(self):
+        config = Hdf5ImportConfig(
+            asset_name="my_asset",
+            run_name="run1",
+            time_format=TimeFormat.ABSOLUTE_UNIX_NANOSECONDS,
+            data=[
+                Hdf5DataColumn(
+                    time_dataset="/time",
+                    value_dataset="/voltage",
+                    name="voltage",
+                    data_type=ChannelDataType.DOUBLE,
+                    units="V",
+                    description="Voltage reading",
+                ),
+            ],
+        )
+        proto = config._to_proto()
+        assert proto.asset_name == "my_asset"
+        assert len(proto.data) == 1
+        assert proto.data[0].time_dataset == "/time"
+        assert proto.data[0].value_dataset == "/voltage"
+        assert proto.data[0].channel_config.name == "voltage"
+        assert proto.data[0].channel_config.units == "V"
+        assert proto.data[0].channel_config.description == "Voltage reading"
+
+    def test_to_proto_compound_fields(self):
+        config = Hdf5ImportConfig(
+            asset_name="my_asset",
+            time_format=TimeFormat.ABSOLUTE_UNIX_NANOSECONDS,
+            data=[
+                Hdf5DataColumn(
+                    time_dataset="/data",
+                    value_dataset="/data",
+                    name="current",
+                    data_type=ChannelDataType.FLOAT,
+                    time_field="ts",
+                    value_field="val",
+                ),
+            ],
+        )
+        proto = config._to_proto()
+        assert proto.data[0].time_field == "ts"
+        assert proto.data[0].value_field == "val"
+
+    def test_to_proto_compound_fields_unset(self):
+        config = Hdf5ImportConfig(
+            asset_name="my_asset",
+            time_format=TimeFormat.ABSOLUTE_UNIX_NANOSECONDS,
+            data=[
+                Hdf5DataColumn(
+                    time_dataset="/time",
+                    value_dataset="/voltage",
+                    name="voltage",
+                    data_type=ChannelDataType.DOUBLE,
+                ),
+            ],
+        )
+        proto = config._to_proto()
+        assert not proto.data[0].HasField("time_field")
+        assert not proto.data[0].HasField("value_field")
+
+    def test_to_proto_multiple_datasets(self):
+        config = Hdf5ImportConfig(
+            asset_name="my_asset",
+            time_format=TimeFormat.ABSOLUTE_UNIX_NANOSECONDS,
+            data=[
+                Hdf5DataColumn(
+                    time_dataset="/time",
+                    value_dataset="/voltage",
+                    name="voltage",
+                    data_type=ChannelDataType.DOUBLE,
+                ),
+                Hdf5DataColumn(
+                    time_dataset="/time",
+                    value_dataset="/current",
+                    value_index=1,
+                    name="current",
+                    data_type=ChannelDataType.FLOAT,
+                ),
+            ],
+        )
+        proto = config._to_proto()
+        assert len(proto.data) == 2
+        assert proto.data[1].value_dataset == "/current"
+        assert proto.data[1].value_index == 1
+
+    def test_relative_time_requires_start_time(self):
+        with pytest.raises(ValueError, match="relative_start_time"):
+            Hdf5ImportConfig(
+                asset_name="my_asset",
+                time_format=TimeFormat.RELATIVE_SECONDS,
+                data=[],
+            )
+
+    def test_relative_time_with_start_time(self):
+        config = Hdf5ImportConfig(
+            asset_name="my_asset",
+            time_format=TimeFormat.RELATIVE_SECONDS,
+            relative_start_time=datetime(2026, 1, 1, tzinfo=timezone.utc),
+            data=[],
+        )
+        proto = config._to_proto()
+        assert proto.HasField("relative_start_time")
+
+    def test_absolute_time_no_start_time_required(self):
+        config = Hdf5ImportConfig(
+            asset_name="my_asset",
+            time_format=TimeFormat.ABSOLUTE_UNIX_NANOSECONDS,
+            data=[],
+        )
+        assert config.relative_start_time is None
+        proto = config._to_proto()
+        assert not proto.HasField("relative_start_time")
+
+
+class TestExtensionMap:
+    def test_tdms_extension(self):
+        assert EXTENSION_TO_DATA_TYPE_KEY[".tdms"] == DataTypeKey.TDMS
+
+    def test_ch10_extension(self):
+        assert EXTENSION_TO_DATA_TYPE_KEY[".ch10"] == DataTypeKey.CH10

From b0559b0f30beeadde13db9d6cb9be03b6901b434 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Wed, 8 Apr 2026 11:18:40 -0700
Subject: [PATCH 25/52] added documentation for csv json metadata

---
 python/lib/sift_client/resources/data_imports.py         | 8 ++++++++
 python/lib/sift_client/resources/sync_stubs/__init__.pyi | 8 ++++++++
 2 files changed, 16 insertions(+)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 03fbce1a3..14b211282 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -160,6 +160,14 @@ async def detect_config(
         using :class:`TdmsImportConfig`, :class:`Hdf5ImportConfig`, or
         :class:`Ch10ImportConfig`.
 
+        For CSV files, the server can parse an optional JSON metadata row
+        that auto-populates channel names, units, descriptions, data types,
+        and enum definitions. Each cell in the row is a JSON object
+        describing that column. When present, ``first_data_row`` in the
+        returned config will be set to the row after the metadata row.
+        Note that enum type definitions are applied server-side during
+        import but are not included in the returned config.
+
         For file types with multiple layouts (e.g. Parquet), ``data_type``
         must be specified explicitly.
 
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index 0fc09dfb5..814c72c83 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -655,6 +655,14 @@ class DataImportAPI:
         using :class:`TdmsImportConfig`, :class:`Hdf5ImportConfig`, or
         :class:`Ch10ImportConfig`.
 
+        For CSV files, the server can parse an optional JSON metadata row
+        that auto-populates channel names, units, descriptions, data types,
+        and enum definitions. Each cell in the row is a JSON object
+        describing that column. When present, ``first_data_row`` in the
+        returned config will be set to the row after the metadata row.
+        Note that enum type definitions are applied server-side during
+        import but are not included in the returned config.
+
         For file types with multiple layouts (e.g. Parquet), ``data_type``
         must be specified explicitly.
 

From d66d8e08eca2b21be2b6b8cb2efd058859969adc Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Wed, 8 Apr 2026 14:00:46 -0700
Subject: [PATCH 26/52] updated docs and split import and polling

---
 .../lib/sift_client/resources/data_imports.py | 35 +++++--------------
 .../resources/sync_stubs/__init__.pyi         | 20 ++++-------
 .../lib/sift_client/sift_types/data_import.py |  8 ++---
 3 files changed, 19 insertions(+), 44 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 14b211282..7873e5dd8 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -3,7 +3,6 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
-import sift_client as _sift_client_module
 from sift_client._internal.low_level_wrappers.data_imports import DataImportsLowLevelClient
 from sift_client._internal.util.executor import run_sync_function
 from sift_client._internal.util.file import extract_parquet_footer, upload_file
@@ -48,17 +47,15 @@ async def import_from_path(
         data_type: DataTypeKey | None = None,
         run_name: str | None = None,
         run_id: str | None = None,
-        polling_interval_secs: int = 5,
-        timeout_secs: int | None = None,
-        show_progress: bool | None = None,
     ) -> Job:
         """Import data from a local file.
 
-        Creates a data import on the server, uploads the file, and waits
-        for the import to complete.
+        Creates a data import on the server, uploads the file, and returns
+        a ``Job`` handle. Use ``job.wait_until_complete()`` to poll for
+        completion if needed.
 
         When ``config`` is omitted the file format is auto-detected via
-        :meth:`detect_config` (CSV and Parquet only). For other formats
+        ``detect_config`` (CSV and Parquet only). For other formats
         (TDMS, HDF5, CH10), ``config`` must be provided.
         When ``asset_name`` is provided it overrides
         the config value; otherwise the config's ``asset_name`` is used.
@@ -79,13 +76,9 @@ async def import_from_path(
                 Defaults to the filename if neither ``run_name`` nor
                 ``run_id`` is set.
             run_id: Existing run ID to use. Overrides any value on the config.
-            polling_interval_secs: Seconds between status polls. Defaults to 5s.
-            timeout_secs: Maximum seconds to wait. If None, polls indefinitely.
-            show_progress: If True, display a progress spinner while waiting.
-                Defaults to True for sync, False for async.
 
         Returns:
-            The completed :class:`Job`.
+            A ``Job`` handle for the pending import.
 
         Raises:
             FileNotFoundError: If the file does not exist.
@@ -129,19 +122,7 @@ async def import_from_path(
         )
         job_id = response["jobId"]
 
-        if show_progress is None:
-            global_setting = _sift_client_module.config.show_progress
-            if global_setting is not None:
-                show_progress = global_setting
-            else:
-                show_progress = getattr(self, "_is_sync", False)
-
-        return await self.client.async_.jobs.wait_until_complete(
-            job_id,
-            polling_interval_secs=polling_interval_secs,
-            timeout_secs=timeout_secs,
-            show_progress=show_progress,
-        )
+        return await self.client.async_.jobs.get(job_id=job_id)
 
     async def detect_config(
         self,
@@ -157,8 +138,8 @@ async def detect_config(
 
         Only CSV and Parquet files are currently supported for auto-detection.
         For other formats (TDMS, HDF5, CH10), create the config manually
-        using :class:`TdmsImportConfig`, :class:`Hdf5ImportConfig`, or
-        :class:`Ch10ImportConfig`.
+        using ``TdmsImportConfig``, ``Hdf5ImportConfig``, or
+        ``Ch10ImportConfig``.
 
         For CSV files, the server can parse an optional JSON metadata row
         that auto-populates channel names, units, descriptions, data types,
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index 814c72c83..6d19a903d 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -652,8 +652,8 @@ class DataImportAPI:
 
         Only CSV and Parquet files are currently supported for auto-detection.
         For other formats (TDMS, HDF5, CH10), create the config manually
-        using :class:`TdmsImportConfig`, :class:`Hdf5ImportConfig`, or
-        :class:`Ch10ImportConfig`.
+        using ``TdmsImportConfig``, ``Hdf5ImportConfig``, or
+        ``Ch10ImportConfig``.
 
         For CSV files, the server can parse an optional JSON metadata row
         that auto-populates channel names, units, descriptions, data types,
@@ -690,17 +690,15 @@ class DataImportAPI:
         data_type: DataTypeKey | None = None,
         run_name: str | None = None,
         run_id: str | None = None,
-        polling_interval_secs: int = 5,
-        timeout_secs: int | None = None,
-        show_progress: bool | None = None,
     ) -> Job:
         """Import data from a local file.
 
-        Creates a data import on the server, uploads the file, and waits
-        for the import to complete.
+        Creates a data import on the server, uploads the file, and returns
+        a ``Job`` handle. Use ``job.wait_until_complete()`` to poll for
+        completion if needed.
 
         When ``config`` is omitted the file format is auto-detected via
-        :meth:`detect_config` (CSV and Parquet only). For other formats
+        ``detect_config`` (CSV and Parquet only). For other formats
         (TDMS, HDF5, CH10), ``config`` must be provided.
         When ``asset_name`` is provided it overrides
         the config value; otherwise the config's ``asset_name`` is used.
@@ -721,13 +719,9 @@ class DataImportAPI:
                 Defaults to the filename if neither ``run_name`` nor
                 ``run_id`` is set.
             run_id: Existing run ID to use. Overrides any value on the config.
-            polling_interval_secs: Seconds between status polls. Defaults to 5s.
-            timeout_secs: Maximum seconds to wait. If None, polls indefinitely.
-            show_progress: If True, display a progress spinner while waiting.
-                Defaults to True for sync, False for async.
 
         Returns:
-            The completed :class:`Job`.
+            A ``Job`` handle for the pending import.
 
         Raises:
             FileNotFoundError: If the file does not exist.
diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
index d46b5cdc8..9c5f60931 100644
--- a/python/lib/sift_client/sift_types/data_import.py
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -307,9 +307,9 @@ class ParquetFlatDatasetImportConfig(BaseModel):
         time_column: Time column configuration.
         data_columns: List of data column definitions.
         footer_offset: Byte offset where the Parquet footer begins. Populated
-            automatically when using :meth:`~DataImportAPIAsync.detect_config`.
+            automatically when using ``detect_config``.
         footer_length: Length of the Parquet footer in bytes. Populated
-            automatically when using :meth:`~DataImportAPIAsync.detect_config`.
+            automatically when using ``detect_config``.
         complex_types_import_mode: How to handle complex Parquet types.
     """
 
@@ -444,9 +444,9 @@ class ParquetSingleChannelPerRowImportConfig(BaseModel):
         single_channel: Set when the entire file contains data for one channel.
         multi_channel: Set when each row identifies its channel via a name column.
         footer_offset: Byte offset where the Parquet footer begins. Populated
-            automatically when using :meth:`~DataImportAPIAsync.detect_config`.
+            automatically when using ``detect_config``.
         footer_length: Length of the Parquet footer in bytes. Populated
-            automatically when using :meth:`~DataImportAPIAsync.detect_config`.
+            automatically when using ``detect_config``.
         complex_types_import_mode: How to handle complex Parquet types.
     """
 

From e4ae07d92f9f23e9f5590474c8c8ce3f28b0271e Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Wed, 8 Apr 2026 15:00:54 -0700
Subject: [PATCH 27/52] add upload_file polling and refactor global
 show_progress to a util

---
 python/lib/sift_client/_internal/util/file.py | 42 +++++++++++++++----
 .../lib/sift_client/resources/data_imports.py | 25 +++++++++--
 python/lib/sift_client/resources/jobs.py      | 19 ++-------
 .../resources/sync_stubs/__init__.pyi         |  9 +++-
 4 files changed, 64 insertions(+), 31 deletions(-)

diff --git a/python/lib/sift_client/_internal/util/file.py b/python/lib/sift_client/_internal/util/file.py
index 4f3a5dead..1d93f44c6 100644
--- a/python/lib/sift_client/_internal/util/file.py
+++ b/python/lib/sift_client/_internal/util/file.py
@@ -8,6 +8,7 @@
 
 from alive_progress import alive_bar  # type: ignore[import-untyped]
 
+import sift_client as _sift_client_module
 from sift_client.errors import SiftWarning
 
 if TYPE_CHECKING:
@@ -16,11 +17,24 @@
     from sift_client.transport.rest_transport import RestClient
 
 
+def resolve_show_progress(*, is_sync: bool) -> bool:
+    """Resolve the show_progress setting from the global config.
+
+    Returns the global ``sift_client.config.show_progress`` value when set,
+    otherwise defaults to ``is_sync``.
+    """
+    global_setting = _sift_client_module.config.show_progress
+    if global_setting is not None:
+        return global_setting
+    return is_sync
+
+
 def upload_file(
     signed_url: str,
     file_path: Path,
     *,
     rest_client: RestClient,
+    show_progress: bool = False,
 ) -> dict:
     """Upload a file to a presigned URL.
 
@@ -28,6 +42,7 @@ def upload_file(
         signed_url: The presigned URL to upload to.
         file_path: Path to the file to upload.
         rest_client: The SDK rest client to use for the upload.
+        show_progress: If True, display a progress spinner during upload.
 
     Returns:
         The parsed JSON response from the server.
@@ -35,15 +50,24 @@ def upload_file(
     Raises:
         ValueError: If the upload request fails.
     """
-    with open(file_path, "rb") as f:
-        response = rest_client.post(
-            signed_url,
-            data=f,
-            headers={"Content-Disposition": f'attachment; filename="{file_path.name}"'},
-        )
-        if not response.ok:
-            raise ValueError(f"Upload failed ({response.status_code}): {response.text}")
-        return response.json()
+    with alive_bar(
+        title=f"Upload [{file_path.name}]",
+        bar=None,
+        spinner="dots_waves",
+        spinner_length=7,
+        monitor=False,
+        stats=False,
+        disable=not show_progress,
+    ):
+        with open(file_path, "rb") as f:
+            response = rest_client.post(
+                signed_url,
+                data=f,
+                headers={"Content-Disposition": f'attachment; filename="{file_path.name}"'},
+            )
+            if not response.ok:
+                raise ValueError(f"Upload failed ({response.status_code}): {response.text}")
+            return response.json()
 
 
 def download_file(
diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 7873e5dd8..73977670f 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -5,7 +5,11 @@
 
 from sift_client._internal.low_level_wrappers.data_imports import DataImportsLowLevelClient
 from sift_client._internal.util.executor import run_sync_function
-from sift_client._internal.util.file import extract_parquet_footer, upload_file
+from sift_client._internal.util.file import (
+    extract_parquet_footer,
+    resolve_show_progress,
+    upload_file,
+)
 from sift_client.resources._base import ResourceBase
 from sift_client.sift_types.channel import ChannelDataType
 from sift_client.sift_types.data_import import (
@@ -47,12 +51,15 @@ async def import_from_path(
         data_type: DataTypeKey | None = None,
         run_name: str | None = None,
         run_id: str | None = None,
+        show_progress: bool | None = None,
     ) -> Job:
         """Import data from a local file.
 
         Creates a data import on the server, uploads the file, and returns
-        a ``Job`` handle. Use ``job.wait_until_complete()`` to poll for
-        completion if needed.
+        a ``Job`` handle after uploading the file. The import processes
+        server-side and typically completes shortly after upload. Use
+        ``job.wait_until_complete()`` only if you need to confirm
+        completion before proceeding.
 
         When ``config`` is omitted the file format is auto-detected via
         ``detect_config`` (CSV and Parquet only). For other formats
@@ -76,6 +83,8 @@ async def import_from_path(
                 Defaults to the filename if neither ``run_name`` nor
                 ``run_id`` is set.
             run_id: Existing run ID to use. Overrides any value on the config.
+            show_progress: If True, display a progress spinner during upload.
+                Defaults to True for sync, False for async.
 
         Returns:
             A ``Job`` handle for the pending import.
@@ -115,10 +124,18 @@ async def import_from_path(
                 config.footer_offset = footer_offset
                 config.footer_length = len(footer_bytes)
 
+        if show_progress is None:
+            show_progress = resolve_show_progress(is_sync=getattr(self, "_is_sync", False))
+
         _, upload_url = await self._low_level_client.create_from_upload(config)
 
         response = await run_sync_function(
-            lambda: upload_file(upload_url, path, rest_client=self.client.rest_client)
+            lambda: upload_file(
+                upload_url,
+                path,
+                rest_client=self.client.rest_client,
+                show_progress=show_progress,
+            )
         )
         job_id = response["jobId"]
 
diff --git a/python/lib/sift_client/resources/jobs.py b/python/lib/sift_client/resources/jobs.py
index 6ddaec6ca..5e2bbdf9b 100644
--- a/python/lib/sift_client/resources/jobs.py
+++ b/python/lib/sift_client/resources/jobs.py
@@ -9,10 +9,9 @@
 
 from alive_progress import alive_bar  # type: ignore[import-untyped]
 
-import sift_client as _sift_client_module
 from sift_client._internal.low_level_wrappers.jobs import JobsLowLevelClient
 from sift_client._internal.util.executor import run_sync_function
-from sift_client._internal.util.file import download_file, extract_zip
+from sift_client._internal.util.file import download_file, extract_zip, resolve_show_progress
 from sift_client.resources._base import ResourceBase
 from sift_client.sift_types.job import DataExportStatusDetails, Job, JobStatus, JobType
 from sift_client.util import cel_utils as cel
@@ -194,13 +193,7 @@ async def wait_until_complete(
         """
         job_id = job._id_or_error if isinstance(job, Job) else job
         if show_progress is None:
-            global_setting = _sift_client_module.config.show_progress
-            if global_setting is not None:
-                show_progress = global_setting
-            elif getattr(self, "_is_sync", False):
-                show_progress = True
-            else:
-                show_progress = False
+            show_progress = resolve_show_progress(is_sync=getattr(self, "_is_sync", False))
 
         start = time.monotonic()
         with alive_bar(
@@ -263,13 +256,7 @@ async def wait_and_download(
         """
         job_id = job._id_or_error if isinstance(job, Job) else job
         if show_progress is None:
-            global_setting = _sift_client_module.config.show_progress
-            if global_setting is not None:
-                show_progress = global_setting
-            elif getattr(self, "_is_sync", False):
-                show_progress = True
-            else:
-                show_progress = False
+            show_progress = resolve_show_progress(is_sync=getattr(self, "_is_sync", False))
 
         completed_job = await self.wait_until_complete(
             job=job_id,
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index 6d19a903d..a94e782d5 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -690,12 +690,15 @@ class DataImportAPI:
         data_type: DataTypeKey | None = None,
         run_name: str | None = None,
         run_id: str | None = None,
+        show_progress: bool | None = None,
     ) -> Job:
         """Import data from a local file.
 
         Creates a data import on the server, uploads the file, and returns
-        a ``Job`` handle. Use ``job.wait_until_complete()`` to poll for
-        completion if needed.
+        a ``Job`` handle after uploading the file. The import processes
+        server-side and typically completes shortly after upload. Use
+        ``job.wait_until_complete()`` only if you need to confirm
+        completion before proceeding.
 
         When ``config`` is omitted the file format is auto-detected via
         ``detect_config`` (CSV and Parquet only). For other formats
@@ -719,6 +722,8 @@ class DataImportAPI:
                 Defaults to the filename if neither ``run_name`` nor
                 ``run_id`` is set.
             run_id: Existing run ID to use. Overrides any value on the config.
+            show_progress: If True, display a progress spinner during upload.
+                Defaults to True for sync, False for async.
 
         Returns:
             A ``Job`` handle for the pending import.

From 107eaa20e26713b2df7a480dbad42aadaff997d1 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Wed, 8 Apr 2026 15:13:09 -0700
Subject: [PATCH 28/52] error handling from missing job_id from upload

---
 python/lib/sift_client/resources/data_imports.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 73977670f..5c759d891 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -137,7 +137,9 @@ async def import_from_path(
                 show_progress=show_progress,
             )
         )
-        job_id = response["jobId"]
+        job_id = response.get("jobId")
+        if not job_id:
+            raise ValueError("Upload succeeded but server response did not include a job ID.")
 
         return await self.client.async_.jobs.get(job_id=job_id)
 

From 9fe594cac936f8205d927b98a08bf68c7d61b59a Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Wed, 8 Apr 2026 15:25:29 -0700
Subject: [PATCH 29/52] refactor to use run/asset objects

---
 .../lib/sift_client/resources/data_imports.py | 39 ++++++++++---------
 .../resources/sync_stubs/__init__.pyi         | 23 ++++++-----
 2 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 5c759d891..56b1eea57 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -11,6 +11,7 @@
     upload_file,
 )
 from sift_client.resources._base import ResourceBase
+from sift_client.sift_types.asset import Asset
 from sift_client.sift_types.channel import ChannelDataType
 from sift_client.sift_types.data_import import (
     EXTENSION_TO_DATA_TYPE_KEY,
@@ -22,6 +23,7 @@
     ParquetSingleChannelPerRowImportConfig,
     ParquetTimeColumn,
 )
+from sift_client.sift_types.run import Run
 
 if TYPE_CHECKING:
     from sift_client.client import SiftClient
@@ -46,11 +48,11 @@ async def import_from_path(
         self,
         file_path: str | Path,
         *,
-        asset_name: str | None = None,
+        asset: Asset | str | None = None,
         config: ImportConfig | None = None,
         data_type: DataTypeKey | None = None,
+        run: Run | str | None = None,
         run_name: str | None = None,
-        run_id: str | None = None,
         show_progress: bool | None = None,
     ) -> Job:
         """Import data from a local file.
@@ -64,25 +66,24 @@ async def import_from_path(
         When ``config`` is omitted the file format is auto-detected via
         ``detect_config`` (CSV and Parquet only). For other formats
         (TDMS, HDF5, CH10), ``config`` must be provided.
-        When ``asset_name`` is provided it overrides
-        the config value; otherwise the config's ``asset_name`` is used.
-        If neither ``run_name`` nor ``run_id`` is provided
-        (and none is set on the config), ``run_name`` defaults to the
-        filename.
+        When ``asset`` is provided it overrides the config value;
+        otherwise the config's ``asset_name`` is used.
+        If neither ``run`` nor ``run_name`` is provided (and none is
+        set on the config), ``run_name`` defaults to the filename.
 
         Args:
             file_path: Path to the local file to import.
-            asset_name: Name of the asset to import data into. Optional
+            asset: Asset object or asset name to import data into. Optional
                 when ``config`` already has ``asset_name`` set.
             config: Import configuration describing the file format and column
                 mapping. When provided, ``data_type`` is ignored.
             data_type: Explicit data type key. Required for formats like
                 Parquet where the extension alone is ambiguous. Only used
                 when ``config`` is not provided.
-            run_name: Run name to use. Overrides any value on the config.
-                Defaults to the filename if neither ``run_name`` nor
-                ``run_id`` is set.
-            run_id: Existing run ID to use. Overrides any value on the config.
+            run: ``Run`` object or run ID string to import into an existing
+                run. Mutually exclusive with ``run_name``.
+            run_name: Name for a new run. Defaults to the filename if
+                neither ``run`` nor ``run_name`` is set.
             show_progress: If True, display a progress spinner during upload.
                 Defaults to True for sync, False for async.
 
@@ -99,16 +100,18 @@ async def import_from_path(
         if config is None:
             config = await self.detect_config(file_path, data_type=data_type)
 
-        if asset_name is not None:
-            config.asset_name = asset_name
+        if asset is not None:
+            config.asset_name = asset.name if isinstance(asset, Asset) else asset
         elif not config.asset_name:
-            raise ValueError("'asset_name' is required when not set on the config.")
-        if run_id is not None:
+            raise ValueError("'asset' is required when not set on the config.")
+        if run is not None and run_name is not None:
+            raise ValueError("'run' and 'run_name' are mutually exclusive.")
+        if run is not None:
             if isinstance(config, Ch10ImportConfig):
                 raise ValueError(
-                    "'run_id' is not supported for Ch10ImportConfig. Use 'run_name' instead."
+                    "'run' is not supported for Ch10ImportConfig. Use 'run_name' instead."
                 )
-            config.run_id = run_id
+            config.run_id = run._id_or_error if isinstance(run, Run) else run
         elif run_name is not None:
             config.run_name = run_name
         elif not config.run_name and (isinstance(config, Ch10ImportConfig) or not config.run_id):
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index a94e782d5..61ffa9f54 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -685,11 +685,11 @@ class DataImportAPI:
         self,
         file_path: str | Path,
         *,
-        asset_name: str | None = None,
+        asset: Asset | str | None = None,
         config: ImportConfig | None = None,
         data_type: DataTypeKey | None = None,
+        run: Run | str | None = None,
         run_name: str | None = None,
-        run_id: str | None = None,
         show_progress: bool | None = None,
     ) -> Job:
         """Import data from a local file.
@@ -703,25 +703,24 @@ class DataImportAPI:
         When ``config`` is omitted the file format is auto-detected via
         ``detect_config`` (CSV and Parquet only). For other formats
         (TDMS, HDF5, CH10), ``config`` must be provided.
-        When ``asset_name`` is provided it overrides
-        the config value; otherwise the config's ``asset_name`` is used.
-        If neither ``run_name`` nor ``run_id`` is provided
-        (and none is set on the config), ``run_name`` defaults to the
-        filename.
+        When ``asset`` is provided it overrides the config value;
+        otherwise the config's ``asset_name`` is used.
+        If neither ``run`` nor ``run_name`` is provided (and none is
+        set on the config), ``run_name`` defaults to the filename.
 
         Args:
             file_path: Path to the local file to import.
-            asset_name: Name of the asset to import data into. Optional
+            asset: Asset object or asset name to import data into. Optional
                 when ``config`` already has ``asset_name`` set.
             config: Import configuration describing the file format and column
                 mapping. When provided, ``data_type`` is ignored.
             data_type: Explicit data type key. Required for formats like
                 Parquet where the extension alone is ambiguous. Only used
                 when ``config`` is not provided.
-            run_name: Run name to use. Overrides any value on the config.
-                Defaults to the filename if neither ``run_name`` nor
-                ``run_id`` is set.
-            run_id: Existing run ID to use. Overrides any value on the config.
+            run: ``Run`` object or run ID string to import into an existing
+                run. Mutually exclusive with ``run_name``.
+            run_name: Name for a new run. Defaults to the filename if
+                neither ``run`` nor ``run_name`` is set.
             show_progress: If True, display a progress spinner during upload.
                 Defaults to True for sync, False for async.
 

From d3377d1e404eea2e585051a72ed7276020ae6017 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Wed, 8 Apr 2026 15:42:11 -0700
Subject: [PATCH 30/52] refactor file format configs into private helpers,
 updated error message

---
 .../lib/sift_client/resources/data_imports.py | 168 ++++++++++--------
 1 file changed, 94 insertions(+), 74 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 56b1eea57..46ef5cdb8 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -120,12 +120,7 @@ async def import_from_path(
         if isinstance(
             config, (ParquetFlatDatasetImportConfig, ParquetSingleChannelPerRowImportConfig)
         ):
-            if config.footer_offset == 0 and config.footer_length == 0:
-                footer_bytes, footer_offset = await run_sync_function(
-                    lambda: extract_parquet_footer(path)
-                )
-                config.footer_offset = footer_offset
-                config.footer_length = len(footer_bytes)
+            await _prepare_parquet_config(config, path)
 
         if show_progress is None:
             show_progress = resolve_show_progress(is_sync=getattr(self, "_is_sync", False))
@@ -191,23 +186,7 @@ async def detect_config(
         if not path.is_file():
             raise FileNotFoundError(f"File not found: {file_path}")
 
-        ext = path.suffix.lower()
-        if ext in (".parquet", ".pqt"):
-            if data_type is None:
-                raise ValueError(
-                    "Parquet files require 'data_type' to be specified. "
-                    "Use DataTypeKey.PARQUET_FLATDATASET or DataTypeKey.PARQUET_SINGLE_CHANNEL_PER_ROW."
-                )
-            data_type_key = data_type
-        elif data_type is not None:
-            data_type_key = data_type
-        else:
-            if ext not in EXTENSION_TO_DATA_TYPE_KEY:
-                raise ValueError(
-                    f"Unsupported file extension '{ext}'. "
-                    f"Supported: {', '.join(sorted(EXTENSION_TO_DATA_TYPE_KEY))}"
-                )
-            data_type_key = EXTENSION_TO_DATA_TYPE_KEY[ext]
+        data_type_key = _resolve_data_type_key(path.suffix.lower(), data_type)
 
         is_parquet = data_type_key in (
             DataTypeKey.PARQUET_FLATDATASET,
@@ -234,58 +213,99 @@ def _read_sample() -> bytes:
         response = await self._low_level_client.detect_config(sample, data_type_key.value)
 
         if response.HasField("csv_config"):
-            csv_config = CsvImportConfig._from_proto(response.csv_config)
-            # Filter out the time column from data_columns to avoid overlap.
-            time_col = csv_config.time_column.column
-            csv_config.data_columns = [
-                dc for dc in csv_config.data_columns if dc.column != time_col
-            ]
-            if not csv_config.data_columns:
-                raise ValueError(f"No data columns detected in '{path.name}'.")
-            return csv_config
+            return _parse_csv_detect_response(response.csv_config, path.name)
 
         if response.HasField("parquet_config"):
-            proto = response.parquet_config
-            if proto.HasField("flat_dataset"):
-                parquet_config = ParquetFlatDatasetImportConfig._from_proto(
-                    proto, footer_offset=footer_offset, footer_length=footer_length
-                )
-                # Filter out the time column from data_columns to avoid overlap.
-                time_path = parquet_config.time_column.path
-                if time_path:
-                    parquet_config.data_columns = [
-                        dc for dc in parquet_config.data_columns if dc.path != time_path
-                    ]
-                else:
-                    # The backend only detects arrow timestamp types. Fall back to
-                    # an integer column whose name starts with "time".
-                    _integer_types = {
-                        ChannelDataType.INT_32,
-                        ChannelDataType.INT_64,
-                        ChannelDataType.UINT_32,
-                        ChannelDataType.UINT_64,
-                    }
-                    match = None
-                    for dc in parquet_config.data_columns:
-                        if dc.data_type in _integer_types and dc.name.lower().startswith("time"):
-                            match = dc
-                            break
-                    if match is not None:
-                        parquet_config.time_column = ParquetTimeColumn(path=match.path)
-                        parquet_config.data_columns = [
-                            c for c in parquet_config.data_columns if c.path != match.path
-                        ]
-                if not parquet_config.time_column.path:
-                    raise ValueError(
-                        f"No time column detected in '{path.name}'. "
-                        "Set the time column manually on the config before importing."
-                    )
-                if not parquet_config.data_columns:
-                    raise ValueError(f"No data columns detected in '{path.name}'.")
-                return parquet_config
-            elif proto.HasField("single_channel_per_row"):
-                return ParquetSingleChannelPerRowImportConfig._from_proto(
-                    proto, footer_offset=footer_offset, footer_length=footer_length
-                )
+            return _parse_parquet_detect_response(
+                response.parquet_config, path.name, footer_offset, footer_length
+            )
 
         raise ValueError("Server returned an empty DetectConfig response.")
+
+
+def _resolve_data_type_key(ext: str, data_type: DataTypeKey | None) -> DataTypeKey:
+    """Resolve the data type key from file extension and explicit override."""
+    if ext in (".parquet", ".pqt"):
+        if data_type is None:
+            raise ValueError(
+                "Parquet files require 'data_type' to be specified. "
+                "Use DataTypeKey.PARQUET_FLATDATASET or DataTypeKey.PARQUET_SINGLE_CHANNEL_PER_ROW."
+            )
+        return data_type
+    if data_type is not None:
+        return data_type
+    if ext not in EXTENSION_TO_DATA_TYPE_KEY:
+        raise ValueError(
+            f"Unsupported file extension '{ext}'. "
+            f"Supported: {', '.join(sorted(EXTENSION_TO_DATA_TYPE_KEY))}. "
+            "You can also specify 'data_type' explicitly using a DataTypeKey value."
+        )
+    return EXTENSION_TO_DATA_TYPE_KEY[ext]
+
+
+def _parse_csv_detect_response(proto, filename: str) -> CsvImportConfig:
+    """Parse a CSV DetectConfig response into a config."""
+    csv_config = CsvImportConfig._from_proto(proto)
+    time_col = csv_config.time_column.column
+    csv_config.data_columns = [dc for dc in csv_config.data_columns if dc.column != time_col]
+    if not csv_config.data_columns:
+        raise ValueError(f"No data columns detected in '{filename}'.")
+    return csv_config
+
+
+def _parse_parquet_detect_response(
+    proto, filename: str, footer_offset: int, footer_length: int
+) -> ParquetFlatDatasetImportConfig | ParquetSingleChannelPerRowImportConfig:
+    """Parse a Parquet DetectConfig response into a config."""
+    if proto.HasField("flat_dataset"):
+        parquet_config = ParquetFlatDatasetImportConfig._from_proto(
+            proto, footer_offset=footer_offset, footer_length=footer_length
+        )
+        time_path = parquet_config.time_column.path
+        if time_path:
+            parquet_config.data_columns = [
+                dc for dc in parquet_config.data_columns if dc.path != time_path
+            ]
+        else:
+            # The backend only detects arrow timestamp types. Fall back to
+            # an integer column whose name starts with "time".
+            _integer_types = {
+                ChannelDataType.INT_32,
+                ChannelDataType.INT_64,
+                ChannelDataType.UINT_32,
+                ChannelDataType.UINT_64,
+            }
+            match = None
+            for dc in parquet_config.data_columns:
+                if dc.data_type in _integer_types and dc.name.lower().startswith("time"):
+                    match = dc
+                    break
+            if match is not None:
+                parquet_config.time_column = ParquetTimeColumn(path=match.path)
+                parquet_config.data_columns = [
+                    c for c in parquet_config.data_columns if c.path != match.path
+                ]
+        if not parquet_config.time_column.path:
+            raise ValueError(
+                f"No time column detected in '{filename}'. "
+                "Set the time column manually on the config before importing."
+            )
+        if not parquet_config.data_columns:
+            raise ValueError(f"No data columns detected in '{filename}'.")
+        return parquet_config
+    elif proto.HasField("single_channel_per_row"):
+        return ParquetSingleChannelPerRowImportConfig._from_proto(
+            proto, footer_offset=footer_offset, footer_length=footer_length
+        )
+    raise ValueError(f"Unsupported parquet layout in DetectConfig response for '{filename}'.")
+
+
+async def _prepare_parquet_config(
+    config: ParquetFlatDatasetImportConfig | ParquetSingleChannelPerRowImportConfig,
+    path: Path,
+) -> None:
+    """Populate parquet footer fields on the config if not already set."""
+    if config.footer_offset == 0 and config.footer_length == 0:
+        footer_bytes, footer_offset = await run_sync_function(lambda: extract_parquet_footer(path))
+        config.footer_offset = footer_offset
+        config.footer_length = len(footer_bytes)

From 07007f955c2ee6ba80df1f6c1ae94f5ddbc04a67 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Thu, 9 Apr 2026 11:32:56 -0700
Subject: [PATCH 31/52] refactored to apply inheritance on time and config
 classes

---
 .../_tests/resources/test_data_imports.py     |   4 +-
 .../lib/sift_client/sift_types/data_import.py | 103 +++++++-----------
 2 files changed, 43 insertions(+), 64 deletions(-)

diff --git a/python/lib/sift_client/_tests/resources/test_data_imports.py b/python/lib/sift_client/_tests/resources/test_data_imports.py
index 05bc09f88..65d8e5785 100644
--- a/python/lib/sift_client/_tests/resources/test_data_imports.py
+++ b/python/lib/sift_client/_tests/resources/test_data_imports.py
@@ -271,9 +271,9 @@ def test_to_proto_defaults(self):
         assert proto.run_name == ""
         assert proto.scale_values is False
 
-    def test_no_run_id_field(self):
+    def test_run_id_inherited_but_unused(self):
         config = Ch10ImportConfig(asset_name="my_asset")
-        assert not hasattr(config, "run_id")
+        assert config.run_id is None
 
 
 class TestTdmsConfig:
diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
index 9c5f60931..6963715af 100644
--- a/python/lib/sift_client/sift_types/data_import.py
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from abc import ABC
 from datetime import datetime  # noqa: TC003
 from enum import Enum
 from typing import Union
@@ -82,7 +83,27 @@ class DataTypeKey(Enum):
 }
 
 
-class CsvTimeColumn(BaseModel):
+class TimeColumnBase(BaseModel, ABC):
+    """Base class for time column configurations.
+
+    Attributes:
+        format: The time format used in this column.
+        relative_start_time: Required when using a relative time format.
+    """
+
+    format: TimeFormat
+    relative_start_time: datetime | None = None
+
+    @model_validator(mode="after")
+    def _check_relative_start_time(self) -> TimeColumnBase:
+        if self.format.name.startswith("RELATIVE_") and self.relative_start_time is None:
+            raise ValueError(
+                f"'relative_start_time' is required when using a relative time format ({self.format.name})."
+            )
+        return self
+
+
+class CsvTimeColumn(TimeColumnBase):
     """Time column configuration for CSV imports.
 
     Attributes:
@@ -92,8 +113,6 @@ class CsvTimeColumn(BaseModel):
     """
 
     column: int
-    format: TimeFormat
-    relative_start_time: datetime | None = None
 
     def _to_proto(self) -> CsvTimeColumnProto:
         proto = CsvTimeColumnProto(
@@ -104,14 +123,6 @@ def _to_proto(self) -> CsvTimeColumnProto:
             proto.relative_start_time.CopyFrom(to_pb_timestamp(self.relative_start_time))
         return proto
 
-    @model_validator(mode="after")
-    def _check_relative_start_time(self) -> CsvTimeColumn:
-        if self.format.name.startswith("RELATIVE_") and self.relative_start_time is None:
-            raise ValueError(
-                f"'relative_start_time' is required when using a relative time format ({self.format.name})."
-            )
-        return self
-
 
 class CsvDataColumn(BaseModel):
     """A data column definition for CSV imports.
@@ -131,21 +142,29 @@ class CsvDataColumn(BaseModel):
     description: str = ""
 
 
-class CsvImportConfig(BaseModel):
-    """Configuration for importing a CSV file.
+class ImportConfigBase(BaseModel, ABC):
+    """Base class for all import configurations.
 
     Attributes:
         asset_name: Name of the asset to import data into.
         run_name: Name for the run. Ignored if ``run_id`` is set.
         run_id: ID of an existing run to append data to.
-        first_data_row: The first row containing data (1-indexed). Defaults to 2 to skip a header row.
-        time_column: Time column configuration.
-        data_columns: List of data column definitions.
     """
 
     asset_name: str
     run_name: str | None = None
     run_id: str | None = None
+
+
+class CsvImportConfig(ImportConfigBase):
+    """Configuration for importing a CSV file.
+
+    Attributes:
+        first_data_row: The first row containing data (1-indexed). Defaults to 2 to skip a header row.
+        time_column: Time column configuration.
+        data_columns: List of data column definitions.
+    """
+
     first_data_row: int = 2
     time_column: CsvTimeColumn
     data_columns: list[CsvDataColumn]
@@ -229,7 +248,7 @@ class ParquetComplexTypesImportMode(Enum):
     BYTES = PARQUET_COMPLEX_TYPES_IMPORT_MODE_BYTES
 
 
-class ParquetTimeColumn(BaseModel):
+class ParquetTimeColumn(TimeColumnBase):
     """Time column configuration for Parquet imports.
 
     Attributes:
@@ -240,7 +259,6 @@ class ParquetTimeColumn(BaseModel):
 
     path: str
     format: TimeFormat = TimeFormat.ABSOLUTE_UNIX_NANOSECONDS
-    relative_start_time: datetime | None = None
 
     def _to_proto(self) -> ParquetTimeColumnProto:
         if not self.path:
@@ -268,14 +286,6 @@ def _from_proto(cls, proto: ParquetTimeColumnProto) -> ParquetTimeColumn:
             relative_start_time=relative_start_time,
         )
 
-    @model_validator(mode="after")
-    def _check_relative_start_time(self) -> ParquetTimeColumn:
-        if self.format.name.startswith("RELATIVE_") and self.relative_start_time is None:
-            raise ValueError(
-                f"'relative_start_time' is required when using a relative time format ({self.format.name})."
-            )
-        return self
-
 
 class ParquetDataColumn(BaseModel):
     """A data column definition for Parquet flat dataset imports.
@@ -295,15 +305,12 @@ class ParquetDataColumn(BaseModel):
     description: str = ""
 
 
-class ParquetFlatDatasetImportConfig(BaseModel):
+class ParquetFlatDatasetImportConfig(ImportConfigBase):
     """Configuration for importing a Parquet file with a flat dataset layout.
 
     Each column in the file maps to a separate channel.
 
     Attributes:
-        asset_name: Name of the asset to import data into.
-        run_name: Name for the run. Ignored if ``run_id`` is set.
-        run_id: ID of an existing run to append data to.
         time_column: Time column configuration.
         data_columns: List of data column definitions.
         footer_offset: Byte offset where the Parquet footer begins. Populated
@@ -313,9 +320,6 @@ class ParquetFlatDatasetImportConfig(BaseModel):
         complex_types_import_mode: How to handle complex Parquet types.
     """
 
-    asset_name: str
-    run_name: str | None = None
-    run_id: str | None = None
     time_column: ParquetTimeColumn
     data_columns: list[ParquetDataColumn]
     footer_offset: int = 0
@@ -430,16 +434,13 @@ class ParquetMultiChannelConfig(BaseModel):
     data_path: str
 
 
-class ParquetSingleChannelPerRowImportConfig(BaseModel):
+class ParquetSingleChannelPerRowImportConfig(ImportConfigBase):
     """Configuration for importing a Parquet file where each row represents
     a single channel's data point.
 
     Exactly one of ``single_channel`` or ``multi_channel`` must be set.
 
     Attributes:
-        asset_name: Name of the asset to import data into.
-        run_name: Name for the run. Ignored if ``run_id`` is set.
-        run_id: ID of an existing run to append data to.
         time_column: Time column configuration.
         single_channel: Set when the entire file contains data for one channel.
         multi_channel: Set when each row identifies its channel via a name column.
@@ -450,9 +451,6 @@ class ParquetSingleChannelPerRowImportConfig(BaseModel):
         complex_types_import_mode: How to handle complex Parquet types.
     """
 
-    asset_name: str
-    run_name: str | None = None
-    run_id: str | None = None
     time_column: ParquetTimeColumn
     single_channel: ParquetSingleChannelConfig | None = None
     multi_channel: ParquetMultiChannelConfig | None = None
@@ -540,17 +538,13 @@ def _from_proto(
         )
 
 
-class Ch10ImportConfig(BaseModel):
+class Ch10ImportConfig(ImportConfigBase):
     """Configuration for importing a CH10 file.
 
     Attributes:
-        asset_name: Name of the asset to import data into.
-        run_name: Name for the run.
         scale_values: Whether to apply EU (engineering unit) scaling to channel values.
     """
 
-    asset_name: str
-    run_name: str | None = None
     scale_values: bool = False
 
     def _to_proto(self) -> Ch10ConfigProto:
@@ -561,21 +555,15 @@ def _to_proto(self) -> Ch10ConfigProto:
         )
 
 
-class TdmsImportConfig(BaseModel):
+class TdmsImportConfig(ImportConfigBase):
     """Configuration for importing a TDMS file.
 
     Attributes:
-        asset_name: Name of the asset to import data into.
-        run_name: Name for the run. Ignored if ``run_id`` is set.
-        run_id: ID of an existing run to append data to.
         start_time_override: Override the ``wf_start_time`` metadata field for all channels.
             Useful when waveform channels have ``wf_increment`` but no ``wf_start_time``.
         file_size: The file size in bytes. Required if the file has truncated chunks.
     """
 
-    asset_name: str
-    run_name: str | None = None
-    run_id: str | None = None
     start_time_override: datetime | None = None
     file_size: int | None = None
 
@@ -622,21 +610,15 @@ class Hdf5DataColumn(BaseModel):
     value_field: str | None = None
 
 
-class Hdf5ImportConfig(BaseModel):
+class Hdf5ImportConfig(ImportConfigBase):
     """Configuration for importing an HDF5 file.
 
     Attributes:
-        asset_name: Name of the asset to import data into.
-        run_name: Name for the run. Ignored if ``run_id`` is set.
-        run_id: ID of an existing run to append data to.
         data: List of dataset mappings, each pairing a time and value dataset to a channel.
         time_format: The time format used across all time datasets.
         relative_start_time: Required when using a relative time format.
     """
 
-    asset_name: str
-    run_name: str | None = None
-    run_id: str | None = None
     data: list[Hdf5DataColumn]
     time_format: TimeFormat
     relative_start_time: datetime | None = None
@@ -678,9 +660,6 @@ def _to_proto(self) -> Hdf5ConfigProto:
         return proto
 
 
-# Note: Using Union instead of | syntax for Python 3.9 compatibility at module level.
-# While `from __future__ import annotations` allows | in type hints (they're strings),
-# module-level type aliases are evaluated at runtime and require Union in Python <3.10.
 ImportConfig = Union[
     CsvImportConfig,
     ParquetFlatDatasetImportConfig,

From fca83316001d2d0acd93dcb0ab131bcca8af35b3 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Thu, 9 Apr 2026 11:50:42 -0700
Subject: [PATCH 32/52] updated documentation around detect_config

---
 .../lib/sift_client/resources/data_imports.py | 31 ++++++++++++++++++-
 .../resources/sync_stubs/__init__.pyi         | 31 ++++++++++++++++++-
 2 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 46ef5cdb8..174cfc452 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -71,12 +71,41 @@ async def import_from_path(
         If neither ``run`` nor ``run_name`` is provided (and none is
         set on the config), ``run_name`` defaults to the filename.
 
+        Examples:
+            Import a CSV file with auto-detected config:
+
+                job = client.data_imports.import_from_path(
+                    "data.csv",
+                    asset=my_asset,
+                )
+
+            Auto-detect config, inspect and patch before importing:
+
+                config = client.data_imports.detect_config("data.csv")
+
+                # Fix a column data type
+                config.get_column("temperature").data_type = ChannelDataType.FLOAT
+
+                # Remove an unwanted column
+                config.data_columns = [
+                    dc for dc in config.data_columns if dc.name != "internal_id"
+                ]
+
+                job = client.data_imports.import_from_path(
+                    "data.csv",
+                    asset=my_asset,
+                    config=config,
+                )
+
         Args:
             file_path: Path to the local file to import.
             asset: Asset object or asset name to import data into. Optional
                 when ``config`` already has ``asset_name`` set.
             config: Import configuration describing the file format and column
-                mapping. When provided, ``data_type`` is ignored.
+                mapping. When provided, ``data_type`` is ignored. If omitted,
+                the config is auto-detected via ``detect_config``. You can
+                call ``detect_config`` yourself to inspect and modify the
+                config before passing it here.
             data_type: Explicit data type key. Required for formats like
                 Parquet where the extension alone is ambiguous. Only used
                 when ``config`` is not provided.
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index 61ffa9f54..cc1b6556e 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -708,12 +708,41 @@ class DataImportAPI:
         If neither ``run`` nor ``run_name`` is provided (and none is
         set on the config), ``run_name`` defaults to the filename.
 
+        Examples:
+            Import a CSV file with auto-detected config:
+
+                job = client.data_imports.import_from_path(
+                    "data.csv",
+                    asset=my_asset,
+                )
+
+            Auto-detect config, inspect and patch before importing:
+
+                config = client.data_imports.detect_config("data.csv")
+
+                # Fix a column data type
+                config.get_column("temperature").data_type = ChannelDataType.FLOAT
+
+                # Remove an unwanted column
+                config.data_columns = [
+                    dc for dc in config.data_columns if dc.name != "internal_id"
+                ]
+
+                job = client.data_imports.import_from_path(
+                    "data.csv",
+                    asset=my_asset,
+                    config=config,
+                )
+
         Args:
             file_path: Path to the local file to import.
             asset: Asset object or asset name to import data into. Optional
                 when ``config`` already has ``asset_name`` set.
             config: Import configuration describing the file format and column
-                mapping. When provided, ``data_type`` is ignored.
+                mapping. When provided, ``data_type`` is ignored. If omitted,
+                the config is auto-detected via ``detect_config``. You can
+                call ``detect_config`` yourself to inspect and modify the
+                config before passing it here.
             data_type: Explicit data type key. Required for formats like
                 Parquet where the extension alone is ambiguous. Only used
                 when ``config`` is not provided.

From deb35902d4967e08215905e608e7d2528bc091f8 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Thu, 9 Apr 2026 12:10:34 -0700
Subject: [PATCH 33/52] updated unit tests

---
 .../_tests/resources/test_data_imports.py     | 234 +++++++-----------
 1 file changed, 88 insertions(+), 146 deletions(-)

diff --git a/python/lib/sift_client/_tests/resources/test_data_imports.py b/python/lib/sift_client/_tests/resources/test_data_imports.py
index 65d8e5785..ff99efe3c 100644
--- a/python/lib/sift_client/_tests/resources/test_data_imports.py
+++ b/python/lib/sift_client/_tests/resources/test_data_imports.py
@@ -4,9 +4,9 @@
 
 import pytest
 
+from sift_client.resources.data_imports import _resolve_data_type_key
 from sift_client.sift_types.channel import ChannelDataType
 from sift_client.sift_types.data_import import (
-    EXTENSION_TO_DATA_TYPE_KEY,
     Ch10ImportConfig,
     CsvDataColumn,
     CsvImportConfig,
@@ -16,6 +16,7 @@
     Hdf5ImportConfig,
     ParquetDataColumn,
     ParquetFlatDatasetImportConfig,
+    ParquetSingleChannelPerRowImportConfig,
     ParquetTimeColumn,
     TdmsImportConfig,
     TimeFormat,
@@ -57,54 +58,6 @@ def parquet_config():
     )
 
 
-class TestCsvConfigMutability:
-    def test_mutate_asset_name(self, csv_config):
-        csv_config.asset_name = "new_asset"
-        assert csv_config.asset_name == "new_asset"
-
-    def test_mutate_run_name(self, csv_config):
-        csv_config.run_name = "new_run"
-        assert csv_config.run_name == "new_run"
-
-    def test_mutate_column_data_type(self, csv_config):
-        csv_config.data_columns[1].data_type = ChannelDataType.STRING
-        assert csv_config.data_columns[1].data_type == ChannelDataType.STRING
-
-    def test_mutate_column_name(self, csv_config):
-        csv_config.data_columns[0].name = "cpu_utilization"
-        assert csv_config.data_columns[0].name == "cpu_utilization"
-
-    def test_append_column(self, csv_config):
-        csv_config.data_columns.append(
-            CsvDataColumn(column=5, name="pressure", data_type=ChannelDataType.DOUBLE)
-        )
-        assert len(csv_config.data_columns) == 4
-        assert csv_config.data_columns[-1].name == "pressure"
-
-    def test_remove_column(self, csv_config):
-        csv_config.data_columns = [
-            dc for dc in csv_config.data_columns if dc.name != "status_flags"
-        ]
-        assert len(csv_config.data_columns) == 2
-        assert all(dc.name != "status_flags" for dc in csv_config.data_columns)
-
-
-class TestParquetConfigMutability:
-    def test_mutate_asset_name(self, parquet_config):
-        parquet_config.asset_name = "new_asset"
-        assert parquet_config.asset_name == "new_asset"
-
-    def test_mutate_column_data_type(self, parquet_config):
-        parquet_config.data_columns[1].data_type = ChannelDataType.STRING
-        assert parquet_config.data_columns[1].data_type == ChannelDataType.STRING
-
-    def test_append_column(self, parquet_config):
-        parquet_config.data_columns.append(
-            ParquetDataColumn(path="pressure", name="pressure", data_type=ChannelDataType.DOUBLE)
-        )
-        assert len(parquet_config.data_columns) == 4
-
-
 class TestGetColumn:
     def test_csv_get_column(self, csv_config):
         col = csv_config.get_column("cpu_util")
@@ -169,94 +122,6 @@ def test_absolute_time_does_not_require_start_time(self):
         assert col.relative_start_time is None
 
 
-class TestDataTypeKey:
-    def test_csv_extension(self):
-        assert EXTENSION_TO_DATA_TYPE_KEY[".csv"] == DataTypeKey.CSV
-
-    def test_parquet_not_in_extension_map(self):
-        assert ".parquet" not in EXTENSION_TO_DATA_TYPE_KEY
-
-    def test_hdf5_extensions(self):
-        assert EXTENSION_TO_DATA_TYPE_KEY[".h5"] == DataTypeKey.HDF5
-        assert EXTENSION_TO_DATA_TYPE_KEY[".hdf5"] == DataTypeKey.HDF5
-
-
-class TestDetectConfigValidation:
-    """Tests for validation checks applied after detect_config."""
-
-    def test_csv_no_data_columns_raises(self):
-        """If all columns are filtered out, detect_config should raise."""
-        config = CsvImportConfig(
-            asset_name="",
-            time_column=CsvTimeColumn(column=1, format=TimeFormat.ABSOLUTE_RFC3339),
-            data_columns=[],
-        )
-        assert not config.data_columns
-
-    def test_parquet_empty_time_column_path(self):
-        """An empty time column path indicates detection failed."""
-        config = ParquetFlatDatasetImportConfig(
-            asset_name="",
-            time_column=ParquetTimeColumn(path=""),
-            data_columns=[
-                ParquetDataColumn(
-                    path="cpu_util", name="cpu_util", data_type=ChannelDataType.DOUBLE
-                ),
-            ],
-        )
-        assert not config.time_column.path
-
-    def test_parquet_no_data_columns(self):
-        """A config with no data columns indicates detection found nothing useful."""
-        config = ParquetFlatDatasetImportConfig(
-            asset_name="",
-            time_column=ParquetTimeColumn(path="timestamp"),
-            data_columns=[],
-        )
-        assert not config.data_columns
-
-    def test_parquet_integer_time_column_fallback(self):
-        """An integer column starting with 'time' should be usable as the time column."""
-        config = ParquetFlatDatasetImportConfig(
-            asset_name="",
-            time_column=ParquetTimeColumn(path=""),
-            data_columns=[
-                ParquetDataColumn(path="time_ns", name="time_ns", data_type=ChannelDataType.INT_64),
-                ParquetDataColumn(
-                    path="cpu_util", name="cpu_util", data_type=ChannelDataType.DOUBLE
-                ),
-            ],
-        )
-        _integer_types = {
-            ChannelDataType.INT_32,
-            ChannelDataType.INT_64,
-            ChannelDataType.UINT_32,
-            ChannelDataType.UINT_64,
-        }
-        match = None
-        for dc in config.data_columns:
-            if dc.data_type in _integer_types and dc.name.lower().startswith("time"):
-                match = dc
-                break
-        assert match is not None
-        assert match.path == "time_ns"
-
-
-class TestRunPrecedence:
-    def test_run_id_ignored_when_none(self, csv_config):
-        csv_config.run_id = None
-        csv_config.run_name = "my_run"
-        proto = csv_config._to_proto()
-        assert proto.run_name == "my_run"
-        assert proto.run_id == ""
-
-    def test_run_id_set(self, csv_config):
-        csv_config.run_id = "run_123"
-        csv_config.run_name = "ignored"
-        proto = csv_config._to_proto()
-        assert proto.run_id == "run_123"
-
-
 class TestCh10Config:
     def test_to_proto(self):
         config = Ch10ImportConfig(asset_name="my_asset", run_name="run1", scale_values=True)
@@ -271,10 +136,6 @@ def test_to_proto_defaults(self):
         assert proto.run_name == ""
         assert proto.scale_values is False
 
-    def test_run_id_inherited_but_unused(self):
-        config = Ch10ImportConfig(asset_name="my_asset")
-        assert config.run_id is None
-
 
 class TestTdmsConfig:
     def test_to_proto(self):
@@ -421,9 +282,90 @@ def test_absolute_time_no_start_time_required(self):
         assert not proto.HasField("relative_start_time")
 
 
-class TestExtensionMap:
-    def test_tdms_extension(self):
-        assert EXTENSION_TO_DATA_TYPE_KEY[".tdms"] == DataTypeKey.TDMS
+class TestCsvToProto:
+    def test_to_proto(self, csv_config):
+        proto = csv_config._to_proto()
+        assert proto.asset_name == "test_asset"
+        assert proto.run_name == "test_run"
+        assert proto.first_data_row == 2
+        assert proto.time_column.column_number == 1
+        assert len(proto.data_columns) == 3
+        assert proto.data_columns[2].name == "cpu_util"
+
+    def test_from_proto_round_trip(self, csv_config):
+        proto = csv_config._to_proto()
+        restored = CsvImportConfig._from_proto(proto)
+        assert restored.asset_name == csv_config.asset_name
+        assert restored.run_name == csv_config.run_name
+        assert restored.first_data_row == csv_config.first_data_row
+        assert restored.time_column.column == csv_config.time_column.column
+        assert len(restored.data_columns) == len(csv_config.data_columns)
+
+
+class TestParquetToProto:
+    def test_flat_dataset_to_proto(self, parquet_config):
+        proto = parquet_config._to_proto()
+        assert proto.asset_name == "test_asset"
+        assert proto.HasField("flat_dataset")
+        assert proto.flat_dataset.time_column.path == "timestamp"
+        assert len(proto.flat_dataset.data_columns) == 3
+
+    def test_flat_dataset_from_proto_round_trip(self, parquet_config):
+        proto = parquet_config._to_proto()
+        restored = ParquetFlatDatasetImportConfig._from_proto(proto)
+        assert restored.asset_name == parquet_config.asset_name
+        assert restored.time_column.path == parquet_config.time_column.path
+        assert len(restored.data_columns) == len(parquet_config.data_columns)
+        for orig, rest in zip(parquet_config.data_columns, restored.data_columns):
+            assert orig.name == rest.name
+            assert orig.data_type == rest.data_type
+
+    def test_single_channel_per_row_from_proto_round_trip(self):
+        from sift_client.sift_types.data_import import ParquetSingleChannelConfig
+
+        config = ParquetSingleChannelPerRowImportConfig(
+            asset_name="a",
+            time_column=ParquetTimeColumn(path="ts"),
+            single_channel=ParquetSingleChannelConfig(
+                data_path="value",
+                name="voltage",
+                data_type=ChannelDataType.DOUBLE,
+            ),
+        )
+        proto = config._to_proto()
+        restored = ParquetSingleChannelPerRowImportConfig._from_proto(proto)
+        assert restored.single_channel is not None
+        assert restored.single_channel.name == "voltage"
+        assert restored.single_channel.data_type == ChannelDataType.DOUBLE
+
+
+class TestParquetTimeColumnToProto:
+    def test_empty_path_raises(self):
+        col = ParquetTimeColumn(path="")
+        with pytest.raises(ValueError, match="path must be set"):
+            col._to_proto()
+
+
+class TestResolveDataTypeKey:
+    def test_parquet_requires_data_type(self):
+        with pytest.raises(ValueError, match="data_type"):
+            _resolve_data_type_key(".parquet", None)
+
+    def test_parquet_with_explicit_data_type(self):
+        result = _resolve_data_type_key(".parquet", DataTypeKey.PARQUET_FLATDATASET)
+        assert result == DataTypeKey.PARQUET_FLATDATASET
+
+    def test_pqt_requires_data_type(self):
+        with pytest.raises(ValueError, match="data_type"):
+            _resolve_data_type_key(".pqt", None)
+
+    def test_known_extension_uses_map(self):
+        assert _resolve_data_type_key(".csv", None) == DataTypeKey.CSV
+
+    def test_explicit_data_type_overrides_extension(self):
+        result = _resolve_data_type_key(".csv", DataTypeKey.TDMS)
+        assert result == DataTypeKey.TDMS
 
-    def test_ch10_extension(self):
-        assert EXTENSION_TO_DATA_TYPE_KEY[".ch10"] == DataTypeKey.CH10
+    def test_unknown_extension_raises(self):
+        with pytest.raises(ValueError, match="Unsupported file extension"):
+            _resolve_data_type_key(".xyz", None)

From 7aa2da478e7e93fa3abf237e9b652dd19c155626 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Thu, 9 Apr 2026 12:21:09 -0700
Subject: [PATCH 34/52] updated documentation regarding json metadata

---
 .../lib/sift_client/resources/data_imports.py | 25 +++++++++++++------
 .../resources/sync_stubs/__init__.pyi         | 25 +++++++++++++------
 2 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 174cfc452..84dcd9147 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -187,13 +187,24 @@ async def detect_config(
         using ``TdmsImportConfig``, ``Hdf5ImportConfig``, or
         ``Ch10ImportConfig``.
 
-        For CSV files, the server can parse an optional JSON metadata row
-        that auto-populates channel names, units, descriptions, data types,
-        and enum definitions. Each cell in the row is a JSON object
-        describing that column. When present, ``first_data_row`` in the
-        returned config will be set to the row after the metadata row.
-        Note that enum type definitions are applied server-side during
-        import but are not included in the returned config.
+        For CSV files, the server scans the first two rows for an optional
+        JSON metadata row. Row 1 is checked first; row 2 is checked only
+        if row 1 is not valid metadata. A row qualifies as metadata when
+        every cell contains valid JSON that describes either a time column
+        or a data column. When present, ``first_data_row`` in the returned
+        config is set to the row after the metadata row.
+
+        Each data column cell is a JSON ``ChannelConfig``::
+
+            {"name": "speed", "units": "m/s", "dataType": "CHANNEL_DATA_TYPE_DOUBLE"}
+
+        The time column cell is a JSON ``CsvTimeColumn``::
+
+            {"format": "TIME_FORMAT_ABSOLUTE_RFC3339"}
+
+        Enum type definitions and bit field elements can also be specified
+        in the metadata row; they are applied server-side during import
+        but are not included in the returned config.
 
         For file types with multiple layouts (e.g. Parquet), ``data_type``
         must be specified explicitly.
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index cc1b6556e..3565fed0c 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -655,13 +655,24 @@ class DataImportAPI:
         using ``TdmsImportConfig``, ``Hdf5ImportConfig``, or
         ``Ch10ImportConfig``.
 
-        For CSV files, the server can parse an optional JSON metadata row
-        that auto-populates channel names, units, descriptions, data types,
-        and enum definitions. Each cell in the row is a JSON object
-        describing that column. When present, ``first_data_row`` in the
-        returned config will be set to the row after the metadata row.
-        Note that enum type definitions are applied server-side during
-        import but are not included in the returned config.
+        For CSV files, the server scans the first two rows for an optional
+        JSON metadata row. Row 1 is checked first; row 2 is checked only
+        if row 1 is not valid metadata. A row qualifies as metadata when
+        every cell contains valid JSON that describes either a time column
+        or a data column. When present, ``first_data_row`` in the returned
+        config is set to the row after the metadata row.
+
+        Each data column cell is a JSON ``ChannelConfig``::
+
+            {"name": "speed", "units": "m/s", "dataType": "CHANNEL_DATA_TYPE_DOUBLE"}
+
+        The time column cell is a JSON ``CsvTimeColumn``::
+
+            {"format": "TIME_FORMAT_ABSOLUTE_RFC3339"}
+
+        Enum type definitions and bit field elements can also be specified
+        in the metadata row; they are applied server-side during import
+        but are not included in the returned config.
 
         For file types with multiple layouts (e.g. Parquet), ``data_type``
         must be specified explicitly.

From f59336df44fc6ab034c31d104da95a66721732cb Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Thu, 9 Apr 2026 12:36:01 -0700
Subject: [PATCH 35/52] updated get_column to getitem

---
 .../_tests/resources/test_data_imports.py     | 26 +++++++++----------
 .../lib/sift_client/resources/data_imports.py |  2 +-
 .../resources/sync_stubs/__init__.pyi         |  2 +-
 .../lib/sift_client/sift_types/data_import.py | 26 ++++++-------------
 4 files changed, 23 insertions(+), 33 deletions(-)

diff --git a/python/lib/sift_client/_tests/resources/test_data_imports.py b/python/lib/sift_client/_tests/resources/test_data_imports.py
index ff99efe3c..de82148c9 100644
--- a/python/lib/sift_client/_tests/resources/test_data_imports.py
+++ b/python/lib/sift_client/_tests/resources/test_data_imports.py
@@ -58,31 +58,31 @@ def parquet_config():
     )
 
 
-class TestGetColumn:
-    def test_csv_get_column(self, csv_config):
-        col = csv_config.get_column("cpu_util")
+class TestGetItem:
+    def test_csv_getitem(self, csv_config):
+        col = csv_config["cpu_util"]
         assert col.name == "cpu_util"
         assert col.data_type == ChannelDataType.DOUBLE
 
-    def test_csv_get_column_not_found(self, csv_config):
+    def test_csv_getitem_not_found(self, csv_config):
         with pytest.raises(KeyError, match="nonexistent"):
-            csv_config.get_column("nonexistent")
+            csv_config["nonexistent"]
 
-    def test_csv_get_column_mutate(self, csv_config):
-        csv_config.get_column("status_flags").data_type = ChannelDataType.STRING
+    def test_csv_getitem_mutate(self, csv_config):
+        csv_config["status_flags"].data_type = ChannelDataType.STRING
         assert csv_config.data_columns[1].data_type == ChannelDataType.STRING
 
-    def test_parquet_get_column(self, parquet_config):
-        col = parquet_config.get_column("temperature")
+    def test_parquet_getitem(self, parquet_config):
+        col = parquet_config["temperature"]
         assert col.name == "temperature"
         assert col.data_type == ChannelDataType.FLOAT
 
-    def test_parquet_get_column_not_found(self, parquet_config):
+    def test_parquet_getitem_not_found(self, parquet_config):
         with pytest.raises(KeyError, match="nonexistent"):
-            parquet_config.get_column("nonexistent")
+            parquet_config["nonexistent"]
 
-    def test_parquet_get_column_mutate(self, parquet_config):
-        parquet_config.get_column("cpu_util").name = "cpu_utilization"
+    def test_parquet_getitem_mutate(self, parquet_config):
+        parquet_config["cpu_util"].name = "cpu_utilization"
         assert parquet_config.data_columns[0].name == "cpu_utilization"
 
 
diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 84dcd9147..b2868970c 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -84,7 +84,7 @@ async def import_from_path(
                 config = client.data_imports.detect_config("data.csv")
 
                 # Fix a column data type
-                config.get_column("temperature").data_type = ChannelDataType.FLOAT
+                config["temperature"].data_type = ChannelDataType.FLOAT
 
                 # Remove an unwanted column
                 config.data_columns = [
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index 3565fed0c..3da4323ab 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -732,7 +732,7 @@ class DataImportAPI:
                 config = client.data_imports.detect_config("data.csv")
 
                 # Fix a column data type
-                config.get_column("temperature").data_type = ChannelDataType.FLOAT
+                config["temperature"].data_type = ChannelDataType.FLOAT
 
                 # Remove an unwanted column
                 config.data_columns = [
diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
index 6963715af..1df48b97f 100644
--- a/python/lib/sift_client/sift_types/data_import.py
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -169,17 +169,12 @@ class CsvImportConfig(ImportConfigBase):
     time_column: CsvTimeColumn
     data_columns: list[CsvDataColumn]
 
-    def get_column(self, name: str) -> CsvDataColumn:
-        """Look up a data column by name.
+    def __getitem__(self, name: str) -> CsvDataColumn:
+        """Look up a data column by channel name.
 
-        Args:
-            name: The channel name to search for.
+        Example::
 
-        Returns:
-            The matching data column.
-
-        Raises:
-            KeyError: If no column with the given name exists.
+            config["temperature"].data_type = ChannelDataType.FLOAT
         """
         for dc in self.data_columns:
             if dc.name == name:
@@ -326,17 +321,12 @@ class ParquetFlatDatasetImportConfig(ImportConfigBase):
     footer_length: int = 0
     complex_types_import_mode: ParquetComplexTypesImportMode = ParquetComplexTypesImportMode.IGNORE
 
-    def get_column(self, name: str) -> ParquetDataColumn:
-        """Look up a data column by name.
-
-        Args:
-            name: The channel name to search for.
+    def __getitem__(self, name: str) -> ParquetDataColumn:
+        """Look up a data column by channel name.
 
-        Returns:
-            The matching data column.
+        Example::
 
-        Raises:
-            KeyError: If no column with the given name exists.
+            config["temperature"].data_type = ChannelDataType.FLOAT
         """
         for dc in self.data_columns:
             if dc.name == name:

From 0328d27e5b1072fbaf4f8014802cbb696d318652 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Thu, 9 Apr 2026 12:41:39 -0700
Subject: [PATCH 36/52] updated detect_config error messages

---
 python/lib/sift_client/resources/data_imports.py       | 10 +++++++---
 .../lib/sift_client/resources/sync_stubs/__init__.pyi  |  4 ++--
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index b2868970c..7653aea55 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -219,8 +219,8 @@ async def detect_config(
 
         Raises:
             FileNotFoundError: If the file does not exist.
-            ValueError: If the file extension is unsupported or detection
-                returns no config.
+            ValueError: If the file extension is unsupported or no
+                supported configuration could be detected.
         """
         path = Path(file_path)
         if not path.is_file():
@@ -260,7 +260,11 @@ def _read_sample() -> bytes:
                 response.parquet_config, path.name, footer_offset, footer_length
             )
 
-        raise ValueError("Server returned an empty DetectConfig response.")
+        raise ValueError(
+            f"No supported configuration detected for '{path.name}'. "
+            "Auto-detection supports CSV and Parquet files. "
+            "For other formats, provide a config manually."
+        )
 
 
 def _resolve_data_type_key(ext: str, data_type: DataTypeKey | None) -> DataTypeKey:
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index 3da4323ab..7b9282198 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -687,8 +687,8 @@ class DataImportAPI:
 
         Raises:
             FileNotFoundError: If the file does not exist.
-            ValueError: If the file extension is unsupported or detection
-                returns no config.
+            ValueError: If the file extension is unsupported or no
+                supported configuration could be detected.
         """
         ...
 

From 930c5567da14c2de18cf93671a2b3cd42ff9dc88 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Thu, 9 Apr 2026 12:53:20 -0700
Subject: [PATCH 37/52] move data column validation from detect_config to
 _to_proto

---
 python/lib/sift_client/resources/data_imports.py | 13 ++-----------
 python/lib/sift_client/sift_types/data_import.py |  4 ++++
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 7653aea55..041bae429 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -253,7 +253,7 @@ def _read_sample() -> bytes:
         response = await self._low_level_client.detect_config(sample, data_type_key.value)
 
         if response.HasField("csv_config"):
-            return _parse_csv_detect_response(response.csv_config, path.name)
+            return _parse_csv_detect_response(response.csv_config)
 
         if response.HasField("parquet_config"):
             return _parse_parquet_detect_response(
@@ -287,13 +287,11 @@ def _resolve_data_type_key(ext: str, data_type: DataTypeKey | None) -> DataTypeK
     return EXTENSION_TO_DATA_TYPE_KEY[ext]
 
 
-def _parse_csv_detect_response(proto, filename: str) -> CsvImportConfig:
+def _parse_csv_detect_response(proto) -> CsvImportConfig:
     """Parse a CSV DetectConfig response into a config."""
     csv_config = CsvImportConfig._from_proto(proto)
     time_col = csv_config.time_column.column
     csv_config.data_columns = [dc for dc in csv_config.data_columns if dc.column != time_col]
-    if not csv_config.data_columns:
-        raise ValueError(f"No data columns detected in '{filename}'.")
     return csv_config
 
 
@@ -329,13 +327,6 @@ def _parse_parquet_detect_response(
                 parquet_config.data_columns = [
                     c for c in parquet_config.data_columns if c.path != match.path
                 ]
-        if not parquet_config.time_column.path:
-            raise ValueError(
-                f"No time column detected in '{filename}'. "
-                "Set the time column manually on the config before importing."
-            )
-        if not parquet_config.data_columns:
-            raise ValueError(f"No data columns detected in '{filename}'.")
         return parquet_config
     elif proto.HasField("single_channel_per_row"):
         return ParquetSingleChannelPerRowImportConfig._from_proto(
diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
index 1df48b97f..6e75376c6 100644
--- a/python/lib/sift_client/sift_types/data_import.py
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -182,6 +182,8 @@ def __getitem__(self, name: str) -> CsvDataColumn:
         raise KeyError(f"No data column named '{name}'")
 
     def _to_proto(self) -> CsvConfigProto:
+        if not self.data_columns:
+            raise ValueError("Config has no data columns. Add at least one before importing.")
         return CsvConfigProto(
             asset_name=self.asset_name,
             run_name=self.run_name or "",
@@ -334,6 +336,8 @@ def __getitem__(self, name: str) -> ParquetDataColumn:
         raise KeyError(f"No data column named '{name}'")
 
     def _to_proto(self) -> ParquetConfigProto:
+        if not self.data_columns:
+            raise ValueError("Config has no data columns. Add at least one before importing.")
         flat_dataset = ParquetFlatDatasetConfigProto(
             time_column=self.time_column._to_proto(),
             data_columns=[

From c610c888c629c4b73d408afc207d93ae1a35117c Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Thu, 9 Apr 2026 13:25:07 -0700
Subject: [PATCH 38/52] updated error types to be more accurate

---
 python/lib/sift_client/_internal/util/file.py    | 3 +--
 python/lib/sift_client/resources/data_imports.py | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/python/lib/sift_client/_internal/util/file.py b/python/lib/sift_client/_internal/util/file.py
index 1d93f44c6..dc2cc1999 100644
--- a/python/lib/sift_client/_internal/util/file.py
+++ b/python/lib/sift_client/_internal/util/file.py
@@ -65,8 +65,7 @@ def upload_file(
                 data=f,
                 headers={"Content-Disposition": f'attachment; filename="{file_path.name}"'},
             )
-            if not response.ok:
-                raise ValueError(f"Upload failed ({response.status_code}): {response.text}")
+            response.raise_for_status()
             return response.json()
 
 
diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 041bae429..49b63c381 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -166,7 +166,7 @@ async def import_from_path(
         )
         job_id = response.get("jobId")
         if not job_id:
-            raise ValueError("Upload succeeded but server response did not include a job ID.")
+            raise RuntimeError("Upload succeeded but server response did not include a job ID.")
 
         return await self.client.async_.jobs.get(job_id=job_id)
 

From 66d567f5b3a3c6eb4e9e769f590e851c8972dbba Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Thu, 9 Apr 2026 13:55:52 -0700
Subject: [PATCH 39/52] add import_data method to Run for importing files into
 existing runs

---
 python/lib/sift_client/sift_types/run.py | 37 ++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/python/lib/sift_client/sift_types/run.py b/python/lib/sift_client/sift_types/run.py
index acfb59a92..22d22c556 100644
--- a/python/lib/sift_client/sift_types/run.py
+++ b/python/lib/sift_client/sift_types/run.py
@@ -19,10 +19,14 @@
 from sift_client.util.metadata import metadata_dict_to_proto, metadata_proto_to_dict
 
 if TYPE_CHECKING:
+    from pathlib import Path
+
     from sift_stream_bindings import RunFormPy
 
     from sift_client.client import SiftClient
     from sift_client.sift_types.asset import Asset
+    from sift_client.sift_types.data_import import DataTypeKey, ImportConfig
+    from sift_client.sift_types.job import Job
 
 
 class Run(BaseType[RunProto, "Run"], FileAttachmentsMixin):
@@ -127,6 +131,39 @@ def stop(self) -> Run:
         self._update(updated_run)
         return self
 
+    def import_data(
+        self,
+        file_path: str | Path,
+        *,
+        asset: Asset | str | None = None,
+        config: ImportConfig | None = None,
+        data_type: DataTypeKey | None = None,
+        show_progress: bool | None = None,
+    ) -> Job:
+        """Import data from a file into this run.
+
+        Convenience method that calls ``client.data_imports.import_from_path``
+        with this run pre-filled.
+
+        Args:
+            file_path: Path to the local file to import.
+            asset: Asset object or asset name to import data into.
+            config: Import configuration. Auto-detected if omitted.
+            data_type: Explicit data type key for ambiguous formats.
+            show_progress: Display a progress spinner during upload.
+
+        Returns:
+            A ``Job`` handle for the pending import.
+        """
+        return self.client.data_import.import_from_path(
+            file_path,
+            asset=asset,
+            config=config,
+            data_type=data_type,
+            run=self,
+            show_progress=show_progress,
+        )
+
 
 class RunBase(ModelCreateUpdateBase):
     """Base class for Run create and update models with shared fields and validation."""

From 462a36d412eaa887113c4466f029d82126b94ad6 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Thu, 9 Apr 2026 14:19:12 -0700
Subject: [PATCH 40/52] add get_run to data import API and get_import_run to
 Job for resolving runs from imports

---
 .../low_level_wrappers/data_imports.py        | 15 ++++++++++++
 .../lib/sift_client/resources/data_imports.py | 23 +++++++++++++++++++
 .../resources/sync_stubs/__init__.pyi         | 19 +++++++++++++++
 python/lib/sift_client/sift_types/job.py      | 17 ++++++++++++++
 4 files changed, 74 insertions(+)

diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py b/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
index b88b9cfab..34315ac6b 100644
--- a/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
+++ b/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
@@ -7,6 +7,8 @@
     CreateDataImportFromUploadResponse,
     DetectConfigRequest,
     DetectConfigResponse,
+    GetDataImportRequest,
+    GetDataImportResponse,
 )
 from sift.data_imports.v2.data_imports_pb2_grpc import DataImportServiceStub
 
@@ -75,6 +77,19 @@ async def create_from_upload(self, config: ImportConfig) -> tuple[str, str]:
         response = cast("CreateDataImportFromUploadResponse", response)
         return response.data_import_id, response.upload_url
 
+    async def get(self, data_import_id: str) -> GetDataImportResponse:
+        """Get a data import by ID.
+
+        Args:
+            data_import_id: The ID of the data import.
+
+        Returns:
+            The GetDataImportResponse proto.
+        """
+        request = GetDataImportRequest(data_import_id=data_import_id)
+        response = await self._grpc_client.get_stub(DataImportServiceStub).GetDataImport(request)
+        return cast("GetDataImportResponse", response)
+
     async def detect_config(
         self, data: bytes, data_type_key: DataTypeKey.ValueType
     ) -> DetectConfigResponse:
diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 49b63c381..ef532eccc 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -170,6 +170,29 @@ async def import_from_path(
 
         return await self.client.async_.jobs.get(job_id=job_id)
 
+    async def get_run(self, data_import_id: str) -> Run:
+        """Get the run associated with a data import.
+
+        The ``data_import_id`` is available on the job returned by
+        ``import_from_path`` via ``job.job_details.data_import_id``.
+        For a more ergonomic approach, use ``job.get_import_run()``
+        which calls this method internally.
+
+        Args:
+            data_import_id: The ID of the data import.
+
+        Returns:
+            The Run created by or associated with the import.
+
+        Raises:
+            ValueError: If the data import has no associated run.
+        """
+        response = await self._low_level_client.get(data_import_id)
+        run_id = response.data_import.run_id
+        if not run_id:
+            raise ValueError("Data import does not have an associated run.")
+        return await self.client.async_.runs.get(run_id=run_id)
+
     async def detect_config(
         self,
         file_path: str | Path,
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index 7b9282198..d76dd4942 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -692,6 +692,25 @@ class DataImportAPI:
         """
         ...
 
+    def get_run(self, data_import_id: str) -> Run:
+        """Get the run associated with a data import.
+
+        The ``data_import_id`` is available on the job returned by
+        ``import_from_path`` via ``job.job_details.data_import_id``.
+        For a more ergonomic approach, use ``job.get_import_run()``
+        which calls this method internally.
+
+        Args:
+            data_import_id: The ID of the data import.
+
+        Returns:
+            The Run created by or associated with the import.
+
+        Raises:
+            ValueError: If the data import has no associated run.
+        """
+        ...
+
     def import_from_path(
         self,
         file_path: str | Path,
diff --git a/python/lib/sift_client/sift_types/job.py b/python/lib/sift_client/sift_types/job.py
index 6d3adbe2d..676cdc8c2 100644
--- a/python/lib/sift_client/sift_types/job.py
+++ b/python/lib/sift_client/sift_types/job.py
@@ -19,6 +19,7 @@
     from pathlib import Path
 
     from sift_client.client import SiftClient
+    from sift_client.sift_types.run import Run
 
 
 class JobType(str, Enum):
@@ -315,6 +316,22 @@ def wait_until_complete(
         self._update(completed_job)
         return self
 
+    def get_import_run(self) -> Run:
+        """Get the run created by this data import job.
+
+        Returns:
+            The Run associated with this import.
+
+        Raises:
+            ValueError: If this is not a data import job or the import
+                has no associated run.
+        """
+        if self.job_type != JobType.DATA_IMPORT:
+            raise ValueError("get_import_run() is only valid for data import jobs.")
+        if not isinstance(self.job_details, DataImportDetails):
+            raise ValueError("Job does not have data import details.")
+        return self.client.data_import.get_run(self.job_details.data_import_id)
+
     def wait_and_download(
         self,
         *,

From f049daaa2f5543e2d59a945a5fb39365a7b4113b Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Thu, 9 Apr 2026 14:52:01 -0700
Subject: [PATCH 41/52] add model validation for parquet single/multi channel

---
 python/lib/sift_client/sift_types/data_import.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
index 6e75376c6..0925f08fd 100644
--- a/python/lib/sift_client/sift_types/data_import.py
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -452,6 +452,16 @@ class ParquetSingleChannelPerRowImportConfig(ImportConfigBase):
     footer_length: int = 0
     complex_types_import_mode: ParquetComplexTypesImportMode = ParquetComplexTypesImportMode.IGNORE
 
+    @model_validator(mode="after")
+    def _check_channel_config(self) -> ParquetSingleChannelPerRowImportConfig:
+        if self.single_channel is None and self.multi_channel is None:
+            raise ValueError("Exactly one of 'single_channel' or 'multi_channel' must be set.")
+        if self.single_channel is not None and self.multi_channel is not None:
+            raise ValueError(
+                "Exactly one of 'single_channel' or 'multi_channel' must be set, not both."
+            )
+        return self
+
     def _to_proto(self) -> ParquetConfigProto:
         scpr = ParquetSingleChannelPerRowConfigProto(
             time_column=self.time_column._to_proto(),

From 8ff5fde74d26e7fd175738619d53eab8161dc073 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Fri, 10 Apr 2026 13:30:02 -0700
Subject: [PATCH 42/52] refactor parquet timecolumn detection

---
 .../lib/sift_client/resources/data_imports.py | 52 +++++++++++++------
 .../lib/sift_client/sift_types/data_import.py | 12 +++--
 2 files changed, 44 insertions(+), 20 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index ef532eccc..96c1693fa 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -26,6 +26,8 @@
 from sift_client.sift_types.run import Run
 
 if TYPE_CHECKING:
+    from collections.abc import Iterable
+
     from sift_client.client import SiftClient
     from sift_client.sift_types.job import Job
 
@@ -318,6 +320,24 @@ def _parse_csv_detect_response(proto) -> CsvImportConfig:
     return csv_config
 
 
+def _infer_time_column(columns: Iterable[tuple[str, ChannelDataType, str]]) -> str | None:
+    """Find a likely time column from a sequence of (name, data_type, path) tuples.
+
+    The backend only detects arrow timestamp types. This falls back to the first
+    integer column whose name starts with "time".
+    """
+    _integer_types = {
+        ChannelDataType.INT_32,
+        ChannelDataType.INT_64,
+        ChannelDataType.UINT_32,
+        ChannelDataType.UINT_64,
+    }
+    for name, data_type, path in columns:
+        if data_type in _integer_types and name.lower().startswith("time"):
+            return path
+    return None
+
+
 def _parse_parquet_detect_response(
     proto, filename: str, footer_offset: int, footer_length: int
 ) -> ParquetFlatDatasetImportConfig | ParquetSingleChannelPerRowImportConfig:
@@ -332,29 +352,27 @@ def _parse_parquet_detect_response(
                 dc for dc in parquet_config.data_columns if dc.path != time_path
             ]
         else:
-            # The backend only detects arrow timestamp types. Fall back to
-            # an integer column whose name starts with "time".
-            _integer_types = {
-                ChannelDataType.INT_32,
-                ChannelDataType.INT_64,
-                ChannelDataType.UINT_32,
-                ChannelDataType.UINT_64,
-            }
-            match = None
-            for dc in parquet_config.data_columns:
-                if dc.data_type in _integer_types and dc.name.lower().startswith("time"):
-                    match = dc
-                    break
-            if match is not None:
-                parquet_config.time_column = ParquetTimeColumn(path=match.path)
+            inferred = _infer_time_column(
+                (dc.name, dc.data_type, dc.path) for dc in parquet_config.data_columns
+            )
+            if inferred is not None:
+                parquet_config.time_column = ParquetTimeColumn(path=inferred)
                 parquet_config.data_columns = [
-                    c for c in parquet_config.data_columns if c.path != match.path
+                    c for c in parquet_config.data_columns if c.path != inferred
                 ]
         return parquet_config
     elif proto.HasField("single_channel_per_row"):
-        return ParquetSingleChannelPerRowImportConfig._from_proto(
+        parquet_config = ParquetSingleChannelPerRowImportConfig._from_proto(
             proto, footer_offset=footer_offset, footer_length=footer_length
         )
+        if not parquet_config.time_column.path:
+            inferred = _infer_time_column(
+                (col.column_config.name, ChannelDataType(col.column_config.data_type), col.path)
+                for col in proto.single_channel_per_row.columns
+            )
+            if inferred is not None:
+                parquet_config.time_column = ParquetTimeColumn(path=inferred)
+        return parquet_config
     raise ValueError(f"Unsupported parquet layout in DetectConfig response for '{filename}'.")
 
 
diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
index 0925f08fd..e07d8eab6 100644
--- a/python/lib/sift_client/sift_types/data_import.py
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -432,7 +432,9 @@ class ParquetSingleChannelPerRowImportConfig(ImportConfigBase):
     """Configuration for importing a Parquet file where each row represents
     a single channel's data point.
 
-    Exactly one of ``single_channel`` or ``multi_channel`` must be set.
+    Exactly one of ``single_channel`` or ``multi_channel`` must be set before
+    importing. When returned by ``detect_config()``, neither field is populated
+    and must be filled in before passing the config to ``import_from_path()``.
 
     Attributes:
         time_column: Time column configuration.
@@ -454,8 +456,6 @@ class ParquetSingleChannelPerRowImportConfig(ImportConfigBase):
 
     @model_validator(mode="after")
     def _check_channel_config(self) -> ParquetSingleChannelPerRowImportConfig:
-        if self.single_channel is None and self.multi_channel is None:
-            raise ValueError("Exactly one of 'single_channel' or 'multi_channel' must be set.")
         if self.single_channel is not None and self.multi_channel is not None:
             raise ValueError(
                 "Exactly one of 'single_channel' or 'multi_channel' must be set, not both."
@@ -463,6 +463,12 @@ def _check_channel_config(self) -> ParquetSingleChannelPerRowImportConfig:
         return self
 
     def _to_proto(self) -> ParquetConfigProto:
+        if self.single_channel is None and self.multi_channel is None:
+            raise ValueError(
+                "Either 'single_channel' or 'multi_channel' must be set before importing. "
+                "If this config was returned by detect_config(), set one of these fields "
+                "to specify the channel layout."
+            )
         scpr = ParquetSingleChannelPerRowConfigProto(
             time_column=self.time_column._to_proto(),
         )

From 67712783462952d3f1216107a84c8152a3d52bf3 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Fri, 10 Apr 2026 13:42:06 -0700
Subject: [PATCH 43/52] mypy fix

---
 python/lib/sift_client/resources/data_imports.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 96c1693fa..5196c5d74 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -362,17 +362,17 @@ def _parse_parquet_detect_response(
                 ]
         return parquet_config
     elif proto.HasField("single_channel_per_row"):
-        parquet_config = ParquetSingleChannelPerRowImportConfig._from_proto(
+        parquet_scpr_config = ParquetSingleChannelPerRowImportConfig._from_proto(
             proto, footer_offset=footer_offset, footer_length=footer_length
         )
-        if not parquet_config.time_column.path:
+        if not parquet_scpr_config.time_column.path:
             inferred = _infer_time_column(
                 (col.column_config.name, ChannelDataType(col.column_config.data_type), col.path)
                 for col in proto.single_channel_per_row.columns
             )
             if inferred is not None:
-                parquet_config.time_column = ParquetTimeColumn(path=inferred)
-        return parquet_config
+                parquet_scpr_config.time_column = ParquetTimeColumn(path=inferred)
+        return parquet_scpr_config
     raise ValueError(f"Unsupported parquet layout in DetectConfig response for '{filename}'.")
 
 

From 261e091a56af72c13993e4c247bbe5a5348d5285 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Fri, 10 Apr 2026 20:08:23 -0700
Subject: [PATCH 44/52] simplify _resolve_data_type_key logic

---
 python/lib/sift_client/resources/data_imports.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 5196c5d74..736fadb36 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -294,15 +294,13 @@ def _read_sample() -> bytes:
 
 def _resolve_data_type_key(ext: str, data_type: DataTypeKey | None) -> DataTypeKey:
     """Resolve the data type key from file extension and explicit override."""
-    if ext in (".parquet", ".pqt"):
-        if data_type is None:
-            raise ValueError(
-                "Parquet files require 'data_type' to be specified. "
-                "Use DataTypeKey.PARQUET_FLATDATASET or DataTypeKey.PARQUET_SINGLE_CHANNEL_PER_ROW."
-            )
-        return data_type
     if data_type is not None:
         return data_type
+    if ext in (".parquet", ".pqt"):
+        raise ValueError(
+            "Parquet files require 'data_type' to be specified. "
+            "Use DataTypeKey.PARQUET_FLATDATASET or DataTypeKey.PARQUET_SINGLE_CHANNEL_PER_ROW."
+        )
     if ext not in EXTENSION_TO_DATA_TYPE_KEY:
         raise ValueError(
             f"Unsupported file extension '{ext}'. "

From 59af2a864828091a4bef0bdcf87b9f1c31cd5920 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Fri, 10 Apr 2026 20:22:28 -0700
Subject: [PATCH 45/52] autofill the run's asset during import

---
 python/lib/sift_client/sift_types/run.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/python/lib/sift_client/sift_types/run.py b/python/lib/sift_client/sift_types/run.py
index 22d22c556..77de6e736 100644
--- a/python/lib/sift_client/sift_types/run.py
+++ b/python/lib/sift_client/sift_types/run.py
@@ -143,7 +143,8 @@ def import_data(
         """Import data from a file into this run.
 
         Convenience method that calls ``client.data_imports.import_from_path``
-        with this run pre-filled.
+        with this run pre-filled. If the run has exactly one asset,
+        ``asset`` is inferred automatically.
 
         Args:
             file_path: Path to the local file to import.
@@ -155,6 +156,9 @@ def import_data(
         Returns:
             A ``Job`` handle for the pending import.
         """
+        if asset is None and len(self.asset_ids) == 1:
+            asset = self.asset_ids[0]
+
         return self.client.data_import.import_from_path(
             file_path,
             asset=asset,

From f999e9752e54c843f92d582bd7fff15f1ed461a8 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Fri, 10 Apr 2026 20:35:02 -0700
Subject: [PATCH 46/52] updated docstrings and fixed run import to infer asset
 object

---
 python/lib/sift_client/sift_types/data_import.py | 5 +----
 python/lib/sift_client/sift_types/run.py         | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
index e07d8eab6..0700463b5 100644
--- a/python/lib/sift_client/sift_types/data_import.py
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -108,8 +108,6 @@ class CsvTimeColumn(TimeColumnBase):
 
     Attributes:
         column: The 1-indexed column number of the time column.
-        format: The time format used in this column.
-        relative_start_time: Required when using a relative time format.
     """
 
     column: int
@@ -250,8 +248,7 @@ class ParquetTimeColumn(TimeColumnBase):
 
     Attributes:
         path: The column path in the Parquet schema (e.g. ``"timestamp"``).
-        format: The time format used in this column.
-        relative_start_time: Required when using a relative time format.
+        format: The time format. Defaults to ``ABSOLUTE_UNIX_NANOSECONDS``.
     """
 
     path: str
diff --git a/python/lib/sift_client/sift_types/run.py b/python/lib/sift_client/sift_types/run.py
index 77de6e736..ec6690896 100644
--- a/python/lib/sift_client/sift_types/run.py
+++ b/python/lib/sift_client/sift_types/run.py
@@ -157,7 +157,7 @@ def import_data(
             A ``Job`` handle for the pending import.
         """
         if asset is None and len(self.asset_ids) == 1:
-            asset = self.asset_ids[0]
+            asset = self.client.assets.get(asset_id=self.asset_ids[0])
 
         return self.client.data_import.import_from_path(
             file_path,

From e074a4cbad533d9c4561f12531f239b2b97b1c56 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Fri, 10 Apr 2026 20:53:10 -0700
Subject: [PATCH 47/52] refactor show_progress helper to the base class

---
 python/lib/sift_client/resources/_base.py        | 4 ++++
 python/lib/sift_client/resources/data_imports.py | 8 ++------
 python/lib/sift_client/resources/jobs.py         | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/python/lib/sift_client/resources/_base.py b/python/lib/sift_client/resources/_base.py
index 890cf2f44..33d7e2659 100644
--- a/python/lib/sift_client/resources/_base.py
+++ b/python/lib/sift_client/resources/_base.py
@@ -3,6 +3,7 @@
 from abc import ABC
 from typing import TYPE_CHECKING, Any, TypeVar
 
+from sift_client._internal.util.file import resolve_show_progress
 from sift_client.sift_types.tag import Tag
 from sift_client.util import cel_utils as cel
 
@@ -34,6 +35,9 @@ def grpc_client(self) -> GrpcClient:
     def rest_client(self) -> RestClient:
         return self.client.rest_client
 
+    def _show_progress(self) -> bool:
+        return resolve_show_progress(is_sync=getattr(self, "_is_sync", False))
+
     def _apply_client_to_instance(self, instance: T) -> T:
         instance._apply_client_to_instance(self.client)
         return instance
diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 736fadb36..3395c4909 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -5,11 +5,7 @@
 
 from sift_client._internal.low_level_wrappers.data_imports import DataImportsLowLevelClient
 from sift_client._internal.util.executor import run_sync_function
-from sift_client._internal.util.file import (
-    extract_parquet_footer,
-    resolve_show_progress,
-    upload_file,
-)
+from sift_client._internal.util.file import extract_parquet_footer, upload_file
 from sift_client.resources._base import ResourceBase
 from sift_client.sift_types.asset import Asset
 from sift_client.sift_types.channel import ChannelDataType
@@ -154,7 +150,7 @@ async def import_from_path(
             await _prepare_parquet_config(config, path)
 
         if show_progress is None:
-            show_progress = resolve_show_progress(is_sync=getattr(self, "_is_sync", False))
+            show_progress = self._show_progress()
 
         _, upload_url = await self._low_level_client.create_from_upload(config)
 
diff --git a/python/lib/sift_client/resources/jobs.py b/python/lib/sift_client/resources/jobs.py
index 5e2bbdf9b..5a9eb38c6 100644
--- a/python/lib/sift_client/resources/jobs.py
+++ b/python/lib/sift_client/resources/jobs.py
@@ -11,7 +11,7 @@
 
 from sift_client._internal.low_level_wrappers.jobs import JobsLowLevelClient
 from sift_client._internal.util.executor import run_sync_function
-from sift_client._internal.util.file import download_file, extract_zip, resolve_show_progress
+from sift_client._internal.util.file import download_file, extract_zip
 from sift_client.resources._base import ResourceBase
 from sift_client.sift_types.job import DataExportStatusDetails, Job, JobStatus, JobType
 from sift_client.util import cel_utils as cel
@@ -193,7 +193,7 @@ async def wait_until_complete(
         """
         job_id = job._id_or_error if isinstance(job, Job) else job
         if show_progress is None:
-            show_progress = resolve_show_progress(is_sync=getattr(self, "_is_sync", False))
+            show_progress = self._show_progress()
 
         start = time.monotonic()
         with alive_bar(
@@ -256,7 +256,7 @@ async def wait_and_download(
         """
         job_id = job._id_or_error if isinstance(job, Job) else job
         if show_progress is None:
-            show_progress = resolve_show_progress(is_sync=getattr(self, "_is_sync", False))
+            show_progress = self._show_progress()
 
         completed_job = await self.wait_until_complete(
             job=job_id,

From 78703e54cc66c2b9243f70d60d8a31d638507068 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Fri, 10 Apr 2026 21:02:52 -0700
Subject: [PATCH 48/52] add client binding test for data imports

---
 .../sift_client/_tests/resources/test_data_imports.py    | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/python/lib/sift_client/_tests/resources/test_data_imports.py b/python/lib/sift_client/_tests/resources/test_data_imports.py
index de82148c9..a2cabf953 100644
--- a/python/lib/sift_client/_tests/resources/test_data_imports.py
+++ b/python/lib/sift_client/_tests/resources/test_data_imports.py
@@ -4,6 +4,7 @@
 
 import pytest
 
+from sift_client.resources import DataImportAPI, DataImportAPIAsync
 from sift_client.resources.data_imports import _resolve_data_type_key
 from sift_client.sift_types.channel import ChannelDataType
 from sift_client.sift_types.data_import import (
@@ -23,6 +24,14 @@
 )
 
 
+@pytest.mark.integration
+def test_client_binding(sift_client):
+    assert sift_client.data_import
+    assert isinstance(sift_client.data_import, DataImportAPI)
+    assert sift_client.async_.data_import
+    assert isinstance(sift_client.async_.data_import, DataImportAPIAsync)
+
+
 @pytest.fixture
 def csv_config():
     return CsvImportConfig(

From d736d1e3b9c82a24df1a01d6a314074711532555 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Fri, 10 Apr 2026 21:17:20 -0700
Subject: [PATCH 49/52] add a base class for data columns shared by csv,
 parquet, and hdf5

---
 .../lib/sift_client/sift_types/data_import.py | 84 ++++++++-----------
 1 file changed, 34 insertions(+), 50 deletions(-)

diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
index 0700463b5..c62a4c0e8 100644
--- a/python/lib/sift_client/sift_types/data_import.py
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -103,37 +103,16 @@ def _check_relative_start_time(self) -> TimeColumnBase:
         return self
 
 
-class CsvTimeColumn(TimeColumnBase):
-    """Time column configuration for CSV imports.
+class DataColumnBase(BaseModel, ABC):
+    """Base class for data column definitions.
 
     Attributes:
-        column: The 1-indexed column number of the time column.
-    """
-
-    column: int
-
-    def _to_proto(self) -> CsvTimeColumnProto:
-        proto = CsvTimeColumnProto(
-            column_number=self.column,
-            format=self.format.value,
-        )
-        if self.relative_start_time is not None:
-            proto.relative_start_time.CopyFrom(to_pb_timestamp(self.relative_start_time))
-        return proto
-
-
-class CsvDataColumn(BaseModel):
-    """A data column definition for CSV imports.
-
-    Attributes:
-        column: The 1-indexed column number.
         name: Channel name.
         data_type: The data type of the channel values.
         units: Optional units string.
         description: Optional channel description.
     """
 
-    column: int
     name: str
     data_type: ChannelDataType
     units: str = ""
@@ -154,6 +133,35 @@ class ImportConfigBase(BaseModel, ABC):
     run_id: str | None = None
 
 
+class CsvTimeColumn(TimeColumnBase):
+    """Time column configuration for CSV imports.
+
+    Attributes:
+        column: The 1-indexed column number of the time column.
+    """
+
+    column: int
+
+    def _to_proto(self) -> CsvTimeColumnProto:
+        proto = CsvTimeColumnProto(
+            column_number=self.column,
+            format=self.format.value,
+        )
+        if self.relative_start_time is not None:
+            proto.relative_start_time.CopyFrom(to_pb_timestamp(self.relative_start_time))
+        return proto
+
+
+class CsvDataColumn(DataColumnBase):
+    """A data column definition for CSV imports.
+
+    Attributes:
+        column: The 1-indexed column number.
+    """
+
+    column: int
+
+
 class CsvImportConfig(ImportConfigBase):
     """Configuration for importing a CSV file.
 
@@ -281,22 +289,14 @@ def _from_proto(cls, proto: ParquetTimeColumnProto) -> ParquetTimeColumn:
         )
 
 
-class ParquetDataColumn(BaseModel):
+class ParquetDataColumn(DataColumnBase):
     """A data column definition for Parquet flat dataset imports.
 
     Attributes:
         path: The column path in the Parquet schema.
-        name: Channel name.
-        data_type: The data type of the channel values.
-        units: Optional units string.
-        description: Optional channel description.
     """
 
     path: str
-    name: str
-    data_type: ChannelDataType
-    units: str = ""
-    description: str = ""
 
 
 class ParquetFlatDatasetImportConfig(ImportConfigBase):
@@ -395,22 +395,14 @@ def _from_proto(
         )
 
 
-class ParquetSingleChannelConfig(BaseModel):
+class ParquetSingleChannelConfig(DataColumnBase):
     """Configuration for a single-channel Parquet single-channel-per-row import.
 
     Attributes:
         data_path: The column path containing channel data.
-        name: Channel name.
-        data_type: The data type of the channel values.
-        units: Optional units string.
-        description: Optional channel description.
     """
 
     data_path: str
-    name: str
-    data_type: ChannelDataType
-    units: str = ""
-    description: str = ""
 
 
 class ParquetMultiChannelConfig(BaseModel):
@@ -587,7 +579,7 @@ def _to_proto(self) -> TDMSConfigProto:
         return proto
 
 
-class Hdf5DataColumn(BaseModel):
+class Hdf5DataColumn(DataColumnBase):
     """A dataset mapping for HDF5 imports.
 
     Each entry maps a time/value dataset pair to a channel.
@@ -597,10 +589,6 @@ class Hdf5DataColumn(BaseModel):
         time_index: Column index within the time dataset. Defaults to 0.
         value_dataset: HDF5 path to the value dataset.
         value_index: Column index within the value dataset. Defaults to 0.
-        name: Channel name.
-        data_type: The data type of the channel values.
-        units: Optional units string.
-        description: Optional channel description.
         time_field: For compound dataset types, the field name to use for time.
         value_field: For compound dataset types, the field name to use for value.
     """
@@ -609,10 +597,6 @@ class Hdf5DataColumn(BaseModel):
     time_index: int = 0
     value_dataset: str
     value_index: int = 0
-    name: str
-    data_type: ChannelDataType
-    units: str = ""
-    description: str = ""
     time_field: str | None = None
     value_field: str | None = None
 

From 3cc3607071e82ac9b1843ea257670677c9fd68cd Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Fri, 10 Apr 2026 21:32:21 -0700
Subject: [PATCH 50/52] update the upload_file progress bar to be more detailed

---
 python/lib/sift_client/_internal/util/file.py | 32 +++++++++++++++----
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/python/lib/sift_client/_internal/util/file.py b/python/lib/sift_client/_internal/util/file.py
index dc2cc1999..5e0269110 100644
--- a/python/lib/sift_client/_internal/util/file.py
+++ b/python/lib/sift_client/_internal/util/file.py
@@ -17,6 +17,23 @@
     from sift_client.transport.rest_transport import RestClient
 
 
+class _ProgressReader:
+    """Wraps a file object to report read progress to an alive_bar callback."""
+
+    def __init__(self, file_object, progress_bar):
+        self._file_object = file_object
+        self._progress_bar = progress_bar
+
+    def read(self, size=-1):
+        chunk = self._file_object.read(size)
+        if chunk:
+            self._progress_bar(len(chunk))
+        return chunk
+
+    def __getattr__(self, name):
+        return getattr(self._file_object, name)
+
+
 def resolve_show_progress(*, is_sync: bool) -> bool:
     """Resolve the show_progress setting from the global config.
 
@@ -50,19 +67,22 @@ def upload_file(
     Raises:
         ValueError: If the upload request fails.
     """
+    file_size = file_path.stat().st_size
+
     with alive_bar(
+        file_size,
         title=f"Upload [{file_path.name}]",
-        bar=None,
         spinner="dots_waves",
         spinner_length=7,
-        monitor=False,
-        stats=False,
+        unit="B",
+        scale="SI",
         disable=not show_progress,
-    ):
-        with open(file_path, "rb") as f:
+    ) as bar:
+        with open(file_path, "rb") as file:
+            wrapped = _ProgressReader(file, bar)
             response = rest_client.post(
                 signed_url,
-                data=f,
+                data=wrapped,
                 headers={"Content-Disposition": f'attachment; filename="{file_path.name}"'},
             )
             response.raise_for_status()

From 8b6539070b5abfd00505bc95d097c588012ac1d0 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Mon, 13 Apr 2026 10:39:18 -0700
Subject: [PATCH 51/52] removed ch10 references, no more support

---
 .../low_level_wrappers/data_imports.py        |  3 ---
 .../lib/sift_client/resources/data_imports.py | 14 ++++--------
 .../lib/sift_client/sift_types/data_import.py | 22 -------------------
 3 files changed, 4 insertions(+), 35 deletions(-)

diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py b/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
index 34315ac6b..b0219124b 100644
--- a/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
+++ b/python/lib/sift_client/_internal/low_level_wrappers/data_imports.py
@@ -14,7 +14,6 @@
 
 from sift_client._internal.low_level_wrappers.base import LowLevelClientBase
 from sift_client.sift_types.data_import import (
-    Ch10ImportConfig,
     CsvImportConfig,
     Hdf5ImportConfig,
     ImportConfig,
@@ -41,8 +40,6 @@ def _set_config_on_request(
         config, (ParquetFlatDatasetImportConfig, ParquetSingleChannelPerRowImportConfig)
     ):
         request.parquet_config.CopyFrom(config._to_proto())
-    elif isinstance(config, Ch10ImportConfig):
-        request.ch10_config.CopyFrom(config._to_proto())
     elif isinstance(config, TdmsImportConfig):
         request.tdms_config.CopyFrom(config._to_proto())
     elif isinstance(config, Hdf5ImportConfig):
diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py
index 3395c4909..f40876234 100644
--- a/python/lib/sift_client/resources/data_imports.py
+++ b/python/lib/sift_client/resources/data_imports.py
@@ -11,7 +11,6 @@
 from sift_client.sift_types.channel import ChannelDataType
 from sift_client.sift_types.data_import import (
     EXTENSION_TO_DATA_TYPE_KEY,
-    Ch10ImportConfig,
     CsvImportConfig,
     DataTypeKey,
     ImportConfig,
@@ -63,7 +62,7 @@ async def import_from_path(
 
         When ``config`` is omitted the file format is auto-detected via
         ``detect_config`` (CSV and Parquet only). For other formats
-        (TDMS, HDF5, CH10), ``config`` must be provided.
+        (TDMS and HDF5), ``config`` must be provided.
         When ``asset`` is provided it overrides the config value;
         otherwise the config's ``asset_name`` is used.
         If neither ``run`` nor ``run_name`` is provided (and none is
@@ -134,14 +133,10 @@ async def import_from_path(
         if run is not None and run_name is not None:
             raise ValueError("'run' and 'run_name' are mutually exclusive.")
         if run is not None:
-            if isinstance(config, Ch10ImportConfig):
-                raise ValueError(
-                    "'run' is not supported for Ch10ImportConfig. Use 'run_name' instead."
-                )
             config.run_id = run._id_or_error if isinstance(run, Run) else run
         elif run_name is not None:
             config.run_name = run_name
-        elif not config.run_name and (isinstance(config, Ch10ImportConfig) or not config.run_id):
+        elif not config.run_name and not config.run_id:
             config.run_name = path.name
 
         if isinstance(
@@ -204,9 +199,8 @@ async def detect_config(
         provided.
 
         Only CSV and Parquet files are currently supported for auto-detection.
-        For other formats (TDMS, HDF5, CH10), create the config manually
-        using ``TdmsImportConfig``, ``Hdf5ImportConfig``, or
-        ``Ch10ImportConfig``.
+        For other formats (TDMS, HDF5), create the config manually
+        using ``TdmsImportConfig`` or ``Hdf5ImportConfig``.
 
         For CSV files, the server scans the first two rows for an optional
         JSON metadata row. Row 1 is checked first; row 2 is checked only
diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py
index c62a4c0e8..62208a678 100644
--- a/python/lib/sift_client/sift_types/data_import.py
+++ b/python/lib/sift_client/sift_types/data_import.py
@@ -8,7 +8,6 @@
 from pydantic import BaseModel, model_validator
 from sift.common.type.v1.channel_config_pb2 import ChannelConfig as ChannelConfigProto
 from sift.data_imports.v2.data_imports_pb2 import (
-    DATA_TYPE_KEY_CH10,
     DATA_TYPE_KEY_CSV,
     DATA_TYPE_KEY_HDF5,
     DATA_TYPE_KEY_PARQUET_FLATDATASET,
@@ -19,7 +18,6 @@
     PARQUET_COMPLEX_TYPES_IMPORT_MODE_IGNORE,
     PARQUET_COMPLEX_TYPES_IMPORT_MODE_STRING,
 )
-from sift.data_imports.v2.data_imports_pb2 import Ch10Config as Ch10ConfigProto
 from sift.data_imports.v2.data_imports_pb2 import CsvConfig as CsvConfigProto
 from sift.data_imports.v2.data_imports_pb2 import CsvTimeColumn as CsvTimeColumnProto
 from sift.data_imports.v2.data_imports_pb2 import Hdf5Config as Hdf5ConfigProto
@@ -70,14 +68,12 @@ class DataTypeKey(Enum):
     PARQUET_FLATDATASET = DATA_TYPE_KEY_PARQUET_FLATDATASET
     PARQUET_SINGLE_CHANNEL_PER_ROW = DATA_TYPE_KEY_PARQUET_SINGLE_CHANNEL_PER_ROW
     TDMS = DATA_TYPE_KEY_TDMS
-    CH10 = DATA_TYPE_KEY_CH10
     HDF5 = DATA_TYPE_KEY_HDF5
 
 
 EXTENSION_TO_DATA_TYPE_KEY: dict[str, DataTypeKey] = {
     ".csv": DataTypeKey.CSV,
     ".tdms": DataTypeKey.TDMS,
-    ".ch10": DataTypeKey.CH10,
     ".h5": DataTypeKey.HDF5,
     ".hdf5": DataTypeKey.HDF5,
 }
@@ -537,23 +533,6 @@ def _from_proto(
         )
 
 
-class Ch10ImportConfig(ImportConfigBase):
-    """Configuration for importing a CH10 file.
-
-    Attributes:
-        scale_values: Whether to apply EU (engineering unit) scaling to channel values.
-    """
-
-    scale_values: bool = False
-
-    def _to_proto(self) -> Ch10ConfigProto:
-        return Ch10ConfigProto(
-            asset_name=self.asset_name,
-            run_name=self.run_name or "",
-            scale_values=self.scale_values,
-        )
-
-
 class TdmsImportConfig(ImportConfigBase):
     """Configuration for importing a TDMS file.
 
@@ -655,7 +634,6 @@ def _to_proto(self) -> Hdf5ConfigProto:
     CsvImportConfig,
     ParquetFlatDatasetImportConfig,
     ParquetSingleChannelPerRowImportConfig,
-    Ch10ImportConfig,
     TdmsImportConfig,
     Hdf5ImportConfig,
 ]

From 542b5fa7212dc0be6f53772448c5a6f143a158c9 Mon Sep 17 00:00:00 2001
From: Wei Qi Lu <wei@siftstack.com>
Date: Mon, 13 Apr 2026 10:48:35 -0700
Subject: [PATCH 52/52] sync stubs update

---
 .../_tests/resources/test_data_imports.py        | 16 ----------------
 .../resources/sync_stubs/__init__.pyi            |  7 +++----
 2 files changed, 3 insertions(+), 20 deletions(-)

diff --git a/python/lib/sift_client/_tests/resources/test_data_imports.py b/python/lib/sift_client/_tests/resources/test_data_imports.py
index a2cabf953..a3819cf1c 100644
--- a/python/lib/sift_client/_tests/resources/test_data_imports.py
+++ b/python/lib/sift_client/_tests/resources/test_data_imports.py
@@ -8,7 +8,6 @@
 from sift_client.resources.data_imports import _resolve_data_type_key
 from sift_client.sift_types.channel import ChannelDataType
 from sift_client.sift_types.data_import import (
-    Ch10ImportConfig,
     CsvDataColumn,
     CsvImportConfig,
     CsvTimeColumn,
@@ -131,21 +130,6 @@ def test_absolute_time_does_not_require_start_time(self):
         assert col.relative_start_time is None
 
 
-class TestCh10Config:
-    def test_to_proto(self):
-        config = Ch10ImportConfig(asset_name="my_asset", run_name="run1", scale_values=True)
-        proto = config._to_proto()
-        assert proto.asset_name == "my_asset"
-        assert proto.run_name == "run1"
-        assert proto.scale_values is True
-
-    def test_to_proto_defaults(self):
-        config = Ch10ImportConfig(asset_name="my_asset")
-        proto = config._to_proto()
-        assert proto.run_name == ""
-        assert proto.scale_values is False
-
-
 class TestTdmsConfig:
     def test_to_proto(self):
         config = TdmsImportConfig(
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index d76dd4942..0e9d18b76 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -651,9 +651,8 @@ class DataImportAPI:
         provided.
 
         Only CSV and Parquet files are currently supported for auto-detection.
-        For other formats (TDMS, HDF5, CH10), create the config manually
-        using ``TdmsImportConfig``, ``Hdf5ImportConfig``, or
-        ``Ch10ImportConfig``.
+        For other formats (TDMS, HDF5), create the config manually
+        using ``TdmsImportConfig`` or ``Hdf5ImportConfig``.
 
         For CSV files, the server scans the first two rows for an optional
         JSON metadata row. Row 1 is checked first; row 2 is checked only
@@ -732,7 +731,7 @@ class DataImportAPI:
 
         When ``config`` is omitted the file format is auto-detected via
         ``detect_config`` (CSV and Parquet only). For other formats
-        (TDMS, HDF5, CH10), ``config`` must be provided.
+        (TDMS and HDF5), ``config`` must be provided.
         When ``asset`` is provided it overrides the config value;
         otherwise the config's ``asset_name`` is used.
         If neither ``run`` nor ``run_name`` is provided (and none is