Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions bluemath_tk/core/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,3 +513,77 @@ def wrapper(
)

return wrapper


def validate_gp_data(func):
"""
Validate data in ExactGPInterpolation class fit method.

Parameters
----------
func : callable
The function to be decorated

Returns
-------
callable
The decorated function
"""

@functools.wraps(func)
def wrapper(
self,
subset_data: pd.DataFrame,
target_data: pd.DataFrame,
subset_directional_variables: list[str] = [],
target_directional_variables: list[str] = [],
subset_custom_scale_factor: dict = {},
normalize_target_data: bool = True,
target_custom_scale_factor: dict = {},
verbose: int = 1,
):
if subset_data is None:
raise ValueError("Subset data cannot be None")
elif not isinstance(subset_data, pd.DataFrame):
raise TypeError("Subset data must be a pandas DataFrame")
if target_data is None:
raise ValueError("Target data cannot be None")
elif not isinstance(target_data, pd.DataFrame):
raise TypeError("Target data must be a pandas DataFrame")
if not isinstance(subset_directional_variables, list):
raise TypeError("Subset directional variables must be a list")
for directional_variable in subset_directional_variables:
if directional_variable not in subset_data.columns:
raise ValueError(
f"Directional variable {directional_variable} "
f"not found in subset data"
)
if not isinstance(target_directional_variables, list):
raise TypeError("Target directional variables must be a list")
for directional_variable in target_directional_variables:
if directional_variable not in target_data.columns:
raise ValueError(
f"Directional variable {directional_variable} "
f"not found in target data"
)
if not isinstance(subset_custom_scale_factor, dict):
raise TypeError("Subset custom scale factor must be a dict")
if not isinstance(normalize_target_data, bool):
raise TypeError("Normalize target data must be a bool")
if not isinstance(target_custom_scale_factor, dict):
raise TypeError("Target custom scale factor must be a dict")
if not isinstance(verbose, int) or verbose < 0:
raise ValueError("Verbose must be an integer >= 0")
return func(
self,
subset_data,
target_data,
subset_directional_variables,
target_directional_variables,
subset_custom_scale_factor,
normalize_target_data,
target_custom_scale_factor,
verbose,
)

return wrapper
3 changes: 2 additions & 1 deletion bluemath_tk/datamining/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,6 @@
from .pca import PCA
from .som import SOM

# Optionally, define the module's `__all__` variable to control what gets imported when using `from module import *`.
# Optionally, define the module's `__all__` variable to control what gets imported
# when using `from module import *`.
__all__ = ["KMA", "LHS", "MDA", "PCA", "SOM"]
9 changes: 8 additions & 1 deletion bluemath_tk/datamining/kma.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
Status: Under development (Working)
"""

import platform

import numpy as np
import pandas as pd
from scipy.spatial.distance import cdist
Expand Down Expand Up @@ -269,12 +271,17 @@ def _create_pyclustering_model(

# Build kwargs for pyclustering
kwargs = {}
# Use Python implementation (ccore=False) on macOS to avoid architecture
# compatibility issues with the native C++ library (x86_64 vs arm64)
# On other platforms, use the faster C++ implementation (ccore=True, default)
if platform.system() == "Darwin": # macOS
kwargs["ccore"] = False
if self.distance_metric is not None:
# Map common metric names to pyclustering format if needed
kwargs["ccore"] = False # Use Python implementation
# Note: pyclustering's distance metric handling varies by algorithm
# For simplicity, we'll let pyclustering use defaults
# Advanced users can modify the model directly if needed
pass

# Import and create the appropriate algorithm
if self.algorithm_name == "kmeans":
Expand Down
Loading