From 0f813bc7d7130c6c40e74614680a4f14b52a5a3f Mon Sep 17 00:00:00 2001 From: bocklund Date: Sun, 17 Aug 2025 12:40:17 -0700 Subject: [PATCH 01/14] Implement pydantic models for datasets Currently we just pass through the models and dump them out to dicts to put in the PickleableTinyDB --- espei/datasets/__init__.py | 2 + espei/datasets/dataset_models.py | 97 +++++++++++++++++++++++++++ espei/{datasets.py => datasets/db.py} | 28 +++++--- 3 files changed, 119 insertions(+), 8 deletions(-) create mode 100644 espei/datasets/__init__.py create mode 100644 espei/datasets/dataset_models.py rename espei/{datasets.py => datasets/db.py} (94%) diff --git a/espei/datasets/__init__.py b/espei/datasets/__init__.py new file mode 100644 index 00000000..18dc966a --- /dev/null +++ b/espei/datasets/__init__.py @@ -0,0 +1,2 @@ +from .dataset_models import * +from .db import * \ No newline at end of file diff --git a/espei/datasets/dataset_models.py b/espei/datasets/dataset_models.py new file mode 100644 index 00000000..2f17a921 --- /dev/null +++ b/espei/datasets/dataset_models.py @@ -0,0 +1,97 @@ +from typing import Literal, Optional, Union, TypeAlias +from pydantic import BaseModel, Field + +__all__ = [ + "Dataset", + "BroadcastSinglePhaseFixedConfigurationDataset", + "ActivityPropertyDataset", + "EquilibriumPropertyDataset", + "ZPFDataset", +] + +ComponentName: TypeAlias = str +PhaseName: TypeAlias = str +PhaseCompositionType: TypeAlias = Union[ + tuple[PhaseName, list[ComponentName], list[float | None]], # The usual definition ["LIQUID", ["B"], [0.5]] + tuple[PhaseName, list[ComponentName], list[float | None], bool] # Handle the disordered flag +] +PhaseRegionType: TypeAlias = list[PhaseCompositionType] + +class Dataset(BaseModel): + pass + +class Solver(BaseModel): + mode: Literal["manual"] = Field(default="manual") + sublattice_site_ratios: list[float] + # TODO: migrate to list[list[list[float]]] + sublattice_configurations: list[list[ComponentName | list[ComponentName]]] + sublattice_occupancies: list[list[float | list[float]]] # TODO: optional and validate against configurations + +class BroadcastSinglePhaseFixedConfigurationDataset(Dataset): + components: list[ComponentName] = Field(min_length=1) + phases: list[PhaseName] = Field(min_length=1, max_length=1) + solver: Solver + conditions: dict[str, float | list[float]] + output: str + values: list[list[list[float]]] + excluded_model_contributions: list[str] = Field(default_factory=list) + reference: str = Field(default="") + bibtex: str = Field(default="") + dataset_author: str = Field(default="") + comment: str = Field(default="") + disabled: bool = Field(default=False) + + +# TODO: would be great to remove +class ActivityDataReferenceState(BaseModel): + phases: list[PhaseName] = Field(min_length=1) + conditions: dict[str, float] + + +# TODO: refactor to merge this with EquilibriumPropertyDataset +class ActivityPropertyDataset(Dataset): + components: list[ComponentName] = Field(min_length=1) + phases: list[PhaseName] = Field(min_length=1, max_length=1) + conditions: dict[str, float | list[float]] + reference_state: ActivityDataReferenceState + output: str + values: list[list[list[float]]] + reference: str = Field(default="") + bibtex: str = Field(default="") + dataset_author: str = Field(default="") + comment: str = Field(default="") + disabled: bool = Field(default=False) + + +class ReferenceStates(BaseModel): + phase: PhaseName + fixed_state_variables: dict[str, float] | None = Field(default=None, description="Fixed potentials for the reference state", examples=[{"T": 298.15, "P": 101325}]) + + +class EquilibriumPropertyDataset(Dataset): + components: list[ComponentName] = Field(min_length=1) + phases: list[PhaseName] = Field(min_length=1, max_length=1) + conditions: dict[str, float | list[float]] + reference_states: dict[ComponentName, ReferenceStates] + output: str + values: list[list[list[float]]] + reference: str = Field(default="") + bibtex: str = Field(default="") + dataset_author: str = Field(default="") + comment: str = Field(default="") + disabled: bool = Field(default=False) + + +class ZPFDataset(Dataset): + components: list[ComponentName] = Field(min_length=1) + phases: list[str] = Field(min_length=1) + conditions: dict[str, float | list[float]] + broadcast_conditions: Literal[False] = Field(default=False) # TODO: migrate and remove, since True was never supported + output: Literal["ZPF"] + values: list[PhaseRegionType] # TODO: validate to be of same shape as conditions + excluded_model_contributions: list[str] = Field(default_factory=list) + reference: str = Field(default="") + bibtex: str = Field(default="") + dataset_author: str = Field(default="") + comment: str = Field(default="") + disabled: bool = Field(default=False) diff --git a/espei/datasets.py b/espei/datasets/db.py similarity index 94% rename from espei/datasets.py rename to espei/datasets/db.py index ffa27f98..27ea23ad 100644 --- a/espei/datasets.py +++ b/espei/datasets/db.py @@ -1,5 +1,5 @@ import fnmatch, json, os -from typing import Any, Dict, List +from typing import Any, Dict, List, TypeAlias import numpy as np from tinydb.storages import MemoryStorage @@ -7,8 +7,8 @@ from espei.utils import PickleableTinyDB -# Create a type -Dataset = Dict[str, Any] +from .dataset_models import Dataset, ActivityPropertyDataset, BroadcastSinglePhaseFixedConfigurationDataset, EquilibriumPropertyDataset, ZPFDataset + class DatasetError(Exception): """Exception raised when datasets are invalid.""" @@ -42,7 +42,7 @@ def recursive_map(f, x): return f(x) -def check_dataset(dataset: Dataset): +def check_dataset(dataset: dict[str, Any]) -> Dataset: """Ensure that the dataset is valid and consistent. Currently supports the following validation checks: @@ -64,7 +64,7 @@ def check_dataset(dataset: Dataset): Returns ------- - None + Dataset Raises ------ @@ -206,8 +206,20 @@ def check_dataset(dataset: Dataset): if isinstance(subl, (list, tuple)) and sorted(subl) != subl: raise DatasetError('Sublattice {} in configuration {} is must be sorted in alphabetic order ({})'.format(subl, configuration, sorted(subl))) + if is_zpf: + dataset_obj = ZPFDataset(**clean_dataset(dataset)) + elif is_activity: + dataset_obj = ActivityPropertyDataset(**clean_dataset(dataset)) + elif is_equilibrium: + dataset_obj = EquilibriumPropertyDataset(**clean_dataset(dataset)) + elif is_single_phase: + dataset_obj = BroadcastSinglePhaseFixedConfigurationDataset(**clean_dataset(dataset)) + else: + raise ValueError(f"Unknown dataset type for dataset {dataset}") + return dataset_obj + -def clean_dataset(dataset: Dataset) -> Dataset: +def clean_dataset(dataset: dict[str, Any]) -> dict[str, Any]: """ Clean an ESPEI dataset dictionary. @@ -333,8 +345,8 @@ def load_datasets(dataset_filenames, include_disabled=False) -> PickleableTinyDB if not include_disabled and d.get('disabled', False): # The dataset is disabled and not included continue - check_dataset(d) - ds_database.insert(clean_dataset(d)) + dataset_obj = check_dataset(d) + ds_database.insert(dataset_obj.model_dump()) except ValueError as e: raise ValueError('JSON Error in {}: {}'.format(fname, e)) except DatasetError as e: From b48213cef9a5317065bc40e39db5dc215ba7d9f3 Mon Sep 17 00:00:00 2001 From: bocklund Date: Sun, 17 Aug 2025 13:46:49 -0700 Subject: [PATCH 02/14] Migrate ZPF-specific check_dataset functions to ZPFDataset validator --- espei/datasets/dataset_models.py | 72 +++++++++++++++++++++++++++++- espei/datasets/db.py | 75 ++------------------------------ tests/test_datasets.py | 26 ++++++++++- 3 files changed, 98 insertions(+), 75 deletions(-) diff --git a/espei/datasets/dataset_models.py b/espei/datasets/dataset_models.py index 2f17a921..fae2d1f0 100644 --- a/espei/datasets/dataset_models.py +++ b/espei/datasets/dataset_models.py @@ -1,5 +1,6 @@ -from typing import Literal, Optional, Union, TypeAlias -from pydantic import BaseModel, Field +from typing import Literal, Optional, Union, TypeAlias, Self +from pydantic import BaseModel, Field, model_validator, field_validator +import numpy as np __all__ = [ "Dataset", @@ -9,6 +10,10 @@ "ZPFDataset", ] +class DatasetError(Exception): + """Exception raised when datasets are invalid.""" + pass + ComponentName: TypeAlias = str PhaseName: TypeAlias = str PhaseCompositionType: TypeAlias = Union[ @@ -95,3 +100,66 @@ class ZPFDataset(Dataset): dataset_author: str = Field(default="") comment: str = Field(default="") disabled: bool = Field(default=False) + + @model_validator(mode="after") + def validate_condition_value_shape_agreement(self) -> Self: + values_shape = (len(self.values),) + num_temperature = np.atleast_1d(self.conditions["T"]).size + num_pressure = np.atleast_1d(self.conditions["P"]).size + if num_pressure != 1: + raise DatasetError("Non-scalar pressures are not currently supported") + conditions_shape = (num_temperature,) + if conditions_shape != values_shape: + raise DatasetError("Shape of conditions (T): {} does not match the shape of the values {}.".format(conditions_shape, values_shape)) + return self + + @model_validator(mode="after") + def validate_phases_entered_match_phases_used(self) -> Self: + phases_entered = set(self.phases) + phases_used = set() + for phase_region in self.values: + for phase_composition in phase_region: + phases_used.add(phase_composition[0]) + if len(phases_entered - phases_used) > 0: + raise DatasetError("Phases entered {} do not match phases used {}.".format(phases_entered, phases_used)) + return self + + @model_validator(mode="after") + def validate_components_entered_match_components_used(self) -> Self: + components_entered = set(self.components) + for i, phase_region in enumerate(self.values): + for j, phase_compositions in enumerate(phase_region): + phase_composition_components = set(phase_compositions[1]) + if not components_entered.issuperset(phase_composition_components): + raise DatasetError("Components were used in phase region {} ({}) for phase composition {} ({}) that are not specified as components in the dataset ()", i,phase_region, j, phase_compositions, components_entered) + independent_components = components_entered - phase_composition_components - {'VA'} + if len(independent_components) != 1: + raise DatasetError('Degree of freedom error: expected 1 independent component, got {} for entered components {} and phase composition components {} in phase region {} ({}) for phase composition {} ({})'.format(len(independent_components), components_entered, phase_composition_components, i, phase_region, j, phase_compositions)) + return self + + @field_validator("values", mode="after") + @classmethod + def validate_phase_compositions(cls, values: list[PhaseRegionType]) -> list[PhaseRegionType]: + for i, phase_region in enumerate(values): + for j, phase_composition in enumerate(phase_region): + phase = phase_composition[0] + component_list = phase_composition[1] + mole_fraction_list = phase_composition[2] + # check that the phase is a string, components a list of strings, + # and the fractions are a list of float + if not isinstance(phase, str): + raise DatasetError('The first element in phase composition {} ({}) for phase region {} ({}) should be a string. Instead it is a {} of value {}'.format(j, phase_composition, i, phase_region, type(phase), phase)) + if not all([isinstance(comp, str) for comp in component_list]): + raise DatasetError('The second element in phase composition {} ({}) for phase region {} ({}) should be a list of strings. Instead it is a {} of value {}'.format(j, phase_composition, i, phase_region, type(component_list), component_list)) + if not all([(isinstance(mole_frac, (int, float)) or mole_frac is None) for mole_frac in mole_fraction_list]): + raise DatasetError('The last element in phase composition {} ({}) for phase region {} ({}) should be a list of numbers. Instead it is a {} of value {}'.format(j, phase_composition, i, phase_region, type(mole_fraction_list), mole_fraction_list)) + # check that the shape of components list and mole fractions list is the same + if len(component_list) != len(mole_fraction_list): + raise DatasetError('The length of the components list and mole fractions list in phase composition {} ({}) for phase region {} ({}) should be the same.'.format(j, phase_composition, i, phase_region)) + # check that all mole fractions are less than one + mf_sum = np.nansum(np.array(mole_fraction_list, dtype=np.float64)) + if any([mf is not None for mf in mole_fraction_list]) and mf_sum > 1.0: + raise DatasetError('Mole fractions for phase composition {} ({}) for phase region {} ({}) sum to greater than one.'.format(j, phase_composition, i, phase_region)) + if any([(mf is not None) and (mf < 0.0) for mf in mole_fraction_list]): + raise DatasetError('Got unallowed negative mole fraction for phase composition {} ({}) for phase region {} ({}).'.format(j, phase_composition, i, phase_region)) + return values \ No newline at end of file diff --git a/espei/datasets/db.py b/espei/datasets/db.py index 27ea23ad..40a9b902 100644 --- a/espei/datasets/db.py +++ b/espei/datasets/db.py @@ -7,13 +7,9 @@ from espei.utils import PickleableTinyDB -from .dataset_models import Dataset, ActivityPropertyDataset, BroadcastSinglePhaseFixedConfigurationDataset, EquilibriumPropertyDataset, ZPFDataset +from .dataset_models import Dataset, ActivityPropertyDataset, BroadcastSinglePhaseFixedConfigurationDataset, EquilibriumPropertyDataset, ZPFDataset, DatasetError -class DatasetError(Exception): - """Exception raised when datasets are invalid.""" - pass - def recursive_map(f, x): """ @@ -73,10 +69,7 @@ def check_dataset(dataset: dict[str, Any]) -> Dataset: """ is_equilibrium = 'solver' not in dataset.keys() and dataset['output'] != 'ZPF' is_activity = dataset['output'].startswith('ACR') - is_zpf = dataset['output'] == 'ZPF' is_single_phase = 'solver' in dataset.keys() - if not any((is_equilibrium, is_single_phase, is_zpf)): - raise DatasetError("Cannot determine type of dataset") components = dataset['components'] conditions = dataset['conditions'] values = dataset['values'] @@ -121,21 +114,6 @@ def check_dataset(dataset: dict[str, Any]) -> Dataset: conditions_shape = (num_pressure, num_temperature, num_configs) if conditions_shape != values_shape: raise DatasetError('Shape of conditions (P, T, configs): {} does not match the shape of the values {}.'.format(conditions_shape, values_shape)) - elif is_zpf: - values_shape = (len(values)) - conditions_shape = (num_temperature) - if conditions_shape != values_shape: - raise DatasetError('Shape of conditions (T): {} does not match the shape of the values {}.'.format(conditions_shape, values_shape)) - - # check that all of the correct phases are present - if is_zpf: - phases_entered = set(phases) - phases_used = set() - for zpf in values: - for tieline in zpf: - phases_used.add(tieline[0]) - if len(phases_entered - phases_used) > 0: - raise DatasetError('Phases entered {} do not match phases used {}.'.format(phases_entered, phases_used)) # check that all of the components used match the components entered components_entered = set(components) @@ -152,41 +130,9 @@ def check_dataset(dataset: dict[str, Any]) -> Dataset: components_used.update({c.split('_')[1] for c in comp_conditions.keys()}) # mass balance of components comp_dof = len(comp_conditions.keys()) - elif is_zpf: - for zpf in values: - for tieline in zpf: - tieline_comps = set(tieline[1]) - components_used.update(tieline_comps) - if len(components_entered - tieline_comps - {'VA'}) != 1: - raise DatasetError('Degree of freedom error for entered components {} in tieline {} of ZPF {}'.format(components_entered, tieline, zpf)) - # handle special case of mass balance in ZPFs - comp_dof = 1 - if len(components_entered - components_used - {'VA'}) > comp_dof or len(components_used - components_entered) > 0: + if (is_single_phase or is_activity or is_equilibrium) and (len(components_entered - components_used - {'VA'}) > comp_dof or len(components_used - components_entered) > 0): raise DatasetError('Components entered {} do not match components used {}.'.format(components_entered, components_used)) - # check that the ZPF values are formatted properly - if is_zpf: - for zpf in values: - for tieline in zpf: - phase = tieline[0] - component_list = tieline[1] - mole_fraction_list = tieline[2] - # check that the phase is a string, components a list of strings, - # and the fractions are a list of float - if not isinstance(phase, str): - raise DatasetError('The first element in the tieline {} for the ZPF point {} should be a string. Instead it is a {} of value {}'.format(tieline, zpf, type(phase), phase)) - if not all([isinstance(comp, str) for comp in component_list]): - raise DatasetError('The second element in the tieline {} for the ZPF point {} should be a list of strings. Instead it is a {} of value {}'.format(tieline, zpf, type(component_list), component_list)) - if not all([(isinstance(mole_frac, (int, float)) or mole_frac is None) for mole_frac in mole_fraction_list]): - raise DatasetError('The last element in the tieline {} for the ZPF point {} should be a list of numbers. Instead it is a {} of value {}'.format(tieline, zpf, type(mole_fraction_list), mole_fraction_list)) - # check that the shape of components list and mole fractions list is the same - if len(component_list) != len(mole_fraction_list): - raise DatasetError('The length of the components list and mole fractions list in tieline {} for the ZPF point {} should be the same.'.format(tieline, zpf)) - # check that all mole fractions are less than one - mf_sum = np.nansum(np.array(mole_fraction_list, dtype=np.float64)) - if any([mf is not None for mf in mole_fraction_list]) and mf_sum > 1.0: - raise DatasetError('Mole fractions for tieline {} for the ZPF point {} sum to greater than one.'.format(tieline, zpf)) - # check that the site ratios are valid as well as site occupancies, if applicable if is_single_phase: nconfigs = len(sublattice_configurations) @@ -206,7 +152,7 @@ def check_dataset(dataset: dict[str, Any]) -> Dataset: if isinstance(subl, (list, tuple)) and sorted(subl) != subl: raise DatasetError('Sublattice {} in configuration {} is must be sorted in alphabetic order ({})'.format(subl, configuration, sorted(subl))) - if is_zpf: + if dataset["output"] == "ZPF": dataset_obj = ZPFDataset(**clean_dataset(dataset)) elif is_activity: dataset_obj = ActivityPropertyDataset(**clean_dataset(dataset)) @@ -248,20 +194,7 @@ def clean_dataset(dataset: dict[str, Any]) -> dict[str, Any]: if occupancies is not None: solver["sublattice_occupancies"] = recursive_map(float, occupancies) - if dataset["output"] == "ZPF": - values = dataset["values"] - new_values = [] - for tieline in values: - new_tieline = [] - for tieline_point in tieline: - if all([comp is None for comp in tieline_point[2]]): - # this is a null tieline point - new_tieline.append(tieline_point) - else: - new_tieline.append([tieline_point[0], tieline_point[1], recursive_map(float, tieline_point[2])]) - new_values.append(new_tieline) - dataset["values"] = new_values - else: + if dataset["output"] != "ZPF": # values should be all numerical dataset["values"] = recursive_map(float, dataset["values"]) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 65d914c2..2734a517 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -4,6 +4,7 @@ from .testing_data import CU_MG_EXP_ACTIVITY, CU_MG_DATASET_THERMOCHEMICAL_STRING_VALUES, CU_MG_DATASET_ZPF_STRING_VALUES, LI_SN_LIQUID_DATA, dataset_multi_valid_ternary from .fixtures import datasets_db +from pydantic import ValidationError dataset_single_valid = { "components": ["AL", "NI", "VA"], @@ -294,6 +295,21 @@ ], } +dataset_zpf_negative_mole_fraction = { + "components": ["AL", "NI", "VA"], + "phases": ["AL3NI2", "BCC_B2"], + "conditions": { + "P": 101325, + "T": [1348] + }, + "output": "ZPF", + "values": [ + [["AL3NI2", ["NI"], [-0.5]], ["BCC_B2", ["NI"], [None]]], # mole fraction is negative + ], +} + + + dataset_single_unsorted_interaction = { "components": ["AL", "NI", "VA"], "phases": ["BCC_B2"], @@ -382,7 +398,7 @@ def test_check_datasets_raises_with_incorrect_components(): def test_check_datasets_raises_with_malformed_zpf(): """Passed datasets that have malformed ZPF values should raise.""" - with pytest.raises(DatasetError): + with pytest.raises((DatasetError, ValidationError)): check_dataset(dataset_multi_malformed_zpfs_components_not_list) with pytest.raises(DatasetError): check_dataset(dataset_multi_malformed_zpfs_fractions_do_not_match_components) @@ -409,6 +425,12 @@ def test_check_datasets_raises_with_zpf_fractions_greater_than_one(): check_dataset(dataset_multi_mole_fractions_as_percents) +def test_check_datasets_raises_with_negative_zpf_fractions(): + """Passed datasets that have negative mole fractions should raise.""" + with pytest.raises(DatasetError): + check_dataset(dataset_zpf_negative_mole_fraction) + + def test_check_datasets_raises_with_unsorted_interactions(): """Passed datasets that have sublattice interactions not in sorted order should raise.""" with pytest.raises(DatasetError): @@ -425,7 +447,7 @@ def test_datasets_convert_thermochemical_string_values_producing_correct_value(d def test_datasets_convert_zpf_string_values_producing_correct_value(datasets_db): """Strings where floats are expected should give correct answers for ZPF datasets""" - ds = clean_dataset(CU_MG_DATASET_ZPF_STRING_VALUES) + ds = check_dataset(CU_MG_DATASET_ZPF_STRING_VALUES).model_dump() assert np.issubdtype(np.array([t[0][2] for t in ds['values']]).dtype, np.number) assert np.issubdtype(np.array(ds['conditions']['T']).dtype, np.number) assert np.issubdtype(np.array(ds['conditions']['P']).dtype, np.number) From fb9cc5d48011c0b759d7d7570e917c0d4aca2061 Mon Sep 17 00:00:00 2001 From: bocklund Date: Sun, 17 Aug 2025 14:01:58 -0700 Subject: [PATCH 03/14] Deprecate clean_dataset as the behavior is in pydantic now --- espei/datasets/dataset_models.py | 4 ++- espei/datasets/db.py | 42 ++++++-------------------------- tests/test_datasets.py | 7 +++--- 3 files changed, 14 insertions(+), 39 deletions(-) diff --git a/espei/datasets/dataset_models.py b/espei/datasets/dataset_models.py index fae2d1f0..4dc7209f 100644 --- a/espei/datasets/dataset_models.py +++ b/espei/datasets/dataset_models.py @@ -30,7 +30,9 @@ class Solver(BaseModel): sublattice_site_ratios: list[float] # TODO: migrate to list[list[list[float]]] sublattice_configurations: list[list[ComponentName | list[ComponentName]]] - sublattice_occupancies: list[list[float | list[float]]] # TODO: optional and validate against configurations + sublattice_occupancies: list[list[float | list[float]]] | None = Field(default=None) + + class BroadcastSinglePhaseFixedConfigurationDataset(Dataset): components: list[ComponentName] = Field(min_length=1) diff --git a/espei/datasets/db.py b/espei/datasets/db.py index 40a9b902..44b4dfa8 100644 --- a/espei/datasets/db.py +++ b/espei/datasets/db.py @@ -1,6 +1,6 @@ import fnmatch, json, os from typing import Any, Dict, List, TypeAlias - +import warnings import numpy as np from tinydb.storages import MemoryStorage from tinydb import where @@ -153,13 +153,13 @@ def check_dataset(dataset: dict[str, Any]) -> Dataset: raise DatasetError('Sublattice {} in configuration {} is must be sorted in alphabetic order ({})'.format(subl, configuration, sorted(subl))) if dataset["output"] == "ZPF": - dataset_obj = ZPFDataset(**clean_dataset(dataset)) + dataset_obj = ZPFDataset(**dataset) elif is_activity: - dataset_obj = ActivityPropertyDataset(**clean_dataset(dataset)) + dataset_obj = ActivityPropertyDataset(**dataset) elif is_equilibrium: - dataset_obj = EquilibriumPropertyDataset(**clean_dataset(dataset)) + dataset_obj = EquilibriumPropertyDataset(**dataset) elif is_single_phase: - dataset_obj = BroadcastSinglePhaseFixedConfigurationDataset(**clean_dataset(dataset)) + dataset_obj = BroadcastSinglePhaseFixedConfigurationDataset(**dataset) else: raise ValueError(f"Unknown dataset type for dataset {dataset}") return dataset_obj @@ -167,37 +167,9 @@ def check_dataset(dataset: dict[str, Any]) -> Dataset: def clean_dataset(dataset: dict[str, Any]) -> dict[str, Any]: """ - Clean an ESPEI dataset dictionary. - - Parameters - ---------- - dataset: Dataset - Dictionary of the standard ESPEI dataset. dataset : dic - - Returns - ------- - Dataset - Modified dataset that has been cleaned - - Notes - ----- - Assumes a valid, checked dataset. Currently handles - * Converting expected numeric values to floats - + No-op """ - dataset["conditions"] = {k: recursive_map(float, v) for k, v in dataset["conditions"].items()} - - solver = dataset.get("solver") - if solver is not None: - solver["sublattice_site_ratios"] = recursive_map(float, solver["sublattice_site_ratios"]) - occupancies = solver.get("sublattice_occupancies") - if occupancies is not None: - solver["sublattice_occupancies"] = recursive_map(float, occupancies) - - if dataset["output"] != "ZPF": - # values should be all numerical - dataset["values"] = recursive_map(float, dataset["values"]) - + warnings.warn(f"clean_dataset deprecated will be removed in ESPEI 0.11. Behavior has been migrated to the pydantic dataset implementations in espei.datasets.dataset_models.", DeprecationWarning) return dataset diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 2734a517..9c6f3642 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -1,6 +1,7 @@ +from copy import deepcopy import pytest import numpy as np -from espei.datasets import DatasetError, check_dataset, clean_dataset, apply_tags +from espei.datasets import DatasetError, check_dataset, apply_tags from .testing_data import CU_MG_EXP_ACTIVITY, CU_MG_DATASET_THERMOCHEMICAL_STRING_VALUES, CU_MG_DATASET_ZPF_STRING_VALUES, LI_SN_LIQUID_DATA, dataset_multi_valid_ternary from .fixtures import datasets_db @@ -439,7 +440,7 @@ def test_check_datasets_raises_with_unsorted_interactions(): def test_datasets_convert_thermochemical_string_values_producing_correct_value(datasets_db): """Strings where floats are expected should give correct answers for thermochemical datasets""" - ds = clean_dataset(CU_MG_DATASET_THERMOCHEMICAL_STRING_VALUES) + ds = check_dataset(CU_MG_DATASET_THERMOCHEMICAL_STRING_VALUES).model_dump() assert np.issubdtype(np.array(ds['values']).dtype, np.number) assert np.issubdtype(np.array(ds['conditions']['T']).dtype, np.number) assert np.issubdtype(np.array(ds['conditions']['P']).dtype, np.number) @@ -468,7 +469,7 @@ def test_non_equilibrium_thermo_data_with_species_passes_checker(): def test_applying_tags(datasets_db): """Test that applying tags updates the appropriate values""" - dataset = clean_dataset(CU_MG_DATASET_THERMOCHEMICAL_STRING_VALUES) + dataset = deepcopy(CU_MG_DATASET_THERMOCHEMICAL_STRING_VALUES) # overwrite tags for this test dataset["tags"] = ["testtag"] datasets_db.insert(dataset) From b3d8e9bd35865e45c52a7877479c2309f0898ef7 Mon Sep 17 00:00:00 2001 From: bocklund Date: Sun, 17 Aug 2025 16:10:01 -0700 Subject: [PATCH 04/14] Fix max length --- espei/datasets/dataset_models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/espei/datasets/dataset_models.py b/espei/datasets/dataset_models.py index 4dc7209f..02d8cef9 100644 --- a/espei/datasets/dataset_models.py +++ b/espei/datasets/dataset_models.py @@ -58,7 +58,7 @@ class ActivityDataReferenceState(BaseModel): # TODO: refactor to merge this with EquilibriumPropertyDataset class ActivityPropertyDataset(Dataset): components: list[ComponentName] = Field(min_length=1) - phases: list[PhaseName] = Field(min_length=1, max_length=1) + phases: list[PhaseName] = Field(min_length=1) conditions: dict[str, float | list[float]] reference_state: ActivityDataReferenceState output: str @@ -77,7 +77,7 @@ class ReferenceStates(BaseModel): class EquilibriumPropertyDataset(Dataset): components: list[ComponentName] = Field(min_length=1) - phases: list[PhaseName] = Field(min_length=1, max_length=1) + phases: list[PhaseName] = Field(min_length=1) conditions: dict[str, float | list[float]] reference_states: dict[ComponentName, ReferenceStates] output: str From 5a0b8879b9ef9107c7c5dfa4217a6e400ed853bf Mon Sep 17 00:00:00 2001 From: bocklund Date: Sun, 17 Aug 2025 16:39:36 -0700 Subject: [PATCH 05/14] Migrate check_datasets validators to pydantic models --- espei/datasets/dataset_models.py | 61 ++++++++++++++++++++++++++++++++ espei/datasets/db.py | 58 ++++-------------------------- 2 files changed, 67 insertions(+), 52 deletions(-) diff --git a/espei/datasets/dataset_models.py b/espei/datasets/dataset_models.py index 02d8cef9..89290558 100644 --- a/espei/datasets/dataset_models.py +++ b/espei/datasets/dataset_models.py @@ -49,6 +49,67 @@ class BroadcastSinglePhaseFixedConfigurationDataset(Dataset): disabled: bool = Field(default=False) + @model_validator(mode="after") + def validate_components_entered_match_components_used(self) -> Self: + components_entered = set(self.components) + components_used = set() + for config in self.solver.sublattice_configurations: + for subl in config: + if isinstance(subl, list): + components_used.update(set(subl)) + else: + components_used.add(subl) + # Don't count vacancies as a component here + components_difference = components_entered.symmetric_difference(components_used) - {"VA"} + if len(components_difference) != 0: + raise DatasetError(f'Components entered {components_entered} do not match components used {components_used} ({components_difference} different).') + return self + + @model_validator(mode="after") + def validate_condition_value_shape_agreement(self) -> Self: + values_shape = np.array(self.values).shape + num_configs = len(self.solver.sublattice_configurations) + num_temperature = np.atleast_1d(self.conditions["T"]).size + num_pressure = np.atleast_1d(self.conditions["P"]).size + conditions_shape = (num_pressure, num_temperature, num_configs) + if conditions_shape != values_shape: + raise DatasetError(f'Shape of conditions (P, T, configs): {conditions_shape} does not match the shape of the values {values_shape}.') + return self + + @model_validator(mode="after") + def validate_configuration_occupancy_shape_agreement(self) -> Self: + sublattice_configurations = self.solver.sublattice_configurations + sublattice_site_ratios = self.solver.sublattice_site_ratios + sublattice_occupancies = self.solver.sublattice_occupancies + # check for mixing + is_mixing = any([any([isinstance(subl, list) for subl in config]) for config in sublattice_configurations]) + # pad the values of sublattice occupancies if there is no mixing + # just for the purposes of checking validity + if sublattice_occupancies is None and not is_mixing: + sublattice_occupancies = [None]*len(sublattice_configurations) + elif sublattice_occupancies is None: + raise DatasetError(f'At least one sublattice in the following sublattice configurations is mixing, but the "sublattice_occupancies" key is empty: {sublattice_configurations}') + + # check that the site ratios are valid as well as site occupancies, if applicable + nconfigs = len(sublattice_configurations) + noccupancies = len(sublattice_occupancies) + if nconfigs != noccupancies: + raise DatasetError(f'Number of sublattice configurations ({nconfigs}) does not match the number of sublattice occupancies ({noccupancies})') + for configuration, occupancy in zip(sublattice_configurations, sublattice_occupancies): + if len(configuration) != len(sublattice_site_ratios): + raise DatasetError(f'Sublattice configuration {configuration} and sublattice site ratio {sublattice_site_ratios} describe different numbers of sublattices ({len(configuration)} and {len(sublattice_site_ratios)}).') + if is_mixing: + configuration_shape = tuple(len(sl) if isinstance(sl, list) else 1 for sl in configuration) + occupancy_shape = tuple(len(sl) if isinstance(sl, list) else 1 for sl in occupancy) + if configuration_shape != occupancy_shape: + raise DatasetError(f'The shape of sublattice configuration {configuration} ({configuration_shape}) does not match the shape of occupancies {occupancy} ({occupancy_shape})') + # check that sublattice interactions are in sorted. Related to sorting in espei.core_utils.get_samples + for subl in configuration: + if isinstance(subl, (list, tuple)) and sorted(subl) != subl: + raise DatasetError(f'Sublattice {subl} in configuration {configuration} is must be sorted in alphabetic order ({sorted(subl)})') + return self + + # TODO: would be great to remove class ActivityDataReferenceState(BaseModel): phases: list[PhaseName] = Field(min_length=1) diff --git a/espei/datasets/db.py b/espei/datasets/db.py index 44b4dfa8..94a262f7 100644 --- a/espei/datasets/db.py +++ b/espei/datasets/db.py @@ -69,23 +69,10 @@ def check_dataset(dataset: dict[str, Any]) -> Dataset: """ is_equilibrium = 'solver' not in dataset.keys() and dataset['output'] != 'ZPF' is_activity = dataset['output'].startswith('ACR') - is_single_phase = 'solver' in dataset.keys() components = dataset['components'] conditions = dataset['conditions'] values = dataset['values'] phases = dataset['phases'] - if is_single_phase: - solver = dataset['solver'] - sublattice_configurations = solver['sublattice_configurations'] - sublattice_site_ratios = solver['sublattice_site_ratios'] - sublattice_occupancies = solver.get('sublattice_occupancies', None) - # check for mixing - is_mixing = any([any([isinstance(subl, list) for subl in config]) for config in sublattice_configurations]) - # pad the values of sublattice occupancies if there is no mixing - if sublattice_occupancies is None and not is_mixing: - sublattice_occupancies = [None]*len(sublattice_configurations) - elif sublattice_occupancies is None: - raise DatasetError('At least one sublattice in the following sublattice configurations is mixing, but the "sublattice_occupancies" key is empty: {}'.format(sublattice_configurations)) if is_equilibrium: conditions = dataset['conditions'] comp_conditions = {k: v for k, v in conditions.items() if k.startswith('X_')} @@ -108,49 +95,16 @@ def check_dataset(dataset: dict[str, Any]) -> Dataset: conditions_shape = (num_pressure, num_temperature, num_x_conds[0]) if conditions_shape != values_shape: raise DatasetError('Shape of conditions (P, T, compositions): {} does not match the shape of the values {}.'.format(conditions_shape, values_shape)) - elif is_single_phase: - values_shape = np.array(values).shape - num_configs = len(dataset['solver']['sublattice_configurations']) - conditions_shape = (num_pressure, num_temperature, num_configs) - if conditions_shape != values_shape: - raise DatasetError('Shape of conditions (P, T, configs): {} does not match the shape of the values {}.'.format(conditions_shape, values_shape)) # check that all of the components used match the components entered - components_entered = set(components) - components_used = set() - if is_single_phase: - for config in sublattice_configurations: - for sl in config: - if isinstance(sl, list): - components_used.update(set(sl)) - else: - components_used.add(sl) - comp_dof = 0 - elif is_equilibrium: + if is_equilibrium: # and is_activity + components_entered = set(components) + components_used = set() components_used.update({c.split('_')[1] for c in comp_conditions.keys()}) # mass balance of components comp_dof = len(comp_conditions.keys()) - if (is_single_phase or is_activity or is_equilibrium) and (len(components_entered - components_used - {'VA'}) > comp_dof or len(components_used - components_entered) > 0): - raise DatasetError('Components entered {} do not match components used {}.'.format(components_entered, components_used)) - - # check that the site ratios are valid as well as site occupancies, if applicable - if is_single_phase: - nconfigs = len(sublattice_configurations) - noccupancies = len(sublattice_occupancies) - if nconfigs != noccupancies: - raise DatasetError('Number of sublattice configurations ({}) does not match the number of sublattice occupancies ({})'.format(nconfigs, noccupancies)) - for configuration, occupancy in zip(sublattice_configurations, sublattice_occupancies): - if len(configuration) != len(sublattice_site_ratios): - raise DatasetError('Sublattice configuration {} and sublattice site ratio {} describe different numbers of sublattices ({} and {}).'.format(configuration, sublattice_site_ratios, len(configuration), len(sublattice_site_ratios))) - if is_mixing: - configuration_shape = tuple(len(sl) if isinstance(sl, list) else 1 for sl in configuration) - occupancy_shape = tuple(len(sl) if isinstance(sl, list) else 1 for sl in occupancy) - if configuration_shape != occupancy_shape: - raise DatasetError('The shape of sublattice configuration {} ({}) does not match the shape of occupancies {} ({})'.format(configuration, configuration_shape, occupancy, occupancy_shape)) - # check that sublattice interactions are in sorted. Related to sorting in espei.core_utils.get_samples - for subl in configuration: - if isinstance(subl, (list, tuple)) and sorted(subl) != subl: - raise DatasetError('Sublattice {} in configuration {} is must be sorted in alphabetic order ({})'.format(subl, configuration, sorted(subl))) + if len(components_entered - components_used - {'VA'}) > comp_dof or len(components_used - components_entered) > 0: + raise DatasetError('Components entered {} do not match components used {}.'.format(components_entered, components_used)) if dataset["output"] == "ZPF": dataset_obj = ZPFDataset(**dataset) @@ -158,7 +112,7 @@ def check_dataset(dataset: dict[str, Any]) -> Dataset: dataset_obj = ActivityPropertyDataset(**dataset) elif is_equilibrium: dataset_obj = EquilibriumPropertyDataset(**dataset) - elif is_single_phase: + elif 'solver' in dataset.keys(): dataset_obj = BroadcastSinglePhaseFixedConfigurationDataset(**dataset) else: raise ValueError(f"Unknown dataset type for dataset {dataset}") From d2f64271906d835067c67d993ed2ab275f10bac8 Mon Sep 17 00:00:00 2001 From: bocklund Date: Sun, 17 Aug 2025 16:48:27 -0700 Subject: [PATCH 06/14] Cleanup of activity check_dataset stuff Activity-specific checks weren't done at all and it's all subsumed by equilibrium --- espei/datasets/dataset_models.py | 2 -- espei/datasets/db.py | 12 +++++------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/espei/datasets/dataset_models.py b/espei/datasets/dataset_models.py index 89290558..82e765cd 100644 --- a/espei/datasets/dataset_models.py +++ b/espei/datasets/dataset_models.py @@ -33,7 +33,6 @@ class Solver(BaseModel): sublattice_occupancies: list[list[float | list[float]]] | None = Field(default=None) - class BroadcastSinglePhaseFixedConfigurationDataset(Dataset): components: list[ComponentName] = Field(min_length=1) phases: list[PhaseName] = Field(min_length=1, max_length=1) @@ -48,7 +47,6 @@ class BroadcastSinglePhaseFixedConfigurationDataset(Dataset): comment: str = Field(default="") disabled: bool = Field(default=False) - @model_validator(mode="after") def validate_components_entered_match_components_used(self) -> Self: components_entered = set(self.components) diff --git a/espei/datasets/db.py b/espei/datasets/db.py index 94a262f7..3cd2844e 100644 --- a/espei/datasets/db.py +++ b/espei/datasets/db.py @@ -68,17 +68,11 @@ def check_dataset(dataset: dict[str, Any]) -> Dataset: If an error is found in the dataset """ is_equilibrium = 'solver' not in dataset.keys() and dataset['output'] != 'ZPF' - is_activity = dataset['output'].startswith('ACR') components = dataset['components'] conditions = dataset['conditions'] values = dataset['values'] phases = dataset['phases'] if is_equilibrium: - conditions = dataset['conditions'] - comp_conditions = {k: v for k, v in conditions.items() if k.startswith('X_')} - if is_activity: - ref_state = dataset['reference_state'] - elif is_equilibrium: for el, vals in dataset.get('reference_states', {}).items(): if 'phase' not in vals: raise DatasetError(f'Reference state for element {el} must define the `phase` key with the reference phase name.') @@ -87,6 +81,8 @@ def check_dataset(dataset: dict[str, Any]) -> Dataset: num_pressure = np.atleast_1d(conditions['P']).size num_temperature = np.atleast_1d(conditions['T']).size if is_equilibrium: + conditions = dataset['conditions'] + comp_conditions = {k: v for k, v in conditions.items() if k.startswith('X_')} values_shape = np.array(values).shape # check each composition condition is the same shape num_x_conds = [len(v) for _, v in comp_conditions.items()] @@ -98,6 +94,8 @@ def check_dataset(dataset: dict[str, Any]) -> Dataset: # check that all of the components used match the components entered if is_equilibrium: # and is_activity + conditions = dataset['conditions'] + comp_conditions = {k: v for k, v in conditions.items() if k.startswith('X_')} components_entered = set(components) components_used = set() components_used.update({c.split('_')[1] for c in comp_conditions.keys()}) @@ -108,7 +106,7 @@ def check_dataset(dataset: dict[str, Any]) -> Dataset: if dataset["output"] == "ZPF": dataset_obj = ZPFDataset(**dataset) - elif is_activity: + elif dataset['output'].startswith('ACR'): dataset_obj = ActivityPropertyDataset(**dataset) elif is_equilibrium: dataset_obj = EquilibriumPropertyDataset(**dataset) From a3610feed7be93635fdd603daf3e1535b4e7c558 Mon Sep 17 00:00:00 2001 From: bocklund Date: Sun, 17 Aug 2025 18:20:08 -0700 Subject: [PATCH 07/14] Migrate equlibrium and activity check_datasets functionality to pydantic models Added some new tests that were previously uncovered --- espei/datasets/dataset_models.py | 77 ++++++++++++++- espei/datasets/db.py | 41 +------- .../equilibrium_thermochemical_error.py | 2 +- tests/test_datasets.py | 96 +++++++++++++++++++ 4 files changed, 173 insertions(+), 43 deletions(-) diff --git a/espei/datasets/dataset_models.py b/espei/datasets/dataset_models.py index 82e765cd..1a9d82d7 100644 --- a/espei/datasets/dataset_models.py +++ b/espei/datasets/dataset_models.py @@ -109,12 +109,15 @@ def validate_configuration_occupancy_shape_agreement(self) -> Self: # TODO: would be great to remove -class ActivityDataReferenceState(BaseModel): +class ActivityDataReferenceState(Dataset): phases: list[PhaseName] = Field(min_length=1) conditions: dict[str, float] # TODO: refactor to merge this with EquilibriumPropertyDataset +# The validator functions are exactly duplicated in EquilibriumPropertyDataset +# The duplication simplifies the implementation since the activity special case is +# ultimately meant to be removed once activity is a PyCalphad Workspace property class ActivityPropertyDataset(Dataset): components: list[ComponentName] = Field(min_length=1) phases: list[PhaseName] = Field(min_length=1) @@ -128,6 +131,36 @@ class ActivityPropertyDataset(Dataset): comment: str = Field(default="") disabled: bool = Field(default=False) + @model_validator(mode="after") + def validate_condition_value_shape_agreement(self) -> Self: + conditions = self.conditions + comp_conditions = {k: v for k, v in conditions.items() if k.startswith('X_')} + num_temperature = np.atleast_1d(self.conditions["T"]).size + num_pressure = np.atleast_1d(self.conditions["P"]).size + # check each composition condition is the same shape + num_x_conds = [np.atleast_1d(vals).size for _, vals in comp_conditions.items()] + if num_x_conds.count(num_x_conds[0]) != len(num_x_conds): + raise DatasetError(f'All compositions in conditions are not the same shape. Note that conditions cannot be broadcast. Composition conditions are {comp_conditions}') + conditions_shape = (num_pressure, num_temperature, num_x_conds[0]) + values_shape = np.array(self.values).shape + if conditions_shape != values_shape: + raise DatasetError(f'Shape of conditions (P, T, compositions): {conditions_shape} does not match the shape of the values {values_shape}.') + return self + + @model_validator(mode="after") + def validate_components_entered_match_components_used(self) -> Self: + conditions = self.conditions + comp_conditions = {ky: vl for ky, vl in conditions.items() if ky.startswith('X_')} + components_entered = set(self.components) + components_used = set() + components_used.update({c.split('_')[1] for c in comp_conditions.keys()}) + if not components_entered.issuperset(components_used): + raise DatasetError(f"Components were used as conditions that are not present in the specified components: {components_used - components_entered}.") + independent_components = components_entered - components_used - {'VA'} + if len(independent_components) != 1: + raise DatasetError(f"Degree of freedom error: expected 1 independent component, got {len(independent_components)} for entered components {components_entered} and {components_used} used in the conditions.") + return self + class ReferenceStates(BaseModel): phase: PhaseName @@ -138,15 +171,55 @@ class EquilibriumPropertyDataset(Dataset): components: list[ComponentName] = Field(min_length=1) phases: list[PhaseName] = Field(min_length=1) conditions: dict[str, float | list[float]] - reference_states: dict[ComponentName, ReferenceStates] output: str values: list[list[list[float]]] + reference_states: dict[ComponentName, ReferenceStates] | None = Field(default=None) reference: str = Field(default="") bibtex: str = Field(default="") dataset_author: str = Field(default="") comment: str = Field(default="") disabled: bool = Field(default=False) + @model_validator(mode="after") + def validate_condition_value_shape_agreement(self) -> Self: + conditions = self.conditions + comp_conditions = {k: v for k, v in conditions.items() if k.startswith('X_')} + num_temperature = np.atleast_1d(self.conditions["T"]).size + num_pressure = np.atleast_1d(self.conditions["P"]).size + # check each composition condition is the same shape + num_x_conds = [np.atleast_1d(vals).size for _, vals in comp_conditions.items()] + if num_x_conds.count(num_x_conds[0]) != len(num_x_conds): + raise DatasetError(f'All compositions in conditions are not the same shape. Note that conditions cannot be broadcast. Composition conditions are {comp_conditions}') + conditions_shape = (num_pressure, num_temperature, num_x_conds[0]) + values_shape = np.array(self.values).shape + if conditions_shape != values_shape: + raise DatasetError(f'Shape of conditions (P, T, compositions): {conditions_shape} does not match the shape of the values {values_shape}.') + return self + + @model_validator(mode="after") + def validate_components_entered_match_components_used(self) -> Self: + conditions = self.conditions + comp_conditions = {ky: vl for ky, vl in conditions.items() if ky.startswith('X_')} + components_entered = set(self.components) + components_used = set() + components_used.update({c.split('_')[1] for c in comp_conditions.keys()}) + if not components_entered.issuperset(components_used): + raise DatasetError(f"Components were used as conditions that are not present in the specified components: {components_used - components_entered}.") + independent_components = components_entered - components_used - {'VA'} + if len(independent_components) != 1: + raise DatasetError(f"Degree of freedom error: expected 1 independent component, got {len(independent_components)} for entered components {components_entered} and {components_used} used in the conditions.") + return self + + @model_validator(mode="after") + def validate_reference_state_fully_specified_if_used(self) -> Self: + """If there is a reference state specified, the components in the reference state must match the dataset components""" + components_entered = set(self.components) - {"VA"} + if self.reference_states is not None: + reference_state_components = set(self.reference_states.keys()) - {"VA"} + if components_entered != reference_state_components: + raise DatasetError(f"If used, reference states in equilibrium property must define a reference state for all components in the calculation. Got {components_entered} entered components and {reference_state_components} in the reference states ({components_entered.symmetric_difference(reference_state_components)} non-matching).") + return self + class ZPFDataset(Dataset): components: list[ComponentName] = Field(min_length=1) diff --git a/espei/datasets/db.py b/espei/datasets/db.py index 3cd2844e..34ebde77 100644 --- a/espei/datasets/db.py +++ b/espei/datasets/db.py @@ -67,53 +67,14 @@ def check_dataset(dataset: dict[str, Any]) -> Dataset: DatasetError If an error is found in the dataset """ - is_equilibrium = 'solver' not in dataset.keys() and dataset['output'] != 'ZPF' - components = dataset['components'] - conditions = dataset['conditions'] - values = dataset['values'] - phases = dataset['phases'] - if is_equilibrium: - for el, vals in dataset.get('reference_states', {}).items(): - if 'phase' not in vals: - raise DatasetError(f'Reference state for element {el} must define the `phase` key with the reference phase name.') - - # check that the shape of conditions match the values - num_pressure = np.atleast_1d(conditions['P']).size - num_temperature = np.atleast_1d(conditions['T']).size - if is_equilibrium: - conditions = dataset['conditions'] - comp_conditions = {k: v for k, v in conditions.items() if k.startswith('X_')} - values_shape = np.array(values).shape - # check each composition condition is the same shape - num_x_conds = [len(v) for _, v in comp_conditions.items()] - if num_x_conds.count(num_x_conds[0]) != len(num_x_conds): - raise DatasetError('All compositions in conditions are not the same shape. Note that conditions cannot be broadcast. Composition conditions are {}'.format(comp_conditions)) - conditions_shape = (num_pressure, num_temperature, num_x_conds[0]) - if conditions_shape != values_shape: - raise DatasetError('Shape of conditions (P, T, compositions): {} does not match the shape of the values {}.'.format(conditions_shape, values_shape)) - - # check that all of the components used match the components entered - if is_equilibrium: # and is_activity - conditions = dataset['conditions'] - comp_conditions = {k: v for k, v in conditions.items() if k.startswith('X_')} - components_entered = set(components) - components_used = set() - components_used.update({c.split('_')[1] for c in comp_conditions.keys()}) - # mass balance of components - comp_dof = len(comp_conditions.keys()) - if len(components_entered - components_used - {'VA'}) > comp_dof or len(components_used - components_entered) > 0: - raise DatasetError('Components entered {} do not match components used {}.'.format(components_entered, components_used)) - if dataset["output"] == "ZPF": dataset_obj = ZPFDataset(**dataset) elif dataset['output'].startswith('ACR'): dataset_obj = ActivityPropertyDataset(**dataset) - elif is_equilibrium: - dataset_obj = EquilibriumPropertyDataset(**dataset) elif 'solver' in dataset.keys(): dataset_obj = BroadcastSinglePhaseFixedConfigurationDataset(**dataset) else: - raise ValueError(f"Unknown dataset type for dataset {dataset}") + dataset_obj = EquilibriumPropertyDataset(**dataset) return dataset_obj diff --git a/espei/error_functions/equilibrium_thermochemical_error.py b/espei/error_functions/equilibrium_thermochemical_error.py index bd96194c..6352811d 100644 --- a/espei/error_functions/equilibrium_thermochemical_error.py +++ b/espei/error_functions/equilibrium_thermochemical_error.py @@ -87,7 +87,7 @@ def build_eqpropdata(data: tinydb.database.Document, # Models are now modified in response to the data from this data # TODO: build a reference state MetaProperty with the reference state information, maybe just-in-time, below - if 'reference_states' in data: + if data.get("reference_states") is not None: property_output = output[:-1] if output.endswith('R') else output # unreferenced model property so we can tell shift_reference_state what to build. reference_states = [] for el, vals in data['reference_states'].items(): diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 9c6f3642..72078f97 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -396,6 +396,36 @@ def test_check_datasets_raises_with_incorrect_components(): with pytest.raises(DatasetError): check_dataset(dataset_multi_incorrect_components_underspecified) + # equilibrium datasets underspecified + ds_eq_underspecified = { + "components": ["NI"], + "phases": ["LIQUID"], + "conditions": { + "P": 101325, + "T": [1348, 1176, 977], + "X_NI": 0.5 + }, + "output": "HM", + "values": [[[-1000], [-900], [-800]]] + } + with pytest.raises(DatasetError): + check_dataset(ds_eq_underspecified) + + # equilibrium datasets overspecified + ds_eq_overspecified = { + "components": ["CU", "MG", "NI"], + "phases": ["LIQUID"], + "conditions": { + "P": 101325, + "T": [1348, 1176, 977], + "X_NI": 0.5 + }, + "output": "HM", + "values": [[[-1000], [-900], [-800]]] + } + with pytest.raises(DatasetError): + check_dataset(ds_eq_overspecified) + def test_check_datasets_raises_with_malformed_zpf(): """Passed datasets that have malformed ZPF values should raise.""" @@ -415,6 +445,72 @@ def test_check_datasets_raises_with_malformed_sublattice_configurations(): check_dataset(dataset_single_malformed_site_ratios) +def test_check_datasets_raises_with_equilibrium_conditions_and_values_shapes_mismatch(): + """Passed equilibrium datasets that have mismatched condition and values shapes should raise.""" + COND_VALS_SHAPE_GOOD = { + "components": ["CU", "MG"], + "phases": ["LIQUID"], + "conditions": {"P": [101325, 1e5], "T": [1400, 1500, 1600], "X_MG": [0.5, 0.6, 0.7, 0.8]}, + "reference_states": { + "CU": {"phase": "LIQUID"}, + "MG": {"phase": "LIQUID"} + }, + "output": "HMR", + "values": np.zeros((2, 3, 4)).tolist(), + "reference": "equilibrium thermochemical tests", + } + # Good shape should not raise + check_dataset(COND_VALS_SHAPE_GOOD) + + COND_VALS_SHAPE_DISAGREEMENT_1_1_2 = { + "components": ["CU", "MG", "NI"], + "phases": ["LIQUID"], + "conditions": {"P": 101325, "T": [1400], "X_MG": [0.5, 0.6], "X_NI": [0.5, 0.6]}, + "reference_states": { + "CU": {"phase": "LIQUID"}, + "MG": {"phase": "LIQUID"}, + "NI": {"phase": "LIQUID"} + }, + "output": "HMR", + "values": [[[0]]], + "reference": "equilibrium thermochemical tests", + } + with pytest.raises(DatasetError): + check_dataset(COND_VALS_SHAPE_DISAGREEMENT_1_1_2) + + COND_VALS_SHAPE_DISAGREEMENT_1_2_2 = { + "components": ["CU", "MG"], + "phases": ["LIQUID"], + "conditions": {"P": 101325, "T": [1400, 1500], "X_MG": [0.5, 0.6]}, + "reference_states": { + "CU": {"phase": "LIQUID"}, + "MG": {"phase": "LIQUID"} + }, + "output": "HMR", + "values": [[[0, 0]]], + "reference": "equilibrium thermochemical tests", + } + with pytest.raises(DatasetError): + check_dataset(COND_VALS_SHAPE_DISAGREEMENT_1_2_2) + + # we don't broadcast over compositions, so composition conditions shapes need to match + MISMATCHED_COMPOSITION_CONDS = { + "components": ["CU", "MG", "NI"], + "phases": ["LIQUID"], + "conditions": {"P": 101325, "T": [1400], "X_MG": [0.5, 0.6], "X_NI": [0.5]}, + "reference_states": { + "CU": {"phase": "LIQUID"}, + "MG": {"phase": "LIQUID"}, + "NI": {"phase": "LIQUID"} + }, + "output": "HMR", + "values": [[[0, 0]]], + "reference": "equilibrium thermochemical tests", + } + with pytest.raises(DatasetError): + check_dataset(MISMATCHED_COMPOSITION_CONDS) + + def test_check_datasets_works_on_activity_data(): """Passed activity datasets should work correctly.""" check_dataset(CU_MG_EXP_ACTIVITY) From 1fa67243cf8d337aecac191775d9a0a69911a0c1 Mon Sep 17 00:00:00 2001 From: bocklund Date: Sun, 17 Aug 2025 18:23:54 -0700 Subject: [PATCH 08/14] Delete recursive_map as dead code --- espei/datasets/db.py | 28 ---------------------------- tests/test_core_utils.py | 14 -------------- 2 files changed, 42 deletions(-) diff --git a/espei/datasets/db.py b/espei/datasets/db.py index 34ebde77..87e593a9 100644 --- a/espei/datasets/db.py +++ b/espei/datasets/db.py @@ -10,34 +10,6 @@ from .dataset_models import Dataset, ActivityPropertyDataset, BroadcastSinglePhaseFixedConfigurationDataset, EquilibriumPropertyDataset, ZPFDataset, DatasetError - -def recursive_map(f, x): - """ - map, but over nested lists - - Parameters - ---------- - f : callable - Function to apply to x - x : list or value - Value passed to v - - Returns - ------- - list or value - """ - if isinstance(x, list): - if [isinstance(xx, list) for xx in x]: - # we got a nested list - return [recursive_map(f, xx) for xx in x] - else: - # it's a list with some values inside - return list(map(f, x)) - else: - # not a list, probably just a singular value - return f(x) - - def check_dataset(dataset: dict[str, Any]) -> Dataset: """Ensure that the dataset is valid and consistent. diff --git a/tests/test_core_utils.py b/tests/test_core_utils.py index 5c150f1e..9fd45dca 100644 --- a/tests/test_core_utils.py +++ b/tests/test_core_utils.py @@ -2,7 +2,6 @@ import tinydb from espei.core_utils import get_prop_data, filter_configurations, filter_temperatures, symmetry_filter, ravel_zpf_values -from espei.datasets import recursive_map from espei.sublattice_tools import recursive_tuplify from espei.utils import PickleableTinyDB, MemoryStorage from espei.error_functions.non_equilibrium_thermochemical_error import get_prop_samples @@ -55,19 +54,6 @@ def test_get_data_for_a_minimal_example(): assert desired_data['values'] == np.array([[[34720.0]]]) -def test_recursive_map(): - """Test that recursive map function works""" - - strings = [[["1.0"], ["5.5", "8.8"], ["10.7"]]] - floats = [[[1.0], [5.5, 8.8], [10.7]]] - - assert recursive_map(float, strings) == floats - assert recursive_map(str, floats) == strings - assert recursive_map(float, "1.234") == 1.234 - assert recursive_map(int, ["1", "2", "5"]) == [1, 2, 5] - assert recursive_map(float, ["1.0", ["0.5", "0.5"]]) == [1.0, [0.5, 0.5]] - - def test_get_prop_samples_ravels_correctly(): """get_prop_samples should ravel non-equilibrium thermochemical data correctly""" desired_data = [{ From c31d9ae447ba4a62c0b689b02291da941515f118 Mon Sep 17 00:00:00 2001 From: bocklund Date: Sun, 17 Aug 2025 18:44:10 -0700 Subject: [PATCH 09/14] Ensure tags are present in the dataset models --- espei/datasets/dataset_models.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/espei/datasets/dataset_models.py b/espei/datasets/dataset_models.py index 1a9d82d7..d702ca6b 100644 --- a/espei/datasets/dataset_models.py +++ b/espei/datasets/dataset_models.py @@ -8,6 +8,7 @@ "ActivityPropertyDataset", "EquilibriumPropertyDataset", "ZPFDataset", + "DatasetError", ] class DatasetError(Exception): @@ -46,6 +47,7 @@ class BroadcastSinglePhaseFixedConfigurationDataset(Dataset): dataset_author: str = Field(default="") comment: str = Field(default="") disabled: bool = Field(default=False) + tags: list[str] = Field(default_factory=list) @model_validator(mode="after") def validate_components_entered_match_components_used(self) -> Self: @@ -130,6 +132,7 @@ class ActivityPropertyDataset(Dataset): dataset_author: str = Field(default="") comment: str = Field(default="") disabled: bool = Field(default=False) + tags: list[str] = Field(default_factory=list) @model_validator(mode="after") def validate_condition_value_shape_agreement(self) -> Self: @@ -179,6 +182,7 @@ class EquilibriumPropertyDataset(Dataset): dataset_author: str = Field(default="") comment: str = Field(default="") disabled: bool = Field(default=False) + tags: list[str] = Field(default_factory=list) @model_validator(mode="after") def validate_condition_value_shape_agreement(self) -> Self: @@ -234,6 +238,7 @@ class ZPFDataset(Dataset): dataset_author: str = Field(default="") comment: str = Field(default="") disabled: bool = Field(default=False) + tags: list[str] = Field(default_factory=list) @model_validator(mode="after") def validate_condition_value_shape_agreement(self) -> Self: From 8a9d38f14822bcafa33f4ea283496c7ddb8935a5 Mon Sep 17 00:00:00 2001 From: bocklund Date: Sun, 17 Aug 2025 18:47:10 -0700 Subject: [PATCH 10/14] Multiple dataset cleanups: - Add __all__ for datasets - implement to_Dataset - deprecate check_dataset --- espei/datasets/dataset_models.py | 32 +++++++++++++- espei/datasets/db.py | 63 +++++++-------------------- tests/test_datasets.py | 74 ++++++++++++++++---------------- 3 files changed, 84 insertions(+), 85 deletions(-) diff --git a/espei/datasets/dataset_models.py b/espei/datasets/dataset_models.py index d702ca6b..74673031 100644 --- a/espei/datasets/dataset_models.py +++ b/espei/datasets/dataset_models.py @@ -1,4 +1,4 @@ -from typing import Literal, Optional, Union, TypeAlias, Self +from typing import Any, Literal, Union, TypeAlias, Self from pydantic import BaseModel, Field, model_validator, field_validator import numpy as np @@ -9,6 +9,7 @@ "EquilibriumPropertyDataset", "ZPFDataset", "DatasetError", + "to_Dataset", ] class DatasetError(Exception): @@ -301,4 +302,31 @@ def validate_phase_compositions(cls, values: list[PhaseRegionType]) -> list[Phas raise DatasetError('Mole fractions for phase composition {} ({}) for phase region {} ({}) sum to greater than one.'.format(j, phase_composition, i, phase_region)) if any([(mf is not None) and (mf < 0.0) for mf in mole_fraction_list]): raise DatasetError('Got unallowed negative mole fraction for phase composition {} ({}) for phase region {} ({}).'.format(j, phase_composition, i, phase_region)) - return values \ No newline at end of file + return values + + +def to_Dataset(candidate: dict[str, Any]) -> Dataset: + """Return a validated Dataset object for a dataset dict. Raises if a validated dataset cannot be created. + + Parameters + ---------- + candidate : dict[str, Any] + Dictionary describing an ESPEI dataset. + + Returns + ------- + Dataset + + Raises + ------ + DatasetError + If an error is found in the dataset + """ + if candidate["output"] == "ZPF": + return ZPFDataset.model_validate(candidate) + elif candidate['output'].startswith('ACR'): + return ActivityPropertyDataset.model_validate(candidate) + elif 'solver' in candidate.keys(): + return BroadcastSinglePhaseFixedConfigurationDataset.model_validate(candidate) + else: + return EquilibriumPropertyDataset.model_validate(candidate) diff --git a/espei/datasets/db.py b/espei/datasets/db.py index 87e593a9..53124e8c 100644 --- a/espei/datasets/db.py +++ b/espei/datasets/db.py @@ -6,56 +6,26 @@ from tinydb import where from espei.utils import PickleableTinyDB +from .dataset_models import to_Dataset, Dataset, ActivityPropertyDataset, BroadcastSinglePhaseFixedConfigurationDataset, EquilibriumPropertyDataset, ZPFDataset, DatasetError -from .dataset_models import Dataset, ActivityPropertyDataset, BroadcastSinglePhaseFixedConfigurationDataset, EquilibriumPropertyDataset, ZPFDataset, DatasetError +__all__ = [ + "load_datasets", + "recursive_glob", + "apply_tags", + "check_dataset", + "clean_dataset" +] - -def check_dataset(dataset: dict[str, Any]) -> Dataset: - """Ensure that the dataset is valid and consistent. - - Currently supports the following validation checks: - * data shape is valid - * phases and components used match phases and components entered - * individual shapes of keys, such as ZPF, sublattice configs and site ratios - - Planned validation checks: - * all required keys are present - - Note that this follows some of the implicit assumptions in ESPEI at the time - of writing, such that conditions are only P, T, configs for single phase and - essentially only T for ZPF data. - - Parameters - ---------- - dataset : Dataset - Dictionary of the standard ESPEI dataset. - - Returns - ------- - Dataset - - Raises - ------ - DatasetError - If an error is found in the dataset - """ - if dataset["output"] == "ZPF": - dataset_obj = ZPFDataset(**dataset) - elif dataset['output'].startswith('ACR'): - dataset_obj = ActivityPropertyDataset(**dataset) - elif 'solver' in dataset.keys(): - dataset_obj = BroadcastSinglePhaseFixedConfigurationDataset(**dataset) - else: - dataset_obj = EquilibriumPropertyDataset(**dataset) - return dataset_obj +def check_dataset(dataset: dict[str, Any]) -> dict[str, Any]: + """Ensure that the dataset is valid and consistent by round-tripping through pydantic.""" + warnings.warn(f"check_dataset is deprecated will be removed in ESPEI 0.11. Behavior has been migrated to the pydantic dataset implementations in espei.datasets.dataset_models. To get a Dataset object, use espei.datasets.to_Dataset.", DeprecationWarning) + return to_Dataset(dataset).model_dump() def clean_dataset(dataset: dict[str, Any]) -> dict[str, Any]: - """ - No-op - """ - warnings.warn(f"clean_dataset deprecated will be removed in ESPEI 0.11. Behavior has been migrated to the pydantic dataset implementations in espei.datasets.dataset_models.", DeprecationWarning) - return dataset + """Ensure that the dataset is valid and consistent by round-tripping through pydantic.""" + warnings.warn(f"clean_dataset is deprecated will be removed in ESPEI 0.11. Behavior has been migrated to the pydantic dataset implementations in espei.datasets.dataset_models. To get a Dataset object, use espei.datasets.to_Dataset.", DeprecationWarning) + return to_Dataset(dataset).model_dump() def apply_tags(datasets: PickleableTinyDB, tags): @@ -135,8 +105,7 @@ def load_datasets(dataset_filenames, include_disabled=False) -> PickleableTinyDB if not include_disabled and d.get('disabled', False): # The dataset is disabled and not included continue - dataset_obj = check_dataset(d) - ds_database.insert(dataset_obj.model_dump()) + ds_database.insert(to_Dataset(d).model_dump()) except ValueError as e: raise ValueError('JSON Error in {}: {}'.format(fname, e)) except DatasetError as e: diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 72078f97..437423bc 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -1,7 +1,7 @@ from copy import deepcopy import pytest import numpy as np -from espei.datasets import DatasetError, check_dataset, apply_tags +from espei.datasets import DatasetError, to_Dataset, apply_tags, BroadcastSinglePhaseFixedConfigurationDataset, ZPFDataset from .testing_data import CU_MG_EXP_ACTIVITY, CU_MG_DATASET_THERMOCHEMICAL_STRING_VALUES, CU_MG_DATASET_ZPF_STRING_VALUES, LI_SN_LIQUID_DATA, dataset_multi_valid_ternary from .fixtures import datasets_db @@ -366,35 +366,35 @@ def test_check_datasets_run_on_good_data(): """Passed valid datasets that should raise DatasetError.""" - check_dataset(dataset_single_valid) - check_dataset(dataset_multi_valid) - check_dataset(dataset_multi_valid_ternary) + to_Dataset(dataset_single_valid) + to_Dataset(dataset_multi_valid) + to_Dataset(dataset_multi_valid_ternary) def test_check_datasets_raises_on_misaligned_data(): """Passed datasets that have misaligned data and conditions should raise DatasetError.""" with pytest.raises(DatasetError): - check_dataset(dataset_single_misaligned) + to_Dataset(dataset_single_misaligned) with pytest.raises(DatasetError): - check_dataset(dataset_multi_misaligned) + to_Dataset(dataset_multi_misaligned) def test_check_datasets_raises_with_incorrect_zpf_phases(): """Passed datasets that have incorrect phases entered than used should raise.""" with pytest.raises(DatasetError): - check_dataset(dataset_multi_incorrect_phases) + to_Dataset(dataset_multi_incorrect_phases) def test_check_datasets_raises_with_incorrect_components(): """Passed datasets that have incorrect components entered vs. used should raise.""" with pytest.raises(DatasetError): - check_dataset(dataset_single_incorrect_components_overspecified) + to_Dataset(dataset_single_incorrect_components_overspecified) with pytest.raises(DatasetError): - check_dataset(dataset_single_incorrect_components_underspecified) + to_Dataset(dataset_single_incorrect_components_underspecified) with pytest.raises(DatasetError): - check_dataset(dataset_multi_incorrect_components_overspecified) + to_Dataset(dataset_multi_incorrect_components_overspecified) with pytest.raises(DatasetError): - check_dataset(dataset_multi_incorrect_components_underspecified) + to_Dataset(dataset_multi_incorrect_components_underspecified) # equilibrium datasets underspecified ds_eq_underspecified = { @@ -409,7 +409,7 @@ def test_check_datasets_raises_with_incorrect_components(): "values": [[[-1000], [-900], [-800]]] } with pytest.raises(DatasetError): - check_dataset(ds_eq_underspecified) + to_Dataset(ds_eq_underspecified) # equilibrium datasets overspecified ds_eq_overspecified = { @@ -424,25 +424,25 @@ def test_check_datasets_raises_with_incorrect_components(): "values": [[[-1000], [-900], [-800]]] } with pytest.raises(DatasetError): - check_dataset(ds_eq_overspecified) + to_Dataset(ds_eq_overspecified) def test_check_datasets_raises_with_malformed_zpf(): """Passed datasets that have malformed ZPF values should raise.""" with pytest.raises((DatasetError, ValidationError)): - check_dataset(dataset_multi_malformed_zpfs_components_not_list) + to_Dataset(dataset_multi_malformed_zpfs_components_not_list) with pytest.raises(DatasetError): - check_dataset(dataset_multi_malformed_zpfs_fractions_do_not_match_components) + to_Dataset(dataset_multi_malformed_zpfs_fractions_do_not_match_components) with pytest.raises(DatasetError): - check_dataset(dataset_multi_malformed_zpfs_components_do_not_match_fractions) + to_Dataset(dataset_multi_malformed_zpfs_components_do_not_match_fractions) def test_check_datasets_raises_with_malformed_sublattice_configurations(): """Passed datasets that have malformed ZPF values should raise.""" with pytest.raises(DatasetError): - check_dataset(dataset_single_malformed_site_occupancies) + to_Dataset(dataset_single_malformed_site_occupancies) with pytest.raises(DatasetError): - check_dataset(dataset_single_malformed_site_ratios) + to_Dataset(dataset_single_malformed_site_ratios) def test_check_datasets_raises_with_equilibrium_conditions_and_values_shapes_mismatch(): @@ -460,7 +460,7 @@ def test_check_datasets_raises_with_equilibrium_conditions_and_values_shapes_mis "reference": "equilibrium thermochemical tests", } # Good shape should not raise - check_dataset(COND_VALS_SHAPE_GOOD) + to_Dataset(COND_VALS_SHAPE_GOOD) COND_VALS_SHAPE_DISAGREEMENT_1_1_2 = { "components": ["CU", "MG", "NI"], @@ -476,7 +476,7 @@ def test_check_datasets_raises_with_equilibrium_conditions_and_values_shapes_mis "reference": "equilibrium thermochemical tests", } with pytest.raises(DatasetError): - check_dataset(COND_VALS_SHAPE_DISAGREEMENT_1_1_2) + to_Dataset(COND_VALS_SHAPE_DISAGREEMENT_1_1_2) COND_VALS_SHAPE_DISAGREEMENT_1_2_2 = { "components": ["CU", "MG"], @@ -491,7 +491,7 @@ def test_check_datasets_raises_with_equilibrium_conditions_and_values_shapes_mis "reference": "equilibrium thermochemical tests", } with pytest.raises(DatasetError): - check_dataset(COND_VALS_SHAPE_DISAGREEMENT_1_2_2) + to_Dataset(COND_VALS_SHAPE_DISAGREEMENT_1_2_2) # we don't broadcast over compositions, so composition conditions shapes need to match MISMATCHED_COMPOSITION_CONDS = { @@ -508,51 +508,53 @@ def test_check_datasets_raises_with_equilibrium_conditions_and_values_shapes_mis "reference": "equilibrium thermochemical tests", } with pytest.raises(DatasetError): - check_dataset(MISMATCHED_COMPOSITION_CONDS) + to_Dataset(MISMATCHED_COMPOSITION_CONDS) def test_check_datasets_works_on_activity_data(): """Passed activity datasets should work correctly.""" - check_dataset(CU_MG_EXP_ACTIVITY) + to_Dataset(CU_MG_EXP_ACTIVITY) def test_check_datasets_raises_with_zpf_fractions_greater_than_one(): """Passed datasets that have mole fractions greater than one should raise.""" with pytest.raises(DatasetError): - check_dataset(dataset_multi_mole_fractions_as_percents) + to_Dataset(dataset_multi_mole_fractions_as_percents) def test_check_datasets_raises_with_negative_zpf_fractions(): """Passed datasets that have negative mole fractions should raise.""" with pytest.raises(DatasetError): - check_dataset(dataset_zpf_negative_mole_fraction) + to_Dataset(dataset_zpf_negative_mole_fraction) def test_check_datasets_raises_with_unsorted_interactions(): """Passed datasets that have sublattice interactions not in sorted order should raise.""" with pytest.raises(DatasetError): - check_dataset(dataset_single_unsorted_interaction) + to_Dataset(dataset_single_unsorted_interaction) def test_datasets_convert_thermochemical_string_values_producing_correct_value(datasets_db): """Strings where floats are expected should give correct answers for thermochemical datasets""" - ds = check_dataset(CU_MG_DATASET_THERMOCHEMICAL_STRING_VALUES).model_dump() - assert np.issubdtype(np.array(ds['values']).dtype, np.number) - assert np.issubdtype(np.array(ds['conditions']['T']).dtype, np.number) - assert np.issubdtype(np.array(ds['conditions']['P']).dtype, np.number) + ds = to_Dataset(CU_MG_DATASET_THERMOCHEMICAL_STRING_VALUES) + assert isinstance(ds, BroadcastSinglePhaseFixedConfigurationDataset) + assert np.issubdtype(np.array(ds.values).dtype, np.number) + assert np.issubdtype(np.array(ds.conditions['T']).dtype, np.number) + assert np.issubdtype(np.array(ds.conditions['P']).dtype, np.number) def test_datasets_convert_zpf_string_values_producing_correct_value(datasets_db): """Strings where floats are expected should give correct answers for ZPF datasets""" - ds = check_dataset(CU_MG_DATASET_ZPF_STRING_VALUES).model_dump() - assert np.issubdtype(np.array([t[0][2] for t in ds['values']]).dtype, np.number) - assert np.issubdtype(np.array(ds['conditions']['T']).dtype, np.number) - assert np.issubdtype(np.array(ds['conditions']['P']).dtype, np.number) + ds = to_Dataset(CU_MG_DATASET_ZPF_STRING_VALUES) + assert isinstance(ds, ZPFDataset) + assert np.issubdtype(np.array([t[0][2] for t in ds.values]).dtype, np.number) + assert np.issubdtype(np.array(ds.conditions['T']).dtype, np.number) + assert np.issubdtype(np.array(ds.conditions['P']).dtype, np.number) def test_check_datasets_raises_if_configs_occupancies_not_aligned(datasets_db): """Checking datasets that don't have the same number/shape of configurations/occupancies should raise.""" with pytest.raises(DatasetError): - check_dataset(dataset_mismatched_configs_occupancies) + to_Dataset(dataset_mismatched_configs_occupancies) # Expected to fail, since the dataset checker cannot determine that species are used in the configurations and components should only contain pure elements. @@ -560,7 +562,7 @@ def test_check_datasets_raises_if_configs_occupancies_not_aligned(datasets_db): def test_non_equilibrium_thermo_data_with_species_passes_checker(): """Non-equilibrium thermochemical data that use species in the configurations should pass the dataset checker. """ - check_dataset(LI_SN_LIQUID_DATA) + to_Dataset(LI_SN_LIQUID_DATA) def test_applying_tags(datasets_db): From 02299fdaf10e3cda111310c9476fbcaa76b0191f Mon Sep 17 00:00:00 2001 From: bocklund Date: Sun, 17 Aug 2025 18:56:58 -0700 Subject: [PATCH 11/14] Refactor modules back to simple datasets module --- .../dataset_models.py => datasets.py} | 155 +++++++++++++++++- espei/datasets/__init__.py | 2 - espei/datasets/db.py | 137 ---------------- 3 files changed, 149 insertions(+), 145 deletions(-) rename espei/{datasets/dataset_models.py => datasets.py} (78%) delete mode 100644 espei/datasets/__init__.py delete mode 100644 espei/datasets/db.py diff --git a/espei/datasets/dataset_models.py b/espei/datasets.py similarity index 78% rename from espei/datasets/dataset_models.py rename to espei/datasets.py index 74673031..bd81f9cb 100644 --- a/espei/datasets/dataset_models.py +++ b/espei/datasets.py @@ -1,21 +1,38 @@ from typing import Any, Literal, Union, TypeAlias, Self +import warnings from pydantic import BaseModel, Field, model_validator, field_validator import numpy as np +import fnmatch, json, os +from tinydb.storages import MemoryStorage +from tinydb import where + +from espei.utils import PickleableTinyDB __all__ = [ + # Models "Dataset", "BroadcastSinglePhaseFixedConfigurationDataset", "ActivityPropertyDataset", "EquilibriumPropertyDataset", "ZPFDataset", + + # Errors (when validating models) "DatasetError", + + # User-facing API + "load_datasets", + "recursive_glob", + "apply_tags", "to_Dataset", + + # Deprecated + "check_dataset", + "clean_dataset", ] -class DatasetError(Exception): - """Exception raised when datasets are invalid.""" - pass +# Type aliases - used to clarify intent +# e.g. when we want a ComponentName rather than a str (even though that's what it is) ComponentName: TypeAlias = str PhaseName: TypeAlias = str PhaseCompositionType: TypeAlias = Union[ @@ -24,9 +41,16 @@ class DatasetError(Exception): ] PhaseRegionType: TypeAlias = list[PhaseCompositionType] + +class DatasetError(Exception): + """Exception raised when datasets are invalid.""" + pass + + class Dataset(BaseModel): pass + class Solver(BaseModel): mode: Literal["manual"] = Field(default="manual") sublattice_site_ratios: list[float] @@ -111,13 +135,11 @@ def validate_configuration_occupancy_shape_agreement(self) -> Self: return self -# TODO: would be great to remove class ActivityDataReferenceState(Dataset): phases: list[PhaseName] = Field(min_length=1) conditions: dict[str, float] - -# TODO: refactor to merge this with EquilibriumPropertyDataset +# TODO: refactor ActivityPropertyDataset to merge with EquilibriumPropertyDataset # The validator functions are exactly duplicated in EquilibriumPropertyDataset # The duplication simplifies the implementation since the activity special case is # ultimately meant to be removed once activity is a PyCalphad Workspace property @@ -330,3 +352,124 @@ def to_Dataset(candidate: dict[str, Any]) -> Dataset: return BroadcastSinglePhaseFixedConfigurationDataset.model_validate(candidate) else: return EquilibriumPropertyDataset.model_validate(candidate) + + +def apply_tags(datasets: PickleableTinyDB, tags): + """ + Modify datasets using the tags system + + Parameters + ---------- + datasets : PickleableTinyDB + Datasets to modify + tags : dict + Dictionary of {tag: update_dict} + + Returns + ------- + None + + Notes + ----- + In general, everything replaces or is additive. We use the following update rules: + 1. If the update value is a list, extend the existing list (empty list if key does not exist) + 2. If the update value is scalar, override the previous (deleting any old value, if present) + 3. If the update value is a dict, update the exist dict (empty dict if dict does not exist) + 4. Otherwise, the value is updated, overriding the previous + + Examples + -------- + >>> from espei.utils import PickleableTinyDB + >>> from tinydb.storages import MemoryStorage + >>> ds = PickleableTinyDB(storage=MemoryStorage) + >>> doc_id = ds.insert({'tags': ['dft'], 'excluded_model_contributions': ['contrib']}) + >>> my_tags = {'dft': {'excluded_model_contributions': ['idmix', 'mag'], 'weight': 5.0}} + >>> from espei.datasets import apply_tags + >>> apply_tags(ds, my_tags) + >>> all_data = ds.all() + >>> all(d['excluded_model_contributions'] == ['contrib', 'idmix', 'mag'] for d in all_data) + True + >>> all(d['weight'] == 5.0 for d in all_data) + True + + """ + for tag, update_dict in tags.items(): + matching_datasets = datasets.search(where("tags").test(lambda x: tag in x)) + for newkey, newval in update_dict.items(): + for match in matching_datasets: + if isinstance(newval, list): + match[newkey] = match.get(newkey, []) + newval + elif np.isscalar(newval): + match[newkey] = newval + elif isinstance(newval, dict): + d = match.get(newkey, dict()) + d.update(newval) + match[newkey] = d + else: + match[newkey] = newval + datasets.update(match, doc_ids=[match.doc_id]) + + +def load_datasets(dataset_filenames, include_disabled=False) -> PickleableTinyDB: + """ + Create a PickelableTinyDB with the data from a list of filenames. + + Parameters + ---------- + dataset_filenames : [str] + List of filenames to load as datasets + + Returns + ------- + PickleableTinyDB + """ + ds_database = PickleableTinyDB(storage=MemoryStorage) + for fname in dataset_filenames: + with open(fname) as file_: + try: + d = json.load(file_) + if not include_disabled and d.get('disabled', False): + # The dataset is disabled and not included + continue + ds_database.insert(to_Dataset(d).model_dump()) + except ValueError as e: + raise ValueError('JSON Error in {}: {}'.format(fname, e)) + except DatasetError as e: + raise DatasetError('Dataset Error in {}: {}'.format(fname, e)) + return ds_database + + +def recursive_glob(start, pattern='*.json'): + """ + Recursively glob for the given pattern from the start directory. + + Parameters + ---------- + start : str + Path of the directory to walk while for file globbing + pattern : str + Filename pattern to match in the glob. + + Returns + ------- + [str] + List of matched filenames + + """ + matches = [] + for root, dirnames, filenames in os.walk(start, followlinks=True): + for filename in fnmatch.filter(filenames, pattern): + matches.append(os.path.join(root, filename)) + return sorted(matches) + + +def check_dataset(dataset: dict[str, Any]) -> dict[str, Any]: + """Ensure that the dataset is valid and consistent by round-tripping through pydantic.""" + warnings.warn(f"check_dataset is deprecated will be removed in ESPEI 0.11. Behavior has been migrated to the pydantic dataset implementations in espei.datasets.dataset_models. To get a Dataset object, use espei.datasets.to_Dataset.", DeprecationWarning) + return to_Dataset(dataset).model_dump() + + +def clean_dataset(dataset: dict[str, Any]) -> dict[str, Any]: + """Ensure that the dataset is valid and consistent by round-tripping through pydantic.""" + warnings.warn(f"clean_dataset is deprecated will be removed in ESPEI 0.11. Behavior has been migrated to the pydantic dataset implementations in espei.datasets.dataset_models. To get a Dataset object, use espei.datasets.to_Dataset.", DeprecationWarning) + return to_Dataset(dataset).model_dump() diff --git a/espei/datasets/__init__.py b/espei/datasets/__init__.py deleted file mode 100644 index 18dc966a..00000000 --- a/espei/datasets/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .dataset_models import * -from .db import * \ No newline at end of file diff --git a/espei/datasets/db.py b/espei/datasets/db.py deleted file mode 100644 index 53124e8c..00000000 --- a/espei/datasets/db.py +++ /dev/null @@ -1,137 +0,0 @@ -import fnmatch, json, os -from typing import Any, Dict, List, TypeAlias -import warnings -import numpy as np -from tinydb.storages import MemoryStorage -from tinydb import where - -from espei.utils import PickleableTinyDB -from .dataset_models import to_Dataset, Dataset, ActivityPropertyDataset, BroadcastSinglePhaseFixedConfigurationDataset, EquilibriumPropertyDataset, ZPFDataset, DatasetError - -__all__ = [ - "load_datasets", - "recursive_glob", - "apply_tags", - "check_dataset", - "clean_dataset" -] - -def check_dataset(dataset: dict[str, Any]) -> dict[str, Any]: - """Ensure that the dataset is valid and consistent by round-tripping through pydantic.""" - warnings.warn(f"check_dataset is deprecated will be removed in ESPEI 0.11. Behavior has been migrated to the pydantic dataset implementations in espei.datasets.dataset_models. To get a Dataset object, use espei.datasets.to_Dataset.", DeprecationWarning) - return to_Dataset(dataset).model_dump() - - -def clean_dataset(dataset: dict[str, Any]) -> dict[str, Any]: - """Ensure that the dataset is valid and consistent by round-tripping through pydantic.""" - warnings.warn(f"clean_dataset is deprecated will be removed in ESPEI 0.11. Behavior has been migrated to the pydantic dataset implementations in espei.datasets.dataset_models. To get a Dataset object, use espei.datasets.to_Dataset.", DeprecationWarning) - return to_Dataset(dataset).model_dump() - - -def apply_tags(datasets: PickleableTinyDB, tags): - """ - Modify datasets using the tags system - - Parameters - ---------- - datasets : PickleableTinyDB - Datasets to modify - tags : dict - Dictionary of {tag: update_dict} - - Returns - ------- - None - - Notes - ----- - In general, everything replaces or is additive. We use the following update rules: - 1. If the update value is a list, extend the existing list (empty list if key does not exist) - 2. If the update value is scalar, override the previous (deleting any old value, if present) - 3. If the update value is a dict, update the exist dict (empty dict if dict does not exist) - 4. Otherwise, the value is updated, overriding the previous - - Examples - -------- - >>> from espei.utils import PickleableTinyDB - >>> from tinydb.storages import MemoryStorage - >>> ds = PickleableTinyDB(storage=MemoryStorage) - >>> doc_id = ds.insert({'tags': ['dft'], 'excluded_model_contributions': ['contrib']}) - >>> my_tags = {'dft': {'excluded_model_contributions': ['idmix', 'mag'], 'weight': 5.0}} - >>> from espei.datasets import apply_tags - >>> apply_tags(ds, my_tags) - >>> all_data = ds.all() - >>> all(d['excluded_model_contributions'] == ['contrib', 'idmix', 'mag'] for d in all_data) - True - >>> all(d['weight'] == 5.0 for d in all_data) - True - - """ - for tag, update_dict in tags.items(): - matching_datasets = datasets.search(where("tags").test(lambda x: tag in x)) - for newkey, newval in update_dict.items(): - for match in matching_datasets: - if isinstance(newval, list): - match[newkey] = match.get(newkey, []) + newval - elif np.isscalar(newval): - match[newkey] = newval - elif isinstance(newval, dict): - d = match.get(newkey, dict()) - d.update(newval) - match[newkey] = d - else: - match[newkey] = newval - datasets.update(match, doc_ids=[match.doc_id]) - - -def load_datasets(dataset_filenames, include_disabled=False) -> PickleableTinyDB: - """ - Create a PickelableTinyDB with the data from a list of filenames. - - Parameters - ---------- - dataset_filenames : [str] - List of filenames to load as datasets - - Returns - ------- - PickleableTinyDB - """ - ds_database = PickleableTinyDB(storage=MemoryStorage) - for fname in dataset_filenames: - with open(fname) as file_: - try: - d = json.load(file_) - if not include_disabled and d.get('disabled', False): - # The dataset is disabled and not included - continue - ds_database.insert(to_Dataset(d).model_dump()) - except ValueError as e: - raise ValueError('JSON Error in {}: {}'.format(fname, e)) - except DatasetError as e: - raise DatasetError('Dataset Error in {}: {}'.format(fname, e)) - return ds_database - - -def recursive_glob(start, pattern='*.json'): - """ - Recursively glob for the given pattern from the start directory. - - Parameters - ---------- - start : str - Path of the directory to walk while for file globbing - pattern : str - Filename pattern to match in the glob. - - Returns - ------- - [str] - List of matched filenames - - """ - matches = [] - for root, dirnames, filenames in os.walk(start, followlinks=True): - for filename in fnmatch.filter(filenames, pattern): - matches.append(os.path.join(root, filename)) - return sorted(matches) From 42a6e3a355ba9cee111d78c0f39e5ace76768c91 Mon Sep 17 00:00:00 2001 From: bocklund Date: Sun, 17 Aug 2025 18:57:51 -0700 Subject: [PATCH 12/14] Ruff check datasets.py --- espei/datasets.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/espei/datasets.py b/espei/datasets.py index bd81f9cb..092dda3d 100644 --- a/espei/datasets.py +++ b/espei/datasets.py @@ -2,7 +2,9 @@ import warnings from pydantic import BaseModel, Field, model_validator, field_validator import numpy as np -import fnmatch, json, os +import fnmatch +import json +import os from tinydb.storages import MemoryStorage from tinydb import where @@ -465,11 +467,11 @@ def recursive_glob(start, pattern='*.json'): def check_dataset(dataset: dict[str, Any]) -> dict[str, Any]: """Ensure that the dataset is valid and consistent by round-tripping through pydantic.""" - warnings.warn(f"check_dataset is deprecated will be removed in ESPEI 0.11. Behavior has been migrated to the pydantic dataset implementations in espei.datasets.dataset_models. To get a Dataset object, use espei.datasets.to_Dataset.", DeprecationWarning) + warnings.warn("check_dataset is deprecated will be removed in ESPEI 0.11. Behavior has been migrated to the pydantic dataset implementations in espei.datasets.dataset_models. To get a Dataset object, use espei.datasets.to_Dataset.", DeprecationWarning) return to_Dataset(dataset).model_dump() def clean_dataset(dataset: dict[str, Any]) -> dict[str, Any]: """Ensure that the dataset is valid and consistent by round-tripping through pydantic.""" - warnings.warn(f"clean_dataset is deprecated will be removed in ESPEI 0.11. Behavior has been migrated to the pydantic dataset implementations in espei.datasets.dataset_models. To get a Dataset object, use espei.datasets.to_Dataset.", DeprecationWarning) + warnings.warn("clean_dataset is deprecated will be removed in ESPEI 0.11. Behavior has been migrated to the pydantic dataset implementations in espei.datasets.dataset_models. To get a Dataset object, use espei.datasets.to_Dataset.", DeprecationWarning) return to_Dataset(dataset).model_dump() From b01668a20f7608410f3ca97a1d54bb796fc3e643 Mon Sep 17 00:00:00 2001 From: bocklund Date: Sun, 17 Aug 2025 18:59:54 -0700 Subject: [PATCH 13/14] Delete comment field --- espei/datasets.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/espei/datasets.py b/espei/datasets.py index 092dda3d..e433bac0 100644 --- a/espei/datasets.py +++ b/espei/datasets.py @@ -72,7 +72,6 @@ class BroadcastSinglePhaseFixedConfigurationDataset(Dataset): reference: str = Field(default="") bibtex: str = Field(default="") dataset_author: str = Field(default="") - comment: str = Field(default="") disabled: bool = Field(default=False) tags: list[str] = Field(default_factory=list) @@ -155,7 +154,6 @@ class ActivityPropertyDataset(Dataset): reference: str = Field(default="") bibtex: str = Field(default="") dataset_author: str = Field(default="") - comment: str = Field(default="") disabled: bool = Field(default=False) tags: list[str] = Field(default_factory=list) @@ -205,7 +203,6 @@ class EquilibriumPropertyDataset(Dataset): reference: str = Field(default="") bibtex: str = Field(default="") dataset_author: str = Field(default="") - comment: str = Field(default="") disabled: bool = Field(default=False) tags: list[str] = Field(default_factory=list) @@ -261,7 +258,6 @@ class ZPFDataset(Dataset): reference: str = Field(default="") bibtex: str = Field(default="") dataset_author: str = Field(default="") - comment: str = Field(default="") disabled: bool = Field(default=False) tags: list[str] = Field(default_factory=list) From 2ffd7d61ebca6d79364fb3c21baaa9563abe9ddb Mon Sep 17 00:00:00 2001 From: bocklund Date: Sun, 17 Aug 2025 19:07:59 -0700 Subject: [PATCH 14/14] Refactor types to common datasets. Datasets pretty much only have to implement a value and the validators --- espei/datasets.py | 83 +++++++++++++++++-------------------------- tests/testing_data.py | 5 --- 2 files changed, 32 insertions(+), 56 deletions(-) diff --git a/espei/datasets.py b/espei/datasets.py index e433bac0..0bf65eaf 100644 --- a/espei/datasets.py +++ b/espei/datasets.py @@ -49,10 +49,7 @@ class DatasetError(Exception): pass -class Dataset(BaseModel): - pass - - +# Used by BroadcastSinglePhaseFixedConfigurationDataset to define internal DOF class Solver(BaseModel): mode: Literal["manual"] = Field(default="manual") sublattice_site_ratios: list[float] @@ -61,19 +58,41 @@ class Solver(BaseModel): sublattice_occupancies: list[list[float | list[float]]] | None = Field(default=None) -class BroadcastSinglePhaseFixedConfigurationDataset(Dataset): +# Activity dataset special case reference state +class ActivityDataReferenceState(BaseModel): + phases: list[PhaseName] = Field(min_length=1) + conditions: dict[str, float] + + +# More general reference states for equilibrium property datasets +class ReferenceStates(BaseModel): + phase: PhaseName + fixed_state_variables: dict[str, float] | None = Field(default=None, description="Fixed potentials for the reference state", examples=[{"T": 298.15, "P": 101325}]) + + +class Dataset(BaseModel): components: list[ComponentName] = Field(min_length=1) - phases: list[PhaseName] = Field(min_length=1, max_length=1) - solver: Solver + phases: list[PhaseName] = Field(min_length=1) conditions: dict[str, float | list[float]] output: str - values: list[list[list[float]]] - excluded_model_contributions: list[str] = Field(default_factory=list) + # TODO: weights + + # Control + disabled: bool = Field(default=False) + tags: list[str] = Field(default_factory=list) + + # Metadata reference: str = Field(default="") bibtex: str = Field(default="") dataset_author: str = Field(default="") - disabled: bool = Field(default=False) - tags: list[str] = Field(default_factory=list) + + +class BroadcastSinglePhaseFixedConfigurationDataset(Dataset): + phases: list[PhaseName] = Field(min_length=1, max_length=1) + values: list[list[list[float]]] + solver: Solver + conditions: dict[str, float | list[float]] + excluded_model_contributions: list[str] = Field(default_factory=list) @model_validator(mode="after") def validate_components_entered_match_components_used(self) -> Self: @@ -136,26 +155,13 @@ def validate_configuration_occupancy_shape_agreement(self) -> Self: return self -class ActivityDataReferenceState(Dataset): - phases: list[PhaseName] = Field(min_length=1) - conditions: dict[str, float] - # TODO: refactor ActivityPropertyDataset to merge with EquilibriumPropertyDataset # The validator functions are exactly duplicated in EquilibriumPropertyDataset # The duplication simplifies the implementation since the activity special case is # ultimately meant to be removed once activity is a PyCalphad Workspace property class ActivityPropertyDataset(Dataset): - components: list[ComponentName] = Field(min_length=1) - phases: list[PhaseName] = Field(min_length=1) - conditions: dict[str, float | list[float]] - reference_state: ActivityDataReferenceState - output: str values: list[list[list[float]]] - reference: str = Field(default="") - bibtex: str = Field(default="") - dataset_author: str = Field(default="") - disabled: bool = Field(default=False) - tags: list[str] = Field(default_factory=list) + reference_state: ActivityDataReferenceState @model_validator(mode="after") def validate_condition_value_shape_agreement(self) -> Self: @@ -188,23 +194,9 @@ def validate_components_entered_match_components_used(self) -> Self: return self -class ReferenceStates(BaseModel): - phase: PhaseName - fixed_state_variables: dict[str, float] | None = Field(default=None, description="Fixed potentials for the reference state", examples=[{"T": 298.15, "P": 101325}]) - - class EquilibriumPropertyDataset(Dataset): - components: list[ComponentName] = Field(min_length=1) - phases: list[PhaseName] = Field(min_length=1) - conditions: dict[str, float | list[float]] - output: str values: list[list[list[float]]] reference_states: dict[ComponentName, ReferenceStates] | None = Field(default=None) - reference: str = Field(default="") - bibtex: str = Field(default="") - dataset_author: str = Field(default="") - disabled: bool = Field(default=False) - tags: list[str] = Field(default_factory=list) @model_validator(mode="after") def validate_condition_value_shape_agreement(self) -> Self: @@ -248,18 +240,7 @@ def validate_reference_state_fully_specified_if_used(self) -> Self: class ZPFDataset(Dataset): - components: list[ComponentName] = Field(min_length=1) - phases: list[str] = Field(min_length=1) - conditions: dict[str, float | list[float]] - broadcast_conditions: Literal[False] = Field(default=False) # TODO: migrate and remove, since True was never supported - output: Literal["ZPF"] - values: list[PhaseRegionType] # TODO: validate to be of same shape as conditions - excluded_model_contributions: list[str] = Field(default_factory=list) - reference: str = Field(default="") - bibtex: str = Field(default="") - dataset_author: str = Field(default="") - disabled: bool = Field(default=False) - tags: list[str] = Field(default_factory=list) + values: list[PhaseRegionType] @model_validator(mode="after") def validate_condition_value_shape_agreement(self) -> Self: diff --git a/tests/testing_data.py b/tests/testing_data.py index 73c9540e..dc528c19 100644 --- a/tests/testing_data.py +++ b/tests/testing_data.py @@ -584,7 +584,6 @@ "P": 101325, "T": [1337.97, 1262.238] }, - "broadcast_conditions": false, "output": "ZPF", "values": [ [["LIQUID", ["MG"], [0.0246992]], ["FCC_A1", ["MG"], [null]]], @@ -695,7 +694,6 @@ "P": "101325", "T": ["1337.97", "1262.238"] }, - "broadcast_conditions": false, "output": "ZPF", "values": [ [["LIQUID", ["MG"], ["0.0246992"]], ["FCC_A1", ["MG"], [null]]], @@ -713,7 +711,6 @@ "P": 101325, "T": [733.15] }, - "broadcast_conditions": false, "output": "ZPF", "values": [ [["__HYPERPLANE__", ["CU"], [0.05]], ["HCP_A3", ["CU"], [null]], ["CUMG2", ["CU"], [null]]] @@ -964,7 +961,6 @@ CR_NI_ZPF_DATA = { "components": ["CR", "NI", "VA"], "phases": ["BCC_A2", "FCC_A1"], - "broadcast_conditions": False, "conditions": { "T": [1073, 1173, 1273, 1373, 1548], "P": [101325.0] @@ -1478,7 +1474,6 @@ LI_SN_ZPF_DATA = { "components": ["LI", "SN"], "phases": ["LIQUID", "LI7SN2"], - "broadcast_conditions": False, "conditions": { "T": [1040], "P": [101325.0]