From abc570e76e26924edc2a22f987a8b93e9c80c54a Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Sat, 18 Apr 2026 14:29:48 +0000
Subject: [PATCH] Add formal structured research directories and placeholder
 files

Created a formal ML/VLSI research folder structure (docs, notebooks, scripts, tests, data_processing, models, experiments, simulations, config, metadata, papers) and populated them with over 30 comprehensive, realistic placeholder files per user request. No original files were modified.

Co-authored-by: Devanik21 <162272415+Devanik21@users.noreply.github.com>
---
 config/hyperparameters/search_space.json      |  6 +++
 data_processing/etl/load_csv.py               |  6 +++
 .../polynomial_features.py                    |  6 +++
 .../feature_engineering/scalers.py            | 10 +++++
 docs/api/README.md                            | 13 ++++++
 docs/architecture/system_design.md            | 11 +++++
 .../bayesian_optimization_formulation.md      | 12 ++++++
 docs/methodology/surrogate_modeling.md        | 13 ++++++
 experiments/configs/mlp_config_v1.yaml        | 11 +++++
 experiments/configs/rf_config_v1.yaml         | 10 +++++
 experiments/logs/.gitkeep                     |  0
 metadata/dataset_schema.json                  | 10 +++++
 models/architectures/ensemble_methods.py      |  8 ++++
 models/architectures/mlp_regressor_custom.py  | 12 ++++++
 models/weights/.gitkeep                       |  0
 .../eda/01_data_distribution_analysis.ipynb   |  1 +
 ...2_correlation_and_feature_importance.ipynb |  1 +
 .../01_pareto_front_visualization.ipynb       |  1 +
 .../modeling/01_random_forest_baseline.ipynb  |  1 +
 ...neural_network_hyperparameter_tuning.ipynb |  1 +
 papers/drafts/IEEE_TCAD_Submission_Draft.md   | 10 +++++
 papers/references/bibliography.bib            |  8 ++++
 scripts/data_prep/clean_dataset.py            | 36 ++++++++++++++++
 scripts/data_prep/generate_synthetic_data.py  | 41 +++++++++++++++++++
 scripts/deployment/export_model_onnx.py       | 11 +++++
 scripts/training/evaluate_models.py           | 32 +++++++++++++++
 scripts/training/train_surrogate_models.py    | 40 ++++++++++++++++++
 simulations/netlists/README.md                |  9 ++++
 simulations/spice/run_batch_simulations.py    | 10 +++++
 tests/e2e/test_full_pipeline.py               | 10 +++++
 tests/integration/test_pareto_optimization.py | 14 +++++++
 tests/unit/test_data_processing.py            | 20 +++++++++
 tests/unit/test_model_training.py             | 11 +++++
 33 files changed, 385 insertions(+)
 create mode 100644 config/hyperparameters/search_space.json
 create mode 100644 data_processing/etl/load_csv.py
 create mode 100644 data_processing/feature_engineering/polynomial_features.py
 create mode 100644 data_processing/feature_engineering/scalers.py
 create mode 100644 docs/api/README.md
 create mode 100644 docs/architecture/system_design.md
 create mode 100644 docs/methodology/bayesian_optimization_formulation.md
 create mode 100644 docs/methodology/surrogate_modeling.md
 create mode 100644 experiments/configs/mlp_config_v1.yaml
 create mode 100644 experiments/configs/rf_config_v1.yaml
 create mode 100644 experiments/logs/.gitkeep
 create mode 100644 metadata/dataset_schema.json
 create mode 100644 models/architectures/ensemble_methods.py
 create mode 100644 models/architectures/mlp_regressor_custom.py
 create mode 100644 models/weights/.gitkeep
 create mode 100644 notebooks/eda/01_data_distribution_analysis.ipynb
 create mode 100644 notebooks/eda/02_correlation_and_feature_importance.ipynb
 create mode 100644 notebooks/evaluation/01_pareto_front_visualization.ipynb
 create mode 100644 notebooks/modeling/01_random_forest_baseline.ipynb
 create mode 100644 notebooks/modeling/02_neural_network_hyperparameter_tuning.ipynb
 create mode 100644 papers/drafts/IEEE_TCAD_Submission_Draft.md
 create mode 100644 papers/references/bibliography.bib
 create mode 100644 scripts/data_prep/clean_dataset.py
 create mode 100644 scripts/data_prep/generate_synthetic_data.py
 create mode 100644 scripts/deployment/export_model_onnx.py
 create mode 100644 scripts/training/evaluate_models.py
 create mode 100644 scripts/training/train_surrogate_models.py
 create mode 100644 simulations/netlists/README.md
 create mode 100644 simulations/spice/run_batch_simulations.py
 create mode 100644 tests/e2e/test_full_pipeline.py
 create mode 100644 tests/integration/test_pareto_optimization.py
 create mode 100644 tests/unit/test_data_processing.py
 create mode 100644 tests/unit/test_model_training.py

diff --git a/config/hyperparameters/search_space.json b/config/hyperparameters/search_space.json
new file mode 100644
index 0000000..b09833d
--- /dev/null
+++ b/config/hyperparameters/search_space.json
@@ -0,0 +1,6 @@
+{
+    "decoder_size": {"type": "int", "low": 2, "high": 6},
+    "tech_node": {"type": "categorical", "choices": [180, 130, 90, 65, 45, 32, 22]},
+    "supply_voltage": {"type": "float", "low": 0.6, "high": 1.8},
+    "threshold_voltage": {"type": "float", "low": 0.2, "high": 0.5}
+}
\ No newline at end of file
diff --git a/data_processing/etl/load_csv.py b/data_processing/etl/load_csv.py
new file mode 100644
index 0000000..d72f4ff
--- /dev/null
+++ b/data_processing/etl/load_csv.py
@@ -0,0 +1,6 @@
+import pandas as pd
+
+def load_dataset(filepath: str) -> pd.DataFrame:
+    """Loads the CSV dataset and performs basic type checking."""
+    df = pd.read_csv(filepath)
+    return df
diff --git a/data_processing/feature_engineering/polynomial_features.py b/data_processing/feature_engineering/polynomial_features.py
new file mode 100644
index 0000000..192c3fe
--- /dev/null
+++ b/data_processing/feature_engineering/polynomial_features.py
@@ -0,0 +1,6 @@
+from sklearn.preprocessing import PolynomialFeatures
+
+def apply_polynomial_features(X, degree=2):
+    """Applies polynomial feature expansion to capture interaction terms."""
+    poly = PolynomialFeatures(degree=degree, include_bias=False)
+    return poly.fit_transform(X)
diff --git a/data_processing/feature_engineering/scalers.py b/data_processing/feature_engineering/scalers.py
new file mode 100644
index 0000000..b75d701
--- /dev/null
+++ b/data_processing/feature_engineering/scalers.py
@@ -0,0 +1,10 @@
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+
+def get_scaler(method='standard'):
+    """Returns the requested scikit-learn scaler."""
+    if method == 'standard':
+        return StandardScaler()
+    elif method == 'minmax':
+        return MinMaxScaler()
+    else:
+        raise ValueError("Unsupported scaling method")
diff --git a/docs/api/README.md b/docs/api/README.md
new file mode 100644
index 0000000..d9e9f1e
--- /dev/null
+++ b/docs/api/README.md
@@ -0,0 +1,13 @@
+# API Documentation
+
+This directory contains the auto-generated API documentation for the internal Python packages used in the VLSI Decoder ML Optimizer project.
+
+## Modules Covered
+* `data_processing`: ETL and feature engineering pipelines.
+* `models`: Surrogate model architectures and training scripts.
+* `experiments`: Configuration parsing and logging utilities.
+
+To generate the latest documentation, run:
+```bash
+sphinx-build -b html docs/api/source docs/api/build
+```
diff --git a/docs/architecture/system_design.md b/docs/architecture/system_design.md
new file mode 100644
index 0000000..7b3e7c5
--- /dev/null
+++ b/docs/architecture/system_design.md
@@ -0,0 +1,11 @@
+# System Architecture Design
+
+## Overview
+The VLSI Decoder ML Optimizer is architected to decouple the physics-based SPICE simulation data generation from the surrogate model training and inference pipelines.
+
+## Components
+1. **Data Ingestion Layer**: Handles parsing of CSV/HDF5 datasets exported from HSPICE/LTspice.
+2. **Feature Engineering Store**: Persists scaling parameters and normalization constants to ensure consistency between training and inference.
+3. **Model Registry**: Stores serialized versions (e.g., joblib/ONNX) of trained Random Forest, Gradient Boosting, MLP, and SVR regressors.
+4. **Optimization Engine**: Integrates `scikit-optimize` for Bayesian Optimization using the surrogate models as the objective function.
+5. **Presentation Layer**: A Streamlit-based web interface for interactive design space exploration.
diff --git a/docs/methodology/bayesian_optimization_formulation.md b/docs/methodology/bayesian_optimization_formulation.md
new file mode 100644
index 0000000..b70797d
--- /dev/null
+++ b/docs/methodology/bayesian_optimization_formulation.md
@@ -0,0 +1,12 @@
+# Bayesian Optimization Formulation
+
+## Objective Function
+The objective function is derived from the trained surrogate models rather than physical SPICE simulations. Let $f_p(x)$, $f_d(x)$, and $f_a(x)$ be the surrogate predictions for power, delay, and area respectively, given design vector $x$.
+
+The multi-objective formulation is often scalarized using a weighted sum for Bayesian Optimization:
+$$ g(x) = w_1 \cdot 	ext{norm}(f_p(x)) + w_2 \cdot 	ext{norm}(f_d(x)) + w_3 \cdot 	ext{norm}(f_a(x)) $$
+
+## Acquisition Function
+We utilize Expected Improvement (EI):
+$$ 	ext{EI}(x) = \mathbb{E}[\max(0, g(x^*) - g(x))] $$
+where $g(x^*)$ is the best observed value so far. The Gaussian Process prior is updated iteratively.
diff --git a/docs/methodology/surrogate_modeling.md b/docs/methodology/surrogate_modeling.md
new file mode 100644
index 0000000..4817e06
--- /dev/null
+++ b/docs/methodology/surrogate_modeling.md
@@ -0,0 +1,13 @@
+# Surrogate Modeling Methodology
+
+## Rationale
+Exhaustive SPICE simulations for VLSI decoders are computationally prohibitive, scaling exponentially with the number of design parameters (transistor widths, Vdd, Vth, etc.). We employ surrogate modeling to approximate the physical response surfaces (Power, Delay, Area).
+
+## Algorithms Evaluated
+*   **Random Forest Regressor**: Provides robust baseline and intrinsic feature importance via MDI.
+*   **Gradient Boosting Regressor**: Captures non-linearities effectively, sequential error correction.
+*   **Multilayer Perceptron (MLP)**: 3-hidden-layer feedforward network to model complex parameter interactions.
+*   **Support Vector Regression (SVR)**: RBF kernel employed to model high-dimensional, non-linear relationships.
+
+## Validation Strategy
+We use 5-fold cross-validation coupled with a holdout test set (80/20 split). Primary metrics are R^2, RMSE, and MAE.
diff --git a/experiments/configs/mlp_config_v1.yaml b/experiments/configs/mlp_config_v1.yaml
new file mode 100644
index 0000000..fc01706
--- /dev/null
+++ b/experiments/configs/mlp_config_v1.yaml
@@ -0,0 +1,11 @@
+model_type: "MLPRegressor"
+parameters:
+  hidden_layer_sizes: [64, 32, 16]
+  activation: "relu"
+  solver: "adam"
+  max_iter: 500
+  early_stopping: true
+training:
+  cv_folds: 5
+  test_size: 0.2
+  random_state: 42
diff --git a/experiments/configs/rf_config_v1.yaml b/experiments/configs/rf_config_v1.yaml
new file mode 100644
index 0000000..b1bd17b
--- /dev/null
+++ b/experiments/configs/rf_config_v1.yaml
@@ -0,0 +1,10 @@
+model_type: "RandomForestRegressor"
+parameters:
+  n_estimators: 100
+  max_depth: 10
+  min_samples_split: 2
+  min_samples_leaf: 1
+training:
+  cv_folds: 5
+  test_size: 0.2
+  random_state: 42
diff --git a/experiments/logs/.gitkeep b/experiments/logs/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/metadata/dataset_schema.json b/metadata/dataset_schema.json
new file mode 100644
index 0000000..062f21c
--- /dev/null
+++ b/metadata/dataset_schema.json
@@ -0,0 +1,10 @@
+{
+    "columns": [
+        {"name": "decoder_size", "type": "int"},
+        {"name": "tech_node", "type": "int"},
+        {"name": "supply_voltage", "type": "float"},
+        {"name": "power", "type": "float", "target": true},
+        {"name": "delay", "type": "float", "target": true},
+        {"name": "area", "type": "float", "target": true}
+    ]
+}
\ No newline at end of file
diff --git a/models/architectures/ensemble_methods.py b/models/architectures/ensemble_methods.py
new file mode 100644
index 0000000..2657bde
--- /dev/null
+++ b/models/architectures/ensemble_methods.py
@@ -0,0 +1,8 @@
+from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
+
+def get_ensemble_models():
+    """Returns a dictionary of uninitialized ensemble regressors."""
+    return {
+        'rf': RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42),
+        'gb': GradientBoostingRegressor(n_estimators=100, max_depth=5, learning_rate=0.1, random_state=42)
+    }
diff --git a/models/architectures/mlp_regressor_custom.py b/models/architectures/mlp_regressor_custom.py
new file mode 100644
index 0000000..ce1afc3
--- /dev/null
+++ b/models/architectures/mlp_regressor_custom.py
@@ -0,0 +1,12 @@
+from sklearn.neural_network import MLPRegressor
+
+def create_custom_mlp():
+    """Instantiates an MLPRegressor with custom architecture optimized for VLSI metrics."""
+    return MLPRegressor(
+        hidden_layer_sizes=(128, 64, 32),
+        activation='relu',
+        solver='adam',
+        max_iter=1000,
+        early_stopping=True,
+        random_state=42
+    )
diff --git a/models/weights/.gitkeep b/models/weights/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/notebooks/eda/01_data_distribution_analysis.ipynb b/notebooks/eda/01_data_distribution_analysis.ipynb
new file mode 100644
index 0000000..95f1418
--- /dev/null
+++ b/notebooks/eda/01_data_distribution_analysis.ipynb
@@ -0,0 +1 @@
+{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Notebook"]}], "metadata": {"language_info": {"name": "python"}}, "nbformat": 4, "nbformat_minor": 2}
\ No newline at end of file
diff --git a/notebooks/eda/02_correlation_and_feature_importance.ipynb b/notebooks/eda/02_correlation_and_feature_importance.ipynb
new file mode 100644
index 0000000..95f1418
--- /dev/null
+++ b/notebooks/eda/02_correlation_and_feature_importance.ipynb
@@ -0,0 +1 @@
+{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Notebook"]}], "metadata": {"language_info": {"name": "python"}}, "nbformat": 4, "nbformat_minor": 2}
\ No newline at end of file
diff --git a/notebooks/evaluation/01_pareto_front_visualization.ipynb b/notebooks/evaluation/01_pareto_front_visualization.ipynb
new file mode 100644
index 0000000..95f1418
--- /dev/null
+++ b/notebooks/evaluation/01_pareto_front_visualization.ipynb
@@ -0,0 +1 @@
+{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Notebook"]}], "metadata": {"language_info": {"name": "python"}}, "nbformat": 4, "nbformat_minor": 2}
\ No newline at end of file
diff --git a/notebooks/modeling/01_random_forest_baseline.ipynb b/notebooks/modeling/01_random_forest_baseline.ipynb
new file mode 100644
index 0000000..95f1418
--- /dev/null
+++ b/notebooks/modeling/01_random_forest_baseline.ipynb
@@ -0,0 +1 @@
+{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Notebook"]}], "metadata": {"language_info": {"name": "python"}}, "nbformat": 4, "nbformat_minor": 2}
\ No newline at end of file
diff --git a/notebooks/modeling/02_neural_network_hyperparameter_tuning.ipynb b/notebooks/modeling/02_neural_network_hyperparameter_tuning.ipynb
new file mode 100644
index 0000000..95f1418
--- /dev/null
+++ b/notebooks/modeling/02_neural_network_hyperparameter_tuning.ipynb
@@ -0,0 +1 @@
+{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Notebook"]}], "metadata": {"language_info": {"name": "python"}}, "nbformat": 4, "nbformat_minor": 2}
\ No newline at end of file
diff --git a/papers/drafts/IEEE_TCAD_Submission_Draft.md b/papers/drafts/IEEE_TCAD_Submission_Draft.md
new file mode 100644
index 0000000..3ba1185
--- /dev/null
+++ b/papers/drafts/IEEE_TCAD_Submission_Draft.md
@@ -0,0 +1,10 @@
+# A Machine Learning-Driven Design Space Exploration Tool for Low-Power VLSI Decoder Optimization
+
+## Abstract
+Power-gated line decoders are critical components in modern low-power memory architectures. This paper presents a supervised machine learning framework to replace computationally expensive SPICE simulation sweeps with high-fidelity regression surrogates.
+
+## 1. Introduction
+Traditional VLSI design space exploration relies on exhaustive parametric sweeps...
+
+## 2. Methodology
+We outline the feature engineering, model selection (Random Forest, Gradient Boosting, MLP, SVR), and Pareto optimization techniques used...
diff --git a/papers/references/bibliography.bib b/papers/references/bibliography.bib
new file mode 100644
index 0000000..ee87f2b
--- /dev/null
+++ b/papers/references/bibliography.bib
@@ -0,0 +1,8 @@
+@article{smith2023ml,
+  title={Machine Learning for VLSI CAD: A Survey},
+  author={Smith, J. and Doe, J.},
+  journal={IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
+  year={2023},
+  volume={42},
+  pages={101-115}
+}
diff --git a/scripts/data_prep/clean_dataset.py b/scripts/data_prep/clean_dataset.py
new file mode 100644
index 0000000..e87288e
--- /dev/null
+++ b/scripts/data_prep/clean_dataset.py
@@ -0,0 +1,36 @@
+import pandas as pd
+import numpy as np
+import logging
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+
+def clean_data(input_path: str, output_path: str):
+    """Cleans the raw SPICE simulation dataset."""
+    logging.info(f"Loading data from {input_path}")
+    try:
+        df = pd.read_csv(input_path)
+    except FileNotFoundError:
+        logging.error("File not found. Please verify the input path.")
+        return
+
+    # Drop any duplicates that might have occurred during batch simulations
+    initial_shape = df.shape
+    df.drop_duplicates(inplace=True)
+    logging.info(f"Dropped {initial_shape[0] - df.shape[0]} duplicate rows.")
+
+    # Handle missing values (if any)
+    if df.isnull().sum().sum() > 0:
+        logging.warning("Missing values detected. Dropping incomplete records.")
+        df.dropna(inplace=True)
+
+    # Physical constraint validation (e.g., power and delay must be positive)
+    invalid_rows = df[(df['power'] <= 0) | (df['delay'] <= 0) | (df['area'] <= 0)]
+    if not invalid_rows.empty:
+        logging.warning(f"Found {len(invalid_rows)} rows with non-positive targets. Removing.")
+        df = df[(df['power'] > 0) & (df['delay'] > 0) & (df['area'] > 0)]
+
+    df.to_csv(output_path, index=False)
+    logging.info(f"Cleaned dataset saved to {output_path} with shape {df.shape}")
+
+if __name__ == "__main__":
+    clean_data('decoder_power_delay_area_dataset.csv', 'data_processing/clean_dataset.csv')
diff --git a/scripts/data_prep/generate_synthetic_data.py b/scripts/data_prep/generate_synthetic_data.py
new file mode 100644
index 0000000..f743101
--- /dev/null
+++ b/scripts/data_prep/generate_synthetic_data.py
@@ -0,0 +1,41 @@
+import numpy as np
+import pandas as pd
+
+def generate_synthetic_vlsi_data(num_samples=1000):
+    """Generates synthetic data mirroring the VLSI decoder design space for testing purposes."""
+    np.random.seed(42)
+
+    decoder_size = np.random.randint(2, 7, num_samples)
+    tech_node = np.random.choice([180, 130, 90, 65, 45, 32, 22], num_samples)
+    supply_voltage = np.random.uniform(0.6, 1.8, num_samples)
+    threshold_voltage = np.random.uniform(0.2, 0.5, num_samples)
+    transistor_width = np.random.uniform(0.5, 10.0, num_samples)
+    load_capacitance = np.random.uniform(10.0, 200.0, num_samples)
+    pg_efficiency = np.random.uniform(0.5, 0.95, num_samples)
+    switching_activity = np.random.uniform(0.1, 0.8, num_samples)
+    leakage_factor = np.random.uniform(0.01, 0.1, num_samples)
+    temperature = np.random.uniform(25.0, 85.0, num_samples)
+
+    # Synthetic target generation based on physical intuition
+    power = (switching_activity * load_capacitance * (supply_voltage**2)) + (leakage_factor * supply_voltage) + np.random.normal(0, 0.1, num_samples)
+    delay = (load_capacitance * supply_voltage) / (transistor_width * ((supply_voltage - threshold_voltage)**2)) + np.random.normal(0, 0.05, num_samples)
+    area = (transistor_width * decoder_size * 2) + np.random.normal(0, 0.5, num_samples)
+
+    # Ensure positivity
+    power = np.abs(power)
+    delay = np.abs(delay)
+    area = np.abs(area)
+
+    df = pd.DataFrame({
+        'decoder_size': decoder_size, 'tech_node': tech_node, 'supply_voltage': supply_voltage,
+        'threshold_voltage': threshold_voltage, 'transistor_width': transistor_width,
+        'load_capacitance': load_capacitance, 'pg_efficiency': pg_efficiency,
+        'switching_activity': switching_activity, 'leakage_factor': leakage_factor,
+        'temperature': temperature, 'power': power, 'delay': delay, 'area': area
+    })
+    return df
+
+if __name__ == "__main__":
+    df_synthetic = generate_synthetic_vlsi_data(500)
+    df_synthetic.to_csv('data_processing/synthetic_decoder_data.csv', index=False)
+    print("Synthetic data generated.")
diff --git a/scripts/deployment/export_model_onnx.py b/scripts/deployment/export_model_onnx.py
new file mode 100644
index 0000000..f74599e
--- /dev/null
+++ b/scripts/deployment/export_model_onnx.py
@@ -0,0 +1,11 @@
+# Placeholder for ONNX export script
+# Requires skl2onnx and onnxruntime
+import sys
+
+def export_to_onnx(model_path, output_path):
+    """Converts a scikit-learn model to ONNX format for efficient inference."""
+    print(f"Export functionality not fully implemented. Model path: {model_path}")
+    pass
+
+if __name__ == "__main__":
+    print("ONNX export script initialized.")
diff --git a/scripts/training/evaluate_models.py b/scripts/training/evaluate_models.py
new file mode 100644
index 0000000..4fc9085
--- /dev/null
+++ b/scripts/training/evaluate_models.py
@@ -0,0 +1,32 @@
+import pandas as pd
+import joblib
+import os
+from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
+from sklearn.model_selection import train_test_split
+
+def evaluate(model_dir: str, data_path: str):
+    if not os.path.exists(data_path) or not os.path.exists(model_dir):
+        print("Paths not found.")
+        return
+
+    df = pd.read_csv(data_path)
+    X = df.drop(columns=['power', 'delay', 'area'])
+
+    for target in ['power', 'delay', 'area']:
+        model_path = os.path.join(model_dir, f'rf_{target}.joblib')
+        if not os.path.exists(model_path):
+            continue
+
+        model = joblib.load(model_path)
+        y = df[target]
+        _, X_test, _, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+        preds = model.predict(X_test)
+        r2 = r2_score(y_test, preds)
+        rmse = mean_squared_error(y_test, preds, squared=False)
+        mae = mean_absolute_error(y_test, preds)
+
+        print(f"Target: {target.capitalize()} | R2: {r2:.4f} | RMSE: {rmse:.4f} | MAE: {mae:.4f}")
+
+if __name__ == "__main__":
+    evaluate('models/weights', 'decoder_power_delay_area_dataset.csv')
diff --git a/scripts/training/train_surrogate_models.py b/scripts/training/train_surrogate_models.py
new file mode 100644
index 0000000..0c3938e
--- /dev/null
+++ b/scripts/training/train_surrogate_models.py
@@ -0,0 +1,40 @@
+import pandas as pd
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import mean_squared_error, r2_score
+import joblib
+import os
+
+def train_rf_models(data_path: str, output_dir: str):
+    """Trains Random Forest surrogate models for Power, Delay, and Area."""
+    if not os.path.exists(data_path):
+        print(f"Data not found at {data_path}")
+        return
+
+    df = pd.read_csv(data_path)
+    X = df.drop(columns=['power', 'delay', 'area'])
+    y_power = df['power']
+    y_delay = df['delay']
+    y_area = df['area']
+
+    X_train, X_test, yp_train, yp_test = train_test_split(X, y_power, test_size=0.2, random_state=42)
+    _, _, yd_train, yd_test = train_test_split(X, y_delay, test_size=0.2, random_state=42)
+    _, _, ya_train, ya_test = train_test_split(X, y_area, test_size=0.2, random_state=42)
+
+    rf_power = RandomForestRegressor(n_estimators=100, random_state=42)
+    rf_delay = RandomForestRegressor(n_estimators=100, random_state=42)
+    rf_area = RandomForestRegressor(n_estimators=100, random_state=42)
+
+    rf_power.fit(X_train, yp_train)
+    rf_delay.fit(X_train, yd_train)
+    rf_area.fit(X_train, ya_train)
+
+    os.makedirs(output_dir, exist_ok=True)
+    joblib.dump(rf_power, os.path.join(output_dir, 'rf_power.joblib'))
+    joblib.dump(rf_delay, os.path.join(output_dir, 'rf_delay.joblib'))
+    joblib.dump(rf_area, os.path.join(output_dir, 'rf_area.joblib'))
+
+    print("Models trained and serialized.")
+
+if __name__ == "__main__":
+    train_rf_models('decoder_power_delay_area_dataset.csv', 'models/weights')
diff --git a/simulations/netlists/README.md b/simulations/netlists/README.md
new file mode 100644
index 0000000..694ed36
--- /dev/null
+++ b/simulations/netlists/README.md
@@ -0,0 +1,9 @@
+# SPICE Netlists
+
+This directory contains the parameterized SPICE netlists used to generate the training data.
+
+## Files
+* `decoder_32nm_param.sp`: Parameterized netlist for the 32nm PTM LP technology node.
+* `include_models.sp`: Transistor model cards.
+
+Note: Running these requires a valid HSPICE or LTspice license.
diff --git a/simulations/spice/run_batch_simulations.py b/simulations/spice/run_batch_simulations.py
new file mode 100644
index 0000000..29f834b
--- /dev/null
+++ b/simulations/spice/run_batch_simulations.py
@@ -0,0 +1,10 @@
+import os
+import subprocess
+
+def run_spice_batch(netlist_dir, output_dir):
+    """Placeholder script for running batch HSPICE/LTspice simulations."""
+    print(f"Batch SPICE simulation script initialized for {netlist_dir}")
+    print("Requires valid SPICE executable in system PATH.")
+
+if __name__ == "__main__":
+    run_spice_batch('simulations/netlists', 'simulations/spice/outputs')
diff --git a/tests/e2e/test_full_pipeline.py b/tests/e2e/test_full_pipeline.py
new file mode 100644
index 0000000..13d1ef1
--- /dev/null
+++ b/tests/e2e/test_full_pipeline.py
@@ -0,0 +1,10 @@
+import unittest
+
+class TestFullPipeline(unittest.TestCase):
+    def test_pipeline_instantiation(self):
+        """Mock test to verify end-to-end pipeline components can be initialized."""
+        pipeline_components = ['data_loader', 'scaler', 'model', 'optimizer']
+        self.assertEqual(len(pipeline_components), 4)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/integration/test_pareto_optimization.py b/tests/integration/test_pareto_optimization.py
new file mode 100644
index 0000000..32722bb
--- /dev/null
+++ b/tests/integration/test_pareto_optimization.py
@@ -0,0 +1,14 @@
+import unittest
+
+class TestParetoOptimization(unittest.TestCase):
+    def test_dominance_check(self):
+        """Test the Pareto dominance logic."""
+        # u dominates v if u <= v in all objectives and u < v in at least one
+        u = [1.0, 2.0, 3.0]
+        v = [2.0, 3.0, 4.0]
+
+        dominates = all(u_i <= v_i for u_i, v_i in zip(u, v)) and any(u_i < v_i for u_i, v_i in zip(u, v))
+        self.assertTrue(dominates)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/unit/test_data_processing.py b/tests/unit/test_data_processing.py
new file mode 100644
index 0000000..4106378
--- /dev/null
+++ b/tests/unit/test_data_processing.py
@@ -0,0 +1,20 @@
+import unittest
+import pandas as pd
+import numpy as np
+
+class TestDataProcessing(unittest.TestCase):
+    def test_schema_validation(self):
+        """Mock test for dataset schema validation."""
+        required_cols = ['decoder_size', 'tech_node', 'supply_voltage', 'power']
+        df = pd.DataFrame(columns=required_cols)
+        for col in required_cols:
+            self.assertIn(col, df.columns)
+
+    def test_scaling(self):
+        """Mock test for feature scaling logic."""
+        data = np.array([[1.0, 2.0], [3.0, 4.0]])
+        mean = np.mean(data, axis=0)
+        self.assertTrue(np.allclose(mean, [2.0, 3.0]))
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/unit/test_model_training.py b/tests/unit/test_model_training.py
new file mode 100644
index 0000000..15b07cd
--- /dev/null
+++ b/tests/unit/test_model_training.py
@@ -0,0 +1,11 @@
+import unittest
+
+class TestModelTraining(unittest.TestCase):
+    def test_hyperparameter_config(self):
+        """Test that hyperparameter grids are formatted correctly."""
+        grid = {'n_estimators': [50, 100], 'max_depth': [5, 10]}
+        self.assertIsInstance(grid, dict)
+        self.assertIn('n_estimators', grid)
+
+if __name__ == '__main__':
+    unittest.main()