From 30d30b8ac40054bafed28da6b937505b1371d15d Mon Sep 17 00:00:00 2001 From: devdklv <155496477+devdklv@users.noreply.github.com> Date: Fri, 11 Oct 2024 00:07:53 -0500 Subject: [PATCH 1/5] Update ElasticNet.py --- elasticnet/models/ElasticNet.py | 75 ++++++++++++++++++++++++++++----- 1 file changed, 65 insertions(+), 10 deletions(-) diff --git a/elasticnet/models/ElasticNet.py b/elasticnet/models/ElasticNet.py index 017e925..9038096 100644 --- a/elasticnet/models/ElasticNet.py +++ b/elasticnet/models/ElasticNet.py @@ -1,17 +1,72 @@ +import numpy as np +from sklearn.model_selection import train_test_split +class CustomElasticNet: + def __init__(self, alpha=1.0, l1_ratio=0.5, tolerance=1e-4, max_iterations=1000): + self.alpha = alpha + self.l1_ratio = l1_ratio + self.tolerance = tolerance + self.max_iterations = max_iterations + self.coefficients_ = None + self.intercept_ = None -class ElasticNetModel(): - def __init__(self): - pass + def fit(self, X, y): + # Inserting a column for the intercept term + X = np.c_[np.ones(X.shape[0]), X] + num_samples, num_features = X.shape + + # Initializing model coefficients in the below + self.coefficients_ = np.zeros(num_features) + + for iteration in range(self.max_iterations): + predictions = X @ self.coefficients_ + residuals = y - predictions + + # Updating model coefficients with ElasticNet regularization + for j in range(num_features): + if j == 0: # Special handling for the intercept + gradient = -2 * np.sum(residuals) / num_samples + self.coefficients_[j] -= self.alpha * gradient + else: + gradient = -2 * (X[:, j] @ residuals) / num_samples + l1_penalty = self.l1_ratio * self.alpha * np.sign(self.coefficients_[j]) + l2_penalty = (1 - self.l1_ratio) * self.alpha * self.coefficients_[j] + self.coefficients_[j] -= self.alpha * (gradient + l1_penalty + l2_penalty) + + # Checking if the gradient is below the tolerance level for convergence + if np.sum(np.abs(gradient)) < self.tolerance: + break + self.intercept_ = self.coefficients_[0] + self.coefficients_ = self.coefficients_[1:] + return ElasticNetResults(self.intercept_, self.coefficients_) - def fit(self, X, y): - return ElasticNetModelResults() +class ElasticNetResults: + def __init__(self, intercept, coefficients): + self.intercept_ = intercept + self.coefficients_ = coefficients + + def predict(self, X): + return self.intercept_ + X @ self.coefficients_ + +# Root Mean Squared Error (RMSE) +def rmse(y_actual, y_predicted): + return np.sqrt(np.mean((y_actual - y_predicted) ** 2)) + +# Usage +if __name__ == "__main__": + # Creating synthetic data + X = np.random.rand(100, 3) + y = 3 * X[:, 0] + 2 * X[:, 1] + X[:, 2] + np.random.randn(100) + # Spliting data for training (70%) and testing (30%) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) -class ElasticNetModelResults(): - def __init__(self): - pass + # Initializing and train the ElasticNet model + model = CustomElasticNet(alpha=0.1, l1_ratio=0.7) + results = model.fit(X_train, y_train) - def predict(self, x): - return 0.5 + # Predicting on test data and evaluate performance + y_pred = results.predict(X_test) + test_rmse = rmse(y_test, y_pred) + print("Test RMSE:", test_rmse) From 9e5afcc294ce5690b61da6956b40a8eadd967031 Mon Sep 17 00:00:00 2001 From: devdklv <155496477+devdklv@users.noreply.github.com> Date: Fri, 11 Oct 2024 00:09:01 -0500 Subject: [PATCH 2/5] Update README.md --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index c1e8359..a8cc17a 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,21 @@ # Project 1 +# Group Member - Dev Kumar(A20546714) +# How to excute the pyhton code for Linear regression with ElasticNet regularization. +Step 1 :- Install pyhton in your system. +Step 2 :- Open terminal and open the directory ~/Project1_ML-CS584-/elasticnet/models +Step 3 :- write the below commmnd in terminal:- +pyhton ElasticNet.py Put your README here. Answer the following questions. * What does the model you have implemented do and when should it be used? +Answer- The ElasticNet model implemented in the python code combines the concepts of L1 (Lasso) and L2 (Ridge) regularization in linear regression, making it effective in scenarios with many predictors and potential multicollinearity. By tuning the l1_ratio parameter, ElasticNet achieves a balance between L1 and L2 penalties, which encourages sparsity by selecting a subset of features while still providing regularization to reduce the risk of overfitting. This approach is particularly useful when there are more predictors than observations or when predictors are correlated, as it stabilizes the estimates and improves interpretability. It is well-suited for situations where feature selection is important while also retaining the regularization benefits of Ridge regression to enhance prediction accuracy. + * How did you test your model to determine if it is working reasonably correctly? +Answer- To assess the effectiveness of the ElasticNet model, I generated a synthetic dataset that establishes a clear relationship between the predictors and the response variable. After fitting the model to this dataset, I used the predict method to generate predictions. I evaluated the model's performance using the Root Mean Squared Error (RMSE), which quantifies the average difference between the predicted and actual values. A lower RMSE indicates that the model's predictions are closely aligned with the true values, demonstrating strong performance. Additionally, I observed the convergence behavior by examining the stability of the model's coefficients during training and can perform cross-validation to further assess the model's reliability across different data subsets. + * What parameters have you exposed to users of your implementation in order to tune performance? (Also perhaps provide some basic usage examples.) +Answer- The ElasticNet implementation allows users to fine-tune model performance through several adjustable parameters. These include alpha, which controls the overall strength of the regularization; l1_ratio, which specifies the proportion between L1 and L2 regularization to influence model sparsity; tol, which sets the tolerance level for stopping criteria during training; and max_iter, which determines the maximum number of iterations for optimization. Users can easily configure these parameters by creating an instance of the model like this: model = ElasticNetModel(alpha=0.5, l1_ratio=0.8, tol=1e-4, max_iter=500). After fitting the model to the training data with results = model.fit(X_train, y_train), they can generate predictions using predictions = results.predict(X_test). To evaluate the model's performance, RMSE can be calculated with rmse = calculate_rmse(y_test, predictions). This structure provides flexibility for customizing the model based on the specific characteristics of the dataset and the desired outcomes. + * Are there specific inputs that your implementation has trouble with? Given more time, could you work around these or is it fundamental? +Answer- The current implementation of the ElasticNet model may struggle with certain types of input data, particularly in cases of extreme multicollinearity, where features are highly correlated and can result in unstable coefficient estimates. Additionally, the model does not address datasets containing missing values, as it lacks the functionality for imputation or filtering those instances. The optimization process may also fail to converge for specific configurations of the alpha and l1_ratio parameters, especially in high-dimensional settings with limited observations. With more time, these challenges could be tackled by enhancing preprocessing steps, such as applying feature selection methods, implementing strategies for missing value imputation, and refining the optimization algorithm with adaptive techniques to improve convergence. However, some issues related to extreme multicollinearity might require substantial changes to the model or the introduction of supplementary regularization techniques to effectively manage complexity. From bfed65dbdba6becdefb8657ea7903fa4d0a89326 Mon Sep 17 00:00:00 2001 From: devdklv <155496477+devdklv@users.noreply.github.com> Date: Fri, 11 Oct 2024 00:36:49 -0500 Subject: [PATCH 3/5] Final Update ElasticNet.py --- elasticnet/models/ElasticNet.py | 79 +++++++++++++++++---------------- 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/elasticnet/models/ElasticNet.py b/elasticnet/models/ElasticNet.py index 9038096..35c17e4 100644 --- a/elasticnet/models/ElasticNet.py +++ b/elasticnet/models/ElasticNet.py @@ -1,72 +1,75 @@ import numpy as np +import pandas as pd from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler -class CustomElasticNet: - def __init__(self, alpha=1.0, l1_ratio=0.5, tolerance=1e-4, max_iterations=1000): +# ElasticNetModel code +class ElasticNetModel: + def __init__(self, alpha=1.0, l1_ratio=0.5, convergence_threshold=1e-4, max_iterations=1000): self.alpha = alpha self.l1_ratio = l1_ratio - self.tolerance = tolerance + self.convergence_threshold = convergence_threshold self.max_iterations = max_iterations self.coefficients_ = None self.intercept_ = None def fit(self, X, y): - # Inserting a column for the intercept term - X = np.c_[np.ones(X.shape[0]), X] + # Adding a bias term to the features + X = np.c_[np.ones(X.shape[0]), X] # Bias term (intercept) num_samples, num_features = X.shape - # Initializing model coefficients in the below + # Initializing coefficients to zero self.coefficients_ = np.zeros(num_features) - for iteration in range(self.max_iterations): + for _ in range(self.max_iterations): predictions = X @ self.coefficients_ residuals = y - predictions - # Updating model coefficients with ElasticNet regularization - for j in range(num_features): - if j == 0: # Special handling for the intercept + # Updating coefficients using the ElasticNet regularization + for index in range(num_features): + if index == 0: # Intercept term gradient = -2 * np.sum(residuals) / num_samples - self.coefficients_[j] -= self.alpha * gradient + self.coefficients_[index] -= self.alpha * gradient else: - gradient = -2 * (X[:, j] @ residuals) / num_samples - l1_penalty = self.l1_ratio * self.alpha * np.sign(self.coefficients_[j]) - l2_penalty = (1 - self.l1_ratio) * self.alpha * self.coefficients_[j] - self.coefficients_[j] -= self.alpha * (gradient + l1_penalty + l2_penalty) + gradient = -2 * (X[:, index] @ residuals) / num_samples + l1_penalty = self.l1_ratio * self.alpha * np.sign(self.coefficients_[index]) + l2_penalty = (1 - self.l1_ratio) * self.alpha * self.coefficients_[index] + self.coefficients_[index] -= self.alpha * (gradient + l1_penalty + l2_penalty) - # Checking if the gradient is below the tolerance level for convergence - if np.sum(np.abs(gradient)) < self.tolerance: + # Checking for convergence + if np.sum(np.abs(gradient)) < self.convergence_threshold: break self.intercept_ = self.coefficients_[0] self.coefficients_ = self.coefficients_[1:] - return ElasticNetResults(self.intercept_, self.coefficients_) - -class ElasticNetResults: - def __init__(self, intercept, coefficients): - self.intercept_ = intercept - self.coefficients_ = coefficients def predict(self, X): return self.intercept_ + X @ self.coefficients_ -# Root Mean Squared Error (RMSE) -def rmse(y_actual, y_predicted): - return np.sqrt(np.mean((y_actual - y_predicted) ** 2)) +# Function to compute Root Mean Squared Error (RMSE) +def compute_rmse(actual_values, predicted_values): + return np.sqrt(np.mean((actual_values - predicted_values) ** 2)) -# Usage +# Main execution of the code if __name__ == "__main__": - # Creating synthetic data - X = np.random.rand(100, 3) - y = 3 * X[:, 0] + 2 * X[:, 1] + X[:, 2] + np.random.randn(100) + # Reading data from CSV file + dataset = pd.read_csv('output.csv') + features = dataset[['x_0', 'x_1']].values + target = dataset['y'].values + + # Splitting the dataset into training (70%) and testing (30%) sets + X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=42) - # Spliting data for training (70%) and testing (30%) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) + # Normalizing the features + feature_scaler = StandardScaler() + X_train = feature_scaler.fit_transform(X_train) + X_test = feature_scaler.transform(X_test) - # Initializing and train the ElasticNet model - model = CustomElasticNet(alpha=0.1, l1_ratio=0.7) - results = model.fit(X_train, y_train) + # Initializing and train the ElasticNet regression model + elastic_net = ElasticNetModel(alpha=0.01, l1_ratio=0.7) # Adjusted learning rate + elastic_net.fit(X_train, y_train) - # Predicting on test data and evaluate performance - y_pred = results.predict(X_test) - test_rmse = rmse(y_test, y_pred) + # Predicting the target values for the test set and evaluate the model's performance + y_pred = elastic_net.predict(X_test) + test_rmse = compute_rmse(y_test, y_pred) print("Test RMSE:", test_rmse) From 8a5f89f16f9dbcbca6e384af1cac727b05ffac2e Mon Sep 17 00:00:00 2001 From: devdklv <155496477+devdklv@users.noreply.github.com> Date: Fri, 11 Oct 2024 00:38:11 -0500 Subject: [PATCH 4/5] output csv uploaded output csv uploaded --- elasticnet/models/output.csv | 101 +++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 elasticnet/models/output.csv diff --git a/elasticnet/models/output.csv b/elasticnet/models/output.csv new file mode 100644 index 0000000..0cc2452 --- /dev/null +++ b/elasticnet/models/output.csv @@ -0,0 +1,101 @@ +x_0,x_1,y +7.739560485559633,4.388784397520523,29.709806843149742 +8.585979199113824,6.973680290593639,39.053538757722606 +0.9417734788764953,9.756223516367559,32.15170532018366 +7.61139701990353,7.860643052769538,39.788378908263226 +1.2811363267554587,4.503859378955672,17.107608245257826 +3.7079802423258124,9.267649888486018,36.35965833624105 +6.438651200806645,8.227616132708299,38.56920929042836 +4.4341419882733115,2.272387217847769,16.749839509417615 +5.545847870158348,0.6381725610417532,13.801196213338853 +8.27631171992582,6.316643991220649,36.49768357332057 +7.580877400853739,3.545259681298684,26.71321081857424 +9.706980243949033,8.931211213221976,47.08571282152163 +7.783834970737619,1.9463870785196757,22.319015940341387 +4.667210037270342,0.43803765787228777,11.615120704087467 +1.5428949206754783,6.830489532424546,24.668848692860198 +7.447621559078171,9.6750973243421,44.78789581940869 +3.2582535813815197,3.704597060348689,18.63336149306855 +4.695558112758079,1.894713590842857,16.02684005471137 +1.2992150533547164,4.757049262259337,17.83681058405125 +2.2690934905088413,6.698139946825103,25.73288260402345 +4.371519188723307,8.326781960578375,34.77719580288213 +7.002651020022491,3.123666413820411,24.51004109225049 +8.32259801395201,8.047643574968019,41.77267618488309 +3.874783790301745,2.883281039302441,17.329816437343744 +6.824955039749755,1.397524836093098,18.820098706090754 +1.9990820247510832,0.07362269751005512,5.243281821159454 +7.869243775021384,6.648508565920322,36.701670583649104 +7.051653786263351,7.807290310219679,38.416739695962406 +4.5891577553833995,5.687411959528937,27.2496003675164 +1.3979699812765745,1.1453007353597344,7.254665001646242 +6.6840296179047165,4.710962061431325,28.7526928238568 +5.652361064811888,7.649988574160256,35.44237231323271 +6.347183200005908,5.535794006579958,30.216424084695806 +5.592071607454136,3.039500980626122,21.27390782063172 +0.30817834567939406,4.367173892323624,14.571534168145961 +2.145846728195292,4.085286437246362,17.488482066733322 +8.53403073268166,2.3393948586534075,25.11780654168258 +0.5830274168906602,2.8138389202199656,10.728156956530041 +2.935937577666836,6.6191651472689506,26.65646221336616 +5.570321523412783,7.8389820910641355,35.5921746760112 +6.643135403273876,4.063868614400706,26.263147746776 +8.140203846660347,1.669729199077039,22.27332869849732 +0.22712073133860478,0.9004786077564175,4.049435844760506 +7.223593505964503,4.618772302513873,29.25055997673402 +1.612717790336018,5.010447751033635,19.169092755956182 +1.5231210271316842,6.96320375077736,24.92642705117019 +4.461562755740307,3.810212260964825,21.177989455239423 +3.015120891478765,6.302825931188885,25.792015051985086 +3.6181261055339045,0.87649919316101,11.078674501753667 +1.1800590212051532,9.618976645495145,32.08830572076834 +9.085806907076071,6.997071338107496,40.05314927062917 +2.6586996145951955,9.69176377347724,35.576381902454244 +7.787509039657946,7.168901891589956,38.3722304710098 +4.493615021437886,2.72241561845159,18.03732023534801 +0.9639096215349929,9.026023965438416,29.969066243706433 +4.557762898336111,2.023633647952303,16.22058229562318 +3.0595662415065252,5.79219568941896,24.668589315710488 +1.767727829392317,8.566142840923755,30.135198473713075 +7.585195298352101,7.194629559509368,37.729751490638094 +4.320930397751037,6.2730884070244315,28.538859774181546 +5.840979689127356,6.4984660155482,32.22083403234598 +0.8444432113988909,4.1580740217060965,15.125492880793061 +0.4161417386189248,4.9399081924451895,16.638625758121815 +3.298612123327853,1.445241888660469,11.795460331800115 +1.0340296772255164,5.87644572177712,20.673579145384927 +1.705929685368861,9.251201183767972,32.138824173040746 +5.8106113970039495,3.4686980453483707,23.05053391901564 +5.9091549148141675,0.22803871029697498,13.446893238637358 +9.585592132414453,4.823034369429003,34.68744122537043 +7.827352272502862,0.8272999992243857,19.237876124460865 +4.866583308381603,4.907069943545209,25.469919380165678 +9.37826454974983,5.717280523760754,36.94354631162184 +4.734894010569538,2.669756630918936,18.48437344865367 +3.315689973425522,5.206724024715378,23.25156046030632 +4.389114603050467,0.21612079880330426,10.354435799156562 +8.262919241943578,8.961607718397667,44.442311065247495 +1.4024908899861077,5.540361435390494,20.41633742630235 +1.0857574113544355,6.722400930398117,23.54803444479628 +2.812337838390083,6.594226346919018,26.564690207784746 +7.269946142868826,7.68647491917657,38.63790169852301 +1.0774094595589656,9.160118451376078,30.558868552276692 +2.302139908948808,0.3741255617617978,6.615415355984668 +5.548524693914834,3.7092228386243873,23.343832199011715 +8.297897431324131,8.082514720643019,41.869613947092034 +3.1713889282271532,9.52899395069745,35.97777404888581 +2.909178381401186,5.150571292317146,22.09561204105639 +2.5596509056760275,9.360435700489633,34.293352760976774 +1.6460781758201815,0.4491061939232899,5.684916967231668 +4.350970600030379,9.92375564055837,39.362165053294426 +8.916772662549139,7.486080194569492,41.24463342806176 +8.90792490878525,8.934466396978632,45.64562072894205 +5.18858360386449,3.1592905183079303,20.860285442488397 +7.72012432110988,6.616612631677611,36.260869418672236 +3.7365772887371005,0.9446666806151527,11.296805792513064 +7.46789611349026,2.624605159228647,23.78440996684577 +9.368131505337793,2.4097057500568475,26.980636512056375 +1.2275793241148603,8.311126721249062,28.535688009276225 +1.5328431662449404,1.7926830815773909,9.187069733128924 +5.993827915208435,8.745620408374645,39.20083202908983 +1.9643466571457324,3.1032367290009475,14.256054743431552 From 875d0fd79d25ee103169dca319a60945bcb95b2c Mon Sep 17 00:00:00 2001 From: devdklv <155496477+devdklv@users.noreply.github.com> Date: Fri, 11 Oct 2024 00:42:57 -0500 Subject: [PATCH 5/5] Final Update README.md Final Update README.md --- README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index a8cc17a..d1654fd 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,12 @@ # Project 1 # Group Member - Dev Kumar(A20546714) # How to excute the pyhton code for Linear regression with ElasticNet regularization. -Step 1 :- Install pyhton in your system. -Step 2 :- Open terminal and open the directory ~/Project1_ML-CS584-/elasticnet/models -Step 3 :- write the below commmnd in terminal:- -pyhton ElasticNet.py +# Step 1 :- Install python in your system. +# Step 2 :- Open terminal and open the directory ~/Project1_ML-CS584-/ and run the below command +# python generate_regression_data.py -N 100 -m 2 3 -b 1 -scale 0.1 -rnge 0 10 -seed 42 -output_file output.csv +# Step 3 :- Move the output.csv file to ~/Project1_ML-CS584-/elasticnet/models/ +# Step 4 :- Open terminal and open the directory ~/Project1_ML-CS584-/elasticnet/models/ and Run the below commmnd in terminal:- +# pyhton ElasticNet.py Put your README here. Answer the following questions. @@ -12,7 +14,7 @@ Put your README here. Answer the following questions. Answer- The ElasticNet model implemented in the python code combines the concepts of L1 (Lasso) and L2 (Ridge) regularization in linear regression, making it effective in scenarios with many predictors and potential multicollinearity. By tuning the l1_ratio parameter, ElasticNet achieves a balance between L1 and L2 penalties, which encourages sparsity by selecting a subset of features while still providing regularization to reduce the risk of overfitting. This approach is particularly useful when there are more predictors than observations or when predictors are correlated, as it stabilizes the estimates and improves interpretability. It is well-suited for situations where feature selection is important while also retaining the regularization benefits of Ridge regression to enhance prediction accuracy. * How did you test your model to determine if it is working reasonably correctly? -Answer- To assess the effectiveness of the ElasticNet model, I generated a synthetic dataset that establishes a clear relationship between the predictors and the response variable. After fitting the model to this dataset, I used the predict method to generate predictions. I evaluated the model's performance using the Root Mean Squared Error (RMSE), which quantifies the average difference between the predicted and actual values. A lower RMSE indicates that the model's predictions are closely aligned with the true values, demonstrating strong performance. Additionally, I observed the convergence behavior by examining the stability of the model's coefficients during training and can perform cross-validation to further assess the model's reliability across different data subsets. +Answer- To assess the effectiveness of the ElasticNet model, I output.csv which was generated from generate_regression_data.py file and used that csv file for traininn and testing data. After fitting the model to this dataset,30% training and 70% testing, I used the predict method to generate predictions. I evaluated the model's performance using the Root Mean Squared Error (RMSE), which quantifies the average difference between the predicted and actual values. A lower RMSE indicates that the model's predictions are closely aligned with the true values, demonstrating strong performance. Additionally, I observed the convergence behavior by examining the stability of the model's coefficients during training and can perform cross-validation to further assess the model's reliability across different data subsets. * What parameters have you exposed to users of your implementation in order to tune performance? (Also perhaps provide some basic usage examples.) Answer- The ElasticNet implementation allows users to fine-tune model performance through several adjustable parameters. These include alpha, which controls the overall strength of the regularization; l1_ratio, which specifies the proportion between L1 and L2 regularization to influence model sparsity; tol, which sets the tolerance level for stopping criteria during training; and max_iter, which determines the maximum number of iterations for optimization. Users can easily configure these parameters by creating an instance of the model like this: model = ElasticNetModel(alpha=0.5, l1_ratio=0.8, tol=1e-4, max_iter=500). After fitting the model to the training data with results = model.fit(X_train, y_train), they can generate predictions using predictions = results.predict(X_test). To evaluate the model's performance, RMSE can be calculated with rmse = calculate_rmse(y_test, predictions). This structure provides flexibility for customizing the model based on the specific characteristics of the dataset and the desired outcomes.