Python_ML_Algorithms/Logistic---Regr2.py at main · GeethikaMaddi/Python_ML_Algorithms · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# # Import necessary libraries
# import numpy as np
# import matplotlib.pyplot as plt
# from sklearn.datasets import fetch_openml
# from sklearn.model_selection import train_test_split
# from sklearn.linear_model import LogisticRegression
# from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# # Load the MNIST dataset
# print("Loading the MNIST dataset...")
# mnist = fetch_openml('mnist_784', version=1)
# X, y = mnist.data, mnist.target

# # Convert labels to integers
# y = y.astype(int)

# # Display dataset information
# print(f"Shape of dataset: {X.shape}")
# print(f"Sample labels: {np.unique(y)}")

# # Split the dataset into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Normalize the data (scale pixel values between 0 and 1)
# X_train = X_train / 255.0
# X_test = X_test / 255.0

# # Initialize and train the Logistic Regression model
# print("Training the Logistic Regression model...")
# log_reg = LogisticRegression(max_iter=1000, solver='lbfgs')
# log_reg.fit(X_train, y_train)

# # Make predictions on the test set
# print("Making predictions...")
# y_pred = log_reg.predict(X_test)

# # Evaluate the model
# accuracy = accuracy_score(y_test, y_pred)
# print(f"Accuracy: {accuracy * 100:.2f}%")
# print("\nClassification Report:")
# print(classification_report(y_test, y_pred))

# # Display confusion matrix
# conf_matrix = confusion_matrix(y_test, y_pred)
# print("\nConfusion Matrix:")
# print(conf_matrix)

# # Visualize a few test samples and their predictions
# print("Displaying some test samples with predictions...")
# n_samples = 5
# indices = np.random.choice(len(X_test), n_samples, replace=False)

# plt.figure(figsize=(10, 5))
# for i, idx in enumerate(indices):
#     plt.subplot(1, n_samples, i + 1)
#     plt.imshow(X_test.iloc[idx].values.reshape(28, 28), cmap='gray')
#     plt.title(f"Pred: {y_pred[idx]}")
#     plt.axis('off')

# plt.show()
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load MNIST dataset
print("Loading MNIST...")
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
y = y.astype(int)

# Train-test split and normalization
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train /= 255.0
X_test /= 255.0

# Train logistic regression
print("Training model...")
model = LogisticRegression(max_iter=1000, solver='lbfgs')
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"Accuracy: {acc * 100:.2f}%")

# Show sample predictions
print("Sample predictions:")
for i in range(5):
    plt.subplot(1, 5, i+1)
    plt.imshow(X_test.iloc[i].values.reshape(28, 28), cmap='gray')
    plt.title("Pred: %s" % y_pred[i])
    plt.axis('off')
plt.show()