You are on page 1of 5

Handwritten Digit Recognition with Logistic Regression

Roll Number: K20SW002, K20SW016, K20SW017 Subject: Data Science & Analytics(Pr)
Task:
Step 1: Load and explore the MNIST dataset
from sklearn.datasets import fetch_openml
import matplotlib.pyplot as plt

import pandas as pd

Step 2: Preprocess the data


# Load MNIST dataset
mnist = fetch_openml('mnist_784')

# Convert the pandas DataFrame to NumPy arrays


X = mnist.data.values.astype('float32') / 255.0
y = mnist.target.values.astype('int')

# Display the first few images and their labels


fig, axes = plt.subplots(1, 4, figsize=(10, 3))
for i in range(4):
axes[i].imshow(X[i].reshape(28, 28), cmap='gray')
axes[i].set_title(f"Label: {y[i]}")
axes[i].axis('off')

plt.show()

Output:
Step 4: Build a logistic regression model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Flatten the images and scale pixel values


X = mnist.data.values.astype('float32') / 255.0
y = mnist.target.values.astype('int')

# Flatten the images


X_flattened = X.reshape((X.shape[0], -1))

# Standardize the pixel values


scaler = StandardScaler()
X_flattened_scaled = scaler.fit_transform(X_flattened)

# Split the data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X_flattened_scaled, y,
test_size=0.2, random_state=42)

Step 6: Visualize predicted and actual labels


from sklearn.linear_model import LogisticRegression

# Create a logistic regression model


model = LogisticRegression(max_iter=100)

# Train the model


model.fit(X_train, y_train)

# Evaluate the model on the test set


accuracy = model.score(X_test, y_test)
print(f"Accuracy on the test set: {accuracy:.2%}")

Output:

import numpy as np

# Predictions on the test set


y_pred = model.predict(X_test)

# Visualize a few test images with their predicted and actual labels
fig, axes = plt.subplots(1, 4, figsize=(10, 3))
for i in range(4):
axes[i].imshow(X_test[i].reshape(28, 28), cmap='gray')
axes[i].set_title(f"Actual: {y_test[i]}, Predicted: {y_pred[i]}")
axes[i].axis('off')

plt.show()

Output:

Step 7: Capture and predict your own handwritten digit


import cv2

# Read the image


image = cv2.imread('/content/WhatsApp Image 2023-10-12 at 9.28.47 AM.jpg',
cv2.IMREAD_GRAYSCALE)

# Check if the image is loaded successfully


if image is None:
print("Error: Unable to load the image.")
else:
# Resize the image to 28x28
image = cv2.resize(image, (28, 28))

# Flatten and scale the image


image_flattened = image.reshape(1, -1).astype('float32') / 255.0
image_flattened_scaled = scaler.transform(image_flattened)

# Predict the digit


prediction = model.predict(image_flattened_scaled)
print(f"Predicted digit: {prediction}")
Given Image:

Output:

THE END

You might also like