Import Pandas As PD DF PD - Read - CSV ("Titanic - Train - CSV") DF - Head

Ques 1: Write a program to Extract the data from the database using python.
Use
head(), tail(), info() commands in the imported data. Create a heat matrix and pairplot
for the imported data base.
Code:
import pandas as pd
df = pd.read_csv("titanic_train.csv")
df.head()
output:
df.tail()
df.info()
1
Heat matrix
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Exclude non-numeric columns
numeric_columns = df.select_dtypes(include=['number']).columns
numeric_df = df[numeric_columns]
# Create a heatmap
plt.figure(figsize=(12, 8))
heatmap_data = numeric_df.corr()
sns.heatmap(heatmap_data, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Heatmap for Titanic Dataset')
plt.show()
output
2
pair plots
import pandas as pd
# Filling missing values in the 'Age' column with the mean value
df['Age'].fillna(df['Age'].mean(), inplace=True)
# Create a pairplot
sns.pairplot(df, hue='Survived', markers=["o", "s"])
# Show the plot

plt.show()
3
output
4
Ques 2: Write a program to implement linear and logistic regression.
Code:
Linear regression
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer
import numpy as np
# Handling missing values in the 'Age' column using SimpleImputer
imputer = SimpleImputer(strategy='mean')
df['Age'] = imputer.fit_transform(df[['Age']])
# Selecting the features and target variable
X = df[['Age']].values
y = df['Fare'].values
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Creating a linear regression model
model = LinearRegression()
# Training the model
model.fit(X_train, y_train)
# Making predictions on the test set
y_pred = model.predict(X_test)
# Evaluating the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
# Predicting Fare for a new Age
5
new_age = np.array([[25]]) # Replace 25 with the desired age
predicted_fare = model.predict(new_age)
print(f'Predicted Fare for Age {new_age[0, 0]}: {predicted_fare[0]}')
# Plotting the linear regression line
plt.scatter(X_test, y_test, color='blue', label='Actual Fare')
plt.plot(X_test, y_pred, color='red', linewidth=3, label='Linear Regression Line')
plt.scatter(new_age, predicted_fare, color='green', marker='*', s=200, label=f'Predicted Fare
for Age {new_age[0, 0]}')
plt.title('Linear Regression Model')
plt.xlabel('Age')
plt.ylabel('Fare')
plt.legend()
plt.show()
output:
6
Logistic Regression
Code:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, f1_score, confusion_matrix
# Read the CSV data

# Drop columns that are not needed for modeling

df = df.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)
# Convert categorical variables to numerical

df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
df['Embarked'] = df['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})
# Fill missing values in 'Age' with the median

df['Age'].fillna(df['Age'].median(), inplace=True)
# Fill missing values in 'Embarked' with the most common value

df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
# Split the data into features (X) and target variable (y)
X = df.drop('Survived', axis=1)
y = df['Survived']
# Split the data into training and testing sets
7
# Create and train the logistic regression model

model = LogisticRegression()
model.fit(X_train, y_train)
# Make predictions on the test set

y_pred = model.predict(X_test)
# Calculate evaluation metrics

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
# Print the metrics

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"F1 Score: {f1:.2f}")
print(f"Confusion Matrix:\n{conf_matrix}")
# Plot the confusion matrix

plt.figure(figsize=(6, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False,
xticklabels=['Not Survived', 'Survived'],
yticklabels=['Not Survived', 'Survived'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()
8
output:
9
Ques 3: Write a program to implement the naïve Bayesian classifier for a sample
training data set stored as a CSV file. Compute the accuracy of the classifier,
considering few test data sets.
Code:
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# Preprocess the data

df = df[['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']]
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
df['Embarked'] = df['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
# Split the data into features and target

X = df.drop('Survived', axis=1)
y = df['Survived']

# Create and train the Naive Bayes classifier

nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)
# Make predictions on the test set

y_pred = nb_classifier.predict(X_test)
10
# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(f'Confusion Matrix:\n{conf_matrix}')
print(f'Classification Report:\n{classification_rep}')
output:
11
Ques 4: Write a program to implement k-nearest neighbors (KNN) and Support Vector
Machine (SVM) Algorithm for classification.
Code:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
# Load the kyphosis dataset

df = pd.read_csv("kyphosis.csv")
# Display the first few rows of the dataset

print("Dataset Preview:")
print(df.head())
# Separate features (X) and target variable (y)

X = df.drop("Kyphosis", axis=1)
y = df["Kyphosis"]
# Split the dataset into training and testing sets

# Standardize the features (important for SVM)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# K-nearest neighbors (KNN) algorithm

knn_model = KNeighborsClassifier(n_neighbors=3)
12
knn_model.fit(X_train, y_train)
# Predictions using KNN

knn_predictions = knn_model.predict(X_test)
# SVM algorithm
svm_model = SVC(kernel='linear')
svm_model.fit(X_train_scaled, y_train)
# Predictions using SVM

svm_predictions = svm_model.predict(X_test_scaled)
# Evaluate the models

print("\nKNN Accuracy:", accuracy_score(y_test, knn_predictions))
print("\nClassification Report for KNN:")
print(classification_report(y_test, knn_predictions))
print("\nSVM Accuracy:", accuracy_score(y_test, svm_predictions))

print("\nClassification Report for SVM:")
print(classification_report(y_test, svm_predictions))
13
output:
14
Ques 5: Implement classification of a given dataset using random forest and decision
tree.
Code:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
df = pd.read_csv("kyphosis.csv")
# Display the first few rows of the dataset

print(df.head())
# Split the data into features (X) and target variable (y)
X = df.drop('Kyphosis', axis=1)
y = df['Kyphosis']

# Decision Tree Classifier

dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train, y_train)
# Predictions on the test set

dt_predictions = dt_classifier.predict(X_test)
# Evaluate Decision Tree

print("\nDecision Tree Classifier:")
print("Accuracy:", accuracy_score(y_test, dt_predictions))
15
print("Classification Report:")
print(classification_report(y_test, dt_predictions))
# Random Forest Classifier

rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train, y_train)
# Predictions on the test set

rf_predictions = rf_classifier.predict(X_test)
# Evaluate Random Forest

print("\nRandom Forest Classifier:")
print("Accuracy:", accuracy_score(y_test, rf_predictions))
print("Classification Report:")
print(classification_report(y_test, rf_predictions))
16
output:
17
Ques 6: Build an Artificial Neural Network (ANN) by implementing the Back
propagation algorithm and test the same using appropriate data sets.
Code:
import numpy as np
# Sigmoid activation function and its derivative

def sigmoid(x, derivative=False):
if derivative:
return x * (1 - x)
return 1 / (1 + np.exp(-x))
# Input data for XOR problem

X = np.array([[0, 0],
[0, 1],
[1, 0],
[1, 1]])
# Target labels for XOR

y = np.array([[0],
[1],
[1],
[0]])
# Set random seed for reproducibility

np.random.seed(42)
# Neural Network architecture

input_layer_size = 2
hidden_layer_size = 4
output_layer_size = 1
18
# Initialize weights and biases
weights_input_hidden = 2 * np.random.random((input_layer_size, hidden_layer_size)) - 1
weights_hidden_output = 2 * np.random.random((hidden_layer_size, output_layer_size)) - 1
# Training parameters
learning_rate = 0.5
epochs = 10000
# Training the Neural Network using backpropagation

for epoch in range(epochs):
# Forward pass
hidden_layer_input = np.dot(X, weights_input_hidden)
hidden_layer_output = sigmoid(hidden_layer_input)
output_layer_input = np.dot(hidden_layer_output, weights_hidden_output)

predicted_output = sigmoid(output_layer_input)
# Calculate the error

error = y - predicted_output
# Backpropagation
output_error_term = error * sigmoid(predicted_output, derivative=True)
hidden_error = output_error_term.dot(weights_hidden_output.T)
hidden_error_term = hidden_error * sigmoid(hidden_layer_output, derivative=True)
# Update weights
weights_hidden_output += hidden_layer_output.T.dot(output_error_term) * learning_rate
weights_input_hidden += X.T.dot(hidden_error_term) * learning_rate
# Test the trained Neural Network
19
test_data = np.array([[0, 0],
[0, 1],
[1, 0],
[1, 1]])
predicted_output_test =
sigmoid(sigmoid(test_data.dot(weights_input_hidden)).dot(weights_hidden_output))
print("Predicted Output after Training:")

print(predicted_output_test)
output:
20

Import Pandas As PD DF PD - Read - CSV ("Titanic - Train - CSV") DF - Head

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Import Pandas As PD DF PD - Read - CSV ("Titanic - Train - CSV") DF - Head

Uploaded by

Copyright:

Available Formats

Ques 1: Write a program to Extract the data from the database using python.

# Show the plot

# Read the CSV data

# Drop columns that are not needed for modeling

# Convert categorical variables to numerical

# Fill missing values in 'Age' with the median

# Fill missing values in 'Embarked' with the most common value

# Split the data into training and testing sets

# Create and train the logistic regression model

# Make predictions on the test set

# Calculate evaluation metrics

# Print the metrics

# Plot the confusion matrix

# Preprocess the data

# Split the data into features and target

# Split the data into training and testing sets

# Create and train the Naive Bayes classifier

# Make predictions on the test set

# Load the kyphosis dataset

# Display the first few rows of the dataset

# Separate features (X) and target variable (y)

# Split the dataset into training and testing sets

# Standardize the features (important for SVM)

# K-nearest neighbors (KNN) algorithm

# Predictions using KNN

# Predictions using SVM

# Evaluate the models

print("\nSVM Accuracy:", accuracy_score(y_test, svm_predictions))

# Display the first few rows of the dataset

# Split the data into training and testing sets

# Decision Tree Classifier

# Predictions on the test set

# Evaluate Decision Tree

# Random Forest Classifier

# Predictions on the test set

# Evaluate Random Forest

# Sigmoid activation function and its derivative

# Input data for XOR problem

# Target labels for XOR

# Set random seed for reproducibility

# Neural Network architecture

# Training the Neural Network using backpropagation

output_layer_input = np.dot(hidden_layer_output, weights_hidden_output)

# Calculate the error

# Test the trained Neural Network

print("Predicted Output after Training:")

You might also like