Professional Documents
Culture Documents
# plotting graph
df.dtypes.value_counts().plot(kind="bar",
title="Types of Data",
xlabel="Data Type",
ylabel="No.of columns",
rot=0,
color=["crimson","orange"])
plt.show()
# Seggregating columns
cat_cols = []
num_cols = []
for i in df.columns:
if df[i].dtype == "O":
cat_cols.append(i)
else:
num_cols.append(i)
null_freq = []
for i in df.columns:
f = dict(df[i].value_counts())
if "?" in f.keys():
null_freq.append(f["?"]*100/len(df))
else:
null_freq.append(0)
# Only 2% of the rows have null values and hence the rows can be dropped
directly instead of applying any imputation techniques.
pd.Series(dict(zip(df.columns,null_freq))).plot(kind="bar",
rot=0,
title="Missing Value
Frequency",
xlabel="Column Name",
ylabel="Percentage of
missing values",
color=["orange","crimson"])
plt.show()
# Pre Processing
df = df.replace({"?":None})
df = df.dropna()
df = df.reset_index()
# Train-Test Split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(df,
df['a16'],
test_size = 0.10,
train_size=0.90,
random_state = 0
)
X_train.pop('a16')
X_test.pop('a16')
X_train
# Perceptron
class Perceptron:
# Activation function.
def __activation_function(self,x):
return 1.0 if (x > 0) else 0.0
# The model training or fitting by updating weights.
def fit(self):
n_rows,n_cols = self.X.shape
self.weights = np.zeros((n_cols + 1, 1))
for epoch in range(self.epochs):
for index, features in enumerate(self.X.values):
feature_transposed = np.insert(features, 0, 1).reshape(-
1,1)
predicted_target =
self.__activation_function(np.dot(feature_transposed.T, self.weights))
flag = np.squeeze(predicted_target) - self.y[index]
if flag != 0:
self.weights += self.learning_rate*((self.y[index] -
predicted_target)*feature_transposed)
p.fit()
# Test Accuracy
p.test(pd.DataFrame(X_test).reset_index().drop(["index"],axis=1),
np.array(pd.DataFrame(y_test).reset_index().drop(["index"],axis=1)))
# Train Accuracy
p.test(X,y)
# Turning the no of epochs
train_acc = []
test_acc = []
epochs = []
for i in range(20,200,20):
y =
np.array(pd.DataFrame(y_train).reset_index().drop(["index"],axis=1))
X = pd.DataFrame(X_train).reset_index().drop(["index"],axis=1)
p = Perceptron(X, y, 0.5, i) # features x, target y , learning rate
=0.5 , i is epochs
p.fit()
train_acc.append(p.test(X,y))
test_acc.append(p.test(pd.DataFrame(X_test).reset_index().drop(["index"],a
xis=1),
np.array(pd.DataFrame(y_test).reset_index().drop(["index"],axis=1))))
epochs.append(i)
plt.figure(figsize=(10,10))
plt.plot(epochs,train_acc)
plt.plot(epochs,test_acc)
plt.xlabel("Number of Epochs")
plt.ylabel("Accuracy of the model")
plt.legend(['Train Accuracy',"Test Accuracy"])
plt.show()
tuning = pd.DataFrame({"epochs":np.array(epochs).squeeze(),"train
accuracy":np.array(train_acc).squeeze(), "test
accuracy":np.array(test_acc).squeeze()})
def diff(row):
return row[1] - row[2]
tuning["Difference"] = tuning.apply(diff,axis=1)
tuning.sort_values(by="Difference")
Task 2: Titanic Dataset Perceptron
train_df = pd.read_csv('/content/train.csv')
test_df = pd.read_csv('/content/test.csv')
combine = [train_df, test_df]
print(train_df.columns.values)
train_df.head()
train_df.tail()
train_df.info()
print('_'*40)
test_df.info()
train_df.describe()
train_df.describe(include=['O'])
train_df[['Pclass', 'Survived']].groupby(['Pclass'],
as_index=False).mean().sort_values(by='Survived', ascending=False)
train_df[["Sex", "Survived"]].groupby(['Sex'],
as_index=False).mean().sort_values(by='Survived', ascending=False)
train_df[["SibSp", "Survived"]].groupby(['SibSp'],
as_index=False).mean().sort_values(by='Survived', ascending=False)
train_df[["Parch", "Survived"]].groupby(['Parch'],
as_index=False).mean().sort_values(by='Survived', ascending=False)
g = sns.FacetGrid(train_df, col='Survived')
g.map(plt.hist, 'Age', bins=20)
pd.crosstab(train_df['Title'], train_df['Sex'])
train_df.head()
train_df = train_df.drop(['Name', 'PassengerId'], axis=1)
test_df = test_df.drop(['Name'], axis=1)
combine = [train_df, test_df]
train_df.shape, test_df.shape
train_df.head()
guess_ages = np.zeros((2,3))
guess_ages
age_guess = guess_df.median()
dataset['Age'] = dataset['Age'].astype(int)
train_df.head()
train_df['AgeBand'] = pd.cut(train_df['Age'], 5)
train_df[['AgeBand', 'Survived']].groupby(['AgeBand'],
as_index=False).mean().sort_values(by='AgeBand', ascending=True)
train_df[['FamilySize', 'Survived']].groupby(['FamilySize'],
as_index=False).mean().sort_values(by='Survived', ascending=False)
train_df[['IsAlone', 'Survived']].groupby(['IsAlone'],
as_index=False).mean()
train_df.head()
freq_port = train_df.Embarked.dropna().mode()[0]
freq_port
train_df.head()
test_df['Fare'].fillna(test_df['Fare'].dropna().median(), inplace=True)
test_df.head()
train_df['FareBand'] = pd.qcut(train_df['Fare'], 4)
train_df[['FareBand', 'Survived']].groupby(['FareBand'],
as_index=False).mean().sort_values(by='FareBand', ascending=True)
train_df.head(10)
test_df.head(10)
# Perceptron
perceptron = Perceptron()
perceptron.fit(X_train, Y_train)
Y_pred = perceptron.predict(X_test)
acc_perceptron = round(perceptron.score(X_train, Y_train) * 100, 2)
acc_perceptron
Observation:
1. CRX Dataset:
2. Titanic Dataset:
Learning:
B.4 Conclusion:
1. Titanic Dataset:
2. Further Investigation:
3. Iterative Process: