Professional Documents
Culture Documents
Lab 7 Questions
Lab 7 Questions
#RollNo : 222CD017
#ML LAB 7
def pre_processing(df):
X = df.drop([df.columns[-1]], axis = 1)
y = df[df.columns[-1]]
return X, y
class NaiveBayes:
def __init__(self):
self.features = list
self.likelihoods = {}
self.class_priors = {}
self.pred_priors = {}
self.X_train = np.array
self.y_train = np.array
self.train_size = int
self.num_feats = int
def fit(self, X, y):
self.features = list(X.columns)
self.X_train = X
self.y_train = y
self.train_size = X.shape[0]
self.num_feats = X.shape[1]
self.likelihoods[feature].update({feat_val+'_'+outcome:0})
self.class_priors.update({outcome: 0})
self._calc_class_prior()
self._calc_likelihoods()
self._calc_predictor_prior()
def _calc_class_prior(self):
def _calc_likelihoods(self):
results = []
X = np.array(X)
for query in X:
probs_outcome = {}
for outcome in np.unique(self.y_train):
prior = self.class_priors[outcome]
likelihood = 1
evidence = 1
probs_outcome[outcome] = posterior
return np.array(results)
if __name__ == "__main__":
#Weather Dataset
print("\ndataset:")
df = pd.DataFrame(dataset)
#print(df)
nb_clf = NaiveBayes()
nb_clf.fit(X, y)
#Query 1:
query = np.array([['Salty','Hot', 'Soft']])
print("Query 1:- {} ---> {}".format(query,
nb_clf.predict(query)))
#Query 2:
query = np.array([['Spicy','Hot', 'Soft']])
print("Query 2:- {} ---> {}".format(query,
nb_clf.predict(query)))
#Query 3:
query = np.array([['Salty','Hot', 'Hard']])
print("Query 3:- {} ---> {}".format(query,
nb_clf.predict(query)))
dataset:
Train Accuracy: 70.0
Query 1:- [['Salty' 'Hot' 'Soft']] ---> ['No']
Query 2:- [['Spicy' 'Hot' 'Soft']] ---> ['Yes']
Query 3:- [['Salty' 'Hot' 'Hard']] ---> ['Yes']
0.9736842105263158
plt.figure(figsize=(16,8))
tree.plot_tree(clf)
plt.show()
path=clf.cost_complexity_pruning_path(X_train,y_train)
#path variable gives two things ccp_alphas and impurities
ccp_alphas,impurities=path.ccp_alphas,path.impurities
print("ccp alpha wil give list of values :",ccp_alphas)
print("***********************************************************")
print("Impurities in Decision Tree :",impurities)
clf=DecisionTreeClassifier(random_state=0,ccp_alpha=0.02)
clf.fit(X_train,y_train)
plt.figure(figsize=(12,8))
tree.plot_tree(clf,rounded=True,filled=True)
plt.show()
accuracy_score(y_test,clf.predict(X_test))
0.9736842105263158