You are on page 1of 3

import matplotlib as plt

from sklearn.datasets import load_breast_cancer

x = df['data']
y = df['target']

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)

#Normalize data

from sklearn.preprocessing import StandardScaler


ss_train = StandardScaler()
x_train = ss_train.fit_transform(x_train)
ss_test = StandardScaler()
x_test = ss_test.fit_transform(x_test)

from sklearn.linear_model import LogisticRegression

logistic_classifier = LogisticRegression()

logistic_classifier.fit(x_train, y_train)

y_pred = logistic_classifier.predict(x_test)

print(y_pred[0:5])
print(y_test[0:5])

from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)

TN, FP, FN, TP = confusion_matrix(y_test, y_pred).ravel()

print('True Positive', TP)


print('False Positive', FP)
print('True Negative', TN)
print('False Negative', FN)

from sklearn import metrics


import matplotlib as plt
print("Logistic Regression's accuracy", metrics.accuracy_score(y_test, y_pred))
#Multiclass classifier

import pandas as pd
import sklearn
import matplotlib.pyplot as plt

df_drug = pd.read_csv('/content/drug200.csv')
df_drug.head()

from sklearn import preprocessing

selected_columns = ['Age', 'Sex', 'BP', 'Cholesterol', 'Na_to_K']


x = df_drug[selected_columns].values
le_sex = preprocessing.LabelEncoder()
le_sex.fit(['F','M'])
x[:,1] = le_sex.transform(x[:,1])
le_bp = preprocessing.LabelEncoder()
le_bp.fit(['LOW', 'HIGH', 'NORMAL'])
x[:,2] = le_bp.transform(x[:,2])
Chol = preprocessing.LabelEncoder()
Chol.fit([ 'NORMAL', 'HIGH'])
x[:,3] = Chol.transform(x[:,3])
x[0:10]

y = df_drug["Drug"]
y[0:10]

from sklearn.model_selection import train_test_split


x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=3)

from sklearn.tree import DecisionTreeClassifier


dt = DecisionTreeClassifier(criterion="entropy", max_depth = 4)
dt

dt.fit(x_train,y_train)

pt = dt.predict(x_test)
print(pt[0:5])
print(y_test[0:5])

from sklearn import metrics


print("Accuracy of the decision tree: ", metrics.accuracy_score(y_test,pt))

from sklearn.tree import plot_tree


from sklearn import tree
import matplotlib.image as mpimg
import numpy as np

plt.figure(figsize=(50,50))
featureNames = df_drug.columns[0:5]
plot_tree(dt.fit(x_train, y_train),
filled=True,
feature_names = featureNames)
plt.show()

You might also like