Professional Documents
Culture Documents
import pandas as pd
titanic = pd.read_csv(r"C:\Users\nikam\OneDrive\Desktop\lab\Titanic.csv")
print(titanic)
titanic.drop(['PassengerId','Name','SibSp','Parch','Ticket','Cabin','Embarked'],axi
s ='columns',inplace=True)
print(titanic.head(20))
target=titanic.Survived
inputs=titanic.drop('Survived', axis='columns')
dummies = pd.get_dummies(inputs.Sex)
print(dummies.head(10))
inputs = pd.concat([inputs,dummies],axis='columns')
print(inputs.head())
inputs.drop('Sex',axis ='columns',inplace=True)
print("Afeter removing Sex column")
print(inputs.head())
inputs.columns[inputs.isna().any()]
print(inputs.head(20))
inputs.Age = inputs.Age.fillna(inputs.Age.mean())
print("Filling Null Values")
print(inputs.head(20))
print(inputs.columns[inputs.isna().any()])
inputs.Fare = inputs.Fare.fillna(inputs.Fare.mean())
print(inputs.columns[inputs.isna().any()])
from sklearn.model_selection import train_test_split
X_train,X_test, y_train, y_test = train_test_split(inputs,target,test_size =0.2)
print("Length of Training",len(X_train))
print("Length of Test",len(X_test))
print("Length of Dataset",len(inputs))
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
print(model.fit(X_train, y_train))
print(model.score(X_test,y_test))
print(X_test[:10])
print(y_test[:10])
print(model.predict(X_test[:10]))
print(model.predict_proba(X_test[:10]))
OUTPUT: