Professional Documents
Culture Documents
#Ignore wornings
import warnings
warnings.filterwarnings('ignore')
In [2]:
#Data handling
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
In [3]:
#Data split and vecter_conversion(BOW)
import seaborn as sns
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
In [4]:
# Logistic regression model selection and confussion matrix calculation
from sklearn.metrics import
average_precision_score,f1_score,precision_score,recall_score,auc,log_loss,confusio
n_matrix
from sklearn import model_selection, svm
In [5]:
# Read dataset (Clean_dataset with it's score)
data = pd.read_csv("/home/shri/Documents/file1.csv")
data = data[["Score","text"]]
clean_data = data['text'].tolist()
BOW
In [6]:
# BOW vecter coversion
counting_var = CountVectorizer(ngram_range = (1,2), min_df=10, max_features=4000)
bi_gram = counting_var.fit_transform(np.array(clean_data))
In [7]:
# Score
scores = data['Score'].get_values()
len(scores)
Out[7]:
1500
In [8]:
# Array coversion
def convToNpArray(arr):
if(type(arr) == list):
arr = np.array(arr)
return arr
else:
return arr;
In [9]:
# Bi_gram vecters : select 1500
x = bi_gram[0:1500]
# Score/rating of data
y = scores[0:1500]
x_1 = convToNpArray(x_1)
x_test = convToNpArray(x_test)
y_1 = convToNpArray(y_1)
y_test = convToNpArray(y_test)
In [10]:
def confusionMatrix(y_test,pred):
'''
Confusion Matrix
'''
tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
tpr = tp/(fn+tp)
tnr = tn/(tn+fp)
fnr = fn/(fn+tp)
fpr = fp/(tn+fp)
print("#"*75)
print("Confusion Matrix")
print("TPR :%f \nTNR : %f\nFPR : %f \nFNR: %f"%(tpr,tnr,fpr,fnr))
print("#"*75)
In [11]:
# Classifier - Algorithm - SVM
# fit the training dataset on the classifier
SVM = svm.SVC(C=1.0, kernel='linear', degree=3, gamma='auto')
SVM.fit(Train_X_Tfidf,Train_Y)
# predict the labels on validation dataset
predictions_SVM = SVM.predict(Test_X_Tfidf)
# Use accuracy_score function to get the accuracy
print("SVM Accuracy Score -> ",accuracy_score(predictions_SVM, Test_Y)*100)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-11-2acca3fc6fc0> in <module>
2 # fit the training dataset on the classifier
3 SVM = svm.SVC(C=1.0, kernel='linear', degree=3, gamma='auto')
----> 4 SVM.fit(Train_X_Tfidf,Train_Y)
5 # predict the labels on validation dataset
6 predictions_SVM = SVM.predict(Test_X_Tfidf)
Positive line
Line seperate
Negaive line
alt text
Find margin
Build convex hull for positive and negative points
find shortest line connecting hulls
Bisects line.(Max margin obtained)
alt text
For non-linear datapoints : if same of points are in non-linear manner then new
hyperparameter (eta) is defined as per formulation distance form line is
calaculated using :
W = arg min((||W||/2)(c(1/n)(eta1+eta2+....))
(-y.Wi.xi) = -1 {if point is positive}
(-y.Wi.xi) = 1- eta