You are on page 1of 3

In [1]:

#Ignore wornings
import warnings
warnings.filterwarnings('ignore')
In [2]:
#Data handling
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
In [3]:
#Data split and vecter_conversion(BOW)
import seaborn as sns
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
In [4]:
# Logistic regression model selection and confussion matrix calculation
from sklearn.metrics import
average_precision_score,f1_score,precision_score,recall_score,auc,log_loss,confusio
n_matrix
from sklearn import model_selection, svm
In [5]:
# Read dataset (Clean_dataset with it's score)
data = pd.read_csv("/home/shri/Documents/file1.csv")
data = data[["Score","text"]]
clean_data = data['text'].tolist()
BOW
In [6]:
# BOW vecter coversion
counting_var = CountVectorizer(ngram_range = (1,2), min_df=10, max_features=4000)
bi_gram = counting_var.fit_transform(np.array(clean_data))
In [7]:
# Score
scores = data['Score'].get_values()
len(scores)
Out[7]:
1500
In [8]:
# Array coversion
def convToNpArray(arr):
if(type(arr) == list):
arr = np.array(arr)
return arr
else:
return arr;
In [9]:
# Bi_gram vecters : select 1500
x = bi_gram[0:1500]
# Score/rating of data
y = scores[0:1500]

# train and test split


x_1, x_test, y_1, y_test = train_test_split(x,y, test_size=0.3, random_state=0)

x_1 = convToNpArray(x_1)
x_test = convToNpArray(x_test)
y_1 = convToNpArray(y_1)
y_test = convToNpArray(y_test)
In [10]:
def confusionMatrix(y_test,pred):
'''
Confusion Matrix
'''
tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
tpr = tp/(fn+tp)
tnr = tn/(tn+fp)
fnr = fn/(fn+tp)
fpr = fp/(tn+fp)
print("#"*75)
print("Confusion Matrix")
print("TPR :%f \nTNR : %f\nFPR : %f \nFNR: %f"%(tpr,tnr,fpr,fnr))
print("#"*75)
In [11]:
# Classifier - Algorithm - SVM
# fit the training dataset on the classifier
SVM = svm.SVC(C=1.0, kernel='linear', degree=3, gamma='auto')
SVM.fit(Train_X_Tfidf,Train_Y)
# predict the labels on validation dataset
predictions_SVM = SVM.predict(Test_X_Tfidf)
# Use accuracy_score function to get the accuracy
print("SVM Accuracy Score -> ",accuracy_score(predictions_SVM, Test_Y)*100)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-11-2acca3fc6fc0> in <module>
2 # fit the training dataset on the classifier
3 SVM = svm.SVC(C=1.0, kernel='linear', degree=3, gamma='auto')
----> 4 SVM.fit(Train_X_Tfidf,Train_Y)
5 # predict the labels on validation dataset
6 predictions_SVM = SVM.predict(Test_X_Tfidf)

NameError: name 'Train_X_Tfidf' is not defined


L1 Regularization
In [ ]:
model = svm.SVC(C=1.0, kernel='linear', degree=3, gamma='auto')
model.fit(x_1,y_1)
print("Score: ",model.score(x_test,y_test))
In [ ]:
pred = model.predict(x_test)
confusionMatrix(y_test,pred)
support vector machine
Define :
Find best fettng line with points in both direction that habe high probability that
belongs to respected class.
Explain :
In Many hyperplane that seperats datapoints in both direct but to find best fetting
line/plane as to get maximum margin.
Points through which plane/line are called as support vectors.
there are three lines one for best fetting line and around that line remaining two
lines are align for positive and negative side of plane.
alt text

Positive line
Line seperate
Negaive line
alt text

Find margin
Build convex hull for positive and negative points
find shortest line connecting hulls
Bisects line.(Max margin obtained)
alt text

For non-linear datapoints : if same of points are in non-linear manner then new
hyperparameter (eta) is defined as per formulation distance form line is
calaculated using :

W = arg min((||W||/2)(c(1/n)(eta1+eta2+....))
(-y.Wi.xi) = -1 {if point is positive}
(-y.Wi.xi) = 1- eta

You might also like