Professional Documents
Culture Documents
Naive Bayes Classification For TEXT Classification
Naive Bayes Classification For TEXT Classification
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
from sklearn.linear_model import LogisticRegression
import numpy as np
import seaborn as sns
#% matplotlib inline
import math
import sklearn.datasets as skd
from sklearn.utils import Bunch
count_vect= CountVectorizer()
X_train_tf=count_vect.fit_transform(news_train.data)
print('\n',X_train_tf.shape) # rows, column info # Term freq tells how often a
given word appears (word the might be imp)
# inverse document frequency tells about the words appears a lot across documents
(some valuable word is imp for classification rather than 'the' word
from sklearn.feature_extraction.text import TfidfTransformer
tfidf_transformer= TfidfTransformer() # type: TfidfTransformer
X_train_tfidf =tfidf_transformer.fit_transform(X_train_tf) # X_train_tf is the
total freq counts in rows & columns form
print(X_train_tfidf.shape)
#print(X_train_tfidf)
predicted=clf.predict(X_new_tfidf)
for x in predicted:
print(x)
X_test_tf=count_vect.transform(news_test.data)
X_test_tfidf= tfidf_transformer.transform(X_test_tf)
predicted= clf.predict(X_test_tfidf)