13/06/2024, 08:10 8-text classification - Jupyter Notebook
In [5]: from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
In [6]: # Load the 20 newsgroups dataset
n = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes')
# Mapping the newsgroups to binary sentiment (positive/negative)
pos = ['rec.sport.baseball', 'sci.space']
ne = ['comp.sys.ibm.pc.hardware', 'misc.forsale']
# Combine positive and negative samples
x = []
y = []
for i in range(len(n.data)):
if n.target_names[n.target[i]] in pos + ne:
x.append(n.data[i])
y.append(1 if n.target_names[n.target[i]] in pos else 0)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, r
v = TfidfVectorizer(stop_words='english')
x_tr_tf = v.fit_transform(x_train)
x_te_tf = v.transform(x_test)
clf = MultinomialNB()
clf.fit(x_tr_tf, y_train)
y_pred = clf.predict(x_te_tf)
a = accuracy_score(y_test, y_pred)
print(f"Accuracy: {a*100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
Accuracy: 96.19%
Classification Report:
precision recall f1-score support
0 0.97 0.96 0.96 405
1 0.95 0.97 0.96 383
accuracy 0.96 788
macro avg 0.96 0.96 0.96 788
weighted avg 0.96 0.96 0.96 788
localhost:8888/notebooks/Desktop/ml lab/8/8-text classification.ipynb 1/2
13/06/2024, 08:10 8-text classification - Jupyter Notebook
In [7]: t = ["I love baseball!", "Computer hardware is so frustrating.", "The latest
"I dislike the constant issues with computer.", "They practice yoga.",
te_tf = v.transform(t)
pred_labs = clf.predict(te_tf)
for te, lab in zip(t, pred_labs):
se = "Positive" if lab == 1 else "Negative"
print(f"Text: {te}\nPredicted Sentiment: {se}\n")
Text: I love baseball!
Predicted Sentiment: Positive
Text: Computer hardware is so frustrating.
Predicted Sentiment: Negative
Text: The latest news is amazing!
Predicted Sentiment: Positive
Text: I dislike the constant issues with computer.
Predicted Sentiment: Negative
Text: They practice yoga.
Predicted Sentiment: Positive
Text: She disliked her boss
Predicted Sentiment: Negative
Text: Priya cooks well.
Predicted Sentiment: Positive
localhost:8888/notebooks/Desktop/ml lab/8/8-text classification.ipynb 2/2