Professional Documents
Culture Documents
Lab Assessment - 3
Code
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 21 10:16:57 2020
@author: nikitha
"""
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.stem import WordNetLemmatizer
import numpy as np
file1=open("data.txt","r")
file2=open("data1.doc","w")
for line in file1:
new_line = line.replace('\t', ' ')
file2.write(new_line)
file1.close()
file2.close()
file3=open("data1.doc","r")
file4=open("data2.doc","w")
file5=open("data3.doc","w")
for line in file3:
new_line1 = line.split(" ")
file4.write(new_line1[0])
file4.write("\n")
file5.write(new_line1[1])
file5.write("\n")
fromnodes=open("data2.doc","r")
tonodes=open("data3.doc","r")
fromn=fromnodes.read()
w1=fromn.split()
ton=tonodes.read()
w2=ton.split()
import networkx as nx
import matplotlib.pyplot as plt
d = nx.DiGraph(Directed=True)
n_nodes=d.number_of_nodes()
for i in degree_prestige:
print(i, " : ", degree_prestige[i])
distance = []
temp_dis = 0
n=0
for dest in d.nodes:
temp_dis = 0
n=0
for src in d.nodes:
if (nx.has_path(d,src,dest) == True):
temp_dis = temp_dis + nx.shortest_path_length(d,source = src,target = dest)
n=n+1
if temp_dis == 0:
distance.append([dest, 0])
else:
distance.append([dest, temp_dis/(n - 1)])
print("\nPROXIMITY PRESTIGE :\n")
for i in distance:
print(str(i[0]) + " : " + str(i[1]))
Output:
Code:
import networkx as nx
import matplotlib.pyplot as plt
G = nx.DiGraph(Directed=True)
G.add_edges_from([('Wiki', 'Bing'), ('Wiki', 'Google'), ('Rediff','Bing'), ('Bing', 'Google'), ('Altavi',
'Bing'), ('Altavi', 'Google'), ('Yahoo','Bing'),('Yahoo','Altavi'), ('Google','Wiki'), ('Google', 'Bing'),
('Google', 'Rediff'), ('Google','Altavi'),('Google','Yahoo')])
hubs, authorities = nx.hits(G, max_iter = 50, normalized = True, tol=0.04)
plt.figure(figsize =(10, 10))
nx.draw_networkx(G, with_labels = True, node_size=5000)
G = nx.DiGraph(Directed=True)
G.add_edges_from([('Wiki', 'Bing'), ('Wiki', 'Google'), ('Rediff','Bing'), ('Bing', 'Google'), ('Altavi',
'Bing'), ('Altavi', 'Google'), ('Yahoo','Bing'),('Yahoo','Altavi'), ('Google','Wiki'), ('Google', 'Bing'),
('Google', 'Rediff'), ('Google','Altavi'),('Google','Yahoo')])
hubs, authorities = nx.hits(G, max_iter = 50, normalized = True, tol=0.04)
plt.figure(figsize =(10, 10))
nx.draw_networkx(G, with_labels = True, node_size=5000)
Code:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 24 16:15:03 2020
@author: nikitha
"""
import pandas as pd
import numpy as np
df=pd.read_excel (r'/Users/nikitha/Desktop/xm/cca.xls')
df.columns=['A1','A2','A3','A4','A5','A6','A7','A8','A9','A10','A11','A12','A13','A14','A15','CLASS']
df[df['A1']=='?']
df.drop(df.index[[248,327,346,374,453,479,489,520,598,601,641,673]],inplace=True)
df = df.reset_index()
df.drop(['index'],axis=1,inplace=True)
#df.drop(['level_0'],axis=1,inplace=True)
df['A3']=df['A3'].replace(['?'],'4.79')
df['A8']=df['A8'].replace(['?'],'2.42')
df['A11']=df['A11'].replace(['?'],'2.43')
df['A15']=df['A15'].replace(['?'],'1031')
df[df['A5']=='?']
df.drop(df.index[[206,269,328,451,584,612]],inplace=True)
df = df.reset_index()
df.drop(['index'],axis=1,inplace=True)
df.drop(df.index[[523,527]],inplace=True)
df[df['A6']=='?']
df = df.reset_index()
df.drop(['index'],axis=1,inplace=True)
df['A14']=df['A14'].replace(['?'],'0')
Web Mining Lab Ass 3 Page 9 of 13
df['A14']=df['A14'].replace(['?'],'0')
df[['A14']] = df[['A14']].apply(pd.to_numeric)
df.mean(axis = 0, skipna = True)
df['A14']=df['A14'].replace(['0'],'1063')
df['A1']=df['A1'].replace(['a','b'],[0,1])
df['A4']=df['A4'].replace(['u'],'0')
df['A4']=df['A4'].replace(['y'],'1')
df['A4']=df['A4'].replace(['l'],'2')
df.A5.unique()
df.A6.unique()
df['A6']=df['A6'].replace(['w', 'q', 'm', 'r', 'cc', 'k', 'c', 'd', 'x', 'i', 'e', 'aa', 'ff',
'j'],[0,1,2,3,4,5,6,7,8,9,10,11,12,13])
df.A7.unique()
df['A7']=df['A7'].replace(['v', 'h', 'bb', 'ff', 'j', 'z', 'o','dd', 'n'],[0,1,2,3,4,5,6,7,8])
df['A9']=df['A9'].replace(['t', 'f'],[0,1])
df['A10']=df['A10'].replace(['t', 'f'],[0,1])
df['A12']=df['A12'].replace(['t', 'f'],[0,1])
df.A13.unique()
df['A13']=df['A13'].replace(['g', 's', 'p'],[0,1,2])
df['CLASS']=df['CLASS'].replace(['+','-'],[0,1])
df['A2']=df['A2'].replace(['?'],'0')
df[['A2']] = df[['A2']].apply(pd.to_numeric)
df.mean(axis = 0, skipna = True)
df['A2']=df['A2'].replace([0],30)
train,test=train_test_split(df,random_state=42)
X_train=train[train.columns[0:14]]
y_train=train['CLASS']
X_test=test[test.columns[0:14]]
y_test=test['CLASS']
print(accuracy.mean())
import numpy as np
model = clf.fit(X_train, y_train)
predicted = model.predict(X_test)
print("\nMETRICS\n",classification_report(y_test, predicted))
predicted1 = model.predict(X_test)
print("\nMETRICS\n",classification_report(y_test, predicted1))
plt.show()
Output: