You are on page 1of 2

Web Mining

19BCE2483

Anubhav Bhandary

Prob.2.

——

import pandas as pd

import math

import numpy as np

df=pd.read_csv("fileA.csv")

for i in range(5):

for j in range(len(df.columns)):

df.iloc[i,j]=math.log(1+df.iloc[i,j])

print(df.head())

arr=[]

for i in range(5):

arr.append(list(df.iloc[i]))

max_score=0

for i in range(5):

for j in range(i+1,5):

a=arr[i]

b=arr[j]

dot=np.dot(a,b)

norm_a=np.linalg.norm(a)

norm_b=np.linalg.norm(b)

cosine_value=dot/(norm_a * norm_b)

if cosine_value>max_score:

similar_d=(i,j,round(cosine_value,5))

max_score=cosine_value

print(i,j,round(cosine_value,5))

print("The most similar document is : ",similar_d)

Output:

You might also like