AP19110010110 Lab Assignment-2 - Jupyter Notebook

M.
Sri Phani Bhushan
AP19110010110
In [34]: 
import nltk
Splitting the text file into 1400 different text files
In [58]: 
i=1
punctuations='''()-[]{};:\/,.<>@$''^+1234567890*%&=?'''
with open('cran/cran.all.1400','r') as f:
for line in f:
if line[:2]=='.I':
file=open(f'Dataset assignment-1\{line[3:len(line)-1]}.txt','x')
elif line[:2] in ['.T','.B','.A','.W']:
continue
elif line=='':
file.close()
else:
no_punct_line=""
for char in line:
if(char not in punctuations):
no_punct_line+=char
file.write(no_punct_line)
In [59]: 
file=open(f'Dataset assignment-1\1400.txt','w')
file.close()
Tokenizing and stemming the documents

In [114]: 
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()
tokens = {}
for i in range(1, 1401):

file = open(f'Dataset assignment-1\{i}.txt')
for line in file:
for word in line.split():
w = str(ps.stem(word))
if w in tokens:
tokens[w][0] = len(tokens[w][1])
tokens[w][1].add(i)
else:
tokens[w] = [1,set()]
tokens[w][1].add(i)
# tokens[w] = 1
file.close()
In [115]: 
print(tokens.keys())
dict_keys(['experiment', 'investig', 'of', 'the', 'aerodynam', 'a', 'win

g', 'in', 'slipstream', 'brenckmanm', 'j', 'ae', 'sc', 'an', 'studi', 'pro
pel', 'wa', 'made', 'order', 'to', 'determin', 'spanwis', 'distribut', 'li
ft', 'increas', 'due', 'at', 'differ', 'angl', 'attack', 'and', 'free', 's
tream', 'veloc', 'ratio', 'result', 'were', 'intend', 'part', 'as', 'eval
u', 'basi', 'for', 'theoret', 'treatment', 'thi', 'problem', 'compar', 'sp
an', 'load', 'curv', 'togeth', 'with', 'support', 'evid', 'show', 'that',
'substanti', 'increment', 'produc', 'by', 'destal', 'or', 'boundarylayerco
ntrol', 'effect', 'integr', 'remain', 'after', 'subtract', 'found', 'agr
e', 'well', 'potenti', 'flow', 'theori', 'empir', 'specif', 'configur', 'e
xperi', 'simpl', 'shear', 'past', 'flat', 'plate', 'incompress', 'fluid',
'small', 'viscos', 'tingyili', 'depart', 'aeronaut', 'engin', 'renssela',
'polytechn', 'institut', 'troy', 'ny', 'highspe', 'viscou', 'twodimensio
n', 'bodi', 'it', 'is', 'usual', 'necessari', 'consid', 'shock', 'wave',
'emit', 'from', 'nose', 'lead', 'edg', 'consequ', 'there', 'exist', 'invis
cid', 'rotat', 'region', 'between', 'boundari', 'layer', 'such', 'situat',
'aris', 'instanc', 'hyperson', 'somewhat', 'prandtl', 'classic', 'boundary
lay', 'origin', 'outsid', 'irrot', 'while', 'must', 'be', 'possibl', 'vort
ic', 'have', 'been', 'recent', 'discuss', 'ferri', 'libbi', 'present', 'pa
In [116]: 
len(tokens)
Out[116]:
7480
In [488]: 
#Sorting the tokens
sorted_tokens=dict(sorted(tokens.items(),key=lambda x:x[1][0],reverse=True))
print(sorted_tokens)
{'of': [1395, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 3
6, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 7
3, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 10
8, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 1
23, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137,
138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152,
153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167,
168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182,
183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197,
198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227,
228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242,
243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257,
258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272,
273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287,
288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302,
In [118]: 
print(sorted_tokens.keys())
'environment', 'page', 'philosophi', 'cut', 'socal', 'paramount', 'countr
i', 'drastic', 'lester', 'classif', 'log', 'jmathphi', 'hypergeometr', 'ow
enpr', 'insert', 'grid', 'grade', 'typifi', 'neighbourhood', 'signal', 'fi
lter', 'db', 'lubric', 'mathieu', 'stratiform', 'apprais', 'elabor', 'wort
h', 'quadrupol', 'shapiro', 'telemet', 'track', 'interplanetari', 'refract
ori', 'aeroquart', 'ward', 'biconvex', 'xy', 'gaussian', 'immers', 'millik
ancb', 'belong', 'lowturbul', 'unfortun', 'ship', 'hodograph', 'zeroord',
'miss', 'mar', 'overshoot', 'brake', 'walker', 'dispers', 'inert', 'cell',
'lowdens', 'photographi', 'annulu', 'holderdw', 'cravenah', 'entrain', 'de
duct', 'utia', 'maxwellian', 'molyneuxwg', 'tnstruct', 'compressibleflow',
'monatom', 'liter', 'unusu', 'yashuram', 'jphyssoc', 'lengthwis', 'aj', 'p
ercentthick', 'idealga', 'rankin', 'seal', 'lowfrequ', 'omit', 'british',
'constrain', 'screen', 'stand', 'sideforc', 'manufactur', 'stratfordb', 'c
oncurr', 'bubbl', 'singlestag', 'stator', 'powel', 'astronaut', 'ensur',
'smoke', 'quasicylindr', 'quasi', 'bank', 'angularli', 'weapon', 'tangen
c', 'tangentcon', 'quasicylind', 'referenc', 'code', 'kettledj', 'strut',
'gooderumpb', 'woodgp', 'visualis', 'visibl', 'royal', 'deposit', 'oil',
'compact', 'white', 'hard', 'eventu', 'said', 'weberj', 'squir', 'sweptw',
'lilleygm', 'civil', 'poiseuil', 'nuclear', 'jappphi', 'eckhausw', 'con',
'hour', 'jd', 'polish', 'stagnationtowal', 'mission', 'evapor', 'entail',
In [119]: 
# creating the dataframe

import pandas as pd
words = list(sorted_tokens.keys())
temp = list(sorted_tokens.values())
count, freq = [], []
for i in range(len(temp)):
count.append(len(temp[i][1]))
freq.append(list(temp[i][1]))
In [120]: 
dict_tokens = {'Tokens': words, 'DOC Frequency':count, 'Document ID': freq}

df = pd.DataFrame.from_dict(dict_tokens)
df.head()
Out[120]:
Tokens DOC Frequency Document ID
0 of 1395 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
1 the 1391 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
2 and 1342 [1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ...
3 a 1307 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
4 to 1252 [1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 1...
In [121]: 
#stopwords removal
dict_tokens = {'Tokens': words, 'DOC Frequency':count, 'DocumentID': freq}

df = pd.DataFrame.from_dict(dict_tokens).set_index("Tokens")
stopwords = df.head(30)
df.drop(index=df.index[:30], axis=0, inplace=True)
stopwords.to_csv('stopwords.csv')
In [122]: 
# printing tokens
df=df.sort_values('Tokens')
df.to_csv('index.csv')
In [123]: 
# printing tokens
index_df=pd.read_csv("index.csv")
index_df.head()
Out[123]:
Tokens DOC Frequency DocumentID
0 aaaero 1 [1111]
1 aaaeroconf 1 [899]
2 aasu 1 [722]
3 ab 3 [744, 924, 1381]
4 abbott 1 [1340]
In [124]: 
# Stopwords csv
stopwords_df=pd.read_csv("stopwords.csv")
stopwords_df.head(15)
Out[124]:
Tokens DOC Frequency DocumentID
0 of 1395 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
1 the 1391 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
2 and 1342 [1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ...
3 a 1307 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
4 to 1252 [1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 1...
5 in 1241 [1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15...
6 is 1151 [2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...
7 for 1145 [1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 17, ...
8 are 1029 [3, 4, 5, 6, 7, 11, 12, 14, 15, 17, 18, 19, 20...
9 with 1010 [1, 3, 4, 6, 7, 9, 11, 12, 13, 14, 15, 16, 17,...
10 on 913 [7, 8, 9, 11, 13, 14, 15, 17, 18, 19, 21, 22, ...
11 by 854 [1, 2, 4, 6, 7, 9, 13, 14, 15, 16, 17, 20, 21,...
12 that 805 [1, 2, 6, 7, 8, 9, 10, 13, 14, 15, 16, 17, 18,...
13 an 796 [1, 2, 8, 9, 10, 11, 14, 15, 16, 17, 19, 21, 2...
14 at 771 [1, 5, 6, 7, 8, 9, 10, 11, 13, 14, 18, 19, 24,...
Query processing
In [125]: 
i=1
punctuations='''()-[]{};:\/,.<>@$^*%&'''
with open('cran/cran.qry','r') as f:
for line in f:
if line[:2]=='.I':
file=open(f'Cran Query\{i}.txt','x')
i+=1
elif line[:2] in ['.T','.B','.A','.W']:
continue
elif line=='':
file.close()
else:
no_punct_line=""
for char in line:
if(char not in punctuations):
no_punct_line+=char
file.write(no_punct_line)
...
In [126]: 
file=open(f'Cran Query\225.txt','w')
file.close()
In [505]: 
stopwords_list
Out[505]:
['of',
'the',
'and',
'a',
'to',
'in',
'is',
'for',
'are',
'with',
'on',
'by',
'that',
'an',
'at',
'be',
'flow',
'result',
'thi',
'as',
'from',
'it',
'which',
'number',
'effect',
'pressur',
'use',
'present',
'j',
'obtain']
Query execution-AND
In [506]: 
output = dict()
for i in range(1,226):
with open(f'Cran Query\{i}.txt', 'r') as f:
mat = []
for line in f:
if word in sorted_tokens:
mat.append(sorted_tokens[word][1])
k = mat[0]
for i in range(1, len(mat)):
k = k.intersection(mat[i])
if len(k)!=0:
output[i]=list(k)
output
Out[506]:
{9: [329, 142, 1263, 625, 1107, 1300, 1204, 983, 666, 1307, 1213],
3: [1040, 185, 1250, 486],
4: [315, 1323, 131],
1: [2,
3,
4,
1029,
8,
9,
522,
13,
525,
15,
527,
1042,
21,
22,
In [531]: 
# QUERY-OR
output = dict()
for i in range(1,226):
with open(f'Cran Query\{i}.txt', 'r') as f:
mat = []
for line in f:
if word in sorted_tokens:
mat.append(sorted_tokens[word][1])
k = mat[0]
for i in range(1, len(mat)):
k = k.union(mat[i])
if len(k)!=0:
output[i]=list(k)
output
Out[531]:
{7: [1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
Query-1
In [507]: 
# Creating a dataframe for the documents of the query-9

query_df=pd.DataFrame(output[1],columns=["docid"])
query_df["qid"]=1
query_df
Out[507]:
docid qid
0 2 1
1 3 1
2 4 1
3 1029 1
4 8 1
... ... ...
211 493 1
212 1008 1
213 500 1
214 1013 1
215 1014 1
216 rows × 2 columns

In [508]: 
# creating a dataframe for the query and relevance scores
import pandas as pd
re_pd=pd.read_csv('cran/cranqrel.csv',names=["qid","docid","rel"])
re_pd
Out[508]:
qid docid rel
0 1 184 2
1 1 29 2
2 1 31 2
3 1 12 3
4 1 51 3
... ... ... ...
1832 225 1062 3
1833 225 1074 3
1834 225 1075 3
1835 225 1213 3
1836 225 1188 -1
1837 rows × 3 columns
In [509]: 
# merging the dataframe for the relevant retives docs
final_df=pd.merge(re_pd, query_df, on=["qid","docid"], how='inner')

final_df=final_df[final_df.rel<3]
final_df
Out[509]:
qid docid rel
0 1 29 2
1 1 31 2
5 1 486 -1
In [510]: 
# defining values for calculating precision recall
relevant_retrived=len(final_df)
total_relevant=len(re_pd[re_pd.qid == 1])
total_retrived=len(query_df)
In [511]: 
print(relevant_retrived)
print(total_relevant)
print(total_retrived)
29
216
In [512]: 
def precision(a,b):
return a/b
def recall(c,d):
return c/d
In [513]: 
precision_9=precision(relevant_retrived,total_retrived)
recall_9=recall(relevant_retrived,total_relevant)
measures_dict={}
measures_dict["Query1"]=[precision_9,recall_9]
In [514]: 
measures_dict
Out[514]:
{'Query1': [0.013888888888888888, 0.10344827586206896]}
QID-3
In [515]: 
def score(a):
query_df=pd.DataFrame(output[a],columns=["docid"])
query_df["qid"]=a
final_df=pd.merge(re_pd, query_df, on=["qid","docid"], how='inner')
final_df=final_df[final_df.rel<3]
relevant_retrived=len(final_df)
print(final_df)
total_relevant=len(re_pd[re_pd.qid == a])
total_retrived=len(query_df)
precision_3=precision(relevant_retrived,total_retrived)
recall_3=recall(relevant_retrived,total_relevant)
measures_dict["Query"+str(a)]=[precision_3,recall_3]
print("Total relevant documents are {}".format(total_relevant))
print("Total relevant retrived documents are {}".format(relevant_retrived))
print("Total retrived documents are {}".format(total_retrived))
return measures_dict
In [516]: 
score(3)
Empty DataFrame
Columns: [qid, docid, rel]
Index: []
Total relevant documents are 9
Total relevant retrived documents are 0
Total retrived documents are 4
Out[516]:
{'Query1': [0.013888888888888888, 0.10344827586206896], 'Query3': [0.0, 0.

0]}
In [517]: 
measures_dict
Out[517]:
{'Query1': [0.013888888888888888, 0.10344827586206896], 'Query3': [0.0, 0.

0]}
QID-15
In [518]: 
score(15)
qid docid rel
0 15 463 1
2 15 497 -1
Out[518]:
{'Query1': [0.013888888888888888, 0.10344827586206896],
'Query3': [0.0, 0.0],
'Query15': [0.0014336917562724014, 0.6666666666666666]}
In [519]: 
measures_dict
Out[519]:
{'Query1': [0.013888888888888888, 0.10344827586206896],
'Query3': [0.0, 0.0],
'Query15': [0.0014336917562724014, 0.6666666666666666]}
QID-71
In [520]: 
score(71)
qid docid rel
0 71 569 1
1 71 571 1
2 71 1355 2
6 71 572 1
Out[520]:
{'Query1': [0.013888888888888888, 0.10344827586206896],
'Query3': [0.0, 0.0],
'Query15': [0.0014336917562724014, 0.6666666666666666],
'Query71': [0.004381161007667032, 0.4444444444444444]}

In [521]: 
measures_dict
Out[521]:
{'Query1': [0.013888888888888888, 0.10344827586206896],
'Query3': [0.0, 0.0],
'Query15': [0.0014336917562724014, 0.6666666666666666],
'Query71': [0.004381161007667032, 0.4444444444444444]}
QID-2
In [522]: 
score(2)
Empty DataFrame
Index: []
Out[522]:
{'Query1': [0.013888888888888888, 0.10344827586206896],
'Query3': [0.0, 0.0],
'Query15': [0.0014336917562724014, 0.6666666666666666],
'Query71': [0.004381161007667032, 0.4444444444444444],
'Query2': [0.0, 0.0]}
In [523]: 
measures_dict
Out[523]:
{'Query1': [0.013888888888888888, 0.10344827586206896],
'Query3': [0.0, 0.0],
'Query15': [0.0014336917562724014, 0.6666666666666666],
'Query71': [0.004381161007667032, 0.4444444444444444],
'Query2': [0.0, 0.0]}
QID-109
In [524]: 
score(109)
qid docid rel
0 109 860 1
1 109 861 1
5 109 766 -1
Out[524]:
{'Query1': [0.013888888888888888, 0.10344827586206896],
'Query3': [0.0, 0.0],
'Query15': [0.0014336917562724014, 0.6666666666666666],
'Query71': [0.004381161007667032, 0.4444444444444444],
'Query2': [0.0, 0.0],
'Query109': [0.0023961661341853034, 0.5]}
In [525]: 
measures_dict
Out[525]:
{'Query1': [0.013888888888888888, 0.10344827586206896],
'Query3': [0.0, 0.0],
'Query15': [0.0014336917562724014, 0.6666666666666666],
'Query71': [0.004381161007667032, 0.4444444444444444],
'Query2': [0.0, 0.0],
'Query109': [0.0023961661341853034, 0.5]}
QID-6
In [526]: 
score(6)
Empty DataFrame
Index: []
Out[526]:
{'Query1': [0.013888888888888888, 0.10344827586206896],
'Query3': [0.0, 0.0],
'Query15': [0.0014336917562724014, 0.6666666666666666],
'Query71': [0.004381161007667032, 0.4444444444444444],
'Query2': [0.0, 0.0],
'Query109': [0.0023961661341853034, 0.5],
'Query6': [0.0, 0.0]}

QID-192
In [527]: 
score(192)
qid docid rel
0 192 733 1
1 192 734 1
2 192 735 1
3 192 736 1
4 192 641 -1
Out[527]:
{'Query1': [0.013888888888888888, 0.10344827586206896],
'Query3': [0.0, 0.0],
'Query15': [0.0014336917562724014, 0.6666666666666666],
'Query71': [0.004381161007667032, 0.4444444444444444],
'Query2': [0.0, 0.0],
'Query109': [0.0023961661341853034, 0.5],
'Query6': [0.0, 0.0],
'Query192': [0.0049504950495049506, 1.0]}
QID-204
In [528]: 
score(204)
Empty DataFrame
Index: []
Out[528]:
{'Query1': [0.013888888888888888, 0.10344827586206896],
'Query3': [0.0, 0.0],
'Query15': [0.0014336917562724014, 0.6666666666666666],
'Query71': [0.004381161007667032, 0.4444444444444444],
'Query2': [0.0, 0.0],
'Query109': [0.0023961661341853034, 0.5],
'Query6': [0.0, 0.0],
'Query192': [0.0049504950495049506, 1.0],
'Query204': [0.0, 0.0]}

In [529]: 
# creating a dataframe with precision and recall values
values_df=pd.DataFrame.from_dict(measures_dict, orient='index',columns=["Precision","Recall
In [530]: 
values_df
Out[530]:
Precision Recall
Query1 0.013889 0.103448
Query3 0.000000 0.000000
Query15 0.001434 0.666667
Query71 0.004381 0.444444
Query2 0.000000 0.000000
Query109 0.002396 0.500000
Query6 0.000000 0.000000
Query192 0.004950 1.000000
Query204 0.000000 0.000000
In [ ]: 
In [ ]: 

AP19110010110 Lab Assignment-2 - Jupyter Notebook

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

AP19110010110 Lab Assignment-2 - Jupyter Notebook

Uploaded by

Copyright:

Available Formats

M.

Sri Phani Bhushan

Splitting the text file into 1400 different text files

Tokenizing and stemming the documents

from nltk.stem.porter import PorterStemmer

for i in range(1, 1401):

dict_keys(['experiment', 'investig', 'of', 'the', 'aerodynam', 'a', 'win

#Sorting the tokens

# creating the dataframe

dict_tokens = {'Tokens': words, 'DOC Frequency':count, 'Document ID': freq}

Tokens DOC Frequency Document ID

0 of 1395 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...

1 the 1391 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...

3 a 1307 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...

4 to 1252 [1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 1...

dict_tokens = {'Tokens': words, 'DOC Frequency':count, 'DocumentID': freq}

Tokens DOC Frequency DocumentID

3 ab 3 [744, 924, 1381]

Tokens DOC Frequency DocumentID

0 of 1395 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...

1 the 1391 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...

3 a 1307 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...

4 to 1252 [1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 1...

5 in 1241 [1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15...

6 is 1151 [2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...

11 by 854 [1, 2, 4, 6, 7, 9, 13, 14, 15, 16, 17, 20, 21,...

14 at 771 [1, 5, 6, 7, 8, 9, 10, 11, 13, 14, 18, 19, 24,...

3: [1040, 185, 1250, 486],

4: [315, 1323, 131],

# Creating a dataframe for the documents of the query-9

... ... ...

216 rows × 2 columns

# creating a dataframe for the query and relevance scores

qid docid rel

... ... ... ...

1832 225 1062 3

1833 225 1074 3

1834 225 1075 3

1835 225 1213 3

1836 225 1188 -1

1837 rows × 3 columns

# merging the dataframe for the relevant retives docs

final_df=pd.merge(re_pd, query_df, on=["qid","docid"], how='inner')

qid docid rel

# defining values for calculating precision recall

{'Query1': [0.013888888888888888, 0.10344827586206896]}

Columns: [qid, docid, rel]

Total relevant documents are 9

Total relevant retrived documents are 0

Total retrived documents are 4

{'Query1': [0.013888888888888888, 0.10344827586206896], 'Query3': [0.0, 0.

{'Query1': [0.013888888888888888, 0.10344827586206896], 'Query3': [0.0, 0.

qid docid rel

Total relevant documents are 3

Total relevant retrived documents are 2

Total retrived documents are 1395

{'Query1': [0.013888888888888888, 0.10344827586206896],

'Query3': [0.0, 0.0],

'Query15': [0.0014336917562724014, 0.6666666666666666]}

{'Query1': [0.013888888888888888, 0.10344827586206896],

'Query3': [0.0, 0.0],

'Query15': [0.0014336917562724014, 0.6666666666666666]}

qid docid rel

Total relevant documents are 9