Professional Documents
Culture Documents
Lab - 2
Lab - 2
import numpy as np
import pandas as pd
from sklearn import preprocessing
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from sklearn.svm import SVC
In [24]:
df = pd.read_csv("movies_metadata.csv")
df
c:\Users\vijay\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3444: DtypeWa
rning: Columns (10) have mixed types.Specify dtype option on import or set low_memory=Fal
se.
exec(code_obj, self.user_global_ns, self.user_ns)
Out[24]:
[{'id': 16,
{'id': 10194, 'name':
'name':
0 False 'Toy Story Collection', 30000000 http://toystory.disney.com/toy-story 862 tt0114709
'Animation'},
...
{'id': 35, '...
[{'id': 12,
'name':
1 False NaN 65000000 NaN 8844 tt0113497
'Adventure'},
{'id': 14, '...
[{'id': 10749,
{'id': 119050, 'name':
'name':
2 False 'Grumpy Old Men 0 NaN 15602 tt0113228
'Romance'},
Collect...
{'id': 35, ...
[{'id': 35,
'name':
3 False NaN 16000000 'Comedy'}, NaN 31357 tt0114885
{'id': 18,
'nam...
[{'id': 18,
'name':
45461 False NaN 0 'Drama'}, http://www.imdb.com/title/tt6209470/ 439050 tt6209470
{'id': 10751,
'n...
[{'id': 18,
45462 False NaN 0 'name': NaN 111109 tt2028550
'Drama'}]
[{'id': 28,
'name':
45463 False NaN 0 'Action'}, NaN 67758 tt0303758
{'id': 18,
'nam...
In [22]:
ls = pd.read_csv("links_small.csv")
ls = ls[ls['tmdbId'].notnull()]['tmdbId'].astype('int')
ls
Out[22]:
0 862
1 8844
2 15602
3 31357
4 11862
...
9120 402672
9121 315011
9122 391698
9123 137608
9124 410803
Name: tmdbId, Length: 9112, dtype: int32
In [23]:
df = df.drop([19730, 29503, 35587])
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_20176/3653394375.py in <module>
----> 1 df = df.drop([19730, 29503, 35587])
c:\Users\vijay\anaconda3\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, *
*kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, ** kwargs)
312
313 return wrapper
In [ ]:
df['id'] = df['id'].astype('int')
df
Out[ ]:
[{'id': 16,
{'id': 10194, 'name':
'name':
0 False 'Toy Story Collection', 30000000 http://toystory.disney.com/toy-story 862 tt0114709
'Animation'},
...
{'id': 35, '...
[{'id': 12,
'name':
1 False NaN 65000000 NaN 8844 tt0113497
'Adventure'},
{'id': 14, '...
[{'id': 10749,
{'id': 119050, 'name':
'name':
2 False 'Grumpy Old Men 0 NaN 15602 tt0113228
'Romance'},
Collect...
{'id': 35, ...
[{'id': 35,
'name':
3 False NaN 16000000 'Comedy'}, NaN 31357 tt0114885
{'id': 18,
'nam...
[{'id': 18,
'name':
45461 False NaN 0 'Drama'}, http://www.imdb.com/title/tt6209470/ 439050 tt6209470
{'id': 10751,
'n...
[{'id': 18,
45462 False NaN 0 'name': NaN 111109 tt2028550
'Drama'}]
[{'id': 28,
'name':
45463 False NaN 0 'Action'}, NaN 67758 tt0303758
{'id': 18,
'nam...
In [ ]:
In [ ]:
sdf = df[df['id'].isin(ls)]
sdf
Out[ ]:
[{'id': 16,
{'id': 10194, 'name':
'name':
0 False 'Toy Story Collection', 30000000 http://toystory.disney.com/toy-story 862 tt0114709
'Animation'},
...
{'id': 35, '...
[{'id': 12,
'name':
1 False NaN 65000000 NaN 8844 tt0113497
'Adventure'},
{'id': 14, '...
[{'id': 10749,
{'id': 119050, 'name':
'name':
2 False 'Grumpy Old Men 0 NaN 15602 tt0113228
'Romance'},
Collect...
{'id': 35, ...
[{'id': 35,
'name':
3 False NaN 16000000 NaN 31357 tt0114885
'Comedy'}, {'id':
18, 'nam...
[{'id': 28,
'name':
40224 False NaN 15000000 NaN 315011 tt4262980
'Action'}, {'id':
12, 'nam...
[{'id': 99,
'name':
40503 False NaN 0 http://www.thebeatlesliveproject.com/ 391698 tt2531318
'Documentary'},
{'id': 104...
[{'id': 12,
{'id': 34055, 'name':
'name':
44821 False 'Pokémon Collection', 16000000 http://movies.warnerbros.com/pk3/ 10991 tt0235679
'Adventure'},
'p...
{'id': 14, '...
[{'id': 12,
{'id': 34055, 'name':
'name': http://www.pokemon.com/us/movies/movie-
44826 False 'Pokémon Collection', 0 12600 tt0287635
'Adventure'}, pokemon...
'p...
{'id': 14, '...
[{'id': 35,
'name':
45265 False NaN 0 NaN 265189 tt2121382
45265 False NaN 0 NaN 265189 tt2121382
'Comedy'}, {'id':
adult belongs_to_collection budget genres homepage id imdb_id origi
18, 'nam...
In [ ]:
sdf['tagline']
Out[ ]:
0 NaN
1 Roll the dice and unleash the excitement!
2 Still Yelling. Still Fighting. Still Ready for...
3 Friends are the people who let you be yourself...
4 Just When His World Is Back To Normal... He's ...
...
40224 A god incarnate. A city doomed.
40503 The band you know. The story you don't.
44821 Pokémon: Spell of the Unknown
44826 NaN
45265 NaN
Name: tagline, Length: 9099, dtype: object
In [ ]:
sdf['tagline'] = sdf['tagline'].fillna('')
C:\Users\vijay\AppData\Local\Temp/ipykernel_20176/3220992391.py:1: SettingWithCopyWarning
:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
In [ ]:
sdf['desc'] = sdf['overview'] + sdf['tagline']
sdf['desc'] = sdf['desc'].fillna('')
sdf['desc']
C:\Users\vijay\AppData\Local\Temp/ipykernel_20176/3947116533.py:1: SettingWithCopyWarning
:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
In [ ]:
tf = TfidfVectorizer(analyzer = 'word', ngram_range = (1, 2), min_df = 0, stop_words = '
english')
tfidf_mat= tf.fit_transform(sdf['desc'])
tfidf_mat.data[:50]
Out[ ]:
array([0.12185153, 0.13497681, 0.13497681, 0.13497681, 0.13497681,
0.13497681, 0.13497681, 0.13497681, 0.13497681, 0.13497681,
0.13497681, 0.13497681, 0.13497681, 0.13497681, 0.13497681,
0.13497681, 0.13497681, 0.13497681, 0.13497681, 0.12916878,
0.13497681, 0.13497681, 0.13497681, 0.13497681, 0.13497681,
0.11703178, 0.13497681, 0.12504792, 0.12916878, 0.13497681,
0.09667133, 0.10519013, 0.07194097, 0.07411981, 0.09136596,
0.07496267, 0.09823554, 0.0939776 , 0.09769493, 0.0681086 ,
0.06719674, 0.0807707 , 0.09482045, 0.0832527 , 0.12916878,
0.34029557, 0.07597422, 0.08975478, 0.08404871, 0.09279959])
In [ ]:
cosine_sim = linear_kernel(tfidf_mat, tfidf_mat)
cosine_sim[0]
Out[ ]:
array([1. , 0.00680476, 0. , ..., 0. , 0.00344913,
0. ])
In [ ]:
from sklearn.feature_extraction.text import CountVectorizer
text = sdf['desc']
vectorizer = CountVectorizer()
for i in text[:100]:
if i != " ":
vectorizer.fit([i])
vector = vectorizer.transform([i])
print(vector.toarray())
[[1 1 1 3 1 1 1 1 3 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1
1 1 1 3]]
[[1 1 2 1 2 3 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 5 1 1 1 1 2 1 1 1 1 1 1 1 1]]
[[1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1
1 1 1 3 1 3 1 1 4 1 1 1 1 1 1 1]]
[[1 4 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1
1 3 1 3 1 1 1 1 1 2 1]]
[[2 1 1 1 1 1 1 1 1 1 1 3 1 1 1 3 4 1 1 2 2 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1
1 1 4 3 1 2 1 1 1 2 1 1 1 1]]
[[1 3 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 3 1 1 1 1 1 1 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1
1 1]]
[[1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 2 1 1 1 1 1
1 1 4 1 2 4 1 1 2 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 2 1 1 1 1 1 1 1 1 1 3 1 1 1 1 2 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 3 1 2]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 4 1 1 1 1]]
[[2 1 1 1 1 1 1 2 1 1 1 1 2 3 1 1 1 1 1 1 1 4 1 2 2 1 1 1 2 1 1 4 1 1 2 1
1 1 1 1 2]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 3 2 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 5 1 1 1 1 1 1 1 1 2 1]]
[[1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 7 4 1 1 1 1 1 1 1 1 1 1
4 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 6 1 1 1 1 2 1 1 1 1 1 1 1 2]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1]]
[[2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2]]
[[1 1 2 1 1 2 1 1 1 2 1 1 2 1 1 2 1 3 1 2 2 1 1 1 1 1 2 2 2 1 1 1 1 1 1 1
1 1 1 1 5 1 1 1 1 1 1 1 1 1 1]]
[[1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 4 1 1 1 1 1 1 1 1 1
8 1 1 2 1 1 1 2 1 1 1 1 1]]
[[1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 2 1 1 1 1 1 1 3 1 1 1 1 1 1 1 2 2 2 1 1 1 2 1 1 1 1 1 1 1
1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 3 1 1 1 2 1]]
[[1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 1 1 1 2 1 1 2 1 1 1 1 2 1 1 2 1 1 1 2
1 1 1 1]]
[[2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 1 2 1 2 1 1
1 2 1 2 1 1 1 1 5 2 1 1 1 3 1 1 1 1 1 1 1 1 5 1 5 1 1 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 1 1 1 1 1 1 2
1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1]]
[[1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1
1 1 2]]
[[1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 1 1 1 1 2 1 3 1]]
[[1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1
1 1 1 1 3 1 1 1 1 1 1 1 1 1]]
[[1 1 1 2 4 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 1 1 1 3 1 1 1 1 1 1 1 1 1 3
1 1 1 1 1 1 1 1 2 1 1 4 1 1 1 2 2 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 3 1 3 3 1 1 2]]
[[1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 2 1 1 1
1 1 2 1 1 1 1 1 1 1 1 4 1 1 1 1 1 5 1 1 3 1 1 1 1 1 1]]
[[ 1 1 1 1 1 3 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1
1 1 2 1 1 1 3 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 5
1 1 1 1 1 1 1 1 1 2 1 1 1 11 1 1 1 5 1 1 1 2 1 1
1 1 1]]
[[1 2 1 3 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 1 1 1 2 1 1 1 1 1 2 1 1 1 1
3 4 1 1 1 1 2 3 7 1 1 2 1 1 2 2 1]]
[[1 2 4 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2
1 1 2 1 1 1 3 1 1 2 1 1 1 4 1 4 3 1 1 1 1 3 1 1 3 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 3 2 1 1 1 1 1 1 2 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 2 1 1]]
[[2 1 1 1 1 1 2 1 1 4 1 1 2 1 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 4 1 1 1 2 1 1
1 1 1 1 3 4 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 7 1 1 2 1 2 4 2 1 1 1 1
1 2 1 1 2 2 1]]
[[2 2 4 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1
1 1]]
[[1 2 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 1 2 1 1]]
[[1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 4 1 1 2 1 1 1 2 1 1
1 1 1 1 1 1 1 6 1 1 2 1 1 1 1]]
[[2 8 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 3 1 2 7 1 4 3 1 1 6 1 2 1 3 1 1 1
6 1 1 1 4 1 1 2 1 1 2 2 1 1 1 1 1 1 1 1 1 1 2 1 1 1 9 1 6 1 1 1 1 1 1 1
2 1 1]]
[[1 1 1 1 1 2 1 1 1 1 1 1 1 2 1 2 1 1 1 2 2 1 1 1 3 1 1 1 2 1 1 1 1 1 1 1
2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 3 1 1 2 1 1 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1]]
[[5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 2 1 1 1 1 1 1 1 5 2 3 2 2 1 2 1 1
1 5 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 2 2 1 1 1 1 1 1 1 1]]
[[1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 2 1 1 1 1 1 2 1 1 1
1 1 1 1 3 1 2 1 1 1 6 1 4 1 1 1 1 1 1 1 1]]
[[1 1 2 1 2 1 1 1 1 1 1 3 1 1 1 2 1 1 1 1 1 3 1 3 2 1 1 1 2 1 1 1 1 1 1 1
1 1 1 1 2 4 1 1 1 1 1 1 1]]
[[1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 1 1]]
[[1 1 3 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 2 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 2 1 2]]
[[1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 3 3 1 1 3 1 1 1 2 1 1 1 1
1 1 1 1 1 1 1 1 3 2 1 3 1 2 1 1 1]]
[[1 2 2 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 4 1 5 1 1 1 1 1 1 1 1]]
[[1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 2 1 1 1 2 1 1 1 1 1
1 1 1 1 1 1 1 1 1 3 1 2 2 2 1 1 1 1]]
[[1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 2 1 1 1 1 1
1 1 1 1 1 1 1 2 1 1 1 2 1 1 3 1 1 1 5 1 1 1 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 5 1 1 2 1 1 1 1 1 2 1 1 2 1 1 1 1 1
1 1 1 3 1 2 3 1 1]]
1 1 1 3 1 2 3 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1]]
[[1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 4 1 4 1 1 1 1 1 1 1 1
4 1 1 1 1 2 1 3 1 1 1 1 1 1 1 1 9 1 5 1 1 1 1 1 1 1 2 1 1 2 1]]
[[1 3 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1]]
[[1 2 1 1 1 1 1 1 2 1 2 1 1 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2
1 1 1]]
[[1 1 2 1 1 1 2 1 1 1 1 1 1 1 1 4 1 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 6 1 1 1 1 1 1 1]]
[[1 1 1 2 4 1 2 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 2 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 4 1 1 5 1 1 1 1 2 1 3 1 3]]
[[1 1 2 4 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1
1 1 1 1 1 2 1 1 1 1 2 2 2 1 4 1 1 1 1 1 1 1 1 1]]
[[1 1 4 4 1 1 1 1 4 2 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
3 1 1 1 1 2 2 1 6 2 1 2 2 1 2 1 1 2 1]]
[[1 2 1 1 1 1 1 1 1 1 1 1 2 1 1 2 3 1 1 1 2 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 2 3 1 3 2 1 1 1 2]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1]]
[[1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1]]
[[1 1 1 3 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1
2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 5 1 3 1 3 1 1 1 1 1]]
[[1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 2 1 1 1 1 1 1 1 1 1 1
1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 7 2 2 2 1 1 1 1 1 1 1 1]]
[[3 1 1 1 1 2 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 2 1
1 1 1 1 1 1 1 3 1 1 1 2]]
[[1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 2 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 2 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 2 1 1 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 3 1 1 1 1 1 5
5 1 1 1 1 1]]
[[1 5 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 2 1 2 4 1 2 1
1 1 1 1 1 1 1 1 4 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 1 2 1 1 1 1 1 1 2
1 3 2 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 2 1 1 1 1 4 1 1 1 1 1 1 2 3 1 1 1 3 1 1 1 1 1 1 1]]
[[1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 4 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 2 1]]
[[1 1 4 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 3 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]]
[[1 1 1 1 2 1 1 1 1 1 2 2 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 3 1 1 3 1 1 1 2
1 1 1 2 1 2 1 1 1 1 1 4 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 6 1 2
1 1 2 2 1 3 1 1 1 1 1 2 1 1 1 1 1]]
[[1 1 1 4 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 3 1 1 2 1
1 1 1 1 1 1 1 1 1 6 1 3 1 1 1 1 1 1 1 1 1 1 2 7 1 1 1 1 2 1 1 1 1 1 1 1
1 2 1 2 1 1 1 1]]
[[1 3 3 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 4 1 1 2 1 1 1 1 3 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1]]
[[1 1 1 1 2 1 1 2 1 1 1 2 2 1 1 1 1 1 1 1 1 1 2 3 1 1 3 1 1 1 1 1 1 1 2 1
1 1 1 2 1 1 1 1 1 3 1 1 1 1 1 2 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 3 1 4 1 2 1 1 1 1 1 1 1
5 1 3 1 1 2 1 1 1]]
[[1 1 1 1 1 1 1 1 2 1 1 1 2 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1]]
[[2 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1]]
[[2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 2 1
4 1 1 1 1 1 1 1]]
[[1 2 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1]]
[[1 1 2 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1
1 1 1 1 1 1 4 1 2 1 1 1 1 1 1 1 1 2]]
[[ 1 1 1 1 1 1 1 5 1 1 2 1 1 1 1 1 1 1 1 1 1 1 2 1
1 2 1 1 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1
1 1 2 5 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 10 1 2 3 1 1 1 1]]
[[1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 4 1 1 1 1 1 1]]
[[1 1 4 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 2 1 1 1 1 1
1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 5 1 1 2 1 1 1 1 1 1 1]]
[[1 1 4 5 1 1 1 1 1 2 1 1 1 1 1 3 1 1 1 1 1 4 3 1 1 2 1 1 4 1 1 2 1 1 1 1
1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 2 1 1 6 1 1 2 2 4 1 1 1 1 2 1
1 1 1 1 1]]
[[1 1 2 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2]]
[[1 1 2 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2]]
[[1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1]]
[[3 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 2 3 1 1 3 1 1 1 1 1 1 1 2 1 1 1 2 3 5 3 1 3 1 1 1 1 1 1 2
1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 3 5 1 1 1 2 2 2 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 3 2 1 1 1 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 1 2 1 1 2 1 2 1 1 2 1 1 1 1 1 4 1 1 1]]
[[1 1 3 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1
1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 5 1 2 2 1 1 1 1 1 1 1 1]]
[[4 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]]
In [ ]:
sdf = sdf.reset_index()
titles = sdf['title']
indices = pd.Series(sdf.index, index=sdf['title'])
In [ ]:
def get_recommendations(title):
idx = indices[title]
sim_scores = list(enumerate(cosine_sim[idx]))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
sim_scores = sim_scores[1:31]
movie_indices = [i[0] for i in sim_scores]
return titles.iloc[movie_indices]
In [25]:
get_recommendations('The Shawshank Redemption')
Out[25]:
8206 Les Misérables
505 Chasers
5650 Sherlock, Jr.
5690 The Blue Dahlia
5983 The Woodsman
4051 Breakout
2729 Coogan's Bluff
408 The Getaway
7409 A Prophet
6193 The 40 Year Old Virgin
5855 Fortress
3766 Brubaker
3520 Ariel
1026 Cool Hand Luke
578 Some Folks Call It a Sling Blade
1852 Runaway Train
4358 Brother
4670 You Only Live Once
1303 Alive and Kicking
2547 Man on the Moon
3443 Uncommon Valor
3074 Silent Fall
2905 American Gigolo
4313 The Weight of Water
8522 Starred Up
8935 Southpaw
6134 The Longest Yard
7535 Toy Story 3
2841 Empire of Passion
916 Raw Deal
Name: title, dtype: object
In [ ]: