You are on page 1of 9

In [20]:

import numpy as np
import pandas as pd
from sklearn import preprocessing
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from sklearn.svm import SVC

In [24]:

df = pd.read_csv("movies_metadata.csv")
df

c:\Users\vijay\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3444: DtypeWa
rning: Columns (10) have mixed types.Specify dtype option on import or set low_memory=Fal
se.
exec(code_obj, self.user_global_ns, self.user_ns)
Out[24]:

adult belongs_to_collection budget genres homepage id imdb_id original_langua

[{'id': 16,
{'id': 10194, 'name':
'name':
0 False 'Toy Story Collection', 30000000 http://toystory.disney.com/toy-story 862 tt0114709
'Animation'},
...
{'id': 35, '...

[{'id': 12,
'name':
1 False NaN 65000000 NaN 8844 tt0113497
'Adventure'},
{'id': 14, '...

[{'id': 10749,
{'id': 119050, 'name':
'name':
2 False 'Grumpy Old Men 0 NaN 15602 tt0113228
'Romance'},
Collect...
{'id': 35, ...

[{'id': 35,
'name':
3 False NaN 16000000 'Comedy'}, NaN 31357 tt0114885
{'id': 18,
'nam...

{'id': 96871, 'name': [{'id': 35,


4 False 'Father of the Bride 0 'name': NaN 11862 tt0113041
Col... 'Comedy'}]

... ... ... ... ... ... ... ...

[{'id': 18,
'name':
45461 False NaN 0 'Drama'}, http://www.imdb.com/title/tt6209470/ 439050 tt6209470
{'id': 10751,
'n...

[{'id': 18,
45462 False NaN 0 'name': NaN 111109 tt2028550
'Drama'}]

[{'id': 28,
'name':
45463 False NaN 0 'Action'}, NaN 67758 tt0303758
{'id': 18,
'nam...

45464 False NaN 0 [] NaN 227506 tt0008536


adult belongs_to_collection budget genres homepage id imdb_id original_langua

45465 False NaN 0 [] NaN 461257 tt6980792

45466 rows × 24 columns

In [22]:
ls = pd.read_csv("links_small.csv")
ls = ls[ls['tmdbId'].notnull()]['tmdbId'].astype('int')
ls
Out[22]:
0 862
1 8844
2 15602
3 31357
4 11862
...
9120 402672
9121 315011
9122 391698
9123 137608
9124 410803
Name: tmdbId, Length: 9112, dtype: int32

In [23]:
df = df.drop([19730, 29503, 35587])

---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_20176/3653394375.py in <module>
----> 1 df = df.drop([19730, 29503, 35587])

c:\Users\vijay\anaconda3\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, *
*kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, ** kwargs)
312
313 return wrapper

c:\Users\vijay\anaconda3\lib\site-packages\pandas\core\frame.py in drop(self, labels, axi


s, index, columns, level, inplace, errors)
4904 weight 1.0 0.8
4905 """
-> 4906 return super().drop(
4907 labels=labels,
4908 axis=axis,

c:\Users\vijay\anaconda3\lib\site-packages\pandas\core\generic.py in drop(self, labels, a


xis, index, columns, level, inplace, errors)
4148 for axis, labels in axes.items():
4149 if labels is not None:
-> 4150 obj = obj._drop_axis(labels, axis, level=level, errors=errors)
4151
4152 if inplace:

c:\Users\vijay\anaconda3\lib\site-packages\pandas\core\generic.py in _drop_axis(self, lab


els, axis, level, errors)
4183 new_axis = axis.drop(labels, level=level, errors=errors)
4184 else:
-> 4185 new_axis = axis.drop(labels, errors=errors)
4186 result = self.reindex(** {axis_name: new_axis})
4187

c:\Users\vijay\anaconda3\lib\site-packages\pandas\core\indexes\base.py in drop(self, labe


ls, errors)
ls, errors)
6015 if mask.any():
6016 if errors != "ignore":
-> 6017 raise KeyError(f"{labels[mask]} not found in axis")
6018 indexer = indexer[~mask]
6019 return self.delete(indexer)

KeyError: '[19730 29503 35587] not found in axis'

In [ ]:
df['id'] = df['id'].astype('int')
df
Out[ ]:

adult belongs_to_collection budget genres homepage id imdb_id original_langua

[{'id': 16,
{'id': 10194, 'name':
'name':
0 False 'Toy Story Collection', 30000000 http://toystory.disney.com/toy-story 862 tt0114709
'Animation'},
...
{'id': 35, '...

[{'id': 12,
'name':
1 False NaN 65000000 NaN 8844 tt0113497
'Adventure'},
{'id': 14, '...

[{'id': 10749,
{'id': 119050, 'name':
'name':
2 False 'Grumpy Old Men 0 NaN 15602 tt0113228
'Romance'},
Collect...
{'id': 35, ...

[{'id': 35,
'name':
3 False NaN 16000000 'Comedy'}, NaN 31357 tt0114885
{'id': 18,
'nam...

{'id': 96871, 'name': [{'id': 35,


4 False 'Father of the Bride 0 'name': NaN 11862 tt0113041
Col... 'Comedy'}]

... ... ... ... ... ... ... ...

[{'id': 18,
'name':
45461 False NaN 0 'Drama'}, http://www.imdb.com/title/tt6209470/ 439050 tt6209470
{'id': 10751,
'n...

[{'id': 18,
45462 False NaN 0 'name': NaN 111109 tt2028550
'Drama'}]

[{'id': 28,
'name':
45463 False NaN 0 'Action'}, NaN 67758 tt0303758
{'id': 18,
'nam...

45464 False NaN 0 [] NaN 227506 tt0008536

45465 False NaN 0 [] NaN 461257 tt6980792

45463 rows × 24 columns

In [ ]:
In [ ]:
sdf = df[df['id'].isin(ls)]
sdf
Out[ ]:

adult belongs_to_collection budget genres homepage id imdb_id origi

[{'id': 16,
{'id': 10194, 'name':
'name':
0 False 'Toy Story Collection', 30000000 http://toystory.disney.com/toy-story 862 tt0114709
'Animation'},
...
{'id': 35, '...

[{'id': 12,
'name':
1 False NaN 65000000 NaN 8844 tt0113497
'Adventure'},
{'id': 14, '...

[{'id': 10749,
{'id': 119050, 'name':
'name':
2 False 'Grumpy Old Men 0 NaN 15602 tt0113228
'Romance'},
Collect...
{'id': 35, ...

[{'id': 35,
'name':
3 False NaN 16000000 NaN 31357 tt0114885
'Comedy'}, {'id':
18, 'nam...

{'id': 96871, 'name': [{'id': 35,


4 False 'Father of the Bride 0 'name': NaN 11862 tt0113041
Col... 'Comedy'}]

... ... ... ... ... ... ... ...

[{'id': 28,
'name':
40224 False NaN 15000000 NaN 315011 tt4262980
'Action'}, {'id':
12, 'nam...

[{'id': 99,
'name':
40503 False NaN 0 http://www.thebeatlesliveproject.com/ 391698 tt2531318
'Documentary'},
{'id': 104...

[{'id': 12,
{'id': 34055, 'name':
'name':
44821 False 'Pokémon Collection', 16000000 http://movies.warnerbros.com/pk3/ 10991 tt0235679
'Adventure'},
'p...
{'id': 14, '...

[{'id': 12,
{'id': 34055, 'name':
'name': http://www.pokemon.com/us/movies/movie-
44826 False 'Pokémon Collection', 0 12600 tt0287635
'Adventure'}, pokemon...
'p...
{'id': 14, '...

[{'id': 35,
'name':
45265 False NaN 0 NaN 265189 tt2121382
45265 False NaN 0 NaN 265189 tt2121382
'Comedy'}, {'id':
adult belongs_to_collection budget genres homepage id imdb_id origi
18, 'nam...

9099 rows × 24 columns

In [ ]:
sdf['tagline']
Out[ ]:
0 NaN
1 Roll the dice and unleash the excitement!
2 Still Yelling. Still Fighting. Still Ready for...
3 Friends are the people who let you be yourself...
4 Just When His World Is Back To Normal... He's ...
...
40224 A god incarnate. A city doomed.
40503 The band you know. The story you don't.
44821 Pokémon: Spell of the Unknown
44826 NaN
45265 NaN
Name: tagline, Length: 9099, dtype: object

In [ ]:
sdf['tagline'] = sdf['tagline'].fillna('')

C:\Users\vijay\AppData\Local\Temp/ipykernel_20176/3220992391.py:1: SettingWithCopyWarning
:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_g


uide/indexing.html#returning-a-view-versus-a-copy
sdf['tagline'] = sdf['tagline'].fillna('')

In [ ]:
sdf['desc'] = sdf['overview'] + sdf['tagline']
sdf['desc'] = sdf['desc'].fillna('')
sdf['desc']

C:\Users\vijay\AppData\Local\Temp/ipykernel_20176/3947116533.py:1: SettingWithCopyWarning
:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_g


uide/indexing.html#returning-a-view-versus-a-copy
sdf['desc'] = sdf['overview'] + sdf['tagline']
C:\Users\vijay\AppData\Local\Temp/ipykernel_20176/3947116533.py:2: SettingWithCopyWarning
:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_g


uide/indexing.html#returning-a-view-versus-a-copy
sdf['desc'] = sdf['desc'].fillna('')
Out[ ]:
0 Led by Woody, Andy's toys live happily in his ...
1 When siblings Judy and Peter discover an encha...
2 A family wedding reignites the ancient feud be...
3 Cheated on, mistreated and stepped on, the wom...
4 Just when George Banks has recovered from his ...
...
40224 From the mind behind Evangelion comes a hit la...
40503 The band stormed Europe in 1963, and, in 1964,...
44821 When Molly Hale's sadness of her father's disa...
44821 When Molly Hale's sadness of her father's disa...
44826 All your favorite Pokémon characters are back,...
45265 While holidaying in the French Alps, a Swedish...
Name: desc, Length: 9099, dtype: object

In [ ]:
tf = TfidfVectorizer(analyzer = 'word', ngram_range = (1, 2), min_df = 0, stop_words = '
english')
tfidf_mat= tf.fit_transform(sdf['desc'])
tfidf_mat.data[:50]
Out[ ]:
array([0.12185153, 0.13497681, 0.13497681, 0.13497681, 0.13497681,
0.13497681, 0.13497681, 0.13497681, 0.13497681, 0.13497681,
0.13497681, 0.13497681, 0.13497681, 0.13497681, 0.13497681,
0.13497681, 0.13497681, 0.13497681, 0.13497681, 0.12916878,
0.13497681, 0.13497681, 0.13497681, 0.13497681, 0.13497681,
0.11703178, 0.13497681, 0.12504792, 0.12916878, 0.13497681,
0.09667133, 0.10519013, 0.07194097, 0.07411981, 0.09136596,
0.07496267, 0.09823554, 0.0939776 , 0.09769493, 0.0681086 ,
0.06719674, 0.0807707 , 0.09482045, 0.0832527 , 0.12916878,
0.34029557, 0.07597422, 0.08975478, 0.08404871, 0.09279959])

In [ ]:
cosine_sim = linear_kernel(tfidf_mat, tfidf_mat)
cosine_sim[0]
Out[ ]:
array([1. , 0.00680476, 0. , ..., 0. , 0.00344913,
0. ])

In [ ]:
from sklearn.feature_extraction.text import CountVectorizer
text = sdf['desc']
vectorizer = CountVectorizer()
for i in text[:100]:
if i != " ":
vectorizer.fit([i])
vector = vectorizer.transform([i])
print(vector.toarray())

[[1 1 1 3 1 1 1 1 3 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1
1 1 1 3]]
[[1 1 2 1 2 3 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 5 1 1 1 1 2 1 1 1 1 1 1 1 1]]
[[1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1
1 1 1 3 1 3 1 1 4 1 1 1 1 1 1 1]]
[[1 4 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1
1 3 1 3 1 1 1 1 1 2 1]]
[[2 1 1 1 1 1 1 1 1 1 1 3 1 1 1 3 4 1 1 2 2 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1
1 1 4 3 1 2 1 1 1 2 1 1 1 1]]
[[1 3 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 3 1 1 1 1 1 1 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1
1 1]]
[[1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 2 1 1 1 1 1
1 1 4 1 2 4 1 1 2 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 2 1 1 1 1 1 1 1 1 1 3 1 1 1 1 2 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 3 1 2]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 4 1 1 1 1]]
[[2 1 1 1 1 1 1 2 1 1 1 1 2 3 1 1 1 1 1 1 1 4 1 2 2 1 1 1 2 1 1 4 1 1 2 1
1 1 1 1 2]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 3 2 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 5 1 1 1 1 1 1 1 1 2 1]]
[[1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 7 4 1 1 1 1 1 1 1 1 1 1
4 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 6 1 1 1 1 2 1 1 1 1 1 1 1 2]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1]]
[[2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2]]
[[1 1 2 1 1 2 1 1 1 2 1 1 2 1 1 2 1 3 1 2 2 1 1 1 1 1 2 2 2 1 1 1 1 1 1 1
1 1 1 1 5 1 1 1 1 1 1 1 1 1 1]]
[[1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 4 1 1 1 1 1 1 1 1 1
8 1 1 2 1 1 1 2 1 1 1 1 1]]
[[1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 2 1 1 1 1 1 1 3 1 1 1 1 1 1 1 2 2 2 1 1 1 2 1 1 1 1 1 1 1
1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 3 1 1 1 2 1]]
[[1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 1 1 1 2 1 1 2 1 1 1 1 2 1 1 2 1 1 1 2
1 1 1 1]]
[[2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 1 2 1 2 1 1
1 2 1 2 1 1 1 1 5 2 1 1 1 3 1 1 1 1 1 1 1 1 5 1 5 1 1 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 1 1 1 1 1 1 2
1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1]]
[[1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1
1 1 2]]
[[1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 1 1 1 1 2 1 3 1]]
[[1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1
1 1 1 1 3 1 1 1 1 1 1 1 1 1]]
[[1 1 1 2 4 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 1 1 1 3 1 1 1 1 1 1 1 1 1 3
1 1 1 1 1 1 1 1 2 1 1 4 1 1 1 2 2 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 3 1 3 3 1 1 2]]
[[1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 2 1 1 1
1 1 2 1 1 1 1 1 1 1 1 4 1 1 1 1 1 5 1 1 3 1 1 1 1 1 1]]
[[ 1 1 1 1 1 3 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1
1 1 2 1 1 1 3 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 5
1 1 1 1 1 1 1 1 1 2 1 1 1 11 1 1 1 5 1 1 1 2 1 1
1 1 1]]
[[1 2 1 3 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 1 1 1 2 1 1 1 1 1 2 1 1 1 1
3 4 1 1 1 1 2 3 7 1 1 2 1 1 2 2 1]]
[[1 2 4 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2
1 1 2 1 1 1 3 1 1 2 1 1 1 4 1 4 3 1 1 1 1 3 1 1 3 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 3 2 1 1 1 1 1 1 2 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 2 1 1]]
[[2 1 1 1 1 1 2 1 1 4 1 1 2 1 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 4 1 1 1 2 1 1
1 1 1 1 3 4 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 7 1 1 2 1 2 4 2 1 1 1 1
1 2 1 1 2 2 1]]
[[2 2 4 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1
1 1]]
[[1 2 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 1 2 1 1]]
[[1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 4 1 1 2 1 1 1 2 1 1
1 1 1 1 1 1 1 6 1 1 2 1 1 1 1]]
[[2 8 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 3 1 2 7 1 4 3 1 1 6 1 2 1 3 1 1 1
6 1 1 1 4 1 1 2 1 1 2 2 1 1 1 1 1 1 1 1 1 1 2 1 1 1 9 1 6 1 1 1 1 1 1 1
2 1 1]]
[[1 1 1 1 1 2 1 1 1 1 1 1 1 2 1 2 1 1 1 2 2 1 1 1 3 1 1 1 2 1 1 1 1 1 1 1
2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 3 1 1 2 1 1 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1]]
[[5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 2 1 1 1 1 1 1 1 5 2 3 2 2 1 2 1 1
1 5 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 2 2 1 1 1 1 1 1 1 1]]
[[1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 2 1 1 1 1 1 2 1 1 1
1 1 1 1 3 1 2 1 1 1 6 1 4 1 1 1 1 1 1 1 1]]
[[1 1 2 1 2 1 1 1 1 1 1 3 1 1 1 2 1 1 1 1 1 3 1 3 2 1 1 1 2 1 1 1 1 1 1 1
1 1 1 1 2 4 1 1 1 1 1 1 1]]
[[1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 1 1]]
[[1 1 3 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 2 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 2 1 2]]
[[1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 3 3 1 1 3 1 1 1 2 1 1 1 1
1 1 1 1 1 1 1 1 3 2 1 3 1 2 1 1 1]]
[[1 2 2 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 4 1 5 1 1 1 1 1 1 1 1]]
[[1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 2 1 1 1 2 1 1 1 1 1
1 1 1 1 1 1 1 1 1 3 1 2 2 2 1 1 1 1]]
[[1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 2 1 1 1 1 1
1 1 1 1 1 1 1 2 1 1 1 2 1 1 3 1 1 1 5 1 1 1 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 5 1 1 2 1 1 1 1 1 2 1 1 2 1 1 1 1 1
1 1 1 3 1 2 3 1 1]]
1 1 1 3 1 2 3 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1]]
[[1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 4 1 4 1 1 1 1 1 1 1 1
4 1 1 1 1 2 1 3 1 1 1 1 1 1 1 1 9 1 5 1 1 1 1 1 1 1 2 1 1 2 1]]
[[1 3 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1]]
[[1 2 1 1 1 1 1 1 2 1 2 1 1 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2
1 1 1]]
[[1 1 2 1 1 1 2 1 1 1 1 1 1 1 1 4 1 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 6 1 1 1 1 1 1 1]]
[[1 1 1 2 4 1 2 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 2 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 4 1 1 5 1 1 1 1 2 1 3 1 3]]
[[1 1 2 4 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1
1 1 1 1 1 2 1 1 1 1 2 2 2 1 4 1 1 1 1 1 1 1 1 1]]
[[1 1 4 4 1 1 1 1 4 2 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
3 1 1 1 1 2 2 1 6 2 1 2 2 1 2 1 1 2 1]]
[[1 2 1 1 1 1 1 1 1 1 1 1 2 1 1 2 3 1 1 1 2 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 2 3 1 3 2 1 1 1 2]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1]]
[[1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1]]
[[1 1 1 3 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1
2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 5 1 3 1 3 1 1 1 1 1]]
[[1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 2 1 1 1 1 1 1 1 1 1 1
1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 7 2 2 2 1 1 1 1 1 1 1 1]]
[[3 1 1 1 1 2 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 2 1
1 1 1 1 1 1 1 3 1 1 1 2]]
[[1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 2 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 2 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 2 1 1 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 3 1 1 1 1 1 5
5 1 1 1 1 1]]
[[1 5 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 2 1 2 4 1 2 1
1 1 1 1 1 1 1 1 4 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 1 2 1 1 1 1 1 1 2
1 3 2 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 2 1 1 1 1 4 1 1 1 1 1 1 2 3 1 1 1 3 1 1 1 1 1 1 1]]
[[1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 4 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 2 1]]
[[1 1 4 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 3 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]]
[[1 1 1 1 2 1 1 1 1 1 2 2 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 3 1 1 3 1 1 1 2
1 1 1 2 1 2 1 1 1 1 1 4 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 6 1 2
1 1 2 2 1 3 1 1 1 1 1 2 1 1 1 1 1]]
[[1 1 1 4 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 3 1 1 2 1
1 1 1 1 1 1 1 1 1 6 1 3 1 1 1 1 1 1 1 1 1 1 2 7 1 1 1 1 2 1 1 1 1 1 1 1
1 2 1 2 1 1 1 1]]
[[1 3 3 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 4 1 1 2 1 1 1 1 3 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1]]
[[1 1 1 1 2 1 1 2 1 1 1 2 2 1 1 1 1 1 1 1 1 1 2 3 1 1 3 1 1 1 1 1 1 1 2 1
1 1 1 2 1 1 1 1 1 3 1 1 1 1 1 2 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 3 1 4 1 2 1 1 1 1 1 1 1
5 1 3 1 1 2 1 1 1]]
[[1 1 1 1 1 1 1 1 2 1 1 1 2 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1]]
[[2 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1]]
[[2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 2 1
4 1 1 1 1 1 1 1]]
[[1 2 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1]]
[[1 1 2 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1
1 1 1 1 1 1 4 1 2 1 1 1 1 1 1 1 1 2]]
[[ 1 1 1 1 1 1 1 5 1 1 2 1 1 1 1 1 1 1 1 1 1 1 2 1
1 2 1 1 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1
1 1 2 5 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 10 1 2 3 1 1 1 1]]
[[1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 4 1 1 1 1 1 1]]
[[1 1 4 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 2 1 1 1 1 1
1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 5 1 1 2 1 1 1 1 1 1 1]]
[[1 1 4 5 1 1 1 1 1 2 1 1 1 1 1 3 1 1 1 1 1 4 3 1 1 2 1 1 4 1 1 2 1 1 1 1
1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 2 1 1 6 1 1 2 2 4 1 1 1 1 2 1
1 1 1 1 1]]
[[1 1 2 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2]]
[[1 1 2 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2]]
[[1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1]]
[[3 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 2 3 1 1 3 1 1 1 1 1 1 1 2 1 1 1 2 3 5 3 1 3 1 1 1 1 1 1 2
1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 3 5 1 1 1 2 2 2 1 1]]
[[1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 3 2 1 1 1 1 1 1 1 1 1]]
[[1 1 1 1 1 1 1 1 2 1 1 2 1 2 1 1 2 1 1 1 1 1 4 1 1 1]]
[[1 1 3 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1
1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 5 1 2 2 1 1 1 1 1 1 1 1]]
[[4 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]]

In [ ]:
sdf = sdf.reset_index()
titles = sdf['title']
indices = pd.Series(sdf.index, index=sdf['title'])

In [ ]:
def get_recommendations(title):
idx = indices[title]
sim_scores = list(enumerate(cosine_sim[idx]))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
sim_scores = sim_scores[1:31]
movie_indices = [i[0] for i in sim_scores]
return titles.iloc[movie_indices]

In [25]:
get_recommendations('The Shawshank Redemption')
Out[25]:
8206 Les Misérables
505 Chasers
5650 Sherlock, Jr.
5690 The Blue Dahlia
5983 The Woodsman
4051 Breakout
2729 Coogan's Bluff
408 The Getaway
7409 A Prophet
6193 The 40 Year Old Virgin
5855 Fortress
3766 Brubaker
3520 Ariel
1026 Cool Hand Luke
578 Some Folks Call It a Sling Blade
1852 Runaway Train
4358 Brother
4670 You Only Live Once
1303 Alive and Kicking
2547 Man on the Moon
3443 Uncommon Valor
3074 Silent Fall
2905 American Gigolo
4313 The Weight of Water
8522 Starred Up
8935 Southpaw
6134 The Longest Yard
7535 Toy Story 3
2841 Empire of Passion
916 Raw Deal
Name: title, dtype: object

In [ ]:

You might also like