Professional Documents
Culture Documents
import numpy as np
df1 = pd.read_csv("tmdb_5000_credits.csv")
In [22]: df1.head(5)
Out[22]:
movie_id title cast crew
2 [{"cast_id": 1, [{"credit_id":
206647 Spectre "character": "James "54805967c3a36829b5002c41",
Bond", "cr... "de...
3 [{"cast_id": 2, [{"credit_id":
The Dark Knight
49026 "character": "Bruce "52fe4781c3a36847f81398c3",
Rises
Wayne / Ba... "de...
4 [{"cast_id": 5, [{"credit_id":
49529 John Carter "character": "John "52fe479ac3a36847f813eaa3",
Carter", "c... "de...
file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 1/18
8/1/2021 Movie recommendation system-Kaggle
In [23]: df2.head(5)
Out[23]:
budget genres homepage id keywords
0 [{"id":
[{"id": 28,
1463,
"name":
"name":
237000000 "Action"}, http://www.avatarmovie.com/ 19995
"culture
{"id": 12,
clash"},
"nam...
{"id":...
1
[{"id": 270,
[{"id": 12,
"name":
"name":
300000000 http://disney.go.com/disneypictures/pirates/ 285 "ocean"},
"Adventure"},
{"id": 726,
{"id": 14, "...
"na...
2
[{"id": 28, [{"id": 470,
"name": "name":
245000000 "Action"}, http://www.sonypictures.com/movies/spectre/ 206647 "spy"},
{"id": 12, {"id": 818,
"nam... "name...
3 [{"id": 849,
[{"id": 28,
"name":
"name":
"dc
250000000 "Action"}, http://www.thedarkknightrises.com/ 49026
comics"},
{"id": 80,
{"id":
"nam...
853,...
4
[{"id": 28, [{"id": 818,
"name": "name":
260000000 "Action"}, http://movies.disney.com/john-carter 49529 "based on
{"id": 12, novel"},
"nam... {"id":...
df2= df2.merge(df1,on='id')
file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 2/18
8/1/2021 Movie recommendation system-Kaggle
In [25]: df1.head(5)
Out[25]:
id tittle cast crew
2 [{"credit_id":
[{"cast_id": 1, "character":
206647 Spectre "54805967c3a36829b5002c41",
"James Bond", "cr...
"de...
3 [{"credit_id":
The Dark Knight [{"cast_id": 2, "character":
49026 "52fe4781c3a36847f81398c3",
Rises "Bruce Wayne / Ba...
"de...
4 [{"credit_id":
[{"cast_id": 5, "character":
49529 John Carter "52fe479ac3a36847f813eaa3",
"John Carter", "c...
"de...
file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 3/18
8/1/2021 Movie recommendation system-Kaggle
In [26]: df2.head(5)
Out[26]:
budget genres homepage id keywords
0 [{"id":
[{"id": 28,
1463,
"name":
"name":
237000000 "Action"}, http://www.avatarmovie.com/ 19995
"culture
{"id": 12,
clash"},
"nam...
{"id":...
1
[{"id": 270,
[{"id": 12,
"name":
"name":
300000000 http://disney.go.com/disneypictures/pirates/ 285 "ocean"},
"Adventure"},
{"id": 726,
{"id": 14, "...
"na...
2
[{"id": 28, [{"id": 470,
"name": "name":
245000000 "Action"}, http://www.sonypictures.com/movies/spectre/ 206647 "spy"},
{"id": 12, {"id": 818,
"nam... "name...
3 [{"id": 849,
[{"id": 28,
"name":
"name":
"dc
250000000 "Action"}, http://www.thedarkknightrises.com/ 49026
comics"},
{"id": 80,
{"id":
"nam...
853,...
4
[{"id": 28, [{"id": 818,
"name": "name":
260000000 "Action"}, http://movies.disney.com/john-carter 49529 "based on
{"id": 12, novel"},
"nam... {"id":...
5 rows × 23 columns
file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 4/18
8/1/2021 Movie recommendation system-Kaggle
#sns.set_theme(style="whitegrid")
plt.show()
sns.set_style("darkgrid")
sns.countplot(x="original_language", data=df2)
plt.show()
file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 5/18
8/1/2021 Movie recommendation system-Kaggle
In [29]: df2['original_language'].value_counts()
Out[29]: en 4505
fr 70
es 32
zh 27
de 27
hi 19
ja 16
it 14
cn 12
ru 11
ko 11
pt 9
da 7
sv 5
fa 4
nl 4
he 3
th 3
ar 2
ro 2
ta 2
id 2
cs 2
nb 1
vi 1
xx 1
el 1
te 1
tr 1
ky 1
pl 1
sl 1
af 1
is 1
no 1
ps 1
hu 1
file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 6/18
8/1/2021 Movie recommendation system-Kaggle
In [39]: sns.set_style("whitegrid")
sns.boxplot(x=df2["budget"])
plt.show()
In [40]: sns.set_style("whitegrid")
sns.violinplot(x=df2["budget"])
plt.show()
file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 7/18
8/1/2021 Movie recommendation system-Kaggle
In [42]: df2.describe()
Out[42]:
budget id popularity revenue runtime vote_aver
sns.set_style("darkgrid")
sns.countplot(x="genres", data=df2)
plt.show()
file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 8/18
8/1/2021 Movie recommendation system-Kaggle
In [44]: df2['genres'].value_counts()
file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 9/18
8/1/2021 Movie recommendation system-Kaggle
[{"id": 35, "name": "Comedy"}, {"id": 18, "name": "Drama"}, {"id": 10749, "na
me": "Romance"}]
109
[{"id": 80, "name": "Crime"}, {"id": 18, "name": "Drama"}, {"id": 53, "name":
"Thriller"}]
43
[{"id": 18, "name": "Drama"}, {"id": 35, "name": "Comedy"}, {"id": 10749, "na
me": "Romance"}]
35
[{"id": 28, "name": "Action"}, {"id": 80, "name": "Crime"}, {"id": 53, "nam
e": "Thriller"}]
30
[]
28
[{"id": 28, "name": "Action"}, {"id": 80, "name": "Crime"}, {"id": 18, "nam
e": "Drama"}, {"id": 53, "name": "Thriller"}]
25
[{"id": 12, "name": "Adventure"}, {"id": 28, "name": "Action"}, {"id": 53, "n
ame": "Thriller"}]
file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 10/18
8/1/2021 Movie recommendation system-Kaggle
24
[{"id": 27, "name": "Horror"}, {"id": 9648, "name": "Mystery"}, {"id": 53, "n
ame": "Thriller"}]
23
[{"id": 28, "name": "Action"}, {"id": 35, "name": "Comedy"}, {"id": 80, "nam
e": "Crime"}]
22
[{"id": 18, "name": "Drama"}, {"id": 9648, "name": "Mystery"}, {"id": 53, "na
me": "Thriller"}]
21
...
[{"id": 10751, "name": "Family"}, {"id": 35, "name": "Comedy"}, {"id": 14, "n
ame": "Fantasy"}]
1
[{"id": 12, "name": "Adventure"}, {"id": 16, "name": "Animation"}, {"id": 35,
"name": "Comedy"}, {"id": 14, "name": "Fantasy"}]
1
[{"id": 10770, "name": "TV Movie"}, {"id": 10749, "name": "Romance"}, {"id":
18, "name": "Drama"}]
1
[{"id": 80, "name": "Crime"}, {"id": 28, "name": "Action"}, {"id": 9648, "nam
e": "Mystery"}, {"id": 53, "name": "Thriller"}]
1
[{"id": 12, "name": "Adventure"}, {"id": 28, "name": "Action"}, {"id": 18, "n
ame": "Drama"}, {"id": 53, "name": "Thriller"}]
1
[{"id": 28, "name": "Action"}, {"id": 12, "name": "Adventure"}, {"id": 35, "n
ame": "Comedy"}, {"id": 10751, "name": "Family"}, {"id": 878, "name": "Scienc
e Fiction"}] 1
[{"id": 28, "name": "Action"}, {"id": 12, "name": "Adventure"}, {"id": 80, "n
ame": "Crime"}, {"id": 18, "name": "Drama"}, {"id": 53, "name": "Thriller"},
{"id": 37, "name": "Western"}] 1
[{"id": 12, "name": "Adventure"}, {"id": 35, "name": "Comedy"}, {"id": 18, "n
ame": "Drama"}, {"id": 10751, "name": "Family"}, {"id": 14, "name": "Fantas
y"}] 1
[{"id": 10751, "name": "Family"}, {"id": 14, "name": "Fantasy"}, {"id": 12,
"name": "Adventure"}]
1
[{"id": 12, "name": "Adventure"}, {"id": 28, "name": "Action"}, {"id": 80, "n
ame": "Crime"}, {"id": 9648, "name": "Mystery"}]
1
[{"id": 28, "name": "Action"}, {"id": 35, "name": "Comedy"}, {"id": 10769, "n
ame": "Foreign"}]
1
[{"id": 35, "name": "Comedy"}, {"id": 27, "name": "Horror"}, {"id": 53, "nam
e": "Thriller"}]
1
[{"id": 16, "name": "Animation"}, {"id": 35, "name": "Comedy"}, {"id": 18, "n
ame": "Drama"}, {"id": 10749, "name": "Romance"}]
1
[{"id": 37, "name": "Western"}, {"id": 18, "name": "Drama"}, {"id": 12, "nam
e": "Adventure"}, {"id": 53, "name": "Thriller"}]
1
[{"id": 12, "name": "Adventure"}, {"id": 16, "name": "Animation"}, {"id": 35,
file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 11/18
8/1/2021 Movie recommendation system-Kaggle
"name": "Comedy"}, {"id": 878, "name": "Science Fiction"}, {"id": 10751, "nam
e": "Family"}] 1
[{"id": 14, "name": "Fantasy"}, {"id": 28, "name": "Action"}, {"id": 35, "nam
e": "Comedy"}, {"id": 80, "name": "Crime"}]
1
[{"id": 28, "name": "Action"}, {"id": 14, "name": "Fantasy"}, {"id": 27, "nam
e": "Horror"}]
1
[{"id": 35, "name": "Comedy"}, {"id": 18, "name": "Drama"}, {"id": 10751, "na
me": "Family"}, {"id": 14, "name": "Fantasy"}]
1
[{"id": 18, "name": "Drama"}, {"id": 37, "name": "Western"}, {"id": 80, "nam
e": "Crime"}]
1
[{"id": 53, "name": "Thriller"}, {"id": 14, "name": "Fantasy"}, {"id": 35, "n
ame": "Comedy"}, {"id": 10751, "name": "Family"}, {"id": 9648, "name": "Myste
ry"}] 1
[{"id": 18, "name": "Drama"}, {"id": 14, "name": "Fantasy"}, {"id": 9648, "na
me": "Mystery"}, {"id": 10749, "name": "Romance"}]
1
[{"id": 12, "name": "Adventure"}, {"id": 14, "name": "Fantasy"}, {"id": 18,
"name": "Drama"}, {"id": 35, "name": "Comedy"}, {"id": 10402, "name": "Musi
c"}] 1
[{"id": 18, "name": "Drama"}, {"id": 35, "name": "Comedy"}, {"id": 27, "nam
e": "Horror"}]
1
[{"id": 27, "name": "Horror"}, {"id": 18, "name": "Drama"}, {"id": 878, "nam
e": "Science Fiction"}]
1
[{"id": 35, "name": "Comedy"}, {"id": 53, "name": "Thriller"}, {"id": 80, "na
me": "Crime"}, {"id": 10749, "name": "Romance"}]
1
[{"id": 80, "name": "Crime"}, {"id": 53, "name": "Thriller"}, {"id": 35, "nam
e": "Comedy"}]
1
[{"id": 35, "name": "Comedy"}, {"id": 10749, "name": "Romance"}, {"id": 1076
9, "name": "Foreign"}]
1
In [45]: df2.shape
In [46]: C= df2['vote_average'].mean()
Out[46]: 6.092171559442011
file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 12/18
8/1/2021 Movie recommendation system-Kaggle
In [47]: m= df2['vote_count'].quantile(0.9)
Out[47]: 1838.4000000000015
q_movies.shape
v = x['vote_count']
R = x['vote_average']
Out[51]:
title vote_count vote_average score
329 The Lord of the Rings: The Return of the King 8064 8.1 7.727243
file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 13/18
8/1/2021 Movie recommendation system-Kaggle
plt.figure(figsize=(12,4))
movies= pop['title']
famous = pop['popularity']
#plt.bar(pop['title'].head(6),pop['popularity'].head(6), align='center',
#color='skyblue')
#plt.gca().invert_yaxis()
plt.xlabel("Popularity")
plt.ylabel("title")
plt.title("Popular Movies")
file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 14/18
8/1/2021 Movie recommendation system-Kaggle
---------------------------------------------------------------------------
<ipython-input-57-1a7683361379> in <module>()
10
11
13 plt.xlabel("Popularity")
14 plt.ylabel("title")
C:\ProgramData\Anaconda3\lib\site-packages\matplotlib\pyplot.py in bar(left,
height, width, bottom, hold, data, **kwargs)
2702 try:
2705 finally:
C:\ProgramData\Anaconda3\lib\site-packages\matplotlib\__init__.py in inner(a
x, *args, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py in bar(se
lf, left, height, width, bottom, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py in <listc
omp>(.0)
file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 15/18
8/1/2021 Movie recommendation system-Kaggle
In [74]: df2[['overview','title']].head(5)
Out[74]:
overview title
3 Following the death of District Attorney Harve... The Dark Knight Rises
#Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'th
e', 'a'
tfidf = TfidfVectorizer(stop_words='english')
df2['overview'] = df2['overview'].fillna('')
#Construct the required TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(df2['overview'])
tfidf_matrix.shape
file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 16/18
8/1/2021 Movie recommendation system-Kaggle
In [62]: # Function that takes in movie title as input and outputs most similar movies
# Get the pairwsie similarity scores of all movies with that movie
sim_scores = list(enumerate(cosine_sim[idx]))
sim_scores = sim_scores[1:11]
return df2['title'].iloc[movie_indices]
1359 Batman
1181 JFK
In [64]: get_recommendations('Avatar')
1610 Hanna
847 Semi-Pro
775 Supernova
In [75]: # Parse the stringified features into their corresponding python objects
df2[feature] = df2[feature].apply(literal_eval)
file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 17/18
8/1/2021 Movie recommendation system-Kaggle
In [76]: # Get the director's name from the crew feature. If director is not listed, re
turn NaN
def get_director(x):
for i in x:
if i['job'] == 'Director':
return i['name']
return np.nan
In [ ]:
file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 18/18