You are on page 1of 18

8/1/2021 Movie recommendation system-Kaggle

In [20]: import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

df1 = pd.read_csv("tmdb_5000_credits.csv")

In [21]: df2 = pd.read_csv("tmdb_5000_movies.csv")

In [22]: df1.head(5)

Out[22]:
movie_id title cast crew

0 [{"cast_id": 242, [{"credit_id":


19995 Avatar "character": "Jake "52fe48009251416c750aca23",
Sully", "... "de...

1 Pirates of the [{"cast_id": 4, [{"credit_id":


285 Caribbean: At "character": "Captain "52fe4232c3a36847f800b579",
World's End Jack Spa... "de...

2 [{"cast_id": 1, [{"credit_id":
206647 Spectre "character": "James "54805967c3a36829b5002c41",
Bond", "cr... "de...

3 [{"cast_id": 2, [{"credit_id":
The Dark Knight
49026 "character": "Bruce "52fe4781c3a36847f81398c3",
Rises
Wayne / Ba... "de...

4 [{"cast_id": 5, [{"credit_id":
49529 John Carter "character": "John "52fe479ac3a36847f813eaa3",
Carter", "c... "de...

file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 1/18
8/1/2021 Movie recommendation system-Kaggle

In [23]: df2.head(5)

Out[23]:
budget genres homepage id keywords

0 [{"id":
[{"id": 28,
1463,
"name":
"name":
237000000 "Action"}, http://www.avatarmovie.com/ 19995
"culture
{"id": 12,
clash"},
"nam...
{"id":...

1
[{"id": 270,
[{"id": 12,
"name":
"name":
300000000 http://disney.go.com/disneypictures/pirates/ 285 "ocean"},
"Adventure"},
{"id": 726,
{"id": 14, "...
"na...

2
[{"id": 28, [{"id": 470,
"name": "name":
245000000 "Action"}, http://www.sonypictures.com/movies/spectre/ 206647 "spy"},
{"id": 12, {"id": 818,
"nam... "name...

3 [{"id": 849,
[{"id": 28,
"name":
"name":
"dc
250000000 "Action"}, http://www.thedarkknightrises.com/ 49026
comics"},
{"id": 80,
{"id":
"nam...
853,...

4
[{"id": 28, [{"id": 818,
"name": "name":
260000000 "Action"}, http://movies.disney.com/john-carter 49529 "based on
{"id": 12, novel"},
"nam... {"id":...

In [24]: df1.columns = ['id','tittle','cast','crew']

df2= df2.merge(df1,on='id')

file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 2/18
8/1/2021 Movie recommendation system-Kaggle

In [25]: df1.head(5)

Out[25]:
id tittle cast crew

0 [{"cast_id": 242, [{"credit_id":


19995 Avatar "character": "Jake Sully", "52fe48009251416c750aca23",
"... "de...

1 Pirates of the [{"credit_id":


[{"cast_id": 4, "character":
285 Caribbean: At World's "52fe4232c3a36847f800b579",
"Captain Jack Spa...
End "de...

2 [{"credit_id":
[{"cast_id": 1, "character":
206647 Spectre "54805967c3a36829b5002c41",
"James Bond", "cr...
"de...

3 [{"credit_id":
The Dark Knight [{"cast_id": 2, "character":
49026 "52fe4781c3a36847f81398c3",
Rises "Bruce Wayne / Ba...
"de...

4 [{"credit_id":
[{"cast_id": 5, "character":
49529 John Carter "52fe479ac3a36847f813eaa3",
"John Carter", "c...
"de...

file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 3/18
8/1/2021 Movie recommendation system-Kaggle

In [26]: df2.head(5)

Out[26]:
budget genres homepage id keywords

0 [{"id":
[{"id": 28,
1463,
"name":
"name":
237000000 "Action"}, http://www.avatarmovie.com/ 19995
"culture
{"id": 12,
clash"},
"nam...
{"id":...

1
[{"id": 270,
[{"id": 12,
"name":
"name":
300000000 http://disney.go.com/disneypictures/pirates/ 285 "ocean"},
"Adventure"},
{"id": 726,
{"id": 14, "...
"na...

2
[{"id": 28, [{"id": 470,
"name": "name":
245000000 "Action"}, http://www.sonypictures.com/movies/spectre/ 206647 "spy"},
{"id": 12, {"id": 818,
"nam... "name...

3 [{"id": 849,
[{"id": 28,
"name":
"name":
"dc
250000000 "Action"}, http://www.thedarkknightrises.com/ 49026
comics"},
{"id": 80,
{"id":
"nam...
853,...

4
[{"id": 28, [{"id": 818,
"name": "name":
260000000 "Action"}, http://movies.disney.com/john-carter 49529 "based on
{"id": 12, novel"},
"nam... {"id":...

5 rows × 23 columns

file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 4/18
8/1/2021 Movie recommendation system-Kaggle

In [34]: #sns.barplot(x="original_language", y="id", kind="count", data=df2)

import seaborn as sns

#sns.set_theme(style="whitegrid")

sns.barplot(x="original_language", y="id", data=df2)

plt.show()

In [37]: import seaborn as sns

sns.set_style("darkgrid")

sns.countplot(x="original_language", data=df2)

plt.show()

file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 5/18
8/1/2021 Movie recommendation system-Kaggle

In [29]: df2['original_language'].value_counts()

Out[29]: en 4505

fr 70

es 32

zh 27

de 27

hi 19

ja 16

it 14

cn 12

ru 11

ko 11

pt 9

da 7

sv 5

fa 4

nl 4

he 3

th 3

ar 2

ro 2

ta 2

id 2

cs 2

nb 1

vi 1

xx 1

el 1

te 1

tr 1

ky 1

pl 1

sl 1

af 1

is 1

no 1

ps 1

hu 1

Name: original_language, dtype: int64

file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 6/18
8/1/2021 Movie recommendation system-Kaggle

In [39]: sns.set_style("whitegrid")

sns.boxplot(x=df2["budget"])

plt.show()

In [40]: sns.set_style("whitegrid")

sns.violinplot(x=df2["budget"])

plt.show()

file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 7/18
8/1/2021 Movie recommendation system-Kaggle

In [42]: df2.describe()

Out[42]:
budget id popularity revenue runtime vote_aver

count 4.803000e+03 4803.000000 4803.000000 4.803000e+03 4801.000000 4803.0000

mean 2.904504e+07 57165.484281 21.492301 8.226064e+07 106.875859 6.092172

std 4.072239e+07 88694.614033 31.816650 1.628571e+08 22.611935 1.194612

min 0.000000e+00 5.000000 0.000000 0.000000e+00 0.000000 0.000000

25% 7.900000e+05 9014.500000 4.668070 0.000000e+00 94.000000 5.600000

50% 1.500000e+07 14629.000000 12.921594 1.917000e+07 103.000000 6.200000

75% 4.000000e+07 58610.500000 28.313505 9.291719e+07 118.000000 6.800000

max 3.800000e+08 459488.000000 875.581305 2.787965e+09 338.000000 10.000000

In [43]: import seaborn as sns

sns.set_style("darkgrid")

sns.countplot(x="genres", data=df2)

plt.show()

file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 8/18
8/1/2021 Movie recommendation system-Kaggle

In [44]: df2['genres'].value_counts()

file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 9/18
8/1/2021 Movie recommendation system-Kaggle

Out[44]: [{"id": 18, "name": "Drama"}]


370

[{"id": 35, "name": "Comedy"}]


282

[{"id": 18, "name": "Drama"}, {"id": 10749, "name": "Romance"}]


164

[{"id": 35, "name": "Comedy"}, {"id": 10749, "name": "Romance"}]


144

[{"id": 35, "name": "Comedy"}, {"id": 18, "name": "Drama"}]


142

[{"id": 35, "name": "Comedy"}, {"id": 18, "name": "Drama"}, {"id": 10749, "na
me": "Romance"}]
109

[{"id": 27, "name": "Horror"}, {"id": 53, "name": "Thriller"}]


88

[{"id": 99, "name": "Documentary"}]


68

[{"id": 27, "name": "Horror"}]


64

[{"id": 18, "name": "Drama"}, {"id": 53, "name": "Thriller"}]


62

[{"id": 18, "name": "Drama"}, {"id": 35, "name": "Comedy"}]


46

[{"id": 80, "name": "Crime"}, {"id": 18, "name": "Drama"}, {"id": 53, "name":
"Thriller"}]
43

[{"id": 28, "name": "Action"}, {"id": 53, "name": "Thriller"}]


40

[{"id": 18, "name": "Drama"}, {"id": 36, "name": "History"}]


37

[{"id": 35, "name": "Comedy"}, {"id": 10751, "name": "Family"}]


36

[{"id": 28, "name": "Action"}, {"id": 35, "name": "Comedy"}]


36

[{"id": 18, "name": "Drama"}, {"id": 35, "name": "Comedy"}, {"id": 10749, "na
me": "Romance"}]
35

[{"id": 80, "name": "Crime"}, {"id": 18, "name": "Drama"}]


33

[{"id": 35, "name": "Comedy"}, {"id": 80, "name": "Crime"}]


30

[{"id": 28, "name": "Action"}, {"id": 80, "name": "Crime"}, {"id": 53, "nam
e": "Thriller"}]
30

[]
28

[{"id": 18, "name": "Drama"}, {"id": 80, "name": "Crime"}]


26

[{"id": 28, "name": "Action"}, {"id": 80, "name": "Crime"}, {"id": 18, "nam
e": "Drama"}, {"id": 53, "name": "Thriller"}]
25

[{"id": 16, "name": "Animation"}, {"id": 10751, "name": "Family"}]


25

[{"id": 18, "name": "Drama"}, {"id": 10402, "name": "Music"}]


24

[{"id": 12, "name": "Adventure"}, {"id": 28, "name": "Action"}, {"id": 53, "n
ame": "Thriller"}]
file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 10/18
8/1/2021 Movie recommendation system-Kaggle

24

[{"id": 27, "name": "Horror"}, {"id": 9648, "name": "Mystery"}, {"id": 53, "n
ame": "Thriller"}]
23

[{"id": 53, "name": "Thriller"}]


23

[{"id": 28, "name": "Action"}, {"id": 35, "name": "Comedy"}, {"id": 80, "nam
e": "Crime"}]
22

[{"id": 18, "name": "Drama"}, {"id": 9648, "name": "Mystery"}, {"id": 53, "na
me": "Thriller"}]
21

...

[{"id": 10751, "name": "Family"}, {"id": 35, "name": "Comedy"}, {"id": 14, "n
ame": "Fantasy"}]
1

[{"id": 12, "name": "Adventure"}, {"id": 16, "name": "Animation"}, {"id": 35,
"name": "Comedy"}, {"id": 14, "name": "Fantasy"}]
1

[{"id": 10770, "name": "TV Movie"}, {"id": 10749, "name": "Romance"}, {"id":
18, "name": "Drama"}]
1

[{"id": 80, "name": "Crime"}, {"id": 28, "name": "Action"}, {"id": 9648, "nam
e": "Mystery"}, {"id": 53, "name": "Thriller"}]
1

[{"id": 12, "name": "Adventure"}, {"id": 28, "name": "Action"}, {"id": 18, "n
ame": "Drama"}, {"id": 53, "name": "Thriller"}]
1

[{"id": 28, "name": "Action"}, {"id": 12, "name": "Adventure"}, {"id": 35, "n
ame": "Comedy"}, {"id": 10751, "name": "Family"}, {"id": 878, "name": "Scienc
e Fiction"}] 1

[{"id": 28, "name": "Action"}, {"id": 12, "name": "Adventure"}, {"id": 80, "n
ame": "Crime"}, {"id": 18, "name": "Drama"}, {"id": 53, "name": "Thriller"},
{"id": 37, "name": "Western"}] 1

[{"id": 12, "name": "Adventure"}, {"id": 35, "name": "Comedy"}, {"id": 18, "n
ame": "Drama"}, {"id": 10751, "name": "Family"}, {"id": 14, "name": "Fantas
y"}] 1

[{"id": 10751, "name": "Family"}, {"id": 14, "name": "Fantasy"}, {"id": 12,
"name": "Adventure"}]
1

[{"id": 12, "name": "Adventure"}, {"id": 28, "name": "Action"}, {"id": 80, "n
ame": "Crime"}, {"id": 9648, "name": "Mystery"}]
1

[{"id": 28, "name": "Action"}, {"id": 35, "name": "Comedy"}, {"id": 10769, "n
ame": "Foreign"}]
1

[{"id": 35, "name": "Comedy"}, {"id": 27, "name": "Horror"}, {"id": 53, "nam
e": "Thriller"}]
1

[{"id": 16, "name": "Animation"}, {"id": 35, "name": "Comedy"}, {"id": 18, "n
ame": "Drama"}, {"id": 10749, "name": "Romance"}]
1

[{"id": 37, "name": "Western"}, {"id": 18, "name": "Drama"}, {"id": 12, "nam
e": "Adventure"}, {"id": 53, "name": "Thriller"}]
1

[{"id": 12, "name": "Adventure"}, {"id": 16, "name": "Animation"}, {"id": 35,

file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 11/18
8/1/2021 Movie recommendation system-Kaggle

"name": "Comedy"}, {"id": 878, "name": "Science Fiction"}, {"id": 10751, "nam
e": "Family"}] 1

[{"id": 10749, "name": "Romance"}, {"id": 53, "name": "Thriller"}]


1

[{"id": 10749, "name": "Romance"}, {"id": 28, "name": "Action"}]


1

[{"id": 14, "name": "Fantasy"}, {"id": 28, "name": "Action"}, {"id": 35, "nam
e": "Comedy"}, {"id": 80, "name": "Crime"}]
1

[{"id": 28, "name": "Action"}, {"id": 14, "name": "Fantasy"}, {"id": 27, "nam
e": "Horror"}]
1

[{"id": 35, "name": "Comedy"}, {"id": 18, "name": "Drama"}, {"id": 10751, "na
me": "Family"}, {"id": 14, "name": "Fantasy"}]
1

[{"id": 18, "name": "Drama"}, {"id": 37, "name": "Western"}, {"id": 80, "nam
e": "Crime"}]
1

[{"id": 53, "name": "Thriller"}, {"id": 14, "name": "Fantasy"}, {"id": 35, "n
ame": "Comedy"}, {"id": 10751, "name": "Family"}, {"id": 9648, "name": "Myste
ry"}] 1

[{"id": 18, "name": "Drama"}, {"id": 14, "name": "Fantasy"}, {"id": 9648, "na
me": "Mystery"}, {"id": 10749, "name": "Romance"}]
1

[{"id": 12, "name": "Adventure"}, {"id": 14, "name": "Fantasy"}, {"id": 18,
"name": "Drama"}, {"id": 35, "name": "Comedy"}, {"id": 10402, "name": "Musi
c"}] 1

[{"id": 18, "name": "Drama"}, {"id": 35, "name": "Comedy"}, {"id": 27, "nam
e": "Horror"}]
1

[{"id": 27, "name": "Horror"}, {"id": 18, "name": "Drama"}, {"id": 878, "nam
e": "Science Fiction"}]
1

[{"id": 35, "name": "Comedy"}, {"id": 53, "name": "Thriller"}, {"id": 80, "na
me": "Crime"}, {"id": 10749, "name": "Romance"}]
1

[{"id": 80, "name": "Crime"}, {"id": 53, "name": "Thriller"}, {"id": 35, "nam
e": "Comedy"}]
1

[{"id": 35, "name": "Comedy"}, {"id": 10749, "name": "Romance"}, {"id": 1076
9, "name": "Foreign"}]
1

[{"id": 10769, "name": "Foreign"}, {"id": 53, "name": "Thriller"}]


1

Name: genres, Length: 1175, dtype: int64

In [45]: df2.shape

Out[45]: (4803, 23)

In [46]: C= df2['vote_average'].mean()

Out[46]: 6.092171559442011

file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 12/18
8/1/2021 Movie recommendation system-Kaggle

In [47]: m= df2['vote_count'].quantile(0.9)

Out[47]: 1838.4000000000015

In [48]: q_movies = df2.copy().loc[df2['vote_count'] >= m]

q_movies.shape

Out[48]: (481, 23)

In [49]: def weighted_rating(x, m=m, C=C):

v = x['vote_count']

R = x['vote_average']

# Calculation based on the IMDB formula

return (v/(v+m) * R) + (m/(m+v) * C)

In [50]: q_movies['score'] = q_movies.apply(weighted_rating, axis=1)

In [51]: #Sort movies based on score calculated above

q_movies = q_movies.sort_values('score', ascending=False)

#Print the top 15 movies

q_movies[['title', 'vote_count', 'vote_average', 'score']].head(10)

Out[51]:
title vote_count vote_average score

1881 The Shawshank Redemption 8205 8.5 8.059258

662 Fight Club 9413 8.3 7.939256

65 The Dark Knight 12002 8.2 7.920020

3232 Pulp Fiction 8428 8.3 7.904645

96 Inception 13752 8.1 7.863239

3337 The Godfather 5893 8.4 7.851236

95 Interstellar 10867 8.1 7.809479

809 Forrest Gump 7927 8.2 7.803188

329 The Lord of the Rings: The Return of the King 8064 8.1 7.727243

1990 The Empire Strikes Back 5879 8.2 7.697884

file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 13/18
8/1/2021 Movie recommendation system-Kaggle

In [57]: pop= df2.sort_values('popularity', ascending=False)

import matplotlib.pyplot as plt

plt.figure(figsize=(12,4))

movies= pop['title']

famous = pop['popularity']

#plt.bar(pop['title'].head(6),pop['popularity'].head(6), align='center',

#color='skyblue')

#plt.gca().invert_yaxis()

plt.bar(movies,famous, color ='maroon',width = 0.4)

plt.xlabel("Popularity")

plt.ylabel("title")

plt.title("Popular Movies")

file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 14/18
8/1/2021 Movie recommendation system-Kaggle

---------------------------------------------------------------------------

TypeError Traceback (most recent call last)

<ipython-input-57-1a7683361379> in <module>()

10

11

---> 12 plt.bar(movies,famous, color ='maroon',width = 0.4)

13 plt.xlabel("Popularity")

14 plt.ylabel("title")

C:\ProgramData\Anaconda3\lib\site-packages\matplotlib\pyplot.py in bar(left,
height, width, bottom, hold, data, **kwargs)

2702 try:

2703 ret = ax.bar(left, height, width=width, bottom=bottom, data=d


ata,

-> 2704 **kwargs)

2705 finally:

2706 ax._hold = washold

C:\ProgramData\Anaconda3\lib\site-packages\matplotlib\__init__.py in inner(a
x, *args, **kwargs)

1895 warnings.warn(msg % (label_namer, func.__name__),

1896 RuntimeWarning, stacklevel=2)

-> 1897 return func(ax, *args, **kwargs)

1898 pre_doc = inner.__doc__

1899 if pre_doc is None:

C:\ProgramData\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py in bar(se
lf, left, height, width, bottom, **kwargs)

2103 if align == 'center':

2104 if orientation == 'vertical':

-> 2105 left = [left[i] - width[i] / 2. for i in xrange(len(l


eft))]

2106 elif orientation == 'horizontal':

2107 bottom = [bottom[i] - height[i] / 2.

C:\ProgramData\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py in <listc
omp>(.0)

2103 if align == 'center':

2104 if orientation == 'vertical':

-> 2105 left = [left[i] - width[i] / 2. for i in xrange(len(l


eft))]

2106 elif orientation == 'horizontal':

2107 bottom = [bottom[i] - height[i] / 2.

TypeError: unsupported operand type(s) for -: 'str' and 'float'

file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 15/18
8/1/2021 Movie recommendation system-Kaggle

In [74]: df2[['overview','title']].head(5)

Out[74]:
overview title

0 In the 22nd century, a paraplegic Marine is di... Avatar

1 Pirates of the Caribbean: At World's


Captain Barbossa, long believed to be dead, ha...
End

2 A cryptic message from Bond’s past sends him


Spectre
o...

3 Following the death of District Attorney Harve... The Dark Knight Rises

4 John Carter is a war-weary, former military ca... John Carter

In [59]: #Import TfIdfVectorizer from scikit-learn

from sklearn.feature_extraction.text import TfidfVectorizer

#Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'th
e', 'a'

tfidf = TfidfVectorizer(stop_words='english')

#Replace NaN with an empty string

df2['overview'] = df2['overview'].fillna('')

#Construct the required TF-IDF matrix by fitting and transforming the data

tfidf_matrix = tfidf.fit_transform(df2['overview'])

#Output the shape of tfidf_matrix

tfidf_matrix.shape

Out[59]: (4803, 20978)

In [60]: # Import linear_kernel


from sklearn.metrics.pairwise import linear_kernel

# Compute the cosine similarity matrix

cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [61]: #Construct a reverse map of indices and movie titles

indices = pd.Series(df2.index, index=df2['title']).drop_duplicates()

file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 16/18
8/1/2021 Movie recommendation system-Kaggle

In [62]: # Function that takes in movie title as input and outputs most similar movies

def get_recommendations(title, cosine_sim=cosine_sim):

# Get the index of the movie that matches the title


idx = indices[title]

# Get the pairwsie similarity scores of all movies with that movie

sim_scores = list(enumerate(cosine_sim[idx]))

# Sort the movies based on the similarity scores

sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

# Get the scores of the 10 most similar movies

sim_scores = sim_scores[1:11]

# Get the movie indices

movie_indices = [i[0] for i in sim_scores]

# Return the top 10 most similar movies

return df2['title'].iloc[movie_indices]

In [63]: get_recommendations('The Dark Knight Rises')

Out[63]: 65 The Dark Knight

299 Batman Forever

428 Batman Returns

1359 Batman

3854 Batman: The Dark Knight Returns, Part 2

119 Batman Begins

2507 Slow Burn

9 Batman v Superman: Dawn of Justice

1181 JFK

210 Batman & Robin

Name: title, dtype: object

In [64]: get_recommendations('Avatar')

Out[64]: 3604 Apollo 18

2130 The American

634 The Matrix

1341 The Inhabited Island

529 Tears of the Sun

1610 Hanna

311 The Adventures of Pluto Nash

847 Semi-Pro

775 Supernova

2628 Blood and Chocolate

Name: title, dtype: object

In [75]: # Parse the stringified features into their corresponding python objects

from ast import literal_eval

features = ['cast', 'crew', 'keywords', 'genres']

for feature in features:

df2[feature] = df2[feature].apply(literal_eval)

file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 17/18
8/1/2021 Movie recommendation system-Kaggle

In [76]: # Get the director's name from the crew feature. If director is not listed, re
turn NaN

def get_director(x):

for i in x:

if i['job'] == 'Director':

return i['name']

return np.nan

In [ ]:

file:///C:/Users/bh03375/AppData/Local/Microsoft/Windows/INetCache/IE/FEEPS4R0/Movie+recommendation+system-Kaggle.html 18/18

You might also like