Professional Documents
Culture Documents
import os
import numpy as np
import seaborn as sns
import matplotlib as plt
os.getcwd()
os.chdir(r"E:\PFA\Datasets")
bollywood=pd.read_csv("bollywood.csv")
bollywood.head(10)
bollywood.columns
bollywood.index
bollywood.dtypes
bollywood.isnull().sum(axis="rows")
bollywood.describe()
bollywood.index = bollywood.MovieName
bollywood.drop('MovieName',1)
bollywood.columns
#general exploration
#top highest grossing films
bollywood.sort_values(by='BoxOfficeCollection', ascending = False)[['MovieName',
'BoxOfficeCollection','Budget']][0:5]
#top highest budget films
bollywood.sort_values(by='Budget', ascending = False)[['MovieName',
'BoxOfficeCollection','Budget']][0:5]
# top highest grossing films in Action category
bollywood[bollywood.Genre == 'Action'].sort_values(by = 'BoxOfficeCollection',
ascending = False)[['MovieName', 'BoxOfficeCollection','Budget']][0:5]
# top highest budget films in Action category
bollywood[bollywood.Genre == 'Action'].sort_values(by = 'Budget', ascending =
False)[['MovieName', 'BoxOfficeCollection','Budget']][0:5]