You are on page 1of 11

#Create the given student data frame and perform the given exercises

#Q1 PART A

#Calculate Mean, Median, Maximum, Minimum and Standard Deviation of Marks

import pandas as pd

df=pd.DataFrame([["MBA001","Ankur","Male",34,320],["MBA002","Vani","Female",25,350],["MBA003","Sabana","Fem
ale",24,290],

["MBA004","Rohit","Male",26,330],["MBA005","Ayushi","Female",30,225]],columns=["ID","Name","Gender","Age","Ma
rks"])

print("Statistics of Marks of Students:")

print("Mean:",df["Marks"].mean())

print("Median:",df["Marks"].median())

print("Maximum:",df["Marks"].max())

print("Minimum:",df["Marks"].min())

print("Standard Deviation:",df["Marks"].std())

Output:

#Q1 PART B

#Set "ID" as row index

df.set_index("ID")
Output:

#Q1 PART C

#Add "Experience" column and print data frame

df["Experience"] = [4,10,1,3,6]

print(df)

Output:

#Q1 PART D

#Draw BAR diagram of marks

import matplotlib.pyplot as plt

plt.bar(df["Name"],df["Marks"])

plt.title("Marks Data", fontsize=18)

plt.xlabel("Name", fontsize=14)

plt.ylabel("Marks", fontsize=14)
Output:

#Q1 PART E

#Draw PIE chart of age

plt.pie(df["Age"],labels=df["Name"],autopct="%0.lf%%")
#Q1 PART F

#Delete Gender column from Data frame and print

df.drop(["Gender"],axis=1, inplace=True)

print(df)

Output:
Question 2

import math

class Math_Class:

radius= 0

factorial=10

d1=0

d2=0

e1=0

e2=0

distance=0

def __init__(self,r,f,x1,y1,x2,y2):

self.radius=r

self.factorial=f

self.d1= x1

self.d2= x2

self.e1= y1

self.e2= y2

def Area(self):

print("Radius=", self.radius)

print("Area=", 3.14*self.radius*self.radius)

def Distance(self):

a=self.d2-self.d1

b=self.e2-self.e1

c=(a*a)+(b*b)

distance=math.sqrt(c)

print("Distance=", distance)
def Factorial(self):

factorial =1

if self.factorial<0:

print("No factorial for negative numbers")

elif self.factorial == 0:

print("The factorial for 0 is 1")

else:

for i in range(1,self.factorial+1):

factorial = factorial*i

print("The factorial of",self.factorial,"is",factorial)

MathCalculation = Math_Class(10,5,20,20,20,60)

MathCalculation.Area()

MathCalculation.Distance()

MathCalculation.Factorial()

Question 3

#Q3

#Initializing the values

import pandas as pd

import seaborn as sn

import matplotlib.pyplot as plt

import scipy.stats as st

import numpy as np

import statsmodels.api as sm

df=pd.read_excel("AFB_MID_DATA.xlsx", sheet_name= "Establishment")

df
Output:
print(sn.distplot(df["Establishment_cost"], label= "Establishment_cost"))

Output:
#Performing t test for Establishment cost with given expected mean of 50 crore

t_value,p_value= st.ttest_1samp(df["Establishment_cost"],50)

#since alternative hypothesis is one tailed we need to divide p_value by 2

one_tail_p_value = float("{:.6f}".format(p_value/2))

#since confidence interval is 95% value of alpha = 0.05

alpha= 0.05

print("P-value = ", one_tail_p_value)

print("alpha=", alpha)

if(one_tail_p_value <= alpha):

print("Reject null hypothesis, institute does not require INR 50 crore")

else:

print("Accept null hypothesis, institute requires INR 50 crore")

Output:
Question 4

import pandas as pd

import statsmodels.api as st

#Reading excel file

df=pd.read_excel("AFB_MID_DATA.xlsx",sheet_name="Supermarket")

x=df[["Price","Advertising","Incentives","Salesforce"]]

#adding constant to model

x=st.add_constant(x)

y=df[["Sales"]]

#Formulating model

results=st.OLS(y,x).fit()

#Printing results

print(results.summary())

You might also like