Ab Assignment - 3: Prof. Rajiv Kumar Group - 03

AB ASSIGNMENT - 3
Web Analytics at Quality Alloys
Prof. Rajiv Kumar
Group - 03
Athul A S MBA21086
Buddha Kinkar Bhaumik MBA21087
Chetan Chaurasia MBA21089
Dhakre Urjeetsingh MBA21090
Gampa Manobhiram MBA21093
1
#Reading different sheets from the original excel file
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
import scipy.stats as st
import numpy as np
WV = pd.read_excel("Quality_Alloy_Original.xlsx",sheet_name="Weekly Visits")
F = pd.read_excel("Quality_Alloy_Original.xlsx",sheet_name="Financials")
LS = pd.read_excel("Quality_Alloy_Original.xlsx",sheet_name="Lbs. Sold")
DV = pd.read_excel("Quality_Alloy_Original.xlsx",sheet_name="Daily Visits")
D = pd.read_excel("Quality_Alloy_Original.xlsx",sheet_name="Demographics")
#Initialization of datasets from different worksheets
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
visit_df=pd.read_excel("Quality_Alloy.xlsx", sheet_name='Weekly Visits')

fin_df=pd.read_excel("Quality_Alloy.xlsx", sheet_name='Financials')
dem_traff_df=pd.read_excel("Quality_Alloy.xlsx", sheet_name='Dem_All_Traffic_Sources')
dem_df=pd.read_excel("Quality_Alloy.xlsx", sheet_name='Demographics')
#print(visit_df)
2
#Create different visit dataframes for different periods
visit_in_df=visit_df[0:14]
#visit_in_df
visit_pp_df=visit_df[15:35]
#visit_pp_df
visit_pr_df=visit_df[34:52]
#visit_pr_df
visit_post_df=visit_df[51:66]
#print(visit_post_df)
#Create different financial dataframes for a different season
fin_in_df=fin_df[0:15]
#fin_in_df
fin_pp_df=fin_df[14:35]
#fin_pp_df
fin_pr_df=fin_df[34:52]
#fin_pr_df
fin_post_df=fin_df[51:66]
#fin_post_df
3
Ans. 1: We first create 5 Column Charts to depict the Weekly figures of Website Visits,
Unique Visits, Revenue, Profits, and Pounds Sold over time.
#Website Visits over time

#Website Visits per week
plt.bar(visit_df.Week, visit_df.Visits, width=.8)
plt.xticks(ticks=visit_df.Week,rotation=45, fontsize=4, ha='right')
plt.title('Visits over Time',fontsize=20)

plt.xlabel('Weeks', fontsize=18)
plt.ylabel('Visits', fontsize=18)
Output:
Text(0, 0.5, 'Visits')
4
#Unique Website Visits over time

#Unique Website Visit per week
plt.bar(visit_df.Week, visit_df.Unique_Visits, width=.8)
plt.xticks(ticks=visit_df.Week,rotation=45, fontsize=4, ha='right')
plt.title('Unique Visits over Time',fontsize=20)

plt.ylabel('Unique Visits', fontsize=18)
Output:
Text(0, 0.5, 'Unique Visits')
5
#Revenue over time

#Revenue per week
plt.bar(fin_df.Week, fin_df.Revenue, width=.8)
plt.xticks(ticks=fin_df.Week,rotation=45, fontsize=4, ha='right')
plt.title('Revenue over Time',fontsize=20)

plt.ylabel('Revenue', fontsize=18)
Output:
Text(0, 0.5, 'Revenue')
6
#Profit over time

#Profit per week
plt.bar(fin_df.Week, fin_df.Profit, width=.8)
plt.title('Profit over Time',fontsize=20)

plt.ylabel('Profit', fontsize=18)
Output:
Text(0, 0.5, 'Profit')
7
#Pounds Sold over time

#Lbs. Sold per week
plt.bar(fin_df.Week, fin_df.Lbs_Sold, width=.8)
plt.title('Pounds Sold over Time',fontsize=20)

plt.ylabel('Lbs.', fontsize=18)
Output:
Text(0, 0.5, 'Lbs.')
8
Ans. 2: Using the same weekly data, we now prepare a Summary Table for Weekly Visits,
Unique Visits, Revenue, Profit, and Pounds Sold for the 4 different periods individually -
Initial, Pre-Promotion, Promotion, and Post-Promotion. The Summary data include Mean,
Median, Standard Deviation, Minimum, and Maximum. Each period is considered separately
in each of the 4 following tables to analyze them individually.
#Visit and Financial Summary Measures-Initial Period
D={"Visits":
[visit_in_df["Visits"].mean(),visit_in_df["Visits"].median(),visit_in_df["Visits"].std(),visit_in_df["Visits"].
min(),visit_in_df["Visits"].max()],
"Unique Visits":
[visit_in_df["Unique_Visits"].mean(),visit_in_df["Unique_Visits"].median(),visit_in_df["Unique_Visits"].
std(),visit_in_df["Unique_Visits"].min(),visit_in_df["Unique_Visits"].max()],
"Revenue":
[fin_in_df["Revenue"].mean(),fin_in_df["Revenue"].median(),fin_in_df["Revenue"].std(),fin_in_df["Rev
enue"].min(),fin_in_df["Revenue"].max()],
"Profit":
[fin_in_df["Profit"].mean(),fin_in_df["Profit"].median(),fin_in_df["Profit"].std(),fin_in_df["Profit"].min(),fin
_in_df["Profit"].max()],
"Lbs. Sold":
[fin_in_df["Lbs_Sold"].mean(),fin_in_df["Lbs_Sold"].median(),fin_in_df["Lbs_Sold"].std(),fin_in_df["Lb
s_Sold"].min(),fin_in_df["Lbs_Sold"].max()],
}
df = pd.DataFrame(D, index = ["mean", "median", "std. dev.", "minimum", "maximum"])
df
Output:
9
#Visit and Financial Summary Measures - Pre-Promotion Period
D={"Visits":
[visit_pp_df["Visits"].mean(),visit_pp_df["Visits"].median(),visit_pp_df["Visits"].std(),visit_pp_df["Visits"
].min(),visit_pp_df["Visits"].max()],
"Unique Visits":
[visit_pp_df["Unique_Visits"].mean(),visit_pp_df["Unique_Visits"].median(),visit_pp_df["Unique_Visits
"].std(),visit_pp_df["Unique_Visits"].min(),visit_pp_df["Unique_Visits"].max()],
"Revenue":
[fin_pp_df["Revenue"].mean(),fin_pp_df["Revenue"].median(),fin_pp_df["Revenue"].std(),fin_pp_df["
Revenue"].min(),fin_pp_df["Revenue"].max()],
"Profit":
[fin_pp_df["Profit"].mean(),fin_pp_df["Profit"].median(),fin_pp_df["Profit"].std(),fin_pp_df["Profit"].min()
,fin_pp_df["Profit"].max()],
"Lbs. Sold":
[fin_pp_df["Lbs_Sold"].mean(),fin_pp_df["Lbs_Sold"].median(),fin_pp_df["Lbs_Sold"].std(),fin_pp_df["
Lbs_Sold"].min(),fin_pp_df["Lbs_Sold"].max()],
}
df
Output:
10
#Visit and Financial Summary Measures - Promotion Period
D={"Visits":
[visit_pr_df["Visits"].mean(),visit_pr_df["Visits"].median(),visit_pr_df["Visits"].std(),visit_pr_df["Visits"].
min(),visit_pr_df["Visits"].max()],
"Unique Visits":
[visit_pr_df["Unique_Visits"].mean(),visit_pr_df["Unique_Visits"].median(),visit_pr_df["Unique_Visits"]
.std(),visit_pr_df["Unique_Visits"].min(),visit_pr_df["Unique_Visits"].max()],
"Revenue":
[fin_pr_df["Revenue"].mean(),fin_pr_df["Revenue"].median(),fin_pr_df["Revenue"].std(),fin_pr_df["Re
venue"].min(),fin_pr_df["Revenue"].max()],
"Profit":
[fin_pr_df["Profit"].mean(),fin_pr_df["Profit"].median(),fin_pr_df["Profit"].std(),fin_pr_df["Profit"].min(),fi
n_pr_df["Profit"].max()],
"Lbs. Sold":
[fin_pr_df["Lbs_Sold"].mean(),fin_pr_df["Lbs_Sold"].median(),fin_pr_df["Lbs_Sold"].std(),fin_pr_df["L
bs_Sold"].min(),fin_pr_df["Lbs_Sold"].max()],
}
df
Output:
11
#Visit and Financial Summary Measures - Post-Promotion Period
D={"Visits":
[visit_post_df["Visits"].mean(),visit_post_df["Visits"].median(),visit_post_df["Visits"].std(),visit_post_df[
"Visits"].min(),visit_post_df["Visits"].max()],
"Unique Visits":
[visit_post_df["Unique_Visits"].mean(),visit_post_df["Unique_Visits"].median(),visit_post_df["Unique_
Visits"].std(),visit_post_df["Unique_Visits"].min(),visit_post_df["Unique_Visits"].max()],
"Revenue":
[fin_post_df["Revenue"].mean(),fin_post_df["Revenue"].median(),fin_post_df["Revenue"].std(),fin_po
st_df["Revenue"].min(),fin_post_df["Revenue"].max()],
"Profit":
[fin_post_df["Profit"].mean(),fin_post_df["Profit"].median(),fin_post_df["Profit"].std(),fin_post_df["Profit
"].min(),fin_post_df["Profit"].max()],
"Lbs. Sold":
[fin_post_df["Lbs_Sold"].mean(),fin_post_df["Lbs_Sold"].median(),fin_post_df["Lbs_Sold"].std(),fin_p
ost_df["Lbs_Sold"].min(),fin_post_df["Lbs_Sold"].max()],
}
df
Output:
12
Ans. 3: Using the weekly data, we now prepare a Table for only the Mean figures of Weekly
Visits, Unique Visits, Revenue, Profit, and Pounds Sold for the 4 different periods altogether.
#Visit and Financial Mean Measures - Periodically
D={"Visits":
[visit_in_df["Visits"].mean(),visit_pp_df["Visits"].mean(),visit_pr_df["Visits"].mean(),visit_post_df["Visit
s"].mean()],
"Unique Visits":
[visit_in_df["Unique_Visits"].mean(),visit_pp_df["Unique_Visits"].mean(),visit_pr_df["Unique_Visits"].
mean(),visit_post_df["Unique_Visits"].mean()],
"Revenue":
[fin_in_df["Revenue"].mean(),fin_pp_df["Revenue"].mean(),fin_pr_df["Revenue"].mean(),fin_post_df[
"Revenue"].mean()],
"Profit":
[fin_in_df["Profit"].mean(),fin_pp_df["Profit"].mean(),fin_pr_df["Profit"].mean(),fin_post_df["Profit"].me
an()],
"Lbs. Sold":
[fin_in_df["Lbs_Sold"].mean(),fin_pp_df["Lbs_Sold"].mean(),fin_pr_df["Lbs_Sold"].mean(),fin_post_df
["Lbs_Sold"].mean()],
}
print("Chart of Means: ")

df = pd.DataFrame(D, index = ["Initial", "Pre-Promo", "Promotion", "Post-Promo"])
df
Output:
Chart of Means:
Ans. 4: On analyzing these charts we notice that Initially the amount of Weekly visits and unique
visits are high enough for the business overall, but it falls by a huge margin during the Pre-promotion
phase which further maybe an indication regarding why the company decides to promote the
business. Thereby, in the Promotion phase the number of visits increase again. However, due to the
added cost of promotion, the Revenue falls by a big margin as does the Profits in this phase and this
has a lingering effect in the post-promotion phase as well. Probably due to the lack of promotional
schemes, the customers might be losing interest in the company. This has a catastrophic effect and
13
thus the number of visits fall, reducing both Revenue as well as profits as well as the Pounds sold by
a lot.
Ans. 5: Creating a Scatterplot of Revenue and Pounds Sold and calculating Pearson’s
Correlation coefficient of the two variables.
#Scatterplot of Revenue and Pounds Sold
y = fin_df["Revenue"]
y
x = fin_df["Lbs_Sold"]
x
plt.scatter(x,y)
plt.title("Scatterplot of Revenue and Pounds Sold")
plt.xlabel("Pounds Sold")
plt.ylabel("Revenue")
corr, _ = st.pearsonr(x, y)
print('Pearsons correlation: %.5f' % corr)
Output:
Pearsons correlation: 0.86893
(Ans. 7: Remarks) Here we notice there is a clear indication of relativity in these two variables. The
Correlation coefficient value is closer to 1. Therefore, they have a strong correlation, and the value is
also positive indicating that when pounds sold increase, the Revenue also increases. The graph
14
denotes this claim as they clearly show an upward sloping cluster of data points with very few
outliers.
15
Ans. 6: Creating a Scatterplot of Revenue and Weekly visits and calculating Pearson’s
Correlation coefficient between the two variables. (Given our previous deductions, the visits
although increase or decrease, the Revenue is not dependent on them. It is mostly based on
the Promotional strategies and the number of pounds sold.)
#Scatterplot of Revenue and Visits
y = fin_df["Revenue"]
y
x = visit_df["Visits"]
x
plt.scatter(x,y)
plt.title("Scatterplot of Revenue and Visits")
plt.xlabel("Visits")
plt.ylabel("Revenue")
Output:
Pearsons correlation: -0.05939
(Ans. 7: Remarks) This scatterplot shows an altogether different picture between the two variables:
Revenue and Weekly visits. Here we notice the Pearson’s correlation coefficient is very close to zero
and negative. This implies that these two variables have a very poor correlation (which is also
depicted by a random scattering of data points with a very feeble clustering around the bottom left
corner of the graph), and the negative value denotes the relation being a negatively related one.(i.e.,
when Visits are increasing, Revenue is usually falling)
16
#Scatterplot of Visits and Bounce Rate
y = visit_df["Visits"]
y
x = visit_df["Bounce_Rate"]
x
plt.scatter(x,y)
plt.title("Scatterplot of Visits and Bounce Rate")
plt.xlabel("Bounce Rate")
plt.ylabel("Visits")
Output:
Pearsons correlation: 0.85576
(Ans. 7: Remarks) Here we notice there is a clear indication of relativity in these two variables:
Bounce Rate and Visits. The Correlation coefficient value is closer to 1. Therefore, they have a
strong correlation, and the value is also positive indicating that when Bounce rate increases, visits
increase. The graph denotes this claim as they clearly show an upward sloping cluster of data points
with very few outliers. The slope of the graph is however slightly curved indicating that the
regression between these two variables might be well fitted by a Quadratic trendline.
17
Ans. 8: Calculating the Summary values of the Pounds Sold Data. Creating a Histogram of the
same across all the weeks. Commenting on the following Graph.
#Histogram for Pounds Sold across all weeks
LS_df=pd.read_excel("Quality_Alloy.xlsx", sheet_name='Lbs. Sold')["Lbs. Sold"]

print("Mean: ", LS_df.mean())
print("Median: ", LS_df.median())
print("Standard Deviation: ", LS_df.std())
print("Minimum: ", LS_df.min())
print("Maximum: ", LS_df.max())
plt.hist(LS_df,29)
Output:
Mean: 18681.55517241379
Median: 17673.0
Standard Deviation: 6840.507939520858
Minimum: 3826
Maximum: 44740
Text(0, 0.5, 'Relative Frequency')
Remarks: This histogram of the Pounds Sold data denotes the Relative frequencies of the different
values of the Pounds sold variable. The graph shows a gradual increase in the frequencies and then
a gradual fall later. These clearly are the characteristics of a Bell shaped curve. However, the graph
does not really indicate complete normality. We may notice that around the value 30000, the
frequency is zero. Also the peak of the graph is attained not exactly at the centre of the graph but
somewhere slightly towards the left. This shows that although the Histogram appears bell shaped, it
is positively skewed (tilted towards the left).
18
Ans. 8(d): Determining how far the Pounds sold data follows the Empirical rule.
#Empirical Rule Check

def count1(list1):
return len(list(x for x in list1 if LS_df.mean()-LS_df.std() <= x <= LS_df.mean()+LS_df.std()))
def count2(list1):
return len(list(x for x in list1 if LS_df.mean()-2*LS_df.std() <= x <= LS_df.mean()+2*LS_df.std()))
def count3(list1):
return len(list(x for x in list1 if LS_df.mean()-3*LS_df.std() <= x <= LS_df.mean()+3*LS_df.std()))
count1 = count1(LS_df)
D={"Theoretical % of Data": ["68%","95%","99%"],

"Theoretical No. Obs.": [0.68*len(LS_df), 0.95*len(LS_df), 0.99*len(LS_df)],
"Actual No. Obs.": [count1, count2, count3]
}
print("Empirical Rule")
df = pd.DataFrame(D, index = ["Mean +/- Std. Dev.", "Mean +/- 2*Std. Dev.", "Mean +/- 3*Std.
Dev."])
df
Output:
Mean: 18681.55517241379
Empirical Rule
19
Ans. 8(e): Determining how far the Pounds sold data follows the Empirical rule with a refined
method.
#Empirical Rule Check (Advanced)

def count1(list1):
return len(list(x for x in list1 if LS_df.mean() <= x <= LS_df.mean()+LS_df.std()))
def count2(list1):
return len(list(x for x in list1 if LS_df.mean()-LS_df.std() <= x <= LS_df.mean()))
def count3(list1):
return len(list(x for x in list1 if LS_df.mean()+1*LS_df.std() <= x <= LS_df.mean()+2*LS_df.std()))
def count4(list1):
return len(list(x for x in list1 if LS_df.mean()-2*LS_df.std() <= x <= LS_df.mean()-1*LS_df.std()))
def count5(list1):
return len(list(x for x in list1 if LS_df.mean()+2*LS_df.std() <= x <= LS_df.mean()+3*LS_df.std()))
def count6(list1):
return len(list(x for x in list1 if LS_df.mean()-3*LS_df.std() <= x <= LS_df.mean()-2*LS_df.std()))
D={"Theoretical % of Data": ["34%","34%","13.5%","13.5%","2%","2%"],

"Theoretical No. Obs.":
[0.34*len(LS_df),0.34*len(LS_df),0.135*len(LS_df),0.135*len(LS_df),0.02*len(LS_df),0.02*len(LS_df)
],
"Actual No. Obs.": [count1, count2, count3, count4, count5, count6]
}
print("Empirical Rule")
df = pd.DataFrame(D, index = ["Mean to Mean + Std. Dev.","Mean - Std. Dev. to Mean","1*Std. Dev.
to 2*Std. Dev.","-2*Std. Dev. to -1*Std. Dev.","2*Std. Dev. to 3*Std. Dev.","-3*Std. Dev. to -2*Std.
Dev."])
df
20
Output:
Mean: 18681.55517241379
Empirical Rule
Ans. 8(f): To understand how much the Pounds sold data follows the Empirical Rule and how much
it seems to follow the Normal Distribution (Bell Shaped curve), we look at the two tables above and
compare the Theoretical no. of observations and the Actual no. of observations for each and every
range of parameters (Mean, Std. Dev.). To get an idea about the Normal distribution, we divide the
curve into 3 divisions under the first method. In this method we notice that for the range, “Mean +/-
Std. Dev.”, theoretically a purely Normal Distribution has 68% (here, 197.2 observations) of the
values falling in that range, however, in our distribution the number of observations falling under this
range is = 201. Therefore, as the difference is not a very significant one, we can say that the bell-
shaped feature is moderately followed. Similarly, we notice that the differences between the
theoretical no. of observations and the actual no. of observations is not very different for the rest two
ranges. (i.e., Mean +/- 2*Std. Dev., and Mean +/- 3*Std. Dev.).
However, in the 2nd table, we divide the curve into 6 divisions. Here, we notice that for the range,
“Mean to Mean + Std. Dev.”, theoretically a purely Normal Distribution has 34% (here, 98.6
observations) of the values falling in that range, however, in our distribution the number of
observations falling under this range is = 84. Similarly, for the range, “Mean - Std. Dev. to Mean”,
theoretically a purely Normal Distribution has 34% (here, 98.6 observations) of the values falling in
that range, however, in our distribution the number of observations falling under this range is = 117.
Therefore, here the differences are quite significant, which indicates that right of the mean,
frequency of the values is slightly lesser than what it is in a Normally distributed distribution.
Similarly, left of the mean, frequency of the values is slightly higher than what it is in a Normally
distributed distribution. This thing is yet again observed in the subsequent ranges mentioned in the
2nd table. Thus, this table indicates that Normality is not very significantly followed in the Pounds
Sold data.
21
Ans 8(f): Applying t-test to check Normality in the Pounds Sold Data.
#Normality Test of the Pounds Sold data:
sn.distplot(LS_df, label="Pounds Sold")

plt.legend()
print(st.ttest_1samp(LS_df, 20000))
#p-value < 0.05, Reject the null hypothesis
Output:
Ttest_1sampResult(statistic=-3.2822571904938735, pvalue=0.0011562126592234148)
Remarks: As we don’t have a standard to check the Normality of the Pounds Sold data, let us
assume that the Population mean = 20000 Lbs. Now we perform a 1-Sample t-test and notice, the p-
value of this test < 0.05, we reject the Null Hypothesis, and we can conclude with 95% confidence
level, that the Pounds Sold Data does not significantly follow a Normal Distribution. (This might be
because of the skewness of the data, or the Population mean that we have assumed might not be
true)
22
23
Ans. 8(g): Determining the Skewness and Kurtosis of the Pounds Sold data.
#Calculating Skewness and Kurtosis:
#Calculate sample skewness

print("Skewness = ",st.skew(LS_df, bias=False))
#Calculate sample kurtosis

print("Kurtosis = ",st.kurtosis(LS_df, bias=False))
Output:
Skewness = 0.6323074848896508
Kurtosis = 0.5636614418064347
Remarks: The Skewness of the Pounds sold data = 0.6323 (approx.) which is higher than the
Skewness of a Normal distribution (i.e. = 0) Therefore, it is positively skewed. The Kurtosis is
calculated at = 0.5637 (approx.) which is quite lesser than the Kurtosis of a Normal distribution (i.e. =
3). Therefore, it can be considered as Platykurtic. These two values indicate that our previous
deductions are true that the data does not significantly follow a normal distribution.
Ans. 9:
Remarks: In this Table we get an overall glimpse of the daily visits data. The Mean is higher than
Median, which is yet again higher than the Mode which implies that the data can be considered as
Positively skewed (reason why the Skewness is at 2.1670 > 0). Standard Deviation is quite high
denoting that the values are quite varying. This again is shown by the range and minimum and
maximum figures. The data must also be Leptokurtic (As the value of Kurtosis is higher than 3).
Thus, Overall the Daily Visits data is not following Normality.
24
25
Remarks: In the above provided chart, we can observe that the Histogram of the daily visits data is
highly skewed towards the left thereby denoting its Positive skewness. To understand how much the
Daily visits data follows the Empirical Rule and how much it seems to follow the Normal Distribution
(Bell Shaped curve), we look at the two tables above and compare the Theoretical no. of
observations and the Actual no. of observations for each and every range of parameters (Mean, Std.
Dev.).
To get an idea about the distribution, we divide the curve into 3 divisions under the first method. In
this method we notice that for the range, “Mean +/- Std. Dev.”, theoretically a purely Normal
Distribution has 68% (here, 314 observations) of the values falling in that range, however, in our
distribution the number of observations falling under this range is = 392. Therefore, as the difference
is quite a significant one, we can say that the bell-shaped feature is not followed properly. Similarly
we notice that the differences between the theoretical no. of observations and the actual no. of
observations is quite significant for the rest two ranges too. (i.e., Mean +/- 2*Std. Dev., and Mean +/-
3*Std. Dev.). This again implies the lack of normality in these observations.
In the 2nd table, we divide the curve into 6 divisions. Here, we notice that for the range, “Mean to
Mean + Std. Dev.”, theoretically a purely Normal Distribution has 34% (here, 157 observations) of
the values falling in that range, however, in our distribution the number of observations falling under
this range is = 108. Similarly, for the range, “Mean - Std. Dev. to Mean”, theoretically a purely
Normal Distribution has 34% (here, 157 observations) of the values falling in that range, however, in
our distribution the number of observations falling under this range is = 284. Therefore, here the
differences are quite significant, which indicates that right of the mean, frequency of the values is
lesser than what it is in a Normally distributed distribution. Similarly, left of the mean, frequency of
the values is higher than what it is in a Normally distributed distribution. This phenomenon is again
observed in the subsequent ranges mentioned in the 2nd table. Thus, this table indicates that
Normality is not followed in the Daily visits data.
Furthermore, we may notice that the Skewness and Kurtosis are much more differing than an ideal
Normal distribution in case of the Daily visits data rather than the Pounds sold data. Thus, according
to our observation we may conclude that the pounds sold data is slightly “More Normal” than the
Daily visits data.
Ans. 10: We have depicted all the observations and findings from our Analysis in this report and
also provided our insights over all our deductions from the charts, diagrams, tables and graphs.
The End

Ab Assignment - 3: Prof. Rajiv Kumar Group - 03

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Ab Assignment - 3: Prof. Rajiv Kumar Group - 03

Uploaded by

Copyright:

Available Formats

AB ASSIGNMENT - 3

Web Analytics at Quality Alloys

Prof. Rajiv Kumar

#Reading different sheets from the original excel file

#Initialization of datasets from different worksheets

visit_df=pd.read_excel("Quality_Alloy.xlsx", sheet_name='Weekly Visits')

#Create different visit dataframes for different periods

#Create different financial dataframes for a different season

#Website Visits over time

plt.bar(visit_df.Week, visit_df.Visits, width=.8)

plt.xticks(ticks=visit_df.Week,rotation=45, fontsize=4, ha='right')

plt.title('Visits over Time',fontsize=20)

#Unique Website Visits over time

plt.bar(visit_df.Week, visit_df.Unique_Visits, width=.8)

plt.xticks(ticks=visit_df.Week,rotation=45, fontsize=4, ha='right')

plt.title('Unique Visits over Time',fontsize=20)

#Revenue over time

plt.bar(fin_df.Week, fin_df.Revenue, width=.8)

plt.xticks(ticks=fin_df.Week,rotation=45, fontsize=4, ha='right')

plt.title('Revenue over Time',fontsize=20)

#Profit over time

plt.bar(fin_df.Week, fin_df.Profit, width=.8)

plt.xticks(ticks=fin_df.Week,rotation=45, fontsize=4, ha='right')

plt.title('Profit over Time',fontsize=20)

#Pounds Sold over time

plt.bar(fin_df.Week, fin_df.Lbs_Sold, width=.8)

plt.xticks(ticks=fin_df.Week,rotation=45, fontsize=4, ha='right')

plt.title('Pounds Sold over Time',fontsize=20)

#Visit and Financial Summary Measures-Initial Period

#Visit and Financial Summary Measures - Pre-Promotion Period

#Visit and Financial Summary Measures - Promotion Period

#Visit and Financial Summary Measures - Post-Promotion Period

#Visit and Financial Mean Measures - Periodically

print("Chart of Means: ")

#Scatterplot of Revenue and Pounds Sold

#Scatterplot of Revenue and Visits

#Scatterplot of Visits and Bounce Rate

#Histogram for Pounds Sold across all weeks

LS_df=pd.read_excel("Quality_Alloy.xlsx", sheet_name='Lbs. Sold')["Lbs. Sold"]

#Empirical Rule Check

LS_df=pd.read_excel("Quality_Alloy.xlsx", sheet_name='Lbs. Sold')["Lbs. Sold"]

print("Mean: ", LS_df.mean())

D={"Theoretical % of Data": ["68%","95%","99%"],

#Empirical Rule Check (Advanced)

LS_df=pd.read_excel("Quality_Alloy.xlsx", sheet_name='Lbs. Sold')["Lbs. Sold"]

print("Mean: ", LS_df.mean())

D={"Theoretical % of Data": ["34%","34%","13.5%","13.5%","2%","2%"],

#Normality Test of the Pounds Sold data:

LS_df=pd.read_excel("Quality_Alloy.xlsx", sheet_name='Lbs. Sold')["Lbs. Sold"]

sn.distplot(LS_df, label="Pounds Sold")

#p-value < 0.05, Reject the null hypothesis

#Calculating Skewness and Kurtosis:

LS_df=pd.read_excel("Quality_Alloy.xlsx", sheet_name='Lbs. Sold')["Lbs. Sold"]

#Calculate sample skewness

#Calculate sample kurtosis

You might also like