Professional Documents
Culture Documents
In [1]:
In [2]:
# Loading Data
# Data is available in quality_alloys
# Sheet "QA" contains weekly data related to visits and financials across period
s initial, pre-promo, promo and post-promo
# Sheet "demographics" contains visit related data
Out[3]:
May
25 -
0 1 1_Initial 1632 1509 3328 2.04 71
May
31
Jun 1 -
1 2 1_Initial 1580 1450 3097 1.96 56
Jun 7
Jun 8 -
2 3 1_Initial 1441 1306 3202 2.22 79
Jun 14
Jun 15
3 4 1_Initial - Jun 1452 1301 3170 2.18 81
21
Jun 22
4 5 1_Initial - Jun 1339 1255 2366 1.77 50
28
In [4]:
Out[4]:
Northern
1 pagead2.googlesyndication.com 8044 yahoo 1250 17509
America
South-Eastern
7 mu.com 344 bing 122 1968
Asia
print("Alloys_DF Dimensions")
print(alloys_df.shape)
print("")
print("______________________________________")
print("")
print("Demographics_DF Dimensions")
print(demographics_df.shape)
print("")
print("______________________________________")
print("")
print("Alloys_DF Information")
print("")
print(alloys_df.info())
print("")
print("______________________________________")
print("")
print("Demographics_DF Information")
print("")
print(demographics_df.info())
Alloys_DF Dimensions
(66, 14)
______________________________________
Demographics_DF Dimensions
(10, 12)
______________________________________
Alloys_DF Information
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 66 entries, 0 to 65
Data columns (total 14 columns):
Week 66 non-null int64
Type 66 non-null object
Weeks 66 non-null object
Visits 66 non-null int64
Unique_Visits 66 non-null int64
Pageviews 66 non-null int64
Pages/Visit 66 non-null float64
Time_on_Site 66 non-null int64
Bounce_Rate_% 66 non-null float64
New_Visits_% 66 non-null float64
Revenue 66 non-null float64
Profit 66 non-null float64
Lbs_Sold 66 non-null float64
Inquiries 66 non-null int64
dtypes: float64(6), int64(6), object(2)
memory usage: 7.3+ KB
None
______________________________________
Demographics_DF Information
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 12 columns):
Top_10_Sites 10 non-null object
S_Visits 10 non-null int64
Top_10_Engines 10 non-null object
E_Visits 10 non-null int64
Top_10_Regions 10 non-null object
R_Visits 10 non-null int64
Top_10_Browsers 10 non-null object
B_Visits 10 non-null int64
Top_10_OS 10 non-null object
OS_Visits 10 non-null int64
Traffic_Source 4 non-null object
T_Visits 4 non-null float64
dtypes: float64(1), int64(5), object(6)
memory usage: 1.1+ KB
None
In [6]:
# Before running plotly use command prompt to install plotly. use pip install pl
otly==4.8.2 or from Anaconda
import plotly.express as px
3500
3000
2500
Visits
2000
1500
In [8]:
3500
3000
2500
Unique_Visits
2000
1500
In [10]:
1M
0.8M
0.6M
Revenue
0.4M
Inference: 1. Visits and Unique visits show similar pattern 2. Visits and Unique visits have inceased during
promotion period and then decreased 3. Visits and Unique visits in post-promo period have settled at levels
higher than pre-promo period 4. Sales has decreased during the promo period. You may plot other parameters
like page views, timespent, page per visit, qty sold, profit etc.
In [12]:
alloys_df.describe(include=np.number)
Out[12]:
# (c) Describe the financials and behavior by Period - initial, pre-promo, promo
and post-promo
values=['Unique_Visits','Visits','Time_on_Site','Bounce_Rate_%','New_Visits_%',
'Revenue','Profit','Lbs_Sold','Inquiries']
index =['Type']
aggfunc={'Unique_Visits': np.mean,
'Visits': np.mean,
'Time_on_Site': np.mean,
'Bounce_Rate_%':np.mean,
'New_Visits_%':np.mean,
'Revenue':np.mean,
'Profit':np.mean,
'Lbs_Sold':np.mean,
'Inquiries':np.mean
}
result = pd.pivot_table(alloys_df,values=values,index =index,aggfunc=aggfunc,fil
l_value=0)
result = result.round(2)
result
Out[13]:
Type
2_Pre-
59.41 6.48 18440.77 83.88 159932.03 534313.52 9
promo
4_Post-
66.33 5.43 14577.79 86.34 111045.82 371728.02 7
Promo
In [14]:
plt.scatter(alloys_df['Lbs_Sold'], y = alloys_df['Revenue']);
plt.xlabel('Quantity Sold');
plt.ylabel('Revenues');
In [15]:
0.8689297128616138 3.2149183975865717e-21
In [16]:
plt.scatter(alloys_df['Visits'], y = alloys_df['Revenue']);
plt.xlabel('Number of Visits');
plt.ylabel('Revenues');
# Inference - Revenue does not seem to be related with the number of visits
In [17]:
-0.05939183049878598 0.6357131002032045
In [18]:
# Histogram
plt.hist( alloys_df['Lbs_Sold']);
In [19]:
# Density Plot
sn.distplot( alloys_df['Lbs_Sold'], color='green');
In [20]:
# Confidence Interval
print("")
print("Confidence Interval")
from scipy import stats
print(stats.norm.interval(0.50,loc = alloys_df.Lbs_Sold.mean(),scale = alloys_df
.Lbs_Sold.std()))
print("")
print("Original Values")
print(alloys_df.Lbs_Sold.to_numpy().round(2))
print("")
# Moving Average
print("Estimates using moving average")
print(alloys_df.Lbs_Sold.rolling(window=3).mean().to_numpy().round(2))
Mean: 17342.11
Median: 17215.73
Std_Dev: 6068.91
Skewness: 0.33
Kurtosis: -0.19
Confidence Interval
(13248.690144531636, 21435.52997668048)
Original Values
[16585.18 18906.38 28052.92 19382.31 24274.25 15308.72 8633.06 1721
6.34
17308.57 24571.17 14389.77 17230.83 13801.99 26652.7 12402.83 1169
5.44
26362.02 15771.65 31968.98 15531.27 19734.21 17192.88 22591.28 899
2.42
19104.31 21454.99 18783.76 14298.02 17215.12 27256.95 11292.47 2014
7.79
16453.81 12702.59 26303.49 22198.86 16535.15 7814.05 28041.31 3149
6.26
10181.38 9727.18 18323.31 17299.12 13862.04 19006.92 23283.78 1737
4.42
18194.93 9176.38 12880.99 15523.62 15406.28 14535.71 10397.18 1105
4.22
17854.91 7197.15 3825.75 22820.65 12758.08 22324.76 18565.9 1229
4.4
11292.5 23761.61]
In [21]:
(16585.18+18906.38+28052.92)/3
Out[21]:
21181.493333333332
In [22]:
# Histogram
plt.hist( alloys_df['Visits']);
In [23]:
# Density Plot
sn.distplot( alloys_df['Visits'], color='green');
In [24]:
print("")
print("Original Values")
print(alloys_df.Visits.to_numpy().round(2))
print("")
# Moving Average
print("Estimates using moving average")
print(alloys_df.Visits.rolling(window=3).mean().to_numpy().round(2))
Mean: 1051.98
Median: 842.0
Std_Dev: 638.12
Skewness: 2.04
Kurtosis: 4.93
Original Values
[1632 1580 1441 1452 1339 892 797 744 1044 906 849 737 734 6
26
577 562 563 652 611 561 558 570 551 537 543 558 536 5
49
545 591 383 402 547 631 795 1000 1207 2317 2013 2324 3726 25
63
3006 1663 1779 1086 1231 1248 1674 1514 1302 1191 957 963 882 9
42
835 802 806 900 860 924 792 781 776 772]
# (h) Analyze the visits by traffic source, search engine, geographic region, br
owsers and OS used.
# We can use piecharts for this purpose
# Before running plotly use command prompt to install plotly. use pip install pl
otly==4.8.2
import plotly.express as px
27%
52.4%
In [26]:
2.8
0.584%
4%
0.302%
0.125%
In [27]:
Visits by Region
26.7%
34.5%
In [28]:
Visits by Browser
1.35%
1.22%
1.14%
18.9%
0.689%
0.0677%
0.0447%
0.0346%
0.013%
In [29]:
1.71%
1.51%
0.0691%
0.0418%
Visits by OS
0.0288%
0.0259%
0.0115%
0.00576%
0.00432%
In [30]:
30.2%
55.8
In [ ]: