You are on page 1of 1

In 

[223]: import pandas as pd

import numpy as np

import os

import seaborn as sns

import matplotlib.pyplot as plt

sns.set(color_codes=True)

%matplotlib inline

In [224]: df = pd.read_csv("Sales_Data.csv")

In [225]: df.head()

Out[225]:
ORDERNUMBER QUANTITYORDERED PRICEEACH ORDERLINENUMBER SALES ORDERDATE DAYS_SINCE_LASTORDER

0 10107 30 95.70 2 2871.00 24-02-2018 828

1 10121 34 81.35 5 2765.90 07-05-2018 757

2 10134 41 94.74 2 3884.34 01-07-2018 703

3 10145 45 83.26 6 3746.70 25-08-2018 649

4 10168 36 96.66 1 3479.76 28-10-2018 586

In [226]: df.describe()

Out[226]:
ORDERNUMBER QUANTITYORDERED PRICEEACH ORDERLINENUMBER SALES DAYS_SINCE_LASTORDER

count 2747.000000 2747.000000 2747.000000 2747.000000 2747.000000 2747.000000 274

mean 10259.761558 35.103021 101.098952 6.491081 3553.047583 1757.085912 10

std 91.877521 9.762135 42.042549 4.230544 1838.953901 819.280576 4

min 10100.000000 6.000000 26.880000 1.000000 482.130000 42.000000 3

25% 10181.000000 27.000000 68.745000 3.000000 2204.350000 1077.000000 6

50% 10264.000000 35.000000 95.550000 6.000000 3184.800000 1761.000000 9

75% 10334.500000 43.000000 127.100000 9.000000 4503.095000 2436.500000 12

max 10425.000000 97.000000 252.870000 18.000000 14082.800000 3562.000000 21

In [227]: df.info()

<class 'pandas.core.frame.DataFrame'>

RangeIndex: 2747 entries, 0 to 2746

Data columns (total 20 columns):

# Column Non-Null Count Dtype

--- ------ -------------- -----

0 ORDERNUMBER 2747 non-null int64

1 QUANTITYORDERED 2747 non-null int64

2 PRICEEACH 2747 non-null float64

3 ORDERLINENUMBER 2747 non-null int64

4 SALES 2747 non-null float64

5 ORDERDATE 2747 non-null object

6 DAYS_SINCE_LASTORDER 2747 non-null int64

7 STATUS 2747 non-null object

8 PRODUCTLINE 2747 non-null object

9 MSRP 2747 non-null int64

10 PRODUCTCODE 2747 non-null object

11 CUSTOMERNAME 2747 non-null object

12 PHONE 2747 non-null object

13 ADDRESSLINE1 2747 non-null object

14 CITY 2747 non-null object

15 POSTALCODE 2747 non-null object

16 COUNTRY 2747 non-null object

17 CONTACTLASTNAME 2747 non-null object

18 CONTACTFIRSTNAME 2747 non-null object

19 DEALSIZE 2747 non-null object

dtypes: float64(2), int64(5), object(13)

memory usage: 429.3+ KB

In [228]: df.shape

Out[228]: (2747, 20)

In [229]: df['ORDERDATE'] = pd.to_datetime(df['ORDERDATE'], unit='ns')

In [230]: df.dtypes

Out[230]: ORDERNUMBER int64

QUANTITYORDERED int64

PRICEEACH float64

ORDERLINENUMBER int64

SALES float64

ORDERDATE datetime64[ns]

DAYS_SINCE_LASTORDER int64

STATUS object

PRODUCTLINE object

MSRP int64

PRODUCTCODE object

CUSTOMERNAME object

PHONE object

ADDRESSLINE1 object

CITY object

POSTALCODE object

COUNTRY object

CONTACTLASTNAME object

CONTACTFIRSTNAME object

DEALSIZE object

dtype: object

In [231]: df.isnull().sum()

Out[231]: ORDERNUMBER 0

QUANTITYORDERED 0

PRICEEACH 0

ORDERLINENUMBER 0

SALES 0

ORDERDATE 0

DAYS_SINCE_LASTORDER 0

STATUS 0

PRODUCTLINE 0

MSRP 0

PRODUCTCODE 0

CUSTOMERNAME 0

PHONE 0

ADDRESSLINE1 0

CITY 0

POSTALCODE 0

COUNTRY 0

CONTACTLASTNAME 0

CONTACTFIRSTNAME 0

DEALSIZE 0

dtype: int64

In [232]: sns.distplot(df['QUANTITYORDERED'],color='indigo',rug=True )

Out[232]: <matplotlib.axes._subplots.AxesSubplot at 0x23d8ed24f70>

In [233]: sns.boxplot(df['QUANTITYORDERED'],color='indigo')

Out[233]: <matplotlib.axes._subplots.AxesSubplot at 0x23d882b0d60>

In [234]: sns.distplot(df['PRICEEACH'],color='blue',rug=True )

Out[234]: <matplotlib.axes._subplots.AxesSubplot at 0x23d880b85e0>

In [235]: sns.boxplot(df['PRICEEACH'],color='blue')

Out[235]: <matplotlib.axes._subplots.AxesSubplot at 0x23d8f010610>

In [236]: sns.distplot(df['ORDERLINENUMBER'],color='green',rug=True )

Out[236]: <matplotlib.axes._subplots.AxesSubplot at 0x23d8f05c730>

In [237]: sns.boxplot(df['ORDERLINENUMBER'],color='GREEN')

Out[237]: <matplotlib.axes._subplots.AxesSubplot at 0x23d889f8340>

In [238]: sns.distplot(df['SALES'],color='orange',rug=True )

Out[238]: <matplotlib.axes._subplots.AxesSubplot at 0x23d8a8709d0>

In [239]: sns.boxplot(df['SALES'],color='ORANGE')

Out[239]: <matplotlib.axes._subplots.AxesSubplot at 0x23d8811cf40>

In [240]: sns.distplot(df['DAYS_SINCE_LASTORDER'],color='pink',rug=True )

Out[240]: <matplotlib.axes._subplots.AxesSubplot at 0x23d8ac5e8b0>

In [241]: sns.boxplot(df['DAYS_SINCE_LASTORDER'],color='PINK')

Out[241]: <matplotlib.axes._subplots.AxesSubplot at 0x23d8d6dc250>

In [242]: sns.distplot(df['MSRP'],color='red',rug=True )

Out[242]: <matplotlib.axes._subplots.AxesSubplot at 0x23d91de66d0>

In [243]: sns.boxplot(df['MSRP'],color='RED')

Out[243]: <matplotlib.axes._subplots.AxesSubplot at 0x23d8a6c8250>

In [244]: sns.pairplot(df)

plt.show()

In [245]: plt.figure(figsize=(10,8))

sns.heatmap(df.corr(), annot=True, cmap='rainbow' )

plt.show()

In [246]: from datetime import datetime

now = datetime.now()

In [254]: df['recency_days']= df['ORDERDATE'].apply(lambda x:now-x)

df['recency_days']= df['recency_days'].apply(lambda x:x.days)

In [255]: df_rfm=df.rename(columns={'SALES':'monetary','recency_days':'recency','QUANTITYORDERED': 'fr


equency'})

In [256]: df_rfm

Out[256]:
ORDERNUMBER frequency PRICEEACH ORDERLINENUMBER monetary ORDERDATE DAYS_SINCE_LASTORDER STA

0 10107 30 95.70 2 2871.00 2018-02-24 828 Shi

1 10121 34 81.35 5 2765.90 2018-07-05 757 Shi

2 10134 41 94.74 2 3884.34 2018-01-07 703 Shi

3 10145 45 83.26 6 3746.70 2018-08-25 649 Shi

4 10168 36 96.66 1 3479.76 2018-10-28 586 Shi

... ... ... ... ... ... ... ...

2742 10350 20 112.22 15 2244.40 2019-02-12 2924 Shi

2743 10373 29 137.19 1 3978.51 2020-01-31 2865 Shi

2744 10386 43 125.99 4 5417.57 2020-01-03 2836 Reso

2745 10397 34 62.24 1 2116.16 2020-03-28 2810 Shi

2746 10414 47 65.52 9 3079.44 2020-06-05 2772 On

2747 rows × 21 columns

In [260]: df_1=df_rfm[['ORDERNUMBER','CUSTOMERNAME','recency','frequency','monetary']]

In [261]: df_1.head()

Out[261]:
ORDERNUMBER CUSTOMERNAME recency frequency monetary

0 10107 Land of Toys Inc. 1275 30 2871.00

1 10121 Reims Collectables 1144 34 2765.90

2 10134 Lyon Souveniers 1323 41 3884.34

3 10145 Toys4GrownUps.com 1093 45 3746.70

4 10168 Technics Stores Inc. 1029 36 3479.76

In [267]: from crm_rfm_modeling import rfm

from crm_rfm_modeling.rfm import RFM

model = RFM(weights=(0.33, 0.33, 0.34))

In [268]: df_2=df_rfm[['recency','frequency','monetary']]

In [269]: model.fit(df_2)

In [270]: model.summary_statistics()

Out[270]:
recency_scores frequency_scores monetary_scores

1 0.201675 0.233710 0.200218

2 0.198398 0.192210 0.199854

3 0.205679 0.190754 0.199854

4 0.194030 0.214780 0.199854

5 0.200218 0.168548 0.200218

In [271]: pd.DataFrame(model.cutoffs)

Out[271]:
recency frequency monetary

0 (1326, 1075.0) (6, 26.0) (482.13, 2006.218)

1 (1075.0, 930.5999999999999) (26.0, 32.0) (2006.218, 2793.284)

2 (930.5999999999999, 736.0) (32.0, 38.0) (2793.284, 3608.7120000000004)

3 (736.0, 632.8000000000002) (38.0, 45.0) (3608.7120000000004, 4943.904000000002)

4 (632.8000000000002, 264) (45.0, 97) (4943.904000000002, 14082.8)

In [272]: df_fitted=model.fitted_data

df_fitted.head()

Out[272]:
recency frequency monetary recency_scores frequency_scores monetary_scores recency_weighted frequency_weighted

0 1275 30 2871.00 1 2 3 0.33 0.66

1 1144 34 2765.90 1 3 2 0.33 0.99

2 1323 41 3884.34 1 4 4 0.33 1.32

3 1093 45 3746.70 1 4 4 0.33 1.32

4 1029 36 3479.76 2 3 3 0.66 0.99

TOP 10 BEST CUSTOMERS

In [279]: df_fitted.sort_values(['recency_scores','frequency_scores','monetary_scores'],ascending=[Fal
se,False,False]).head(10)

Out[279]:
recency frequency monetary recency_scores frequency_scores monetary_scores recency_weighted frequency_weighte

23 466 66 7516.08 5 5 5 1.65 1.6

49 448 50 12001.00 5 5 5 1.65 1.6

96 383 66 11886.60 5 5 5 1.65 1.6

97 466 56 9218.16 5 5 5 1.65 1.6

121 596 64 9661.44 5 5 5 1.65 1.6

222 580 49 8470.14 5 5 5 1.65 1.6

225 448 49 7969.36 5 5 5 1.65 1.6

248 383 66 8648.64 5 5 5 1.65 1.6

274 474 47 8236.75 5 5 5 1.65 1.6

374 535 54 5951.34 5 5 5 1.65 1.6

CUSTOMERS WHO ARE ON THE VERGE OF CHURNING

In [291]: a= df_fitted.sort_values(['recency_scores','frequency_scores','monetary_scores'],ascending=[
True,True,True])

display(a.iloc[5:11])

recency frequency monetary recency_scores frequency_scores monetary_scores recency_weighted frequency_weight

659 1287 23 1643.12 1 1 1 0.33 0.

723 1301 22 1189.98 1 1 1 0.33 0.

874 1133 20 1424.00 1 1 1 0.33 0.

898 1212 21 1878.66 1 1 1 0.33 0.

924 1133 20 1801.20 1 1 1 0.33 0.

1048 1268 26 1777.10 1 1 1 0.33 0.

LOST CUSTOMER

In [285]: df_fitted.sort_values(['recency_scores','frequency_scores','monetary_scores'],ascending=[Tru
e,True,True]).head(5)

Out[285]:
recency frequency monetary recency_scores frequency_scores monetary_scores recency_weighted frequency_weighte

327 1292 24 1800.24 1 1 1 0.33 0.3

328 1201 21 1340.64 1 1 1 0.33 0.3

340 1230 22 1685.42 1 1 1 0.33 0.3

577 1229 20 1262.80 1 1 1 0.33 0.3

656 1199 26 1495.26 1 1 1 0.33 0.3

LOYAL CUSTOMERS

In [286]: df_fitted[df_fitted.monetary_scores==5].head(5)

Out[286]:
recency
This study source was downloaded by 100000828586097 from CourseHero.com on 03-13-2022 13:34:55 GMT frequency
-05:00 monetary recency_scores frequency_scores monetary_scores recency_weighted frequency_weighted

6 1008 48 5512.32 2 5 5 0.66 1.65


https://www.coursehero.com/file/103680063/MRA-Project-Milestone-1pdf/
18 637 41 7737.93 4 4 5 1.32 1.32

23 466 66 7516.08 5 5 5 1.65 1.65

24 1301 26 5404.62 1 1 5 0.33 0.33

25 1247 29 7209.11 1 2 5 0.33 0.66

In [ ]:

In [ ]:

Powered by TCPDF (www.tcpdf.org)

You might also like