You are on page 1of 1

In 

[30]: import pandas as pd

import numpy as np

import os

import seaborn as sns

import matplotlib.pyplot as plt

from mlxtend.preprocessing import TransactionEncoder

from mlxtend.frequent_patterns import apriori

from mlxtend.frequent_patterns import association_rules

sns.set(color_codes=True)

%matplotlib inline

In [31]: df = pd.read_csv("dataset_group (1).csv")

In [32]: df.head()

Out[32]:
Date Order_id Product

0 01-01-2018 1 yogurt

1 01-01-2018 1 pork

2 01-01-2018 1 sandwich bags

3 01-01-2018 1 lunch meat

4 01-01-2018 1 all- purpose

In [33]: df.describe().T

Out[33]:
count mean std min 25% 50% 75% max

Order_id 20641.0 575.986289 328.557078 1.0 292.0 581.0 862.0 1139.0

In [34]: df.info()

<class 'pandas.core.frame.DataFrame'>

RangeIndex: 20641 entries, 0 to 20640

Data columns (total 3 columns):

# Column Non-Null Count Dtype

--- ------ -------------- -----

0 Date 20641 non-null object

1 Order_id 20641 non-null int64

2 Product 20641 non-null object

dtypes: int64(1), object(2)

memory usage: 483.9+ KB

In [35]: df.isnull().sum()

Out[35]: Date 0

Order_id 0

Product 0

dtype: int64

In [36]: df.shape

Out[36]: (20641, 3)

In [37]: df.duplicated().sum()

Out[37]: 4730

In [38]: df = df.drop(df[df.Product == 'none'].index)

In [39]: df

Out[39]:
Date Order_id Product

0 01-01-2018 1 yogurt

1 01-01-2018 1 pork

2 01-01-2018 1 sandwich bags

3 01-01-2018 1 lunch meat

4 01-01-2018 1 all- purpose

... ... ... ...

20636 25-02-2020 1138 soda

20637 25-02-2020 1138 paper towels

20638 26-02-2020 1139 soda

20639 26-02-2020 1139 laundry detergent

20640 26-02-2020 1139 shampoo

20641 rows × 3 columns

In [40]: df = df.drop_duplicates()

In [41]: df

Out[41]:
Date Order_id Product

0 01-01-2018 1 yogurt

1 01-01-2018 1 pork

2 01-01-2018 1 sandwich bags

3 01-01-2018 1 lunch meat

4 01-01-2018 1 all- purpose

... ... ... ...

20634 25-02-2020 1138 soda

20637 25-02-2020 1138 paper towels

20638 26-02-2020 1139 soda

20639 26-02-2020 1139 laundry detergent

20640 26-02-2020 1139 shampoo

15911 rows × 3 columns

In [12]: df['Product'].value_counts().sort_values(ascending=False).head(20).plot(kind='bar',figsize=(
12,7), color='indigo');

In [13]: basket=df.groupby(['Order_id', 'Product'])['Product'].count().unstack().reset_index().fillna


(0).set_index('Order_id')

In [14]: basket.head()

Out[14]:
all- aluminum dinner dishwashing sandwich
Product bagels beef butter cereals cheeses coffee/tea ... shamp
purpose foil rolls liquid/detergent loaves

Order_id

1 1.0 1.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0 0.0 ... 0.0

2 0.0 1.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 1.0 ... 0.0

3 0.0 0.0 1.0 0.0 0.0 1.0 1.0 0.0 1.0 0.0 ... 1.0

4 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 ... 0.0

5 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 ... 1.0

5 rows × 37 columns

In [15]: def encode_zero_one(x):

if x <= 0:

return 0

if x >= 1:

return 1

In [16]: basket=basket.applymap(encode_zero_one)

In [17]: itemsets = apriori(basket, min_support = 0.01, use_colnames = True, low_memory=True)

itemsets

Out[17]:
support itemsets

0 0.374890 (all- purpose)

1 0.384548 (aluminum foil)

2 0.385426 (bagels)

3 0.374890 (beef)

4 0.367867 (butter)

... ... ...

610567 0.010536 (soda, mixes, pork, ketchup, soap, spaghetti s...

610568 0.011414 (soda, mixes, soap, ketchup, spaghetti sauce, ...

610569 0.010536 (soda, lunch meat, laundry detergent, sandwich...

610570 0.011414 (milk, lunch meat, mixes, shampoo, sandwich ba...

610571 0.010536 (tortillas, milk, lunch meat, mixes, shampoo, ...

610572 rows × 2 columns

In [18]: plt.figure(figsize=(12,7))

sns.barplot(itemsets.sort_values('support',ascending=False).iloc[0:20,1],

itemsets.sort_values('support',ascending=False).iloc[0:20,0])

plt.xticks(rotation=90)

plt.show()

In [19]: basket = association_rules(itemsets, metric ="lift")

basket = basket.sort_values(['lift','confidence'], ascending =[False, False])

In [20]: basket.head(20)

Out[20]:
antecedent consequent
antecedents consequents support confidence lift leverage conviction
support support

(sandwich bags, (laundry


21916485 ketchup, sugar, detergent, soap, 0.023705 0.058824 0.011414 0.481481 8.185185 0.010019 1.815126
all- purpose) flour)

(laundry (sandwich bags,


21916496 detergent, soap, ketchup, sugar, 0.058824 0.023705 0.011414 0.194030 8.185185 0.010019 1.211329
flour) all- purpose)

(toilet paper,
(beef, butter,
21907038 coffee/tea, all- 0.022827 0.058824 0.010536 0.461538 7.846154 0.009193 1.747899
cereals)
purpose, fruits)

(toilet paper,
(beef, butter,
21907043 coffee/tea, all- 0.058824 0.022827 0.010536 0.179104 7.846154 0.009193 1.190374
cereals)
purpose, fruits)

(pork, individual
(sandwich loaves,
22026345 meals, milk, ice 0.020193 0.067603 0.010536 0.521739 7.717674 0.009170 1.949557
cereals, shampoo)
cream)

(sandwich loaves, (pork, individual


22026380 cereals, meals, milk, ice 0.067603 0.020193 0.010536 0.155844 7.717674 0.009170 1.160694
shampoo) cream)

(ketchup, milk,
(pork, coffee/tea,
22029870 cheeses, lunch 0.022827 0.065847 0.011414 0.500000 7.593333 0.009910 1.868306
soap)
meat)

(ketchup, milk,
(pork, coffee/tea,
22029911 cheeses, lunch 0.065847 0.022827 0.011414 0.173333 7.593333 0.009910 1.182064
soap)
meat)

(laundry
detergent, (flour, soap,
21916464 0.025461 0.059701 0.011414 0.448276 7.508621 0.009893 1.704291
ketchup, sugar, sandwich bags)
all- purpose)

(laundry
(flour, soap, detergent,
21916517 0.059701 0.025461 0.011414 0.191176 7.508621 0.009893 1.204885
sandwich bags) ketchup, sugar,
all- purpose)

(beef, pasta,
(spaghetti sauce,
22002778 sandwich bags, 0.026339 0.053556 0.010536 0.400000 7.468852 0.009125 1.577407
shampoo, fruits)
lunch meat)

(beef, pasta,
(spaghetti sauce,
22002823 sandwich bags, 0.053556 0.026339 0.010536 0.196721 7.468852 0.009125 1.212109
shampoo, fruits)
This study source was downloaded by 100000828586097 from CourseHero.com on 03-23-2022 08:37:24 GMT -05:00 lunch meat)

(laundry
https://www.coursehero.com/file/104748531/MRA-Project-Milestone2pdf/ detergent, soap, (ketchup, sugar,
21916467 0.027217 0.057068 0.011414 0.419355 7.348387 0.009860 1.623939
flour, sandwich all- purpose)
bags)

(laundry
(ketchup, sugar, detergent, soap,
21916514 0.057068 0.027217 0.011414 0.200000 7.348387 0.009860 1.215979
all- purpose) flour, sandwich
bags)

(pork, pasta, (soda, ketchup,


21986689 0.047410 0.030729 0.010536 0.222222 7.231746 0.009079 1.246206
soap) waffles, bagels)

(soda, ketchup, (pork, pasta,


21986656 0.030729 0.047410 0.010536 0.342857 7.231746 0.009079 1.449593
waffles, bagels) soap)

(flour, sandwich (laundry


21916481 bags, sugar, all- detergent, soap, 0.022827 0.069359 0.011414 0.500000 7.208861 0.009830 1.861282
purpose) ketchup)

(laundry (flour, sandwich


21916500 detergent, soap, bags, sugar, all- 0.069359 0.022827 0.011414 0.164557 7.208861 0.009830 1.169646
ketchup) purpose)

(soda, coffee/tea,
(soap, milk,
22045238 sandwich loaves, 0.026339 0.065847 0.012291 0.466667 7.087111 0.010557 1.751536
waffles)
lunch meat)

(soda, coffee/tea,
(soap, milk,
22045287 sandwich loaves, 0.065847 0.026339 0.012291 0.186667 7.087111 0.010557 1.197124
waffles)
lunch meat)

In [ ]:

Powered by TCPDF (www.tcpdf.org)

You might also like