Professional Documents
Culture Documents
import numpy as np
import os
sns.set(color_codes=True)
%matplotlib inline
In [32]: df.head()
Out[32]:
Date Order_id Product
0 01-01-2018 1 yogurt
1 01-01-2018 1 pork
In [33]: df.describe().T
Out[33]:
count mean std min 25% 50% 75% max
In [34]: df.info()
<class 'pandas.core.frame.DataFrame'>
In [35]: df.isnull().sum()
Out[35]: Date 0
Order_id 0
Product 0
dtype: int64
In [36]: df.shape
Out[36]: (20641, 3)
In [37]: df.duplicated().sum()
Out[37]: 4730
In [39]: df
Out[39]:
Date Order_id Product
0 01-01-2018 1 yogurt
1 01-01-2018 1 pork
In [40]: df = df.drop_duplicates()
In [41]: df
Out[41]:
Date Order_id Product
0 01-01-2018 1 yogurt
1 01-01-2018 1 pork
In [12]: df['Product'].value_counts().sort_values(ascending=False).head(20).plot(kind='bar',figsize=(
12,7), color='indigo');
In [14]: basket.head()
Out[14]:
all- aluminum dinner dishwashing sandwich
Product bagels beef butter cereals cheeses coffee/tea ... shamp
purpose foil rolls liquid/detergent loaves
Order_id
1 1.0 1.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0 0.0 ... 0.0
2 0.0 1.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 1.0 ... 0.0
3 0.0 0.0 1.0 0.0 0.0 1.0 1.0 0.0 1.0 0.0 ... 1.0
4 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 ... 0.0
5 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 ... 1.0
5 rows × 37 columns
if x <= 0:
return 0
if x >= 1:
return 1
In [16]: basket=basket.applymap(encode_zero_one)
itemsets
Out[17]:
support itemsets
2 0.385426 (bagels)
3 0.374890 (beef)
4 0.367867 (butter)
In [18]: plt.figure(figsize=(12,7))
sns.barplot(itemsets.sort_values('support',ascending=False).iloc[0:20,1],
itemsets.sort_values('support',ascending=False).iloc[0:20,0])
plt.xticks(rotation=90)
plt.show()
In [20]: basket.head(20)
Out[20]:
antecedent consequent
antecedents consequents support confidence lift leverage conviction
support support
(toilet paper,
(beef, butter,
21907038 coffee/tea, all- 0.022827 0.058824 0.010536 0.461538 7.846154 0.009193 1.747899
cereals)
purpose, fruits)
(toilet paper,
(beef, butter,
21907043 coffee/tea, all- 0.058824 0.022827 0.010536 0.179104 7.846154 0.009193 1.190374
cereals)
purpose, fruits)
(pork, individual
(sandwich loaves,
22026345 meals, milk, ice 0.020193 0.067603 0.010536 0.521739 7.717674 0.009170 1.949557
cereals, shampoo)
cream)
(ketchup, milk,
(pork, coffee/tea,
22029870 cheeses, lunch 0.022827 0.065847 0.011414 0.500000 7.593333 0.009910 1.868306
soap)
meat)
(ketchup, milk,
(pork, coffee/tea,
22029911 cheeses, lunch 0.065847 0.022827 0.011414 0.173333 7.593333 0.009910 1.182064
soap)
meat)
(laundry
detergent, (flour, soap,
21916464 0.025461 0.059701 0.011414 0.448276 7.508621 0.009893 1.704291
ketchup, sugar, sandwich bags)
all- purpose)
(laundry
(flour, soap, detergent,
21916517 0.059701 0.025461 0.011414 0.191176 7.508621 0.009893 1.204885
sandwich bags) ketchup, sugar,
all- purpose)
(beef, pasta,
(spaghetti sauce,
22002778 sandwich bags, 0.026339 0.053556 0.010536 0.400000 7.468852 0.009125 1.577407
shampoo, fruits)
lunch meat)
(beef, pasta,
(spaghetti sauce,
22002823 sandwich bags, 0.053556 0.026339 0.010536 0.196721 7.468852 0.009125 1.212109
shampoo, fruits)
This study source was downloaded by 100000828586097 from CourseHero.com on 03-23-2022 08:37:24 GMT -05:00 lunch meat)
(laundry
https://www.coursehero.com/file/104748531/MRA-Project-Milestone2pdf/ detergent, soap, (ketchup, sugar,
21916467 0.027217 0.057068 0.011414 0.419355 7.348387 0.009860 1.623939
flour, sandwich all- purpose)
bags)
(laundry
(ketchup, sugar, detergent, soap,
21916514 0.057068 0.027217 0.011414 0.200000 7.348387 0.009860 1.215979
all- purpose) flour, sandwich
bags)
(soda, coffee/tea,
(soap, milk,
22045238 sandwich loaves, 0.026339 0.065847 0.012291 0.466667 7.087111 0.010557 1.751536
waffles)
lunch meat)
(soda, coffee/tea,
(soap, milk,
22045287 sandwich loaves, 0.065847 0.026339 0.012291 0.186667 7.087111 0.010557 1.197124
waffles)
lunch meat)
In [ ]: