You are on page 1of 18

Association rules

November 6, 2022

[6]: import pandas as pd


from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
df = pd.read_excel('D:\\ML work\\Online_Retail.xlsx')
df.head()

[6]: InvoiceNo StockCode Description Quantity \


0 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6
1 536365 71053 WHITE METAL LANTERN 6
2 536365 84406B CREAM CUPID HEARTS COAT HANGER 8
3 536365 84029G KNITTED UNION FLAG HOT WATER BOTTLE 6
4 536365 84029E RED WOOLLY HOTTIE WHITE HEART. 6

InvoiceDate UnitPrice CustomerID Country


0 2010-12-01 08:26:00 2.55 17850.0 United Kingdom
1 2010-12-01 08:26:00 3.39 17850.0 United Kingdom
2 2010-12-01 08:26:00 2.75 17850.0 United Kingdom
3 2010-12-01 08:26:00 3.39 17850.0 United Kingdom
4 2010-12-01 08:26:00 3.39 17850.0 United Kingdom

[9]: df['Description'] = df['Description'].str.strip()

[11]: df.head()

[11]: InvoiceNo StockCode Description Quantity \


0 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6
1 536365 71053 WHITE METAL LANTERN 6
2 536365 84406B CREAM CUPID HEARTS COAT HANGER 8
3 536365 84029G KNITTED UNION FLAG HOT WATER BOTTLE 6
4 536365 84029E RED WOOLLY HOTTIE WHITE HEART. 6

InvoiceDate UnitPrice CustomerID Country


0 2010-12-01 08:26:00 2.55 17850.0 United Kingdom
1 2010-12-01 08:26:00 3.39 17850.0 United Kingdom
2 2010-12-01 08:26:00 2.75 17850.0 United Kingdom
3 2010-12-01 08:26:00 3.39 17850.0 United Kingdom
4 2010-12-01 08:26:00 3.39 17850.0 United Kingdom

1
[23]: df['Description'] = df['Description'].str.strip()
df.dropna(axis=0, subset=['InvoiceNo'], inplace=True)
df['InvoiceNo'] = df['InvoiceNo'].astype('str')
df = df[~df['InvoiceNo'].str.contains('C')]
df

[23]: InvoiceNo StockCode Description Quantity \


0 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6
1 536365 71053 WHITE METAL LANTERN 6
2 536365 84406B CREAM CUPID HEARTS COAT HANGER 8
3 536365 84029G KNITTED UNION FLAG HOT WATER BOTTLE 6
4 536365 84029E RED WOOLLY HOTTIE WHITE HEART. 6
… … … … …
541904 581587 22613 PACK OF 20 SPACEBOY NAPKINS 12
541905 581587 22899 CHILDREN'S APRON DOLLY GIRL 6
541906 581587 23254 CHILDRENS CUTLERY DOLLY GIRL 4
541907 581587 23255 CHILDRENS CUTLERY CIRCUS PARADE 4
541908 581587 22138 BAKING SET 9 PIECE RETROSPOT 3

InvoiceDate UnitPrice CustomerID Country


0 2010-12-01 08:26:00 2.55 17850.0 United Kingdom
1 2010-12-01 08:26:00 3.39 17850.0 United Kingdom
2 2010-12-01 08:26:00 2.75 17850.0 United Kingdom
3 2010-12-01 08:26:00 3.39 17850.0 United Kingdom
4 2010-12-01 08:26:00 3.39 17850.0 United Kingdom
… … … … …
541904 2011-12-09 12:50:00 0.85 12680.0 France
541905 2011-12-09 12:50:00 2.10 12680.0 France
541906 2011-12-09 12:50:00 4.15 12680.0 France
541907 2011-12-09 12:50:00 4.15 12680.0 France
541908 2011-12-09 12:50:00 4.95 12680.0 France

[532621 rows x 8 columns]

[31]: f_filt = df['Country'] =="France"


df1 = df.loc[f_filt,["InvoiceNo"]]
set(df1['InvoiceNo'])

[31]: {'536370',
'536852',
'536974',
'537065',
'537463',
'537468',
'537693',
'537897',
'537967',

2
'538008',
'538093',
'538196',
'539050',
'539113',
'539407',
'539435',
'539551',
'539607',
'539688',
'539727',
'539829',
'540178',
'540239',
'540351',
'540365',
'540455',
'540463',
'540521',
'540642',
'540688',
'540789',
'540824',
'540835',
'540851',
'540972',
'540976',
'541120',
'541121',
'541138',
'541405',
'541567',
'541631',
'541857',
'541981',
'542132',
'542425',
'542433',
'542535',
'542629',
'542735',
'542904',
'542922',
'543030',
'543188',
'543459',
'543484',

3
'543625',
'544069',
'544115',
'544200',
'544355',
'544423',
'544470',
'544585',
'544817',
'544818',
'545051',
'545086',
'545105',
'545180',
'545181',
'545235',
'545301',
'545515',
'545517',
'545583',
'546373',
'546428',
'546479',
'546678',
'546765',
'546770',
'547087',
'547194',
'547196',
'547327',
'547492',
'547504',
'547508',
'547520',
'547722',
'547863',
'547892',
'548409',
'548410',
'548496',
'548553',
'548606',
'548725',
'548737',
'548913',
'549274',
'550145',

4
'550313',
'550338',
'550661',
'550821',
'550827',
'551163',
'551389',
'551539',
'551544',
'551545',
'551552',
'551649',
'551652',
'551887',
'552315',
'552325',
'552464',
'552631',
'552826',
'552851',
'552946',
'553044',
'553143',
'553161',
'553195',
'553208',
'553316',
'553357',
'553411',
'553566',
'553577',
'553666',
'553838',
'553843',
'553997',
'554097',
'554109',
'554134',
'554356',
'554555',
'554648',
'554650',
'554826',
'554840',
'554940',
'555096',
'555531',

5
'555547',
'555569',
'555637',
'555933',
'555936',
'555937',
'555941',
'556258',
'556259',
'556305',
'556463',
'556507',
'556526',
'556529',
'556816',
'556819',
'556820',
'556886',
'556924',
'557069',
'557247',
'557295',
'557483',
'557489',
'557627',
'557669',
'557734',
'558045',
'558124',
'558334',
'558687',
'558699',
'558813',
'559036',
'559134',
'559418',
'559422',
'559441',
'559607',
'559685',
'559699',
'559704',
'559782',
'559819',
'559889',
'560272',
'560363',

6
'560398',
'560452',
'561029',
'561470',
'561795',
'562039',
'562207',
'562275',
'562450',
'562520',
'562894',
'563189',
'563202',
'563288',
'563502',
'563662',
'563712',
'563753',
'563897',
'563926',
'564159',
'564173',
'564226',
'564314',
'564372',
'564471',
'564479',
'564531',
'564670',
'564862',
'564897',
'564906',
'564912',
'565239',
'565321',
'565322',
'565443',
'565587',
'565612',
'565801',
'565854',
'565865',
'565930',
'565931',
'566074',
'566115',
'566163',

7
'566256',
'566492',
'566736',
'566816',
'566873',
'566918',
'567191',
'567293',
'567296',
'567300',
'567380',
'567552',
'567618',
'567640',
'567657',
'567756',
'567793',
'567910',
'567915',
'568040',
'568137',
'568176',
'568518',
'568645',
'568798',
'568953',
'568988',
'568994',
'569097',
'569120',
'569122',
'569332',
'569402',
'569531',
'569568',
'569686',
'569699',
'569701',
'569921',
'570020',
'570030',
'570267',
'570409',
'570672',
'570724',
'570798',
'570823',

8
'570841',
'570851',
'570879',
'571240',
'571864',
'571923',
'572087',
'572925',
'572986',
'572992',
'573036',
'573077',
'573080',
'573153',
'573273',
'573316',
'573402',
'573428',
'573442',
'573867',
'573868',
'573891',
'574068',
'574093',
'574104',
'574252',
'574501',
'574506',
'574918',
'575049',
'575067',
'575193',
'575326',
'575519',
'575581',
'575584',
'575629',
'575661',
'575671',
'575692',
'575747',
'575759',
'575845',
'575880',
'575884',
'575916',
'576064',

9
'576087',
'576222',
'576245',
'576373',
'576399',
'576669',
'576672',
'576894',
'576927',
'576941',
'577121',
'577152',
'577295',
'577344',
'577502',
'577512',
'577687',
'577856',
'578033',
'578277',
'578287',
'578348',
'578541',
'578698',
'578736',
'578835',
'579153',
'579190',
'579193',
'579345',
'579492',
'579529',
'579634',
'579708',
'579792',
'579870',
'580120',
'580126',
'580144',
'580160',
'580285',
'580509',
'580534',
'580536',
'580705',
'580706',
'580736',

10
'580753',
'580756',
'580986',
'581001',
'581171',
'581279',
'581587'}

[33]: f_basket = df[f_filt].groupby(['InvoiceNo', 'Description'])

[43]: f_basket=f_basket['Quantity'].sum().unstack().reset_index().fillna(0).
,→set_index('InvoiceNo')

[45]: f_basket.head(10)

[45]: Description 10 COLOUR SPACEBOY PEN 12 COLOURED PARTY BALLOONS \


InvoiceNo
536370 0.0 0.0
536852 0.0 0.0
536974 0.0 0.0
537065 0.0 0.0
537463 0.0 0.0
537468 24.0 0.0
537693 0.0 0.0
537897 0.0 0.0
537967 0.0 0.0
538008 0.0 0.0

Description 12 EGG HOUSE PAINTED WOOD 12 MESSAGE CARDS WITH ENVELOPES \


InvoiceNo
536370 0.0 0.0
536852 0.0 0.0
536974 0.0 0.0
537065 0.0 0.0
537463 0.0 0.0
537468 0.0 0.0
537693 0.0 0.0
537897 0.0 0.0
537967 0.0 0.0
538008 0.0 0.0

Description 12 PENCIL SMALL TUBE WOODLAND \


InvoiceNo
536370 0.0
536852 0.0
536974 0.0
537065 0.0

11
537463 0.0
537468 0.0
537693 0.0
537897 0.0
537967 0.0
538008 0.0

Description 12 PENCILS SMALL TUBE RED RETROSPOT 12 PENCILS SMALL TUBE SKULL \
InvoiceNo
536370 0.0 0.0
536852 0.0 0.0
536974 0.0 0.0
537065 0.0 0.0
537463 0.0 0.0
537468 0.0 0.0
537693 0.0 0.0
537897 0.0 0.0
537967 0.0 0.0
538008 0.0 0.0

Description 12 PENCILS TALL TUBE POSY 12 PENCILS TALL TUBE RED RETROSPOT \
InvoiceNo
536370 0.0 0.0
536852 0.0 0.0
536974 0.0 0.0
537065 0.0 0.0
537463 0.0 0.0
537468 0.0 0.0
537693 0.0 0.0
537897 0.0 0.0
537967 0.0 0.0
538008 0.0 0.0

Description 12 PENCILS TALL TUBE WOODLAND … WRAP VINTAGE PETALS DESIGN \


InvoiceNo …
536370 0.0 … 0.0
536852 0.0 … 0.0
536974 0.0 … 0.0
537065 0.0 … 0.0
537463 0.0 … 0.0
537468 0.0 … 0.0
537693 0.0 … 0.0
537897 0.0 … 0.0
537967 0.0 … 0.0
538008 0.0 … 0.0

Description YELLOW COAT RACK PARIS FASHION YELLOW GIANT GARDEN THERMOMETER \

12
InvoiceNo
536370 0.0 0.0
536852 0.0 0.0
536974 0.0 0.0
537065 0.0 0.0
537463 0.0 0.0
537468 0.0 0.0
537693 0.0 0.0
537897 0.0 0.0
537967 0.0 0.0
538008 0.0 0.0

Description YELLOW SHARK HELICOPTER ZINC STAR T-LIGHT HOLDER \


InvoiceNo
536370 0.0 0.0
536852 0.0 0.0
536974 0.0 0.0
537065 0.0 0.0
537463 0.0 0.0
537468 0.0 0.0
537693 0.0 0.0
537897 0.0 0.0
537967 0.0 0.0
538008 0.0 0.0

Description ZINC FOLKART SLEIGH BELLS ZINC HERB GARDEN CONTAINER \


InvoiceNo
536370 0.0 0.0
536852 0.0 0.0
536974 0.0 0.0
537065 0.0 0.0
537463 0.0 0.0
537468 0.0 0.0
537693 0.0 0.0
537897 0.0 0.0
537967 0.0 0.0
538008 0.0 0.0

Description ZINC METAL HEART DECORATION ZINC T-LIGHT HOLDER STAR LARGE \
InvoiceNo
536370 0.0 0.0
536852 0.0 0.0
536974 0.0 0.0
537065 0.0 0.0
537463 0.0 0.0
537468 0.0 0.0
537693 0.0 0.0

13
537897 0.0 0.0
537967 0.0 0.0
538008 0.0 0.0

Description ZINC T-LIGHT HOLDER STARS SMALL


InvoiceNo
536370 0.0
536852 0.0
536974 0.0
537065 0.0
537463 0.0
537468 0.0
537693 0.0
537897 0.0
537967 0.0
538008 0.0

[10 rows x 1563 columns]

[56]: def encode_units(x):


if x <= 0:
return 0
else:
return 1
f_basket = f_basket.applymap(encode_units)
f_basket.drop('POSTAGE', inplace=True, axis=1)

[48]: f_basket.head(10)

[48]: Description 10 COLOUR SPACEBOY PEN 12 COLOURED PARTY BALLOONS \


InvoiceNo
536370 0 0
536852 0 0
536974 0 0
537065 0 0
537463 0 0
537468 1 0
537693 0 0
537897 0 0
537967 0 0
538008 0 0

Description 12 EGG HOUSE PAINTED WOOD 12 MESSAGE CARDS WITH ENVELOPES \


InvoiceNo
536370 0 0
536852 0 0
536974 0 0

14
537065 0 0
537463 0 0
537468 0 0
537693 0 0
537897 0 0
537967 0 0
538008 0 0

Description 12 PENCIL SMALL TUBE WOODLAND \


InvoiceNo
536370 0
536852 0
536974 0
537065 0
537463 0
537468 0
537693 0
537897 0
537967 0
538008 0

Description 12 PENCILS SMALL TUBE RED RETROSPOT 12 PENCILS SMALL TUBE SKULL \
InvoiceNo
536370 0 0
536852 0 0
536974 0 0
537065 0 0
537463 0 0
537468 0 0
537693 0 0
537897 0 0
537967 0 0
538008 0 0

Description 12 PENCILS TALL TUBE POSY 12 PENCILS TALL TUBE RED RETROSPOT \
InvoiceNo
536370 0 0
536852 0 0
536974 0 0
537065 0 0
537463 0 0
537468 0 0
537693 0 0
537897 0 0
537967 0 0
538008 0 0

15
Description 12 PENCILS TALL TUBE WOODLAND … WRAP VINTAGE PETALS DESIGN \
InvoiceNo …
536370 0 … 0
536852 0 … 0
536974 0 … 0
537065 0 … 0
537463 0 … 0
537468 0 … 0
537693 0 … 0
537897 0 … 0
537967 0 … 0
538008 0 … 0

Description YELLOW COAT RACK PARIS FASHION YELLOW GIANT GARDEN THERMOMETER \
InvoiceNo
536370 0 0
536852 0 0
536974 0 0
537065 0 0
537463 0 0
537468 0 0
537693 0 0
537897 0 0
537967 0 0
538008 0 0

Description YELLOW SHARK HELICOPTER ZINC STAR T-LIGHT HOLDER \


InvoiceNo
536370 0 0
536852 0 0
536974 0 0
537065 0 0
537463 0 0
537468 0 0
537693 0 0
537897 0 0
537967 0 0
538008 0 0

Description ZINC FOLKART SLEIGH BELLS ZINC HERB GARDEN CONTAINER \


InvoiceNo
536370 0 0
536852 0 0
536974 0 0
537065 0 0
537463 0 0
537468 0 0

16
537693 0 0
537897 0 0
537967 0 0
538008 0 0

Description ZINC METAL HEART DECORATION ZINC T-LIGHT HOLDER STAR LARGE \
InvoiceNo
536370 0 0
536852 0 0
536974 0 0
537065 0 0
537463 0 0
537468 0 0
537693 0 0
537897 0 0
537967 0 0
538008 0 0

Description ZINC T-LIGHT HOLDER STARS SMALL


InvoiceNo
536370 0
536852 0
536974 0
537065 0
537463 0
537468 0
537693 0
537897 0
537967 0
538008 0

[10 rows x 1563 columns]

[68]: frequent_itemsets = apriori(f_basket, min_support=0.07, use_colnames=True)


rules = association_rules(frequent_itemsets, metric="confidence",␣
,→min_threshold=.5)

rules.head()

C:\ProgramData\Anaconda3\lib\site-
packages\mlxtend\frequent_patterns\fpcommon.py:111: DeprecationWarning:
DataFrames with non-bool types result in worse computationalperformance and
their support might be discontinued in the future.Please use a DataFrame with
bool type
warnings.warn(

[68]: antecedents consequents \


0 (ALARM CLOCK BAKELIKE PINK) (ALARM CLOCK BAKELIKE GREEN)
1 (ALARM CLOCK BAKELIKE GREEN) (ALARM CLOCK BAKELIKE PINK)

17
2 (ALARM CLOCK BAKELIKE RED) (ALARM CLOCK BAKELIKE GREEN)
3 (ALARM CLOCK BAKELIKE GREEN) (ALARM CLOCK BAKELIKE RED)
4 (ALARM CLOCK BAKELIKE PINK) (ALARM CLOCK BAKELIKE RED)

antecedent support consequent support support confidence lift \


0 0.102041 0.096939 0.073980 0.725000 7.478947
1 0.096939 0.102041 0.073980 0.763158 7.478947
2 0.094388 0.096939 0.079082 0.837838 8.642959
3 0.096939 0.094388 0.079082 0.815789 8.642959
4 0.102041 0.094388 0.073980 0.725000 7.681081

leverage conviction
0 0.064088 3.283859
1 0.064088 3.791383
2 0.069932 5.568878
3 0.069932 4.916181
4 0.064348 3.293135

[ ]:

18

You might also like