You are on page 1of 6

Ecommerce Data Analysis

In [1]:

# Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:

ecom = pd.read_csv('Ecommerce Purchases.csv')


ecom.head()

Out[2]:

AM CC CC
Browser CC
Address Lot or Company Credit Card Exp Security
Info Provider
PM Date Code

Opera/9.56.
16629 Pace Camp
(X11; Linux
Apt. 46 Martinez- JCB 16
0 PM x86_64; sl- 6011929061123406 02/20 900 pdunlap@yahoo.c
448\nAlexisborough, in Herman digit
SI)
NE 77...
Presto/2...

Opera/8.93.
Fletcher,
9374 Jasmine Spurs (Windows
28 Richards
1 Suite 508\nSouth PM 98; Win 9x 3337758169645356 11/18 561 Mastercard anthony41@reed.c
rn and
John, TN 8... 4.90; en-US)
Whitaker
Pr...

Mozilla/5.0
Unit 0065 Box (compatible; Simpson,
94 JCB 16 amymiller@mora
2 5052\nDPO AP PM MSIE 9.0; Williams 675957666125 08/19 699
vE digit harrison.c
27450 Windows and Pham
NT ...

Mozilla/5.0
Williams,
7780 Julia (Macintosh;
36 Marshall
3 Fords\nNew Stacy, PM Intel Mac 6011578504430710 02/24 384 Discover brent16@olson-robinson.
vm and
WA 45798 OS X 10_8_0
Buchanan
...

Opera/9.58.
Brown, Diners
23012 Munoz Drive (X11; Linux
20 Watson Club /
4 Suite 337\nNew AM x86_64; it- 6011456623207998 10/25 678 christopherwright@gmail.c
IE and Carte
Cynthia, TX 5... IT)
Andrews Blanche
Presto/2...

Q1. Display top 10 rows of the data

In [3]:
ecom.head(10)
Out[3]:

AM CC CC
CC
Address Lot or Browser Info Company Credit Card Exp Security
Provider
PM Date Code

Opera/9.56.(X11;
16629 Pace Camp Apt. 46 Martinez- JCB 16
0 PM Linux x86_64; sl-SI) 6011929061123406 02/20 900
448\nAlexisborough, NE 77... in Herman digit
Presto/2...

Fletcher,
Opera/8.93.
9374 Jasmine Spurs Suite 28 Richards
1 PM (Windows 98; Win 3337758169645356 11/18 561 Mastercard
508\nSouth John, TN 8... rn and
9x 4.90; en-US) Pr...
Whitaker

Mozilla/5.0 Simpson,
Unit 0065 Box 5052\nDPO 94 JCB 16
2 PM (compatible; MSIE Williams 675957666125 08/19 699
AP 27450 vE digit
9.0; Windows NT ... and Pham

Williams,
Mozilla/5.0
7780 Julia Fords\nNew 36 Marshall
3 PM (Macintosh; Intel 6011578504430710 02/24 384 Discover brent16@
Stacy, WA 45798 vm and
Mac OS X 10_8_0 ...
Buchanan

Brown, Diners
Opera/9.58.(X11;
23012 Munoz Drive Suite 20 Watson Club /
4 AM Linux x86_64; it-IT) 6011456623207998 10/25 678 christophe
337\nNew Cynthia, TX 5... IE and Carte
Presto/2...
Andrews Blanche

Mozilla/5.0
7502 Powell Mission Apt. 21 Silva-
5 PM (Macintosh; U; PPC 30246185196287 07/25 7169 Discover
768\nTravisland, VA 3... XT Anderson
Mac OS X 10_8_5...

93971 Conway Mozilla/5.0


96 Gibson VISA 16
6 Causeway\nAndersonburgh, AM (compatible; MSIE 6011398782655569 07/24 714
Xt and Sons digit
AZ 75107 7.0; Windows NT ...

Mozilla/5.0 (X11;
260 Rachel Plains Suite 96 Marshall- VISA 13
7 PM Linux i686) 561252141909 06/25 256
366\nCastroberg, WV 24... pG Collins digit
AppleWebKit/5350...

Mozilla/5.0
2129 Dylan Burg\nNew 45 Galloway JCB 16
8 PM (Macintosh; U; Intel 180041795790001 04/24 899 kdav
Michelle, ME 28650 JN and Sons digit
Mac OS X 10_7...

Rivera,
3795 Dawson Mozilla/5.0 (X11;
15 Buchanan American
9 Extensions\nLake Tinafort, AM Linux i686; 4396283918371 01/17 931 qcolema
Ug and Express
ID 88739 rv:1.9.7.20) Gec...
Ramirez

Q2. Display last 10 rows of the data

In [4]:
ecom.tail(10)
Out[4]:

AM CC CC
CC
Address Lot or Browser Info Company Credit Card Exp Security
Provider
PM Date Code

Mozilla/5.0 Pace,
75731 Molly
93 (Macintosh; Intel Vazquez JCB 15
9990 Springs\nWest Danielle, PM 869968197049750 04/24 877 andersonmicha
ty Mac OS X and digit
VT 96934-5102
10_7_4;... Richards

Mozilla/5.0
PSC 8165, Box
50 (compatible; MSIE Snyder
9991 8498\nAPO AP 60327- AM 4221582137197481 02/24 969 Voyager kkin
dA 8.0; Windows NT Inc
0346
...

Mozilla/5.0
885 Allen Mountains
40 (Macintosh; PPC
9992 Apt. 230\nWallhaven, PM Wells Ltd 4664825258997302 10/20 431 Discover bb
9992 Apt. 230\nWallhaven, PM Wells Ltd 4664825258997302 10/20 431 Discover bb
vH Mac OS X 10_6_5)
LA 16995 AM CC CC
A... CC
Address Lot or Browser Info Company Credit Card Exp Security
Provider
PM Mozilla/5.0 Date Code
7555 Larson Locks
72 (Macintosh; U; Colon
9993 Suite 229\nEllisburgh, PM 30025560104631 10/25 629 Maestro chelseawill
jg Intel Mac OS X and Sons
MA 34...
10_8...

6276 Rojas Opera/9.68.(X11;


93 Ritter-
9994 Hollow\nLake Louis, PM Linux x86_64; sl- 3112186784121077 01/25 1823 Maestro irobe
Ex Smith
WY 56410-7837 SI) Presto/2...

Mozilla/5.0
966 Castaneda
92 (Windows NT 5.1) Randall- JCB 15
9995 Locks\nWest Juliafurt, PM 342945015358701 03/22 838 iscott@wa
XI AppleWebKit/5352 Sloan digit
CO 96415
...

Mozilla/5.0 Hale,
832 Curtis Dam Suite
41 (compatible; MSIE Collins JCB 16
9996 785\nNorth AM 210033169205009 07/25 207 mary8
JY 9.0; Windows NT and digit
Edwardburgh, T...
... Wilson

Mozilla/5.0
Unit 4434 Box
74 (Macintosh; U; Anderson VISA 16
9997 6343\nDPO AE 28026- AM 6011539787356311 05/21 1 tyle
Zh Intel Mac OS X Ltd digit
0283
10_7...

Mozilla/5.0
0096 English
74 (Macintosh; Intel American
9998 Rest\nRoystad, IA PM Cook Inc 180003348082930 11/17 987 elizabeth
cL Mac OS X Express
12457
10_8_8;...

40674 Barrett Mozilla/5.0 (X11;


64 Greene JCB 15
9999 Stravenue\nGrimesville, AM Linux i686; 4139972901927273 02/19 302 rachelfor
Hr Inc digit
WI 79682 rv:1.9.5.20) Gec...

Q3. Check datatype of each column

In [5]:
ecom.dtypes # 2 integer, 1 float and others are object type
Out[5]:
Address object
Lot object
AM or PM object
Browser Info object
Company object
Credit Card int64
CC Exp Date object
CC Security Code int64
CC Provider object
Email object
Job object
IP Address object
Language object
Purchase Price float64
dtype: object

Q4. How many null values in each column in this data ?

In [6]:
ecom.isnull().sum() # No null values
Out[6]:
Address 0
Lot 0
AM or PM 0
Browser Info 0
Company 0
Credit Card 0
Credit Card 0
CC Exp Date 0
CC Security Code 0
CC Provider 0
Email 0
Job 0
IP Address 0
Language 0
Purchase Price 0
dtype: int64

Q5. How many rows and columns in this data ?

In [7]:
print(f"Rows = {ecom.shape[0]} and Columns = {ecom.shape[1]}")

Rows = 10000 and Columns = 14

Q6. Highest and Lowest purchase prices ?

In [8]:
print(f"Highest Purchase Price = {ecom['Purchase Price'].max()}$ \n Lowest Purchase Price
= {ecom['Purchase Price'].min()}$" )

Highest Purchase Price = 99.99$


Lowest Purchase Price = 0.0$

Q7. What is the Average purchase price ?

In [9]:
print(f"Average Purchase Price = {round(ecom['Purchase Price'].mean(),2)}$")

Average Purchase Price = 50.35$

Q8. How many peoples have "French" as there language ?

In [10]:
len(ecom[ecom.Language == "fr"]) # 1097 Peoples having French Language
Out[10]:
1097

Q9. How many peoples have their job as "Engineer" ?

In [11]:
len(ecom[ecom.Job.str.contains('engineer', case=False)]) # Around 984 Peoples are Engine
ers
Out[11]:
984

Q10. Find The Email of the person with the following IP Address: 132.207.160.22 ?

In [12]:
ecom[ecom["IP Address"]=="132.207.160.22"]["Email"]
Out[12]:
2 amymiller@morales-harrison.com
2 amymiller@morales-harrison.com
Name: Email, dtype: object

Q11. How many People have Mastercard as their Credit Card Provider and made a purchase above 50?

In [13]:
len(ecom[(ecom["CC Provider"]=="Mastercard") & (ecom["Purchase Price"]>50)])
# 405 People have Mastercard as their Credit Card Provider and made a purchase above 50
Out[13]:
405

Q12. Find the email of the person with the following Credit Card Number: 4664825258997302

In [14]:
ecom[ecom["Credit Card"]==4664825258997302]["Email"]
Out[14]:
9992 bberry@wright.net
Name: Email, dtype: object

Q13. How many people purchase during the AM and how many people purchase during PM?

In [15]:
print("Number of peoples purchased during AM = ",len(ecom[ecom["AM or PM"]=="AM"]))
print("Number of peoples purchased during PM = ",len(ecom[ecom["AM or PM"]=="PM"]))
# Peoples purchased more at "PM" than in "AM"
Number of peoples purchased during AM = 4932
Number of peoples purchased during PM = 5068

Q14. How many people have a credit card that expires in 2020?

In [16]:
def CC_exp_2020():
count = 0
for date in ecom["CC Exp Date"]:
if date.split('/')[1] == "20":
count+=1
print(count)

In [17]:
CC_exp_2020() # 988 peoples have there CC expired in 2020
988

Q15. What are the top 5 most popular email providers (e.g. gmail.com, yahoo.com, etc...)

In [18]:
host = []
for email in ecom["Email"]:
host.append(email.split("@")[1])

In [19]:
ecom["Host"] = host
ecom.Host.head()

Out[19]:
0 yahoo.com
1 reed.com
2 morales-harrison.com
3 olson-robinson.info
4 gmail.com
Name: Host, dtype: object

In [20]:
ecom.Host.value_counts(ascending=False).head(5)
Out[20]:
hotmail.com 1638
yahoo.com 1616
gmail.com 1605
smith.com 42
williams.com 37
Name: Host, dtype: int64

You might also like