You are on page 1of 22

ASSIGNMENT 3

Practice

In [1]:

import matplotlib.pyplot as plt


i t

Graph

In [2]:

x = [1,2,3,4,5]
[6 7 8 9 10]
In [3]:

plt.plot(x, y)
plt.xlabel('Tree')
plt.ylabel('Mountain')
plt.title('Graph')
lt h ()
In [4]:

plt.plot(x, y, marker = '*', color = 'lime', linestyle = 'dashed', linewidth = 3, markerfac


lt h ()

Bar Chart

In [5]:

x = [1, 2, 3, 4, 5]
y = [10, 20, 30, 40, 50]

plt.xlabel('x - axis')
plt.ylabel('y - axis')
plt.title('Vertical Bar Chart')
plt.bar(x, y, width = 0.9, color = ['yellow', 'cyan'])
lt h ()
In [6]:

x = np.array(["A", "B", "C", "D"])


y = np.array([1, 2, 3, 4])

plt.xlabel('x - axis')
plt.ylabel('y - axis')
plt.title('Horizontal Bar Chart')
plt.barh(x, y, color = ['fuchsia', 'teal'])
lt h ()

Histogram
In [7]:

x = np.random.normal(180, 5, 200)
plt.hist(x)

plt.xlabel('x_axis')
plt.ylabel('y-axis')
plt.title('Histogram')
lt h ()

In [8]:

x = np.array([87,87,5,89,56,73,34,54,31,17,51,5,79,21])
bins = ([0,25,50,75,100])

plt.hist(x, bins, range = None, orientation='vertical', histtype = 'bar', color = 'lightbl

plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.title('Histogram')
lt h ()

Scatter Plot
In [9]:

x = np.random.rand(10)
y = np.random.rand(10)

plt.scatter(x, y)
lt h ()

In [10]:

x = np.random.normal(5.0, 1.0, 1000) # random samples from a normal (Gaussian) distribution


y = np.random.normal(10.0, 2.0, 1000)

plt.scatter(x, y)
lt h ()

Pie Chart
In [11]:

values = np.array([4.6, 24, 71.4])


names = ['Atoms', 'Dark Matter', 'Dark Energy']

plt.pie(values, labels = names, radius = 1.5, startangle = 90, shadow=True, explode = (0.05
# explode : specifies the fraction of the radius with which to offset each wedge, autopct :

plt.legend(title = 'Content of the Universe') # to add a list of explanation for each wedge
lt h ()

Curves
In [12]:

# parabola y = x^2
x = np.linspace(-100, 100, 50000) # generates linearly spaced vectors
y = x**2

plt.plot(x, y, linestyle = 'dashdot', linewidth = 2, label = 'y = x^2')


plt.legend()
lt h ()

In [13]:

x = np.arange(0, 5*np.pi, 0.1) # start, stop, step


y = np.sin(x)
z = np.cos(x)

plt.plot(x, y, x, z)

plt.title('Plot of sin and cos from 0 to 5π')


plt.legend(['sin(x)', 'cos(x)'])
lt h ()
In [14]:

x = np.linspace(-2,2,41)
y = np.exp(-x**2) * np.cos(2*np.pi*x)

plt.plot(x, y, alpha = 0.4, label = 'Decaying Cosine', color = 'c', linestyle = 'dashed', l
marker = 'o', markersize = 5, markerfacecolor = 'g', markeredgecolor = 'b')
plt.ylim([-3 ,3])

plt.legend()
lt h ()

Pandas

In [15]:

i t d d
In [16]:

# Importing the 'titanic' dataset


df d d ( 'D \CO327 ML L b\L b 3\t i ')
In [17]:

df d ib ()
Out[17]:

PassengerId Survived Pclass Age SibSp Parch Fare

count 891.000000 891.000000 891.000000 714.000000 891.000000 891.000000 891.000000

mean 446.000000 0.383838 2.308642 29.699118 0.523008 0.381594 32.204208

std 257.353842 0.486592 0.836071 14.526497 1.102743 0.806057 49.693429

min 1.000000 0.000000 1.000000 0.420000 0.000000 0.000000 0.000000

25% 223.500000 0.000000 2.000000 20.125000 0.000000 0.000000 7.910400

50% 446.000000 0.000000 3.000000 28.000000 0.000000 0.000000 14.454200

75% 668.500000 1.000000 3.000000 38.000000 1.000000 0.000000 31.000000

max 891.000000 1.000000 3.000000 80.000000 8.000000 6.000000 512.329200

In [18]:

df i f ()
<class 'pandas.core.frame.DataFrame'>

RangeIndex: 891 entries, 0 to 890

Data columns (total 12 columns):

# Column Non-Null Count Dtype

--- ------ -------------- -----

0 PassengerId 891 non-null int64

1 Survived 891 non-null int64

2 Pclass 891 non-null int64

3 Name 891 non-null object

4 Sex 891 non-null object

5 Age 714 non-null float64

6 SibSp 891 non-null int64

7 Parch 891 non-null int64

8 Ticket 891 non-null object

9 Fare 891 non-null float64

10 Cabin 204 non-null object

11 Embarked 889 non-null object

dtypes: float64(2), int64(5), object(5)

memory usage: 83.7+ KB

In [19]:

df h d()
Out[19]:

PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare C

Braund,
0 1 0 3 Mr. Owen male 22.0 1 0 A/5 21171 7.2500
Harris

Cumings,
Mrs. John
Bradley
1 2 1 1 female 38.0 1 0 PC 17599 71.2833
(Florence
Briggs
Th...

Heikkinen,
STON/O2.
2 3 1 3 Miss. female 26.0 0 0 7.9250
3101282
Laina

Futrelle,
Mrs.
Jacques
3 4 1 1 female 35.0 1 0 113803 53.1000
Heath
(Lily May
Peel)

Allen, Mr.
4 5 0 3 William male 35.0 0 0 373450 8.0500
Henry

In [20]:

df t il()
Out[20]:

PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cab

Montvila,
886 887 0 2 Rev. male 27.0 0 0 211536 13.00 Na
Juozas

Graham,
Miss.
887 888 1 1 female 19.0 0 0 112053 30.00 B4
Margaret
Edith

Johnston,
Miss.
W./C.
888 889 0 3 Catherine female NaN 1 2 23.45 Na
6607
Helen
"Carrie"

Behr, Mr.
889 890 1 1 Karl male 26.0 0 0 111369 30.00 C14
Howell

Dooley,
890 891 0 3 Mr. male 32.0 0 0 370376 7.75 Na
Patrick
In [21]:

df = df.sort_values('Ticket', ascending = False)


df h d()
Out[21]:

PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabi

Crosby,
Capt. WE/P
745 746 0 1 male 70.0 1 1 71.000 B2
Edward 5735
Gifford

Crosby,
WE/P
540 541 1 1 Miss. female 36.0 0 2 71.000 B2
5735
Harriet R

Harris,
W/C
219 220 0 2 Mr. male 30.0 0 0 10.500 Na
14208
Walter

Chaffee,
Mr. W.E.P.
92 93 0 1 male 46.0 1 0 61.175 E3
Herbert 5734
Fuller

Harknett,
Miss. W./C.
235 236 0 3 female NaN 0 0 7.550 Na
Alice 6609
Phoebe

In [ ]:

Assignment

In [22]:

df d d ( 'D \CO327 ML L b\L b 3\ l d t ')


In [23]:

df d ib ()
Out[23]:

month_number facecream facewash toothpaste bathingsoap shampoo moi

count 12.000000 12.000000 12.000000 12.000000 12.000000 12.000000 12

mean 6.500000 2873.333333 1542.916667 5825.833333 9500.833333 2117.500000 1542

std 3.605551 584.595172 316.733745 1242.032486 2348.095779 617.724931 316

min 1.000000 1990.000000 1120.000000 4550.000000 6100.000000 1200.000000 1120

25% 3.750000 2460.000000 1305.000000 4862.500000 8015.000000 1795.000000 1305

50% 6.500000 2830.000000 1527.500000 5530.000000 9090.000000 1995.000000 1527

75% 9.250000 3435.000000 1765.000000 6400.000000 10045.000000 2325.000000 1765

max 12.000000 3700.000000 2100.000000 8300.000000 14400.000000 3550.000000 2100


In [24]:

df i f ()
<class 'pandas.core.frame.DataFrame'>

RangeIndex: 12 entries, 0 to 11

Data columns (total 9 columns):

# Column Non-Null Count Dtype

--- ------ -------------- -----

0 month_number 12 non-null int64

1 facecream 12 non-null int64

2 facewash 12 non-null int64

3 toothpaste 12 non-null int64

4 bathingsoap 12 non-null int64

5 shampoo 12 non-null int64

6 moisturizer 12 non-null int64

7 total_units 12 non-null int64

8 total_profit 12 non-null int64

dtypes: int64(9)

memory usage: 992.0 bytes

In [25]:

df h d()
Out[25]:

month_number facecream facewash toothpaste bathingsoap shampoo moisturizer total_u

0 1 2500 1500 5200 9200 1200 1500 2

1 2 2630 1200 5100 6100 2100 1200 1

2 3 2140 1340 4550 9550 3550 1340 2

3 4 3400 1130 5870 8870 1870 1130 2

4 5 3600 1740 4560 7760 1560 1740 2

In [26]:

df t il()
Out[26]:

month_number facecream facewash toothpaste bathingsoap shampoo moisturizer total_

7 8 3700 1400 5860 9960 2860 1400

8 9 3540 1780 6100 8100 2100 1780

9 10 1990 1890 8300 10300 2300 1890

10 11 2340 2100 7300 13300 2400 2100

11 12 2900 1760 7400 14400 1800 1760

1. Read Total profit of all months and show it using a line plot
Total profit data provided for each month. Generated line plot must include the following properties: –

X label name = Month Number


Y label name = Total profit

In [27]:

# Method 1
# Reading a CSV file with specific columns

x = pd.read_csv('company_sales_data.csv', usecols = ['month_number'])


y = pd.read_csv('company_sales_data.csv', usecols = ['total_profit'])

plt.plot(x, y)
plt.xlabel('Month Number')
plt.ylabel('Total Profit')
plt.title('Company profit per month')
lt h ()
In [28]:

# Method 2

df = pd.read_csv("company_sales_data.csv")
y = df ['total_profit']
x = df ['month_number']

plt.plot(x, y)
plt.xlabel('Month Number')
plt.ylabel('Total Number')
plt.xticks(x)
plt.yticks([100000, 200000, 300000, 400000, 500000])
plt.title('Company profit per month')
lt h ()

2. Get total profit of all months and show line plot with the following Style
properties
Generated line plot must include following Style properties: –

Line Style dotted and Line-color should be red


Show legend at the lower right location.

X label name = Month Number

Y label name = Sold units number


Add a circle marker.

Line marker color as black


Line width should be 3

In [29]:

plt.plot(x, y, label = 'Profit Data', linestyle = 'dotted', color = 'r', marker = 'o', mark
plt.xlabel('Month Number')
plt.ylabel('Sold Units Number')
plt.xticks(x)
plt.yticks([100000, 200000, 300000, 400000, 500000])
plt.title('Company Sales data of last year')
plt.legend(loc = 'lower right')
lt h ()

3. Read all product sales data and show it using a multiline plot
Display the number of units sold per month for each product using multiline plots. (i.e., Separate Plotline for
each product ).
In [30]:

facecream = df['facecream']
facewash = df['facewash']
toothpaste = df['toothpaste']
bathingsoap = df['bathingsoap']
shampoo = df['shampoo']
moisturizer = df['moisturizer']

plt.plot(x, facecream, label = 'Facecream sales data', marker = 'o')


plt.plot(x, facewash, label = 'Facewash sales data', marker = 'o')
plt.plot(x, toothpaste, label = 'Toothpaste sales data', marker = 'o')
plt.plot(x, bathingsoap, label = 'Bathing soap sales data', marker = 'o')
plt.plot(x, shampoo, label = 'Shampoo sales data', marker = 'o')
plt.plot(x, moisturizer, label = 'Moisturizer sales data', marker = 'o')

plt.xlabel('Month Number')
plt.ylabel('Sales units in number')
plt.xticks(x)
plt.yticks([1000, 2000, 4000, 6000, 8000, 10000, 12000, 15000, 18000])
plt.legend()
plt.title('Sales data')
lt h ()

4. Read toothpaste sales data of each month and show it using a scatter
plot
Also, add a grid in the plot. gridline style should “–“.
In [31]:

tp = df['toothpaste']
plt.scatter(x, tp, label = 'Toothpaste sales data')
plt.legend()
plt.xlabel('Month Number')
plt.ylabel('Number of units sold')
plt.xticks(x)
plt.grid(linestyle = '--')
plt.title('Toothpaste sales data')
lt h ()

5. Read face cream and facewash product sales data and show it using the bar
chart
The bar chart should display the number of units sold per month for each product. Add a separate bar for each
product in the same chart.
In [32]:

fc = df['facecream']
fw = df['facewash']
plt.bar(x, fc, width = -0.25, label = 'Facecream sales data', align = 'edge') # bar goes fr
plt.bar(x, fw, width = 0.25, label = 'Facewash sales data', align = 'edge') # bar goes from
plt.legend()
plt.xlabel('Month Number')
plt.ylabel('Sales units in number')
plt.xticks(x)
plt.grid(linestyle = '--')
plt.title('Facewash and facecream sales data')
lt h ()

6. Read sales data of bathing soap of all months and show it using a bar chart.
Save this plot to your hard disk
In [33]:

bs = df['bathingsoap'].tolist()
plt.bar(x, bs)
plt.xlabel('Month Number')
plt.ylabel('Sales units in number')
plt.xticks(x)
plt.grid(linestyle = '--')
plt.title('Bathingsoap sales data')
plt.savefig('D:\CO327_ML_Lab\Lab_3')
lt h ()

7. Read the total profit of each month and show it using the histogram to see the
most common profit ranges

In [34]:

tp = df ['total_profit']
range = [150000, 175000, 200000, 225000, 250000, 300000, 350000]

plt.hist(tp, range, label = 'Profit data')


plt.xlabel('Profit range (in $)')
plt.ylabel('Actual Profit (in $)')
plt.xticks(range)
plt.legend(loc = 'upper left')
plt.title('Profit data')
lt h ()
8. Calculate total sale data for last year for each product and show it using a Pie
chart
Note: In Pie chart display Number of units sold per year for each product in percentage.

In [35]:

data = [df['facecream'].sum(), df['facewash'].sum(), df['toothpaste'].sum(), df['bathingsoa


df['shampoo'].sum(), df['moisturizer'].sum()]
names = ['Facecream', 'Fasewash', 'Toothpaste', 'Bathingsoap', 'Shampoo', 'Moisturizer']

plt.pie(data, labels = names, autopct = '%1.1f%%', radius = 1.5)

plt.legend()
plt.title('Sales data\n\n\n')
lt h ()

9. Read Bathing soap facewash of all months and display it using the
Subplot
In [36]:

bs = df['bathingsoap']
fw = df['facewash']

# fig: used as a container for all the subplots.


# ax: A single object of the axes.Axes object if there is only one plot, or an array of axe
# objects if there are multiple plots, as specified by the nrows and ncols.

# given the number of rows and columns, it returns a tuple (fig ,ax), giving a single figur

fig, ax = plt.subplots(2, sharex = True) # sharex : controls sharing of properties among x

ax[0].plot(x, bs, color = 'k', marker = 'o', linewidth = 2)


ax[0].set_title('Sales data of bathingsoap')

ax[1].plot(x, fw, color = 'r', marker = 'o', linewidth = 2)


ax[1].set_title('Sales data of facewash')

plt.xticks(x)
plt.xlabel('Month Number')
plt.ylabel('Sales units in number')
lt h ()

10. Read all product sales data and show it using the stack plot
In [37]:

plt.plot([], color = 'm', label = 'Facecream')


plt.plot([], color = 'c', label = 'Facewash')
plt.plot([], color = 'r', label = 'Toothpaste')
plt.plot([], color = 'k', label = 'Bathingsoap')
plt.plot([], color = 'g', label = 'Shampoo')
plt.plot([], color = 'y', label = 'Moisturizer')

plt.stackplot(x, facecream, facewash, toothpaste, bathingsoap, shampoo, moisturizer, colors

plt.xlabel('Month Number')
plt.ylabel('Sales units in Number')
plt.xticks(x)
plt.legend(loc = 'upper left')
plt.title('All product sales data using stack plot')
lt h ()

You might also like