Lab Assignment 2

ASSIGNMENT 3
Practice
In [1]:
import matplotlib.pyplot as plt

i t
Graph
In [2]:
x = [1,2,3,4,5]
[6 7 8 9 10]
In [3]:
plt.plot(x, y)
plt.xlabel('Tree')
plt.ylabel('Mountain')
plt.title('Graph')
lt h ()
In [4]:
plt.plot(x, y, marker = '*', color = 'lime', linestyle = 'dashed', linewidth = 3, markerfac

lt h ()
Bar Chart
In [5]:
x = [1, 2, 3, 4, 5]
y = [10, 20, 30, 40, 50]
plt.xlabel('x - axis')
plt.ylabel('y - axis')
plt.title('Vertical Bar Chart')
plt.bar(x, y, width = 0.9, color = ['yellow', 'cyan'])
lt h ()
In [6]:
x = np.array(["A", "B", "C", "D"])

y = np.array([1, 2, 3, 4])
plt.xlabel('x - axis')
plt.ylabel('y - axis')
plt.title('Horizontal Bar Chart')
plt.barh(x, y, color = ['fuchsia', 'teal'])
lt h ()
Histogram
In [7]:
x = np.random.normal(180, 5, 200)
plt.hist(x)
plt.xlabel('x_axis')
plt.ylabel('y-axis')
plt.title('Histogram')
lt h ()
In [8]:
x = np.array([87,87,5,89,56,73,34,54,31,17,51,5,79,21])
bins = ([0,25,50,75,100])
plt.hist(x, bins, range = None, orientation='vertical', histtype = 'bar', color = 'lightbl
plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.title('Histogram')
lt h ()
Scatter Plot
In [9]:
x = np.random.rand(10)
y = np.random.rand(10)
plt.scatter(x, y)
lt h ()
In [10]:
x = np.random.normal(5.0, 1.0, 1000) # random samples from a normal (Gaussian) distribution

y = np.random.normal(10.0, 2.0, 1000)
plt.scatter(x, y)
lt h ()
Pie Chart
In [11]:
values = np.array([4.6, 24, 71.4])

names = ['Atoms', 'Dark Matter', 'Dark Energy']
plt.pie(values, labels = names, radius = 1.5, startangle = 90, shadow=True, explode = (0.05
# explode : specifies the fraction of the radius with which to offset each wedge, autopct :
plt.legend(title = 'Content of the Universe') # to add a list of explanation for each wedge
lt h ()
Curves
In [12]:
# parabola y = x^2
x = np.linspace(-100, 100, 50000) # generates linearly spaced vectors
y = x**2
plt.plot(x, y, linestyle = 'dashdot', linewidth = 2, label = 'y = x^2')

plt.legend()
lt h ()
In [13]:
x = np.arange(0, 5*np.pi, 0.1) # start, stop, step

y = np.sin(x)
z = np.cos(x)
plt.plot(x, y, x, z)
plt.title('Plot of sin and cos from 0 to 5π')

plt.legend(['sin(x)', 'cos(x)'])
lt h ()
In [14]:
x = np.linspace(-2,2,41)
y = np.exp(-x**2) * np.cos(2*np.pi*x)
plt.plot(x, y, alpha = 0.4, label = 'Decaying Cosine', color = 'c', linestyle = 'dashed', l
marker = 'o', markersize = 5, markerfacecolor = 'g', markeredgecolor = 'b')
plt.ylim([-3 ,3])
plt.legend()
lt h ()
Pandas
In [15]:
i t d d
In [16]:
# Importing the 'titanic' dataset

df d d ( 'D \CO327 ML L b\L b 3\t i ')
In [17]:
df d ib ()
Out[17]:
PassengerId Survived Pclass Age SibSp Parch Fare
count 891.000000 891.000000 891.000000 714.000000 891.000000 891.000000 891.000000
mean 446.000000 0.383838 2.308642 29.699118 0.523008 0.381594 32.204208
std 257.353842 0.486592 0.836071 14.526497 1.102743 0.806057 49.693429
min 1.000000 0.000000 1.000000 0.420000 0.000000 0.000000 0.000000
25% 223.500000 0.000000 2.000000 20.125000 0.000000 0.000000 7.910400
50% 446.000000 0.000000 3.000000 28.000000 0.000000 0.000000 14.454200
75% 668.500000 1.000000 3.000000 38.000000 1.000000 0.000000 31.000000
max 891.000000 1.000000 3.000000 80.000000 8.000000 6.000000 512.329200
In [18]:
df i f ()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 PassengerId 891 non-null int64
1 Survived 891 non-null int64
2 Pclass 891 non-null int64
3 Name 891 non-null object
4 Sex 891 non-null object
5 Age 714 non-null float64
6 SibSp 891 non-null int64
7 Parch 891 non-null int64
8 Ticket 891 non-null object
9 Fare 891 non-null float64
10 Cabin 204 non-null object
11 Embarked 889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB
In [19]:
df h d()
Out[19]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare C
Braund,
0 1 0 3 Mr. Owen male 22.0 1 0 A/5 21171 7.2500
Harris
Cumings,
Mrs. John
Bradley
1 2 1 1 female 38.0 1 0 PC 17599 71.2833
(Florence
Briggs
Th...
Heikkinen,
STON/O2.
2 3 1 3 Miss. female 26.0 0 0 7.9250
3101282
Laina
Futrelle,
Mrs.
Jacques
3 4 1 1 female 35.0 1 0 113803 53.1000
Heath
(Lily May
Peel)
Allen, Mr.
4 5 0 3 William male 35.0 0 0 373450 8.0500
Henry
In [20]:
df t il()
Out[20]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cab
Montvila,
886 887 0 2 Rev. male 27.0 0 0 211536 13.00 Na
Juozas
Graham,
Miss.
887 888 1 1 female 19.0 0 0 112053 30.00 B4
Margaret
Edith
Johnston,
Miss.
W./C.
888 889 0 3 Catherine female NaN 1 2 23.45 Na
6607
Helen
"Carrie"
Behr, Mr.
889 890 1 1 Karl male 26.0 0 0 111369 30.00 C14
Howell
Dooley,
890 891 0 3 Mr. male 32.0 0 0 370376 7.75 Na
Patrick
In [21]:
df = df.sort_values('Ticket', ascending = False)

df h d()
Out[21]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabi
Crosby,
Capt. WE/P
745 746 0 1 male 70.0 1 1 71.000 B2
Edward 5735
Gifford
Crosby,
WE/P
540 541 1 1 Miss. female 36.0 0 2 71.000 B2
5735
Harriet R
Harris,
W/C
219 220 0 2 Mr. male 30.0 0 0 10.500 Na
14208
Walter
Chaffee,
Mr. W.E.P.
92 93 0 1 male 46.0 1 0 61.175 E3
Herbert 5734
Fuller
Harknett,
Miss. W./C.
235 236 0 3 female NaN 0 0 7.550 Na
Alice 6609
Phoebe
In [ ]:
Assignment
In [22]:
df d d ( 'D \CO327 ML L b\L b 3\ l d t ')

In [23]:
df d ib ()
Out[23]:
month_number facecream facewash toothpaste bathingsoap shampoo moi
count 12.000000 12.000000 12.000000 12.000000 12.000000 12.000000 12
mean 6.500000 2873.333333 1542.916667 5825.833333 9500.833333 2117.500000 1542
std 3.605551 584.595172 316.733745 1242.032486 2348.095779 617.724931 316
min 1.000000 1990.000000 1120.000000 4550.000000 6100.000000 1200.000000 1120
25% 3.750000 2460.000000 1305.000000 4862.500000 8015.000000 1795.000000 1305
50% 6.500000 2830.000000 1527.500000 5530.000000 9090.000000 1995.000000 1527
75% 9.250000 3435.000000 1765.000000 6400.000000 10045.000000 2325.000000 1765
max 12.000000 3700.000000 2100.000000 8300.000000 14400.000000 3550.000000 2100

In [24]:
df i f ()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 month_number 12 non-null int64
1 facecream 12 non-null int64
2 facewash 12 non-null int64
3 toothpaste 12 non-null int64
4 bathingsoap 12 non-null int64
5 shampoo 12 non-null int64
6 moisturizer 12 non-null int64
7 total_units 12 non-null int64
8 total_profit 12 non-null int64
dtypes: int64(9)
memory usage: 992.0 bytes
In [25]:
df h d()
Out[25]:
month_number facecream facewash toothpaste bathingsoap shampoo moisturizer total_u
0 1 2500 1500 5200 9200 1200 1500 2
1 2 2630 1200 5100 6100 2100 1200 1
2 3 2140 1340 4550 9550 3550 1340 2
3 4 3400 1130 5870 8870 1870 1130 2
4 5 3600 1740 4560 7760 1560 1740 2
In [26]:
df t il()
Out[26]:
month_number facecream facewash toothpaste bathingsoap shampoo moisturizer total_
7 8 3700 1400 5860 9960 2860 1400
8 9 3540 1780 6100 8100 2100 1780
9 10 1990 1890 8300 10300 2300 1890
10 11 2340 2100 7300 13300 2400 2100
11 12 2900 1760 7400 14400 1800 1760
1. Read Total profit of all months and show it using a line plot
Total profit data provided for each month. Generated line plot must include the following properties: –

X label name = Month Number

Y label name = Total profit
In [27]:
# Method 1
# Reading a CSV file with specific columns
x = pd.read_csv('company_sales_data.csv', usecols = ['month_number'])

y = pd.read_csv('company_sales_data.csv', usecols = ['total_profit'])
plt.plot(x, y)
plt.xlabel('Month Number')
plt.ylabel('Total Profit')
plt.title('Company profit per month')
lt h ()
In [28]:
# Method 2
df = pd.read_csv("company_sales_data.csv")
y = df ['total_profit']
x = df ['month_number']
plt.plot(x, y)
plt.ylabel('Total Number')
plt.xticks(x)
plt.yticks([100000, 200000, 300000, 400000, 500000])
plt.title('Company profit per month')
lt h ()
2. Get total profit of all months and show line plot with the following Style
properties
Generated line plot must include following Style properties: –

Line Style dotted and Line-color should be red

Show legend at the lower right location.
X label name = Month Number
Y label name = Sold units number

Add a circle marker.
Line marker color as black

Line width should be 3
In [29]:
plt.plot(x, y, label = 'Profit Data', linestyle = 'dotted', color = 'r', marker = 'o', mark
plt.ylabel('Sold Units Number')
plt.xticks(x)
plt.yticks([100000, 200000, 300000, 400000, 500000])
plt.title('Company Sales data of last year')
plt.legend(loc = 'lower right')
lt h ()
3. Read all product sales data and show it using a multiline plot
Display the number of units sold per month for each product using multiline plots. (i.e., Separate Plotline for
each product ).
In [30]:
facecream = df['facecream']
facewash = df['facewash']
toothpaste = df['toothpaste']
bathingsoap = df['bathingsoap']
shampoo = df['shampoo']
moisturizer = df['moisturizer']
plt.plot(x, facecream, label = 'Facecream sales data', marker = 'o')

plt.plot(x, facewash, label = 'Facewash sales data', marker = 'o')
plt.plot(x, toothpaste, label = 'Toothpaste sales data', marker = 'o')
plt.plot(x, bathingsoap, label = 'Bathing soap sales data', marker = 'o')
plt.plot(x, shampoo, label = 'Shampoo sales data', marker = 'o')
plt.plot(x, moisturizer, label = 'Moisturizer sales data', marker = 'o')
plt.ylabel('Sales units in number')
plt.xticks(x)
plt.yticks([1000, 2000, 4000, 6000, 8000, 10000, 12000, 15000, 18000])
plt.legend()
plt.title('Sales data')
lt h ()
4. Read toothpaste sales data of each month and show it using a scatter
plot
Also, add a grid in the plot. gridline style should “–“.
In [31]:
tp = df['toothpaste']
plt.scatter(x, tp, label = 'Toothpaste sales data')
plt.legend()
plt.ylabel('Number of units sold')
plt.xticks(x)
plt.grid(linestyle = '--')
plt.title('Toothpaste sales data')
lt h ()
5. Read face cream and facewash product sales data and show it using the bar
chart
The bar chart should display the number of units sold per month for each product. Add a separate bar for each
product in the same chart.
In [32]:
fc = df['facecream']
fw = df['facewash']
plt.bar(x, fc, width = -0.25, label = 'Facecream sales data', align = 'edge') # bar goes fr
plt.bar(x, fw, width = 0.25, label = 'Facewash sales data', align = 'edge') # bar goes from
plt.legend()
plt.xticks(x)
plt.title('Facewash and facecream sales data')
lt h ()
6. Read sales data of bathing soap of all months and show it using a bar chart.
Save this plot to your hard disk
In [33]:
bs = df['bathingsoap'].tolist()
plt.bar(x, bs)
plt.xticks(x)
plt.title('Bathingsoap sales data')
plt.savefig('D:\CO327_ML_Lab\Lab_3')
lt h ()
7. Read the total profit of each month and show it using the histogram to see the
most common profit ranges
In [34]:
tp = df ['total_profit']
range = [150000, 175000, 200000, 225000, 250000, 300000, 350000]
plt.hist(tp, range, label = 'Profit data')

plt.xlabel('Profit range (in $)')
plt.ylabel('Actual Profit (in $)')
plt.xticks(range)
plt.legend(loc = 'upper left')
plt.title('Profit data')
lt h ()
8. Calculate total sale data for last year for each product and show it using a Pie
chart
Note: In Pie chart display Number of units sold per year for each product in percentage.
In [35]:
data = [df['facecream'].sum(), df['facewash'].sum(), df['toothpaste'].sum(), df['bathingsoa

df['shampoo'].sum(), df['moisturizer'].sum()]
names = ['Facecream', 'Fasewash', 'Toothpaste', 'Bathingsoap', 'Shampoo', 'Moisturizer']
plt.pie(data, labels = names, autopct = '%1.1f%%', radius = 1.5)
plt.legend()
plt.title('Sales data\n\n\n')
lt h ()
9. Read Bathing soap facewash of all months and display it using the
Subplot
In [36]:
bs = df['bathingsoap']
fw = df['facewash']
# fig: used as a container for all the subplots.

# ax: A single object of the axes.Axes object if there is only one plot, or an array of axe
# objects if there are multiple plots, as specified by the nrows and ncols.
# given the number of rows and columns, it returns a tuple (fig ,ax), giving a single figur
fig, ax = plt.subplots(2, sharex = True) # sharex : controls sharing of properties among x
ax[0].plot(x, bs, color = 'k', marker = 'o', linewidth = 2)

ax[0].set_title('Sales data of bathingsoap')
ax[1].plot(x, fw, color = 'r', marker = 'o', linewidth = 2)

ax[1].set_title('Sales data of facewash')
plt.xticks(x)
lt h ()
10. Read all product sales data and show it using the stack plot
In [37]:
plt.plot([], color = 'm', label = 'Facecream')

plt.plot([], color = 'c', label = 'Facewash')
plt.plot([], color = 'r', label = 'Toothpaste')
plt.plot([], color = 'k', label = 'Bathingsoap')
plt.plot([], color = 'g', label = 'Shampoo')
plt.plot([], color = 'y', label = 'Moisturizer')
plt.stackplot(x, facecream, facewash, toothpaste, bathingsoap, shampoo, moisturizer, colors
plt.ylabel('Sales units in Number')
plt.xticks(x)
plt.legend(loc = 'upper left')
plt.title('All product sales data using stack plot')
lt h ()

Lab Assignment 2

Uploaded by

Document Information

Original Description:

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Lab Assignment 2

Uploaded by

Copyright:

Available Formats

ASSIGNMENT 3

import matplotlib.pyplot as plt

plt.plot(x, y, marker = '*', color = 'lime', linestyle = 'dashed', linewidth = 3, markerfac

x = np.array(["A", "B", "C", "D"])

plt.hist(x, bins, range = None, orientation='vertical', histtype = 'bar', color = 'lightbl

x = np.random.normal(5.0, 1.0, 1000) # random samples from a normal (Gaussian) distribution

values = np.array([4.6, 24, 71.4])

plt.plot(x, y, linestyle = 'dashdot', linewidth = 2, label = 'y = x^2')

x = np.arange(0, 5*np.pi, 0.1) # start, stop, step

plt.title('Plot of sin and cos from 0 to 5π')

# Importing the 'titanic' dataset

PassengerId Survived Pclass Age SibSp Parch Fare

count 891.000000 891.000000 891.000000 714.000000 891.000000 891.000000 891.000000

mean 446.000000 0.383838 2.308642 29.699118 0.523008 0.381594 32.204208

std 257.353842 0.486592 0.836071 14.526497 1.102743 0.806057 49.693429

min 1.000000 0.000000 1.000000 0.420000 0.000000 0.000000 0.000000

25% 223.500000 0.000000 2.000000 20.125000 0.000000 0.000000 7.910400

50% 446.000000 0.000000 3.000000 28.000000 0.000000 0.000000 14.454200

75% 668.500000 1.000000 3.000000 38.000000 1.000000 0.000000 31.000000

max 891.000000 1.000000 3.000000 80.000000 8.000000 6.000000 512.329200

RangeIndex: 891 entries, 0 to 890

Data columns (total 12 columns):

# Column Non-Null Count Dtype

--- ------ -------------- -----

0 PassengerId 891 non-null int64

1 Survived 891 non-null int64

2 Pclass 891 non-null int64

3 Name 891 non-null object

4 Sex 891 non-null object

5 Age 714 non-null float64

6 SibSp 891 non-null int64

7 Parch 891 non-null int64

8 Ticket 891 non-null object

9 Fare 891 non-null float64

10 Cabin 204 non-null object

11 Embarked 889 non-null object

dtypes: float64(2), int64(5), object(5)

memory usage: 83.7+ KB

df = df.sort_values('Ticket', ascending = False)

df d d ( 'D \CO327 ML L b\L b 3\ l d t ')

month_number facecream facewash toothpaste bathingsoap shampoo moi

count 12.000000 12.000000 12.000000 12.000000 12.000000 12.000000 12

mean 6.500000 2873.333333 1542.916667 5825.833333 9500.833333 2117.500000 1542

std 3.605551 584.595172 316.733745 1242.032486 2348.095779 617.724931 316

min 1.000000 1990.000000 1120.000000 4550.000000 6100.000000 1200.000000 1120

25% 3.750000 2460.000000 1305.000000 4862.500000 8015.000000 1795.000000 1305

50% 6.500000 2830.000000 1527.500000 5530.000000 9090.000000 1995.000000 1527

75% 9.250000 3435.000000 1765.000000 6400.000000 10045.000000 2325.000000 1765

max 12.000000 3700.000000 2100.000000 8300.000000 14400.000000 3550.000000 2100

Data columns (total 9 columns):

# Column Non-Null Count Dtype

--- ------ -------------- -----

0 month_number 12 non-null int64

1 facecream 12 non-null int64

2 facewash 12 non-null int64

3 toothpaste 12 non-null int64

4 bathingsoap 12 non-null int64

5 shampoo 12 non-null int64

6 moisturizer 12 non-null int64

7 total_units 12 non-null int64

8 total_profit 12 non-null int64

memory usage: 992.0 bytes

month_number facecream facewash toothpaste bathingsoap shampoo moisturizer total_u