Professional Documents
Culture Documents
1. Mengimport Library
In [1]:
import pandas as pd # Transformasi data
import math
2. Memanggil Data
In [44]:
# Memanggil / load data 'Transformasi Data.csv'
df = pd.read_csv('automobileEDA.csv')
Keterangan
In [45]:
# Seluruh data
df
Out[45]: num-
normalized- body- drive- engine- wheel-
symboling make aspiration of- le
losses style wheels location base
doors
alfa-
0 3 122 std two convertible rwd front 88.6 0.8
romero
alfa-
1 3 122 std two convertible rwd front 88.6 0.8
romero
alfa-
2 1 122 std two hatchback rwd front 94.5 0.82
romero
... ... ... ... ... ... ... ... ... ...
4. Visualisasi Data
Pie Chart
plt.pie(
votes,
labels=flavors,
autopct='%1.1f%%',
colors=colors,
explode=explode,
shadow=True
plt.show()
Bar Chart
In [33]:
countries = ('Argentina', 'Bolivia', 'Brazil', 'Chile', 'Colombia','Ecuador',
'Falkland Islands', 'French Guiana', 'Guyana','Paraguay', 'Pe
'Suriname', 'Uruguay', 'Venezuela')
3470475,28258770)
x_coords = np.arange(len(countries))
plt.xticks(rotation=90)
plt.ylabel('Population (Millions)')
plt.show()
In [48]:
countries = ('Argentina', 'Bolivia', 'Brazil', 'Chile', 'Colombia','Ecuador',
'Falkland Islands', 'French Guiana', 'Guyana','Paraguay', 'Pe
'Suriname', 'Uruguay', 'Venezuela')
3470475,28258770)
df1.sort_values(by='Population', inplace=True)
x_coords = np.arange(len(df1))
colors[-2] = '#FF0000'
plt.figure(figsize=(20,10))
color=colors)
plt.xticks(rotation=90)
plt.ylabel('Population (Millions)')
plt.show()
Line Chart
In [36]:
temperature_c = [2, 1, 0, 0, 1, 5, 8, 9, 8, 5, 3, 2, 2]
hour = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24]
plt.plot(hour,temperature_c)
plt.ylabel('Temperature Celsius')
plt.xlabel('Hour')
plt.show()
In [37]:
temperature_c = [2, 1, 0, 0, 1, 5, 8, 9, 8, 5, 3, 2, 2]
hour = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24]
plt.plot(hour,temperature_c,marker='x',)
plt.ylabel('Temperature Celsius')
plt.xlabel('Hour')
plt.show()
In [38]:
temperature_c_actual = [2, 1, 0, 0, 1, 5, 8, 9, 8, 5, 3, 2, 2]
temperature_c_predicted = [2, 2, 1, 0, 1, 3, 7, 8, 8, 6, 4, 3, 3]
hour = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24]
plt.plot(hour, temperature_c_actual)
plt.ylabel('Temperature Celsius')
plt.xlabel('Hour')
plt.show()
5. Korelasi Data
Hubungan Positif
In [41]:
x = [1,2,3,4,5,6,7,8]
x1 = np.array(x)
y = pow(x1, 2)
plt.plot(x1, y)
plt.title('Hubungan Positif')
plt.ylabel('Nilai Y')
plt.xlabel('Nilai X')
plt.show()
Hubungan Negatif
In [42]:
penjualan_es = [2,4,6,8,10,12,14,16]
curah_hujan = [20,18,16,14,12,10,8,6]
pe = np.array(penjualan_es)
ch = np.array(curah_hujan)
plt.plot(ch, pe)
plt.title('Hubungan Negatif')
plt.show()
6. Korelasi Koefisien
Kita akan gunakan data 'automobileEDA.csv' untuk melihat korelasi antar variabel
In [49]:
df.head()
Out[49]: num-
normalized- body- drive- engine- wheel-
symboling make aspiration of- leng
losses style wheels location base
doors
alfa-
0 3 122 std two convertible rwd front 88.6 0.8111
romero
num-
normalized- body- drive- engine- wheel-
symboling make aspiration of- leng
losses style wheels location base
doors
alfa-
1 3 122 std two convertible rwd front 88.6 0.8111
romero
alfa-
2 1 122 std two hatchback rwd front 94.5 0.8226
romero
5 rows × 29 columns
In [52]:
df.columns # Melihat jumlah kolom
dtype='object')
In [55]:
df.describe(include='all') # Tampilan Statistik Data numerik dan non numerik
Out[55]: num-
normalized- body- drive- engine- wheel-
symboling make aspiration of-
losses style wheels location base
doors
count 201.000000 201.00000 201 201 201 201 201 201 201.000000 2
top NaN NaN toyota std four sedan fwd front NaN
mean 0.840796 122.00000 NaN NaN NaN NaN NaN NaN 98.797015
std 1.254802 31.99625 NaN NaN NaN NaN NaN NaN 6.066366
min -2.000000 65.00000 NaN NaN NaN NaN NaN NaN 86.600000
25% 0.000000 101.00000 NaN NaN NaN NaN NaN NaN 94.500000
50% 1.000000 122.00000 NaN NaN NaN NaN NaN NaN 97.000000
75% 2.000000 137.00000 NaN NaN NaN NaN NaN NaN 102.400000
max 3.000000 256.00000 NaN NaN NaN NaN NaN NaN 120.900000
11 rows × 29 columns
In [54]:
df.loc[:,:'price'].corr()
normalized-
0.466264 1.000000 -0.056661 0.019424 0.086802 -0.373737 0.099404 0.11
losses
compression-
-0.182196 -0.114713 0.250313 0.159733 0.189867 0.259737 0.156433 0.02
ratio