You are on page 1of 11

a

September 5, 2023

[1]: import pandas as pd


import matplotlib.pyplot as plt
import numpy as np

Đọc dữ liệu
[2]: df1= pd.read_csv("bank_data.csv")
df1_1 = pd.read_csv("bank-data-1.1-6thuoctinh.csv")
df1_2 = pd.read_csv("bank-data-1.2-7thuoctinh.csv")
df2 = pd.read_csv("bank-data-2-12thuoctinh.csv")

[3]: print('Kích thước tập dữ liệu',df1.shape)

Kích thước tập dữ liệu (600, 12)

[4]: print("Kích thước tập dữ liệu 1.1 ",df1_1.shape)


print("Kích thước tập dữ liệu 1.2 ",df1_2.shape)
print("Kích thước tập dữ liệu 2 ",df2.shape)

Kích thước tập dữ liệu 1.1 (300, 6)


Kích thước tập dữ liệu 1.2 (300, 7)
Kích thước tập dữ liệu 2 (300, 12)
Đọc 10 dòng dữ liệu đầu tiên của df1_1
[5]: df1_1.head(10)

[5]: ID age sex region income married


0 1 48 FEMALE INNER_CITY 17546.0 NO
1 2 40 MALE TOWN 30085.1 YES
2 3 51 FEMALE INNER_CITY 16575.4 YES
3 4 23 FEMALE TOWN 20375.4 YES
4 5 57 FEMALE RURAL 50576.3 YES
5 6 57 FEMALE TOWN 37869.6 YES
6 7 22 NaN RURAL NaN NO
7 8 58 MALE TOWN 24946.6 YES
8 9 37 FEMALE SUBURBAN 25304.3 YES
9 10 200 MALE TOWN 24212.1 YES

1
[6]: df1_1Na= df1_1.count()
print(df1_1Na)

ID 300
age 300
sex 297
region 300
income 298
married 300
dtype: int64
Dữ liệu của giới tính có giá trị NaN
[7]: df1_1Na= df1_1['sex'].value_counts(dropna=False)
print(df1_1Na)

MALE 154
FEMALE 143
NaN 3
Name: sex, dtype: int64

[8]: df1_1Na= df1_1['sex'].count()


print(df1_1Na)

297

[9]: df1_1Na= df1_1['sex'].isnull()


print(df1_1Na)

0 False
1 False
2 False
3 False
4 False

295 False
296 False
297 False
298 False
299 False
Name: sex, Length: 300, dtype: bool

[10]: df1_1Na=df1_1.isnull()
print(df1_1Na)

ID age sex region income married


0 False False False False False False
1 False False False False False False
2 False False False False False False

2
3 False False False False False False
4 False False False False False False
.. … … … … … …
295 False False False False False False
296 False False False False False False
297 False False False False False False
298 False False False False False False
299 False False False False False False

[300 rows x 6 columns]

[11]: print(df1_1.to_string())

ID age sex region income married


0 1 48 FEMALE INNER_CITY 17546.00 NO
1 2 40 MALE TOWN 30085.10 YES
2 3 51 FEMALE INNER_CITY 16575.40 YES
3 4 23 FEMALE TOWN 20375.40 YES
4 5 57 FEMALE RURAL 50576.30 YES
5 6 57 FEMALE TOWN 37869.60 YES
6 7 22 NaN RURAL NaN NO
7 8 58 MALE TOWN 24946.60 YES
8 9 37 FEMALE SUBURBAN 25304.30 YES
9 10 200 MALE TOWN 24212.10 YES
10 11 66 FEMALE TOWN 59803.90 YES
11 12 52 FEMALE INNER_CITY 26658.80 NO
12 13 44 FEMALE TOWN NaN YES
13 14 66 NaN TOWN 55204.70 YES
14 15 36 MALE RURAL 19474.60 YES
15 16 38 FEMALE INNER_CITY 22342.10 YES
16 17 37 NaN TOWN 17729.80 YES
17 18 46 FEMALE SUBURBAN 41016.00 YES
18 19 62 FEMALE INNER_CITY 26909.20 YES
19 20 31 MALE TOWN 22522.80 YES
20 21 61 MALE INNER_CITY 57880.70 YES
21 22 50 MALE TOWN 16497.30 YES
22 23 54 MALE INNER_CITY 38446.60 YES
23 24 27 FEMALE TOWN 15538.80 NO
24 25 22 MALE INNER_CITY 12640.30 NO
25 26 56 MALE INNER_CITY 41034.00 YES
26 27 45 MALE INNER_CITY 20809.70 YES
27 28 39 FEMALE TOWN 20114.00 YES
28 29 39 FEMALE INNER_CITY 29359.10 NO
29 30 61 MALE RURAL 24270.10 YES
30 31 61 FEMALE RURAL 22942.90 YES
31 32 20 FEMALE TOWN 16325.80 YES
32 33 45 MALE SUBURBAN 23443.20 YES
33 34 33 FEMALE INNER_CITY 29921.30 NO

3
34 35 43 MALE SUBURBAN 37521.90 NO
35 36 27 FEMALE INNER_CITY 19868.00 YES
36 37 19 MALE RURAL 10953.00 YES
37 38 36 FEMALE RURAL 13381.00 NO
38 39 43 FEMALE TOWN 18504.30 YES
39 40 66 FEMALE SUBURBAN 25391.50 NO
40 41 55 MALE TOWN 26774.20 YES
41 42 47 FEMALE INNER_CITY 26952.60 YES
42 43 67 MALE TOWN 55716.50 NO
43 44 32 FEMALE TOWN 27571.50 YES
44 45 20 MALE INNER_CITY 13740.00 NO
45 46 64 MALE INNER_CITY 52670.60 YES
46 47 50 FEMALE INNER_CITY 13283.90 NO
47 48 29 MALE INNER_CITY 13106.60 NO
48 49 52 MALE INNER_CITY 39547.80 NO
49 50 47 FEMALE RURAL 17867.30 YES
50 51 24 MALE TOWN 14309.70 NO
51 52 36 MALE TOWN 23894.80 YES
52 53 43 MALE TOWN 16259.70 YES
53 54 48 MALE SUBURBAN 29794.10 NO
54 55 63 MALE TOWN 56842.50 YES
55 56 52 FEMALE RURAL 47835.80 NO
56 57 58 FEMALE INNER_CITY 24977.50 NO
57 58 28 MALE INNER_CITY 23124.90 YES
58 59 29 FEMALE INNER_CITY 15143.80 YES
59 60 34 MALE INNER_CITY 25334.30 NO
60 61 42 FEMALE INNER_CITY 24763.30 YES
61 62 65 FEMALE INNER_CITY 36589.00 NO
62 63 47 MALE INNER_CITY 27022.60 YES
63 64 20 MALE INNER_CITY 11700.40 YES
64 65 21 MALE TOWN 5014.21 NO
65 66 42 MALE INNER_CITY 17390.10 YES
66 67 19 MALE TOWN 10861.00 NO
67 68 41 FEMALE TOWN 34892.90 NO
68 69 30 MALE TOWN 19403.10 NO
69 70 31 FEMALE RURAL 10441.90 YES
70 71 25 MALE INNER_CITY 14064.90 YES
71 72 21 MALE INNER_CITY 8062.73 NO
72 73 36 MALE INNER_CITY 31982.00 YES
73 74 58 FEMALE INNER_CITY 23197.50 YES
74 75 64 FEMALE INNER_CITY 52674.00 NO
75 76 59 FEMALE RURAL 35610.50 NO
76 77 45 FEMALE TOWN 26948.00 NO
77 78 61 MALE INNER_CITY 49456.70 YES
78 79 30 FEMALE INNER_CITY 14724.50 YES
79 80 58 FEMALE TOWN 34524.90 YES
80 81 50 FEMALE TOWN 22052.10 NO
81 82 30 MALE INNER_CITY 27808.10 NO

4
82 83 29 FEMALE INNER_CITY 12591.40 YES
83 84 35 MALE INNER_CITY 16394.40 YES
84 85 62 MALE INNER_CITY 24026.10 YES
85 86 36 MALE INNER_CITY 31683.10 YES
86 87 25 FEMALE INNER_CITY 15525.00 YES
87 88 66 FEMALE TOWN 22562.20 NO
88 89 30 MALE SUBURBAN 15848.70 YES
89 90 54 FEMALE INNER_CITY 31095.60 YES
90 91 37 MALE TOWN 24814.50 YES
91 92 28 FEMALE INNER_CITY 25429.30 NO
92 93 53 FEMALE RURAL 34866.50 NO
93 94 61 MALE INNER_CITY 42579.10 YES
94 95 61 FEMALE INNER_CITY 41127.40 YES
95 96 18 FEMALE INNER_CITY 9990.11 YES
96 97 22 MALE INNER_CITY 7948.62 YES
97 98 34 MALE TOWN 30870.80 YES
98 99 35 FEMALE INNER_CITY 12125.80 NO
99 100 18 FEMALE RURAL 15348.90 YES
100 101 54 MALE INNER_CITY 26707.90 YES
101 102 27 FEMALE INNER_CITY 11604.40 YES
102 103 42 MALE INNER_CITY 15499.90 YES
103 104 43 MALE TOWN 33088.50 NO
104 105 64 FEMALE INNER_CITY 34513.60 YES
105 106 43 MALE TOWN 32395.50 YES
106 107 49 MALE RURAL 46633.00 YES
107 108 23 MALE INNER_CITY 13039.90 YES
108 109 23 MALE INNER_CITY 12681.90 NO
109 110 30 FEMALE INNER_CITY 24031.50 YES
110 111 36 MALE TOWN 37330.50 NO
111 112 34 MALE INNER_CITY 25333.20 YES
112 113 51 FEMALE INNER_CITY 37094.20 YES
113 114 36 MALE TOWN 33630.60 NO
114 115 56 MALE INNER_CITY 43228.20 YES
115 116 54 FEMALE INNER_CITY 47796.80 YES
116 117 56 FEMALE TOWN 21730.30 YES
117 118 26 MALE INNER_CITY 10044.10 YES
118 119 39 MALE TOWN 17270.10 NO
119 120 64 FEMALE RURAL 45765.00 YES
120 121 46 MALE RURAL 29525.50 NO
121 122 62 FEMALE RURAL 54863.80 YES
122 123 36 FEMALE TOWN 20799.00 YES
123 124 35 FEMALE RURAL 33028.30 NO
124 125 47 MALE RURAL 45031.90 NO
125 126 47 MALE INNER_CITY 39010.80 YES
126 127 37 FEMALE TOWN 25257.70 YES
127 128 48 FEMALE INNER_CITY 42603.90 YES
128 129 41 MALE TOWN 14092.70 YES
129 130 27 FEMALE RURAL 21350.30 NO

5
130 131 43 MALE INNER_CITY 23246.40 NO
131 132 61 MALE RURAL 41609.50 YES
132 133 52 FEMALE SUBURBAN 16716.10 NO
133 134 64 FEMALE SUBURBAN 36436.40 YES
134 135 66 FEMALE TOWN 59503.80 YES
135 136 53 FEMALE TOWN 31334.80 YES
136 137 20 FEMALE INNER_CITY 14048.90 YES
137 138 57 FEMALE INNER_CITY 39205.30 NO
138 139 65 FEMALE RURAL 42173.90 YES
139 140 64 FEMALE INNER_CITY 55263.00 NO
140 141 52 MALE INNER_CITY 37095.20 YES
141 142 47 FEMALE INNER_CITY 22791.40 YES
142 143 28 FEMALE TOWN 17240.60 YES
143 144 64 MALE TOWN 48974.80 YES
144 145 25 MALE INNER_CITY 18923.00 YES
145 146 58 MALE SUBURBAN 51204.20 YES
146 147 34 MALE TOWN 20236.20 YES
147 148 20 FEMALE INNER_CITY 18860.30 NO
148 149 63 MALE RURAL 25732.50 YES
149 150 30 FEMALE SUBURBAN 28240.40 YES
150 151 53 MALE RURAL 28193.60 YES
151 152 43 MALE TOWN 36432.80 NO
152 153 63 MALE TOWN 54618.80 YES
153 154 33 MALE INNER_CITY 24760.80 YES
154 155 41 MALE RURAL 23356.10 NO
155 156 20 FEMALE SUBURBAN 8143.75 YES
156 157 50 MALE TOWN 26462.50 YES
157 158 24 MALE RURAL 20467.30 YES
158 159 60 FEMALE TOWN 21506.20 YES
159 160 44 FEMALE TOWN 15315.30 YES
160 161 23 MALE INNER_CITY 18875.70 YES
161 162 40 FEMALE INNER_CITY 12977.20 YES
162 163 49 FEMALE TOWN 20708.50 NO
163 164 21 FEMALE TOWN 7549.38 NO
164 165 40 FEMALE INNER_CITY 24904.00 YES
165 166 26 MALE RURAL 24071.80 YES
166 167 20 MALE TOWN 9589.91 NO
167 168 24 MALE INNER_CITY 8562.86 NO
168 169 37 FEMALE TOWN 26707.50 NO
169 170 56 MALE INNER_CITY 34020.50 YES
170 171 52 MALE INNER_CITY 49175.70 YES
171 172 22 MALE INNER_CITY 19726.30 YES
172 173 35 MALE INNER_CITY 24346.60 YES
173 174 34 MALE RURAL 26999.40 YES
174 175 67 FEMALE TOWN 41558.10 YES
175 176 58 FEMALE INNER_CITY 56340.30 NO
176 177 40 MALE TOWN 37558.50 YES
177 178 41 FEMALE INNER_CITY 30099.30 YES

6
178 179 43 MALE INNER_CITY 15254.80 YES
179 180 63 MALE INNER_CITY 36086.10 YES
180 181 22 FEMALE INNER_CITY 17655.00 YES
181 182 60 MALE RURAL 56658.90 NO
182 183 65 FEMALE INNER_CITY 37706.50 NO
183 184 48 FEMALE INNER_CITY 18516.00 YES
184 185 38 FEMALE INNER_CITY 29622.00 NO
185 186 49 MALE RURAL 32669.90 YES
186 187 20 FEMALE INNER_CITY 18275.50 YES
187 188 48 FEMALE TOWN 34410.00 YES
188 189 38 MALE INNER_CITY 34866.90 YES
189 190 41 FEMALE INNER_CITY 21796.60 YES
190 191 67 FEMALE SUBURBAN 63130.10 YES
191 192 39 MALE INNER_CITY 14996.40 YES
192 193 64 FEMALE RURAL 49024.90 YES
193 194 41 MALE INNER_CITY 16249.80 YES
194 195 55 MALE SUBURBAN 36192.10 YES
195 196 52 MALE INNER_CITY 17839.90 YES
196 197 30 FEMALE INNER_CITY 18802.40 NO
197 198 52 MALE INNER_CITY 48720.30 YES
198 199 26 MALE INNER_CITY 14585.90 NO
199 200 26 FEMALE INNER_CITY 20819.00 YES
200 201 46 MALE TOWN 26077.80 YES
201 202 46 FEMALE TOWN 41627.10 YES
202 203 52 MALE INNER_CITY 16977.30 YES
203 204 37 MALE INNER_CITY 19012.80 NO
204 205 22 MALE INNER_CITY 12764.80 YES
205 206 18 MALE INNER_CITY 14388.60 NO
206 207 63 MALE INNER_CITY 59409.10 NO
207 208 25 FEMALE INNER_CITY 14960.20 YES
208 209 67 MALE INNER_CITY 39666.60 YES
209 210 27 MALE INNER_CITY 20771.90 NO
210 211 61 MALE INNER_CITY 24474.10 NO
211 212 58 MALE TOWN 33123.70 YES
212 213 22 MALE INNER_CITY 14433.40 YES
213 214 28 MALE TOWN 13175.50 NO
214 215 23 MALE INNER_CITY 9824.37 YES
215 216 27 MALE SUBURBAN 17610.30 YES
216 217 27 FEMALE SUBURBAN 15156.20 YES
217 218 40 FEMALE INNER_CITY 31774.10 YES
218 219 39 MALE TOWN 31693.50 NO
219 220 35 FEMALE INNER_CITY 28598.70 YES
220 221 37 FEMALE INNER_CITY 26261.70 NO
221 222 47 MALE INNER_CITY 42124.10 YES
222 223 42 FEMALE INNER_CITY 39308.70 YES
223 224 67 FEMALE INNER_CITY 43530.00 YES
224 225 57 MALE RURAL 49874.40 YES
225 226 47 FEMALE RURAL 27434.80 NO

7
226 227 67 MALE INNER_CITY 50474.60 YES
227 228 56 MALE TOWN 24888.20 NO
228 229 37 MALE RURAL 28021.60 NO
229 230 27 MALE INNER_CITY 12279.50 NO
230 231 59 FEMALE INNER_CITY 30189.40 YES
231 232 31 MALE INNER_CITY 28969.40 NO
232 233 31 MALE SUBURBAN 14058.50 YES
233 234 32 FEMALE TOWN 30404.30 YES
234 235 57 FEMALE RURAL 41438.20 NO
235 236 49 FEMALE INNER_CITY 16711.30 NO
236 237 65 MALE TOWN 52255.90 NO
237 238 22 FEMALE INNER_CITY 17866.90 YES
238 239 26 FEMALE RURAL 18067.50 YES
239 240 23 MALE INNER_CITY 12823.70 YES
240 241 26 FEMALE RURAL 11299.30 YES
241 242 59 FEMALE INNER_CITY 56031.10 NO
242 243 67 MALE INNER_CITY 35263.50 YES
243 244 34 FEMALE INNER_CITY 19968.10 YES
244 245 50 MALE RURAL 27825.50 YES
245 246 46 MALE SUBURBAN 37773.90 NO
246 247 23 FEMALE INNER_CITY 7606.25 NO
247 248 26 MALE RURAL 21384.40 YES
248 249 40 MALE TOWN 20347.00 YES
249 250 36 MALE TOWN 21332.30 YES
250 251 65 MALE INNER_CITY 57671.70 NO
251 252 45 FEMALE TOWN 36057.80 YES
252 253 23 MALE INNER_CITY 14290.50 YES
253 254 42 FEMALE TOWN 17882.90 YES
254 255 21 FEMALE RURAL 10629.10 NO
255 256 62 FEMALE INNER_CITY 24262.80 NO
256 257 49 FEMALE SUBURBAN 26097.90 NO
257 258 28 FEMALE TOWN 23371.00 YES
258 259 38 FEMALE TOWN 21495.60 NO
259 260 36 MALE TOWN 12166.90 NO
260 261 22 MALE SUBURBAN 17180.20 YES
261 262 40 FEMALE TOWN 28882.30 YES
262 263 40 FEMALE TOWN 21612.20 YES
263 264 60 FEMALE INNER_CITY 46358.40 YES
264 265 23 MALE INNER_CITY 19166.00 NO
265 266 21 MALE INNER_CITY 17921.80 YES
266 267 58 MALE TOWN 33229.00 NO
267 268 48 FEMALE SUBURBAN 30396.10 NO
268 269 63 FEMALE TOWN 34625.20 YES
269 270 20 MALE TOWN 16672.80 NO
270 271 67 FEMALE SUBURBAN 60747.50 NO
271 272 62 FEMALE INNER_CITY 56394.30 NO
272 273 36 MALE TOWN 13236.40 YES
273 274 31 MALE INNER_CITY 28409.40 YES

8
274 275 42 FEMALE INNER_CITY 27056.50 YES
275 276 18 MALE RURAL 9362.58 YES
276 277 46 FEMALE SUBURBAN 28702.70 NO
277 278 25 MALE TOWN 22366.10 YES
278 279 65 FEMALE RURAL 24477.50 NO
279 280 40 MALE TOWN 36972.40 YES
280 281 32 MALE INNER_CITY 22327.80 YES
281 282 18 FEMALE INNER_CITY 15610.20 YES
282 283 64 MALE INNER_CITY 54314.50 YES
283 284 43 FEMALE INNER_CITY 39175.80 YES
284 285 22 FEMALE INNER_CITY 13739.00 YES
285 286 25 MALE TOWN 9485.84 YES
286 287 39 MALE INNER_CITY 24675.70 YES
287 288 58 FEMALE INNER_CITY 28253.60 YES
288 289 33 MALE INNER_CITY 14136.50 YES
289 290 52 FEMALE RURAL 37162.10 YES
290 291 23 MALE INNER_CITY 13519.20 NO
291 292 44 FEMALE INNER_CITY 39253.60 NO
292 293 51 MALE RURAL 46323.80 YES
293 294 26 FEMALE TOWN 20950.70 YES
294 295 42 MALE TOWN 22495.70 YES
295 296 34 MALE TOWN 32548.90 YES
296 297 54 FEMALE RURAL 24583.40 NO
297 298 18 MALE RURAL 8639.24 YES
298 299 47 FEMALE INNER_CITY 17139.50 NO
299 300 24 FEMALE INNER_CITY 13667.70 YES
Xóa dòng dữ liệu khuyết thiếu NaN
[12]: drop_df1_1 = df1_1.dropna()
print(drop_df1_1)

ID age sex region income married


0 1 48 FEMALE INNER_CITY 17546.00 NO
1 2 40 MALE TOWN 30085.10 YES
2 3 51 FEMALE INNER_CITY 16575.40 YES
3 4 23 FEMALE TOWN 20375.40 YES
4 5 57 FEMALE RURAL 50576.30 YES
.. … … … … … …
295 296 34 MALE TOWN 32548.90 YES
296 297 54 FEMALE RURAL 24583.40 NO
297 298 18 MALE RURAL 8639.24 YES
298 299 47 FEMALE INNER_CITY 17139.50 NO
299 300 24 FEMALE INNER_CITY 13667.70 YES

[296 rows x 6 columns]

[13]: df1_1.head(10)

9
[13]: ID age sex region income married
0 1 48 FEMALE INNER_CITY 17546.0 NO
1 2 40 MALE TOWN 30085.1 YES
2 3 51 FEMALE INNER_CITY 16575.4 YES
3 4 23 FEMALE TOWN 20375.4 YES
4 5 57 FEMALE RURAL 50576.3 YES
5 6 57 FEMALE TOWN 37869.6 YES
6 7 22 NaN RURAL NaN NO
7 8 58 MALE TOWN 24946.6 YES
8 9 37 FEMALE SUBURBAN 25304.3 YES
9 10 200 MALE TOWN 24212.1 YES

[14]: drop_df1_1 .head(10)

[14]: ID age sex region income married


0 1 48 FEMALE INNER_CITY 17546.0 NO
1 2 40 MALE TOWN 30085.1 YES
2 3 51 FEMALE INNER_CITY 16575.4 YES
3 4 23 FEMALE TOWN 20375.4 YES
4 5 57 FEMALE RURAL 50576.3 YES
5 6 57 FEMALE TOWN 37869.6 YES
7 8 58 MALE TOWN 24946.6 YES
8 9 37 FEMALE SUBURBAN 25304.3 YES
9 10 200 MALE TOWN 24212.1 YES
10 11 66 FEMALE TOWN 59803.9 YES

[15]: df1_1['sex'] = df1_1['sex'].fillna('LGBT')

Thay thế giá trị khuyết thiếu của trường “sex” thành “LGBT”
[16]: df1_1.head(10)

[16]: ID age sex region income married


0 1 48 FEMALE INNER_CITY 17546.0 NO
1 2 40 MALE TOWN 30085.1 YES
2 3 51 FEMALE INNER_CITY 16575.4 YES
3 4 23 FEMALE TOWN 20375.4 YES
4 5 57 FEMALE RURAL 50576.3 YES
5 6 57 FEMALE TOWN 37869.6 YES
6 7 22 LGBT RURAL NaN NO
7 8 58 MALE TOWN 24946.6 YES
8 9 37 FEMALE SUBURBAN 25304.3 YES
9 10 200 MALE TOWN 24212.1 YES

Số dòng trường ‘income’ có giá trị


[17]: df1_1_GT= df1_1['income'].count()
print(df1_1_GT)

10
298
Xóa dòng ‘income’ có giá trị null
[18]: drop_df1_GT = df1_1.dropna()
print(drop_df1_GT)

ID age sex region income married


0 1 48 FEMALE INNER_CITY 17546.00 NO
1 2 40 MALE TOWN 30085.10 YES
2 3 51 FEMALE INNER_CITY 16575.40 YES
3 4 23 FEMALE TOWN 20375.40 YES
4 5 57 FEMALE RURAL 50576.30 YES
.. … … … … … …
295 296 34 MALE TOWN 32548.90 YES
296 297 54 FEMALE RURAL 24583.40 NO
297 298 18 MALE RURAL 8639.24 YES
298 299 47 FEMALE INNER_CITY 17139.50 NO
299 300 24 FEMALE INNER_CITY 13667.70 YES

[298 rows x 6 columns]

[ ]:

11

You might also like