Professional Documents
Culture Documents
Leer Los Datos: Import As Import As Import As From Import From Import
Leer Los Datos: Import As Import As Import As From Import From Import
In [1]:
import numpy as np
import pandas as pd
df2=pd.read_csv('UCI_Credit_Card_PayATM.csv',sep=',')
df3=pd.read_csv('UCI_Credit_Card_Pay.csv',sep=',')
df4=pd.read_csv('UCI_Credit_Card_BillATM.csv',sep=',')
In [3]:
df1.head()
0 0 1 2 2.0 1 24.0 1
1 1 2 2 2.0 2 26.0 1
2 2 3 2 2.0 2 34.0 0
3 3 4 2 NaN 1 37.0 0
4 4 5 1 2.0 1 57.0 0
In [4]:
df2.head()
Out[4]: Unnamed:
ID LIMIT_BAL EDUCATION PAY_AMT1 PAY_AMT2 PAY_AMT3 PAY_AMT4 PAY_AMT5
0
In [5]:
df3.head()
Out[5]: Unnamed: 0 ID LIMIT_BAL AGE PAY_0 PAY_2 PAY_3 PAY_4 PAY_5 PAY_6
0 0 1 20000.0 No info 2 2 -1 -1 -2 -2
1 1 2 120000.0 26 years -1 2 0 0 0 2
2 2 3 90000.0 34 years 0 0 0 0 0 0
3 3 4 50000.0 37 years 0 0 0 0 0 0
4 4 5 50000.0 57 years -1 0 -1 0 0 0
In [6]:
df4.head()
Out[6]: Unnamed:
ID LIMIT_BAL SEX BILL_AMT1 BILL_AMT2 BILL_AMT3 BILL_AMT4 BILL_AMT5 BILL
0
ID int64
SEX object
EDUCATION float64
MARRIAGE int64
AGE float64
default.payment.next.month int64
dtype: object
In [8]:
df2.dtypes
ID int64
LIMIT_BAL object
EDUCATION object
PAY_AMT1 float64
PAY_AMT2 float64
PAY_AMT3 float64
PAY_AMT4 float64
PAY_AMT5 float64
PAY_AMT6 float64
dtype: object
In [9]:
df3.dtypes
ID int64
LIMIT_BAL object
AGE object
PAY_0 int64
PAY_2 int64
PAY_3 int64
PAY_4 int64
PAY_5 int64
PAY_6 int64
dtype: object
In [10]:
df4.dtypes
ID int64
LIMIT_BAL object
SEX object
BILL_AMT1 float64
BILL_AMT2 float64
BILL_AMT3 float64
BILL_AMT4 float64
BILL_AMT5 float64
BILL_AMT6 float64
dtype: object
Out[11]: SEX
count 50
unique 3
top 2
freq 24
In [12]:
df2.describe(include=['object'])
count 52 52
unique 25 5
top 50000.0 1
freq 8 21
In [13]:
df3.describe(include=['object'])
count 52 52
unique 24 26
LIMIT_BAL AGE
freq 9 8
In [14]:
df4.describe(include=['object'])
count 52 45
unique 27 2
freq 11 23
In [16]:
df2.describe(include=np.number)
Out[16]: Unnamed:
ID PAY_AMT1 PAY_AMT2 PAY_AMT3 PAY_AMT4 PAY_AMT5
0
Unnamed:
ID PAY_AMT1 PAY_AMT2 PAY_AMT3 PAY_AMT4 PAY_AMT5
0
In [17]:
df3.describe(include=np.number)
In [18]:
df4.describe(include=np.number)
Out[18]: Unnamed:
ID BILL_AMT1 BILL_AMT2 BILL_AMT3 BILL_AMT4 BILL_AMT
0
D1=pd.merge(df1,df2,left_on='ID',right_on='ID')
D2=pd.merge(D1,df3,left_on='ID',right_on='ID')
D3.head()
0 0 1 2 2.0 1 24.0 1 0
1 0 1 2 2.0 1 24.0 1 0
2 0 1 2 2.0 1 24.0 1 0
3 0 1 2 2.0 1 24.0 1 0
4 0 1 2 2.0 1 24.0 1 35
5 rows × 34 columns
In [20]:
#Eliminar columnas Unnamed 0_x y Unnamed 0_y
DF_1.head()
5 rows × 30 columns
In [21]:
#Eliminar filas duplicadas
DF_1=DF_1.drop_duplicates()
DF_1.head()
10 4 2 NaN 1 37.0 0 -
5 rows × 30 columns
DF_1['SEX_x']=DF_1['SEX_x'].replace('2','female')
DF_1['SEX_x']=DF_1['SEX_x'].replace('1','male')
DF_1.head()
5 rows × 30 columns
In [23]:
#Unir las columnas SEX_x y SEX_y y se crea la columna Sex
#Justificación: Los datos Nan se reemplazarán por el dato existente ya sea en la column
#No se eliminará al cliente.
ynan=np.where(DF_1['SEX_x']=='Nan',DF_1['SEX_y'],DF_1['SEX_x'])
DF_1['Sex']=np.where(DF_1['SEX_x']==DF_1['SEX_y'],DF_1['SEX_x'],ynan)
DF_1.head()
5 rows × 31 columns
In [24]:
#Se eliminan las columnas SEX_x y SEX_y
DF_1=DF_1.drop(labels=['SEX_x','SEX_y'],axis=1)
DF_1.head()
10 4 NaN 1 37.0 0 - 2
5 rows × 29 columns
In [25]:
#Trabajar Columna Limit Balance, Unir las columnas LIMIT_BAL_x y LIMIT_BAL y se crea la
#Justificación: Los datos Nan se reemplazarán por el dato existente ya sea en las colum
#No se eliminará al cliente.
ynan2=np.where(DF_1['LIMIT_BAL_x']=='-',DF_1['LIMIT_BAL'],DF_1['LIMIT_BAL_x'])
DF_1['Limit Balance']=np.where(DF_1['LIMIT_BAL_x']==DF_1['LIMIT_BAL'],DF_1['LIMIT_BAL_x
DF_1.head()
Out[25]:
ID EDUCATION_x MARRIAGE AGE_x default.payment.next.month LIMIT_BAL_x EDUCATION_y PA
10 4 NaN 1 37.0 0 - 2
5 rows × 30 columns
In [26]:
#Se eliminan las columnas LIMIT_BAL_x y LIMIT_BAL
DF_1=DF_1.drop(labels=['LIMIT_BAL_x','LIMIT_BAL'],axis=1)
DF_1.head()
Out[26]:
ID EDUCATION_x MARRIAGE AGE_x default.payment.next.month EDUCATION_y PAY_AMT1 PAY
5 rows × 28 columns
In [27]:
#Se trabaja la columna Education
#Se reemplazan los nulos por un 0 y se transforma la columna EDUCATION_x de Float a Int
DF_1.EDUCATION_x=DF_1.EDUCATION_x.fillna(0)
DF_1.EDUCATION_x=DF_1.EDUCATION_x.astype(int)
DF_1.head()
Out[27]:
ID EDUCATION_x MARRIAGE AGE_x default.payment.next.month EDUCATION_y PAY_AMT1 PAY
0 1 2 1 24.0 1 2 0.0
8 2 2 2 26.0 1 2 0.0
10 4 0 1 37.0 0 2 2000.0
12 5 2 1 57.0 0 2 2000.0 3
5 rows × 28 columns
In [28]:
#Se unen las columnas EDUCATION_x y EDUCATION_y y se crea la columna Education
#Justificación: Los datos Nan se reemplazarán por el dato existente ya sea en las colum
#No se eliminará al cliente.
ynan3=np.where(DF_1['EDUCATION_x']==0,DF_1['EDUCATION_y'],DF_1['EDUCATION_x'])
DF_1['Education']=np.where(DF_1['EDUCATION_x']==DF_1['EDUCATION_y'],DF_1['EDUCATION_x']
DF_1.head()
Out[28]:
ID EDUCATION_x MARRIAGE AGE_x default.payment.next.month EDUCATION_y PAY_AMT1 PAY
0 1 2 1 24.0 1 2 0.0
8 2 2 2 26.0 1 2 0.0
10 4 0 1 37.0 0 2 2000.0
12 5 2 1 57.0 0 2 2000.0 3
5 rows × 29 columns
In [29]:
#Se eliminan las columnas EDUCATION_x y EDUCATION_y
DF_1=DF_1.drop(labels=['EDUCATION_x','EDUCATION_y'],axis=1)
DF_1.head()
Out[29]:
ID MARRIAGE AGE_x default.payment.next.month PAY_AMT1 PAY_AMT2 PAY_AMT3 PAY_AMT4
5 rows × 27 columns
In [30]:
#Se trabaja la columna Age, Se transforma la columna AGE_x a tipo str y se eliminan los
DF_1['AGE_x']=DF_1['AGE_x'].astype(str).str[:-2]
DF_1.head()
Out[30]:
ID MARRIAGE AGE_x default.payment.next.month PAY_AMT1 PAY_AMT2 PAY_AMT3 PAY_AMT4
5 rows × 27 columns
In [36]:
#Se reemplazan los valores que tienen un "n" en la columna AGE_x por un espacio vacio "
#Justificación: No eliminaré los clientes que no posean datos en la columna AGE_x, ya q
DF_1.head()
Out[36]:
ID MARRIAGE AGE_x default.payment.next.month PAY_AMT1 PAY_AMT2 PAY_AMT3 PAY_AMT4
24
0 1 1 1 0.0 689.0 0.0 0.0
years
26
8 2 2 1 0.0 1000.0 1000.0 1000.0
years
34
9 3 2 0 1518.0 1500.0 1000.0 1000.0
years
37
10 4 1 0 2000.0 2019.0 1200.0 1100.0
years
57
12 5 1 0 2000.0 36681.0 10000.0 9000.0
years
5 rows × 27 columns
In [38]:
localhost:8888/nbconvert/html/Desktop/Miniproyecto 1/Untitled Folder/Miniproyecto 1.2.ipynb?download=false 10/14
09-08-2021 Miniproyecto 1.2
Out[38]: default
PAY PAY PAY PAY
pay PAY PAY PAY BILL
ID Marriage Age AMT AMT AMT AMT ...
next AMT 2 AMT 3 6 AMT 1 A
1 4 5 6
month
24
0 1 1 1 0.0 689.0 0.0 0.0 0.0 0.0 ... -2 3913.0 3
years
26
8 2 2 1 0.0 1000.0 1000.0 1000.0 0.0 2000.0 ... 2 2682.0 1
years
34
9 3 2 0 1518.0 1500.0 1000.0 1000.0 1000.0 5000.0 ... 0 29239.0 14
years
37
10 4 1 0 2000.0 2019.0 1200.0 1100.0 1069.0 1000.0 ... 0 46990.0 48
years
57
12 5 1 0 2000.0 36681.0 10000.0 9000.0 689.0 679.0 ... 0 8617.0 5
years
5 rows × 27 columns
In [39]:
#Se ordenan las columnas
Out[39]:
Limit PAY PAY PAY PAY BILL BILL BIL
ID Sex Education Marriage Age ...
Balance 0 2 3 4 AMT 4 AMT 5 AMT
24
0 1 20000.0 female 2 1 2 2 -1 -1 ... 0.0 0.0 0
years
26
8 2 120000.0 female 2 2 -1 2 0 0 ... 3272.0 3455.0 3261
years
34
9 3 90000.0 female 2 2 0 0 0 0 ... 14331.0 14948.0 15549
years
37
10 4 50000.0 female 2 1 0 0 0 0 ... 28314.0 28959.0 29547
years
57
12 5 50000.0 male 2 1 -1 0 -1 0 ... 20940.0 19146.0 19131
years
5 rows × 25 columns
In [40]:
localhost:8888/nbconvert/html/Desktop/Miniproyecto 1/Untitled Folder/Miniproyecto 1.2.ipynb?download=false 11/14
09-08-2021 Miniproyecto 1.2
DF_1.to_csv('cliente2.csv',sep=',')
In [42]:
#Leer nuevo archivo CSV
DF_2=pd.read_csv('cliente2.csv',sep=',')
DF_2
Out[42]:
Unnamed: Limit PAY PAY PAY BILL BILL
ID Sex Education Marriage Age ...
0 Balance 0 2 3 AMT 4 AMT 5
24
0 0 1 20000.0 female 2 1 2 2 -1 ... 0.0 0.0
years
26
1 8 2 120000.0 female 2 2 -1 2 0 ... 3272.0 3455.0
years
34
2 9 3 90000.0 female 2 2 0 0 0 ... 14331.0 14948.0
years
37
3 10 4 50000.0 female 2 1 0 0 0 ... 28314.0 28959.0
years
57
4 12 5 50000.0 male 2 1 -1 0 -1 ... 20940.0 19146.0
years
37
5 13 6 50000.0 male 1 2 0 0 0 ... 19394.0 19619.0
years
29
6 15 7 500000.0 male 1 2 0 0 0 ... 542653.0 483003.0
years
23
7 16 8 100000.0 female 2 2 0 -1 -1 ... 221.0 -159.0
years
28
8 17 9 140000.0 female 3 1 0 0 2 ... 12211.0 11793.0
years
35
9 18 10 20000.0 male 3 2 -2 -2 -2 ... 0.0 13007.0
years
51
11 20 12 260000.0 female 1 2 -1 -1 -1 ... 8517.0 22287.0
years
41
12 21 13 630000.0 female 2 2 -1 0 -1 ... 6500.0 6500.0
years
30
13 22 14 70000.0 male 2 2 1 2 2 ... 66782.0 36137.0
years
29
14 23 15 250000.0 male 1 2 0 0 0 ... 59696.0 56875.0
years
23
15 24 16 50000.0 female 3 3 1 2 0 ... 28771.0 29531.0
years
24
16 25 17 20000.0 male 1 2 0 0 2 ... 18338.0 17905.0
years
49
17 26 18 320000.0 male 1 1 0 0 0 ... 70074.0 5856.0
years
49
18 27 19 360000.0 female 1 1 1 -2 -2 ... 0.0 0.0
years
29
19 28 20 180000.0 female 1 2 1 -2 -2 ... 0.0 0.0
years
39
20 29 21 130000.0 female 3 2 0 0 0 ... 20616.0 11802.0
years
39
21 30 22 120000.0 female 2 1 -1 -1 -1 ... 0.0 632.0
years
26
22 31 23 70000.0 female 2 2 2 0 0 ... 44006.0 46905.0
years
40
23 32 24 450000.0 female 1 1 -2 -2 -2 ... 560.0 0.0
years
23
24 33 25 90000.0 male 1 2 0 0 0 ... 5398.0 6360.0
years
27
26 36 27 60000.0 male 1 2 1 -2 -1 ... -57.0 127.0
years
30
27 37 28 50000.0 female 3 2 0 0 0 ... 17878.0 18931.0
years
47
28 38 29 50000.0 female 3 1 -1 -1 -1 ... 2040.0 30430.0
years
26
29 39 30 50000.0 male 1 2 0 0 0 ... 17907.0 18375.0
years
33
31 41 32 50000.0 male 2 2 2 0 0 ... 22734.0 23217.0
years
32
32 42 33 100000.0 male 1 2 0 0 0 ... 80958.0 78703.0
years
58
34 44 35 500000.0 male 1 1 -2 -2 -2 ... 3180.0 0.0
years
30
35 45 36 160000.0 male 1 2 -1 -1 -2 ... -923.0 -1488.0
years
40
36 46 37 280000.0 male 2 1 0 0 0 ... 170410.0 173901.0
years
22
37 47 38 60000.0 female 2 2 0 0 0 ... 6026.0 -28335.0
years
25
38 48 39 50000.0 male 1 2 1 -1 -1 ... 0.0 0.0
years
31
39 49 40 280000.0 male 1 2 -1 -1 2 ... 9976.0 17976.0
years
33
40 50 41 360000.0 male 1 2 0 0 0 ... 628699.0 195969.0
years
25
41 51 42 70000.0 female 1 2 0 0 0 ... 63699.0 64718.0
years
30
44 54 45 40000.0 female 1 2 0 0 0 ... 25209.0 26636.0
years
29
45 55 46 210000.0 male 1 2 -2 -2 -2 ... 0.0 0.0
years
22
46 56 47 20000.0 female 1 2 0 0 2 ... 16341.0 16675.0
years
46
47 57 48 150000.0 female 5 2 0 0 -1 ... 1170.0 0.0
years
32
48 58 49 380000.0 male 2 2 -1 -1 -1 ... 32018.0 11849.0
years
24
49 59 50 20000.0 male 1 2 0 0 0 ... 19865.0 20480.0
years
50 rows × 26 columns
In [ ]: