Professional Documents
Culture Documents
Data Preprocessing
Data Preprocessing
csv")
df_2
df_2.info(0)
df_2.describe()
cols=['Name','Ticket','Cabin']
df_2=df_2.drop(cols,axis=1)
df_2.info()
df_2=df_2.dropna(0)
df_2.info()
df_2['Sex']
df_2['Pclass']
df_2['Embarked']
pd.get_dummies(df_2['Pclass'])
pd.get_dummies(df_2['Sex'])
dummies=[]
cols=['Pclass','Sex','Embarked']
for col in cols:
dummies.append(pd.get_dummies(df_2[col]))
print(dummies)
titanic_dummies=pd.concat(dummies,axis=1)
titanic_dummies
df_2=pd.concat((df_2,titanic_dummies),axis=1)
df_2
df_2.drop(0)
df_2=df_2.drop(['Sex','Embarked','Pclass'],axis=1)
df_2.info()
df_2.isnull()
df_2['Age']=df_2['Age'].interpolate()
print(df_2)
scaler =MinMaxScaler()
scaler.fit(data)
print(scaler.transform(data))
scaler.data_min_
scaler.data_max_
data=asarray([[100,0.001],
[50,0.05],
[50,0.05],
[88,0.07],
[4,0.1]])
print(data)
scaler=StandardScaler()
scaled=scaler.fit_transform(data)
print(scaled)
N = 5
menMeans = (22, 30, 35, 35, 26)
womenMeans = (25, 32, 30, 35, 29)
menStd = (4, 3, 4, 1, 5)
womenStd = (3, 5, 2, 3, 3)
# the x locations for the groups
ind = np.arange(N)
# the width of the bars
width = 0.35
plt.ylabel('Scores')
plt.xlabel('Groups')
plt.title('Scores by group\n' + 'and gender')
plt.xticks(ind, ('Group1', 'Group2', 'Group3', 'Group4', 'Group5'))
plt.yticks(np.arange(0, 81, 10))
plt.legend((p1[0], p2[0]), ('Men', 'Women'))
plt.show()