You are on page 1of 4

Reading csv file

df = readtable('superConElements.csv');
df2 = readtable('superConFeatures.csv');

Warning: Column headers from the file were modified to make them valid MATLAB identifie
column headers are saved in the VariableDescriptions property.
Set 'VariableNamingRule' to 'preserve' to use the original column headers as table vari

Removing missing values

rmmissing(df)

ans = 21263×88 table


H He Li Be B C N
1 0 0 0 0 0 0

2 0 0 0 0 0 0

3 0 0 0 0 0 0

4 0 0 0 0 0 0

5 0 0 0 0 0 0

6 0 0 0 0 0 0

7 0 0 0 0 0 0

8 0 0 0 0 0 0

9 0 0 0 0 0 0

10 0 0 0 0 0 0

11 0 0 0 0 0 0

12 0 0 0 0 0 0

13 0 0 0 0 0 0

14 0 0 0 0 0 0

rmmissing(df2)

ans = 14333×83 table


Material__ NumberOfElements__ MeanAtomicMass_AMU_
1 'Hg0.66Pb0.34Ba2Ca1.98Cu2.9O8.4' 6 110.79

2 'Bi1.6Pb0.4Sb0.1Sr2Ca2Cu2O' 7 106.45

3 'Bi1.8Pb0.2Sb0.1Sr2Ca2Cu2O' 7 106.45

4 'Hg0.75Ba2.07Ca2.07Cu3.11O8.208' 5 91.50

5 'Hg0.75Ba2.07Ca2.07Cu3.11O8.187' 5 91.50

6 'Hg1Ba2Ca2Cu3O' 5 91.50

7 'Hg1Ba2Ca2Cu3FO8' 6 79.42

8 'Hg0.9Re0.1Ba2Ca2Cu3O' 6 107.29

9 'Hg1Ba2Ca2Cu3O8.27' 5 91.50

10 'Hg1Ba2Ca2Cu3O8.29' 5 91.50
Material__
'Hg0.75Ba2.07Ca2.07Cu3.11O8.15' NumberOfElements__ 5 MeanAtomicMass_AMU_
91.50
11

12 'Hg0.75Ba2.07Ca2.07Cu3.11O8.16' 5 91.50

13 'Tl0.5Hg0.5Ba2Ca2Cu3O8' 6 110.32

14 'Tl0.3Hg0.7Ba2Ca2Cu3O8' 6 110.32

Removing Non-numeric columns

new_df = removevars(df, 'material');


new_df2 = removevars(df2, 'Material__');

Representing each entity on histogram for df

for i = 1:87
histogram(new_df.(new_df.Properties.VariableNames{i}))
end

Representing each entity on histogram for df2

for i = 1:82
histogram(new_df2.(new_df2.Properties.VariableNames{i}))
end
Normalizing Values

normalize(new_df, "zscore")

ans = 21263×87 table


H He Li Be B C N
1 -0.0662 NaN -0.0936 -0.0408 -0.1365 -0.0873 -0.0

2 -0.0662 NaN -0.0936 -0.0408 -0.1365 -0.0873 -0.0

3 -0.0662 NaN -0.0936 -0.0408 -0.1365 -0.0873 -0.0

4 -0.0662 NaN -0.0936 -0.0408 -0.1365 -0.0873 -0.0

5 -0.0662 NaN -0.0936 -0.0408 -0.1365 -0.0873 -0.0

6 -0.0662 NaN -0.0936 -0.0408 -0.1365 -0.0873 -0.0

7 -0.0662 NaN -0.0936 -0.0408 -0.1365 -0.0873 -0.0

8 -0.0662 NaN -0.0936 -0.0408 -0.1365 -0.0873 -0.0

9 -0.0662 NaN -0.0936 -0.0408 -0.1365 -0.0873 -0.0

10 -0.0662 NaN -0.0936 -0.0408 -0.1365 -0.0873 -0.0

11 -0.0662 NaN -0.0936 -0.0408 -0.1365 -0.0873 -0.0

12 -0.0662 NaN -0.0936 -0.0408 -0.1365 -0.0873 -0.0

13 -0.0662 NaN -0.0936 -0.0408 -0.1365 -0.0873 -0.0

14 -0.0662 NaN -0.0936 -0.0408 -0.1365 -0.0873 -0.0


normalize(new_df2,"zscore")

ans = 15542×82 table

NumberOfElements__ MeanAtomicMass_AMU_ WtdMeanAtomicMass_AMU_


1 1.2008 0.7380 -0.6024

2 1.2008 0.7222 -0.6390

3 1.8827 0.5920 0.4910


NumberOfElements__
1.8827 MeanAtomicMass_AMU_
0.5920 WtdMeanAtomicMass_AMU_
0.492
4

5 0.5188 0.0886 -0.6478

6 0.5188 0.0886 -0.6464

7 0.5188 0.0886 0.3055

8 1.2008 -0.3184 -0.639

9 1.2008 0.6202 0.3008

10 0.5188 0.0886 -0.5963

11 0.5188 0.0886 -0.5977

12 0.5188 0.0886 -0.6440

13 0.5188 0.0886 -0.6446

14 1.2008 0.7222 -0.5740


You might also like