You are on page 1of 2

%%% Clean the workspace

clear
clc

%% Step 1 - Importing the Data


%%% https://archive.ics.uci.edu/ml/datasets/airfoil+self-noise#
dataset = readtable('AirfoilSelfNoise.csv',"TextType","string");

%Notice that the headings of the table are available.


%"f": frequency
% "alpha": attack_angle
% "c": chord_length
% "u_infinity": free_stream
% "delta": suction_side_displacement_thickness
% "sspl": scale_sound_pressure_level

%% Step 2 - Exploring the Dataset

%%%Check the first elements in the table


head(dataset)

%%%Check the number of data points and attributes (row x column)


size(dataset)

%%%Get some statistics


summary(dataset)

%%%Get descriptive analysis of a specific attribute.


MPG_statistics = datastats(dataset.SSPL);

%%%Analyse an attribute with histogram


figure;
histogram(dataset.SSPL)

%% Step 3 - Handling Missing Values

%%%Check for missing values


missingElements = ismissing(dataset); %find every missing value
totalMissing = sum(missingElements); %sum the number of missing value per
attribute

% %%% Remove missing values with the mean


dataset = rmmissing(dataset);

%% Step 4 - Removing Duplicate Rows

dataset = unique(dataset);

%%%If you want to remove one or more attributes, and create a new dataset
%%%with it
dataset_remove_var = removevars(dataset, {'SSPL', 'delta'});

%% Step 5 - Handling Outliers

dataset= rmoutliers(dataset,"mean");

%% Step 6 - Normalise data

%%Normalise using zscore


dataset_normalized=normalize(dataset,'range');

%% Step 7 - Visualising the data

%%% Plot the relationship between attributes.


%%% Visualize the dataset using line plot
figure;
plot(dataset.SSPL, dataset.delta, '*r');
%%% Add labels to the plot
xlabel('SSPL');
ylabel('Delta');

You might also like