You are on page 1of 3

#MARKETING ANALYTICS - data -"usedcarsales.

csv"

usedcarsales=read.csv(file.choose())

head(usedcarsales)# First 6 rows of data

head(usedcarsales,10) # First 10 rows of data

tail(usedcarsales) # Last 6 Rows of data

class(usedcarsales) # data Structure

# Data Frame - By default rows are observations

# and column are variables

nrow(usedcarsales) # Number of rows or observations

ncol(usedcarsales) # Number of Columns or variables

colnames(usedcarsales) # Names of Columns or variables

str(usedcarsales) # Data Structure, num of observations

# and number of variables.

# Individual Variable Data Types

# Numeric Types - int, num

# String Types - chr

# Date Types - date format

# EXPLORATORY DATA ANALYSIS (EDA)- DESCRIPTIVE STATISTICS

# + DATA VIZUALIZATION

# DESCRIPTIVE STATISTICS - UNIVARIATE STATISTICS

# MEASURES OF CENTRAL TENDENCY - Mean, Median & Mode

# Measures of Dispersion - Range, Variance, Standard

# Deviation, Quartiles, percentiles & Deciles

# Measures of Asymmetry - Skewness & Kurtosis

summary(usedcarsales$Price) # Dependent Variable

# Mean and Median must be closer, difference must be

# smaller. If there is large difference, then mean is

# getting distorted.

sd(usedcarsales$Price)

install.packages("psych")
library(psych) # Activate for Function use

describe(usedcarsales$Price) # n, mean, sd, median,

#trimmedian,mean absolute deviation, min, max, range(max-min),

# skew, kurtosis and Standard error

# Skewness

# Positive - Peak of the curve on left side

# Negative - peak of the curve on right side

# Zero - Normal Distribution, Bell Curve or Gaussian Distribution

# Kurtosis

# Postive - Narrow and tall peak

# Negative - Flat and Wide peak

# Zero - Normal Distribution or Bell Curve

# Descriptive Statistics of variable - "KM"

describe(usedcarsales$KM)

# Data Vizualizations - Line plot, bar plot, pie chart, stacked

# bar plot, etc.

# 3 Most important plots in AI/ML/Datascience

# 1. Histogram - Based on Frequency Distribution Table

# Frequency Distribution Table - Class Interval(LL-UL), Frequency

hist(usedcarsales$Price)# Skewness & Kurtosis

# 2. Boxplot - Based on Quartiles. Q1, Q2 or Median,

# Q3, InterQuartileRange=Q3-Q1

# Identify Outliers - Outliers are extreme values that

# fall outside the normal range. Skewness identified

# Q1 - 1.5 X IQR(Q3-Q1)

# Q3 + 1.5 X IQR(Q3-Q1)

boxplot(usedcarsales$Price,horizontal = T)

# 3. Density Curve or Kernel Density Curve - Based

# on Standard Scores. Skewness & Kurtosis.

plot(density(usedcarsales$Price))
# Correction Strategies - Skewness

# Positive Skewness - Logarthmic Transformation, Square

# Root Transformation

# Negative Skewness - Exponential Transformation, Power

# Transformation

boxplot(log(usedcarsales$Price),horizontal = T)

plot(density(log(usedcarsales$Price)))

# Histogram, boxplot & density plot - "KM"

hist(usedcarsales$KM)

boxplot(usedcarsales$KM,horizontal = T)

plot(density(usedcarsales$KM))

# For Non Numeric Data - Frequency Counts

table(usedcarsales$FuelType)

table(usedcarsales$Automatic)# 0- Manual & 1-Automatic

table(usedcarsales$MetColor)# 0 - No & 1- yes

table(usedcarsales$Doors)

table(usedcarsales$FuelType,usedcarsales$Doors)

# Cross Tabulation frequency table of 2 or more

# non numeric Categorical Variables

You might also like