You are on page 1of 17

CSE3505 – FOUNDATIONS OF DATA ANALYTICS

EXPT NO: 9

DATE:30/09/2022

NAME:ACHANTA SAMPATH MIHIR

REG. NO.:20BEC1351

Reading Datasets and Viewing Details

AIM

Understand the following operations/functions on ‘mtcars’ dataset and perform


similar operations on one other dataset (say ‘Credit.csv’).

R PROGRAM

rm(list=ls())

data <-read.csv("C:\\Users\\dsp\\Desktop\\20be1351\\Credit.csv")

data

plot(seq(1,length(data$Cdur),by=1), data$Cdur, main = "CDUR", pch = 19, frame =


TRUE)

# install.packages("ggplot2")

library("ggplot2")

ggplot(data, aes(x=Cdur,y=Camt))+geom_point() #To plot - wt/gear

cor.test(data$Cdur,data$Camt) # to find the correlation value

library(dplyr)

data <- select(data,c(Cdur,Camt,age))


data

cmatrix <- round(cor(data),4)

cmatrix

# install.packages("reshape2")

library(reshape2)

cmatrix_melted <- melt(cmatrix)

cmatrix_melted

ggplot(cmatrix_melted, aes(x=Var1, y=Var2, fill=value)) + geom_tile()

OUTPUT
CSE3505 – FOUNDATIONS OF DATA ANALYTICS

EXPT NO: 10

DATE:07/10/2022

NAME:ACHANTA SAMPATH MIHIR

REG. NO.: 20BEC1351

LINEAR REGRESSION

AIM

Understand the following operations/functions on ‘mtcars’ dataset and perform


similar operationson ‘flights’ dataset.

R PROGRAM

rm(list=ls())
data<-read.csv("C:\\Users\\dsp\\Desktop\\20be1351\\insurance.csv")
data
x<-data$bmi
y<-data$charges
#regression
reg=lm(y~x)
reg
(summary(reg))
#plotting
plot(x,y)
abline(reg)
a=data.frame(x=30)
predict(reg,a)
OUTPUT
CSE3505 – FOUNDATIONS OF DATA ANALYTICS

EXPT NO: 11

DATE:21/10/2022

NAME:ACHANTA SAMPATH MIHIR

REG. NO.:20BEC1351

MULTIPLE LINEAR REGRESSION

AIM

Understand the following operations/functions on ‘mtcars’ dataset and perform


similar operations on ‘Credit.csv’ dataset based on given instructions.

R PROGRAM

rm(list=ls())

data <- read.csv("C:\\Users\\dsp\\Downloads\\Credit (2).csv")


library(dplyr)
data <- sample_n(data,20)
data
library("ggplot2")

ggplot(data,aes(x=Cdur,y=Camt))+geom_point()
cor.test(data$Cdur,data$Camt)

slr = lm(Camt~Cdur, data)


summary(slr)
plot(slr$resid)

plot(slr$resid~data$Cdur[order(data$Cdur)], main="wt vs Residuals - Simple


Linear Regression", xlab="Cdur", ylab="Residuals")
hist(slr$resid, main="Histogram of Residuals", ylab="Residuals")

qqnorm(slr$resid) #Q-Q Plot

qqline(slr$resid)

plot(slr,which =2)
mlr = lm(Cdur~Camt+age, data)
summary(mlr)
plot(mlr$resid)

plot(mlr$resid~data$Cdur[order(data$Cdur)], main="wt vs Residuals -


Multiple Linear Regression", xlab="Cdur", ylab="Residuals")

hist(mlr$resid, main="Histogram of Residuals",


ylab="Residuals")

qqnorm(mlr$resid) #Q-Q Plot

qqline(mlr$resid)

plot(mlr,which =2)
OUTPUT
CSE3505 – FOUNDATIONS OF DATA ANALYTICS

EXPT NO: 12

DATE:04-11-2022

NAME:ACHANTA SAMPATH MIHIR

REG. NO.:20BEC1351

Week-12: Structured Query Language (SQL) in R


AIM

Understand the following operations/functions on ‘mtcars’ and ‘iris’ datasets and


perform similar operations on Credit.csv dataset.

R PROGRAM

rm(list=ls())
data_a <-read.csv("C:\\Users\\asmih\\Desktop\\R prog\\Credit.csv")
library(dplyr)
data_credit <- sample_n(data_a,100)
# install.packages("sqldf")
library(sqldf)
df1 <- sqldf("Select * from data_credit")
df2 <- sqldf("select camt from data_credit")# Select
df3 <- sqldf('select "age" from data_credit')# Select
df4 <- sqldf("select * from data_credit LIMIT 5") # Head
df5 <- sqldf("select * from data_credit ORDER BY cdur desc") # Sorting
df6 <- sqldf("select age from data_credit WHERE age > 30") # Filtering
df7 <- sqldf("select cdur from data_credit WHERE cdur not in (11,13)") # Filtering - range
df8 <- sqldf('select * from data_credit WHERE prop LIKE "real estate"') # Filtering - category
df9 <- sqldf("select avg(cdur) as avg_cdur, avg(camt) as avg_cmt, age from data_credit WHERE
age > 30 GROUP BY age")
OUTPUT

You might also like