You are on page 1of 20

CSE3505 – FOUNDATIONS OF DATA ANALYTICS

EXPT NO: 5

DATE:19/08/2022

NAME:ACHANTA SAMPATH MIHIR

REG. NO.:20BEC1351

Data Manipulation I

AIM

Understand the following manipulations on the ‘mtcars’ dataset and perform


similar operations on the ‘Credit.csv’ dataset as per the instructions given
R PROGRAM

rm=(list=ls())
data=read.csv("C:\\Users\\dsp\\Desktop\\20be1351\\Credit.csv")
View(data)
library(dplyr)
data <-sample_n(data,6)
View(data)
df1 <- select(data,c(Cdur,Cpur))
View(df1)
df2 <- select(data,Camt,Prop:age)
View(df2)
df3 <- select(data,-c(Cpur,creditScore))
View(df3)
df4 <- select(data, starts_with("at"))
df4
df5 <- select(data, -ends_with("at"))
View(df5)
df6 <- select(data, Cpur, everything())
View(df6)
df7 <- rename(data, creditscore = Camt)
View(df7)
df8 <- transform(data, new=(data$Cdur/data$age))
View(df8)
df9 <- arrange(data, desc(Camt), Cdur)
View(df9)
v1 <- tapply(data$Cdur,data$Camt,mean)
(v1)
k <- apply(data[c('Cdur','Camt','age')],2,mean)
(k)

OUTPUT
CSE3505 – FOUNDATIONS OF DATA ANALYTICS

EXPT NO: 6

DATE:26/08/2022

NAME:ACHANTA SAMPATH MIHIR

REG. NO.:20BEC1351

Data Manipulation II
AIM

Understand the following manipulations on the ‘mtcars’ dataset and perform the
tasks given in page 2.

Task 1

Choose 10 random observations from ‘mtcars’ dataset and find the average weight
(‘wt’) of those cars for which the number of forward gears (‘gear’) is 3 or 4 and the
miles per gallon (‘mpg’) is between [18-22].

R PROGRAM

rm(list=ls())
data<- mtcars
library(dplyr)
data1<-sample_n(data,10)
df1<-filter(data1,gear==3 | gear==4)
df1
avg1=mean(df1$wt)
avg1
df2<-filter(data,mpg>=18 & mpg<=22)
df2
avg4=mean(df2$wt)
avg4
OUTPUT
Task 2

Choose 10 random observations from ‘mtcars’ dataset and find the average ‘mpg’
and average quarter mile time (‘qsec’) of those cars for which the ‘gear’ is 4 and
the ‘wt’ is less than 3.

R PROGRAM

data2<-sample_n(data,10)
df2<-filter(data2,gear==4&wt<3)
df2
avg2=mean(df2$mpg)
avg2
avg3=mean(df2$qsec)
avg3

OUTPUT
Task 3

Choose 2 sets of 4 random observations from ‘mtcars’ dataset and test the JOIN
functions.

R PROGRAM

data_1 <- sample_n(data,4)


data_2 <- sample_n(data,4)
df3 <- inner_join(data_2,data_1,by = "gear")
df3
df4 <- left_join(data_1,data_2,by = "gear")
df4
df5 <- right_join(data_1,data_2,by = "gear")
df5

OUTPUT
Task 4

Choose 2 sets of 12 random observations from ‘mtcars’ dataset and test the
union(), intersect(), and setdiff() functions.

R PROGRAM

data_3 <- sample_n(data,12)


data_4 <- sample_n(data,12)
df6 <- merge(data_3,data_4,by = "gear")
df6
df7 <- union(data_3,data_4)
df7
df8 <- intersect(data_3,data_4)
df8
df9 <- setdiff(data_3,data_4)
df9

OUTPUT
CSE3505 – FOUNDATIONS OF DATA ANALYTICS

EXPT NO: 7

DATE:09/09/2022

NAME:ACHANTA SAMPATH MIHIR

REG. NO.:20BEC1351

SOFTWARE USED: R

Task 1

Find the average ‘air_time’ and average ‘distance’ for all flights from ‘JFK’ to
‘SFO’ in the year 2013.

CODE:

library(nycflights13)

data<-read.csv('D:\\20bec1060\\flights.csv')

library(dplyr)

data <-sample_n(data_a,30)

#TASK1

d1<-data %>% group_by(origin,dest) %>% select(origin, dest, air_time, distance)


%>% filter(origin=='JFK', dest=='SFO')

avg_air_time=mean(d1$air_time, na.rm=TRUE)

avg_distance=mean(d1$distance)
d1

avg_air_time

avg_distance

Task 2

Find the month-wise, average ‘arr_delay’ and maximum ‘arr_delay’ for all flights
landed in ‘SFO’ in the year 2013.

CODE:

d2<- data %>% group_by(month) %>% filter(dest=='SFO'& year==2013)


%>%summarise(avg_arr_delay=mean(arr_delay, na.rm=TRUE),
max_arr_delay=max(arr_delay, na.rm=TRUE))

d2
Task 3

Find the month-wise, average ‘dep_delay’ and maximum ‘dep_delay’ for all flights
departed from ‘JFK’ in the year 2013.

CODE:

#TASK3

d3<- data %>% group_by(month) %>% filter(origin=='JFK'& year==2013)


%>%summarise(dep_arr_delay=mean(dep_delay, na.rm=TRUE),
max_dep_delay=max(dep_delay, na.rm=TRUE))

d3
Task 4

Find the average ‘air_time’ and average ‘distance’ for all ‘UA’ flights departed
from ‘JFK’ in the year 2013.

CODE:

#TASK4

d4<- data %>% filter(origin=='JFK'& year==2013 & carrier=='UA') %>%


summarise(avg_dist=mean(distance, na.rm=TRUE),avg_air_time=mean(air_time,
na.rm=TRUE))

d4

Task 5

On your birthday in 2013, how many flights departed ‘JFK’ between 9 AM and
9:59 AM.

CODE:

df5<- data %>% filter(origin=='JFK' & year==2013 & month==5 & day==31 &
dep_time>=900 & arr_time<=959)

df5

nrow(df5)
CSE3505 – FOUNDATIONS OF DATA ANALYTICS

EXPT NO: 8

DATE: 16/09/2022

NAME:ACHANTA SAMPATH MIHIR

REG. NO.:20BEC1351

Week-8: Summary Statistics

AIM

Perform Descriptive statistics in R with summary functions.

R PROGRAM

rm(list=ls())
data_a <- mtcars
data_a
library(dplyr)
data <- sample_n(data_a,10)
data
summary(data)
sapply(data, function(gear) sum(is.na(gear)))
colSums(!is.na(data))
sapply(data, sum, na.rm=TRUE)
sapply(data, range, na.rm=TRUE)
sapply(data, var, na.rm=TRUE)
sapply(data, sd, na.rm=TRUE)
OUTPUT
R PROGRAM

rm(list=ls())
library(nycflights13)
data_a <- flights
data_a
library(dplyr)
data <- sample_n(data_a,10)
data
summary(data)
sapply(data, function(arr_time) sum(is.na(arr_time)))
colSums(!is.na(data))
data <- data[,!names(data) %in% c("tailnum", "origin", "dest", "carrier",
"time_hour")]
sapply(data, sum, na.rm=TRUE)
sapply(data, range, na.rm=TRUE)
sapply(data, var, na.rm=TRUE)
sapply(data, sd, na.rm=TRUE)

OUTPUT:

You might also like