Exp5 To Exp8 20BEC1351

CSE3505 – FOUNDATIONS OF DATA ANALYTICS
EXPT NO: 5
DATE:19/08/2022
NAME:ACHANTA SAMPATH MIHIR
REG. NO.:20BEC1351
Data Manipulation I
AIM
Understand the following manipulations on the ‘mtcars’ dataset and perform

similar operations on the ‘Credit.csv’ dataset as per the instructions given
R PROGRAM
rm=(list=ls())
data=read.csv("C:\\Users\\dsp\\Desktop\\20be1351\\Credit.csv")
View(data)
library(dplyr)
data <-sample_n(data,6)
View(data)
df1 <- select(data,c(Cdur,Cpur))
View(df1)
df2 <- select(data,Camt,Prop:age)
View(df2)
df3 <- select(data,-c(Cpur,creditScore))
View(df3)
df4 <- select(data, starts_with("at"))
df4
df5 <- select(data, -ends_with("at"))
View(df5)
df6 <- select(data, Cpur, everything())
View(df6)
df7 <- rename(data, creditscore = Camt)
View(df7)
df8 <- transform(data, new=(data$Cdur/data$age))
View(df8)
df9 <- arrange(data, desc(Camt), Cdur)
View(df9)
v1 <- tapply(data$Cdur,data$Camt,mean)
(v1)
k <- apply(data[c('Cdur','Camt','age')],2,mean)
(k)
OUTPUT
EXPT NO: 6
DATE:26/08/2022
REG. NO.:20BEC1351
Data Manipulation II
AIM
Understand the following manipulations on the ‘mtcars’ dataset and perform the
tasks given in page 2.
Task 1
Choose 10 random observations from ‘mtcars’ dataset and find the average weight
(‘wt’) of those cars for which the number of forward gears (‘gear’) is 3 or 4 and the
miles per gallon (‘mpg’) is between [18-22].
R PROGRAM
rm(list=ls())
data<- mtcars
library(dplyr)
data1<-sample_n(data,10)
df1<-filter(data1,gear==3 | gear==4)
df1
avg1=mean(df1$wt)
avg1
df2<-filter(data,mpg>=18 & mpg<=22)
df2
avg4=mean(df2$wt)
avg4
OUTPUT
Task 2
Choose 10 random observations from ‘mtcars’ dataset and find the average ‘mpg’
and average quarter mile time (‘qsec’) of those cars for which the ‘gear’ is 4 and
the ‘wt’ is less than 3.
R PROGRAM
data2<-sample_n(data,10)
df2<-filter(data2,gear==4&wt<3)
df2
avg2=mean(df2$mpg)
avg2
avg3=mean(df2$qsec)
avg3
OUTPUT
Task 3
Choose 2 sets of 4 random observations from ‘mtcars’ dataset and test the JOIN
functions.
R PROGRAM
data_1 <- sample_n(data,4)

df3 <- inner_join(data_2,data_1,by = "gear")
df3
df4 <- left_join(data_1,data_2,by = "gear")
df4
df5 <- right_join(data_1,data_2,by = "gear")
df5
OUTPUT
Task 4
Choose 2 sets of 12 random observations from ‘mtcars’ dataset and test the
union(), intersect(), and setdiff() functions.
R PROGRAM

df6 <- merge(data_3,data_4,by = "gear")
df6
df7 <- union(data_3,data_4)
df7
df8 <- intersect(data_3,data_4)
df8
df9 <- setdiff(data_3,data_4)
df9
OUTPUT
EXPT NO: 7
DATE:09/09/2022
REG. NO.:20BEC1351
SOFTWARE USED: R
Task 1
Find the average ‘air_time’ and average ‘distance’ for all flights from ‘JFK’ to
‘SFO’ in the year 2013.
CODE:
library(nycflights13)
data<-read.csv('D:\\20bec1060\\flights.csv')
library(dplyr)
data <-sample_n(data_a,30)
#TASK1
d1<-data %>% group_by(origin,dest) %>% select(origin, dest, air_time, distance)

%>% filter(origin=='JFK', dest=='SFO')
avg_air_time=mean(d1$air_time, na.rm=TRUE)
avg_distance=mean(d1$distance)
d1
avg_air_time
avg_distance
Task 2
Find the month-wise, average ‘arr_delay’ and maximum ‘arr_delay’ for all flights
landed in ‘SFO’ in the year 2013.
CODE:
d2<- data %>% group_by(month) %>% filter(dest=='SFO'& year==2013)

%>%summarise(avg_arr_delay=mean(arr_delay, na.rm=TRUE),
max_arr_delay=max(arr_delay, na.rm=TRUE))
d2
Task 3
Find the month-wise, average ‘dep_delay’ and maximum ‘dep_delay’ for all flights
departed from ‘JFK’ in the year 2013.
CODE:
#TASK3
d3<- data %>% group_by(month) %>% filter(origin=='JFK'& year==2013)

%>%summarise(dep_arr_delay=mean(dep_delay, na.rm=TRUE),
max_dep_delay=max(dep_delay, na.rm=TRUE))
d3
Task 4
Find the average ‘air_time’ and average ‘distance’ for all ‘UA’ flights departed
from ‘JFK’ in the year 2013.
CODE:
#TASK4
d4<- data %>% filter(origin=='JFK'& year==2013 & carrier=='UA') %>%

summarise(avg_dist=mean(distance, na.rm=TRUE),avg_air_time=mean(air_time,
na.rm=TRUE))
d4
Task 5
On your birthday in 2013, how many flights departed ‘JFK’ between 9 AM and
9:59 AM.
CODE:
df5<- data %>% filter(origin=='JFK' & year==2013 & month==5 & day==31 &
dep_time>=900 & arr_time<=959)
df5
nrow(df5)
EXPT NO: 8
DATE: 16/09/2022
REG. NO.:20BEC1351
Week-8: Summary Statistics
AIM
Perform Descriptive statistics in R with summary functions.
R PROGRAM
rm(list=ls())
data_a <- mtcars
data_a
library(dplyr)
data <- sample_n(data_a,10)
data
summary(data)
sapply(data, function(gear) sum(is.na(gear)))
colSums(!is.na(data))
sapply(data, sum, na.rm=TRUE)
sapply(data, range, na.rm=TRUE)
sapply(data, var, na.rm=TRUE)
sapply(data, sd, na.rm=TRUE)
OUTPUT
R PROGRAM
rm(list=ls())
library(nycflights13)
data_a <- flights
data_a
library(dplyr)
data <- sample_n(data_a,10)
data
summary(data)
sapply(data, function(arr_time) sum(is.na(arr_time)))
colSums(!is.na(data))
data <- data[,!names(data) %in% c("tailnum", "origin", "dest", "carrier",
"time_hour")]
sapply(data, sum, na.rm=TRUE)
sapply(data, range, na.rm=TRUE)
sapply(data, var, na.rm=TRUE)
sapply(data, sd, na.rm=TRUE)
OUTPUT:

Exp5 To Exp8 20BEC1351

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Exp5 To Exp8 20BEC1351

Uploaded by

Copyright:

Available Formats

CSE3505 – FOUNDATIONS OF DATA ANALYTICS

NAME:ACHANTA SAMPATH MIHIR

Understand the following manipulations on the ‘mtcars’ dataset and perform

NAME:ACHANTA SAMPATH MIHIR

data_1 <- sample_n(data,4)

data_3 <- sample_n(data,12)

NAME:ACHANTA SAMPATH MIHIR

d1<-data %>% group_by(origin,dest) %>% select(origin, dest, air_time, distance)

d2<- data %>% group_by(month) %>% filter(dest=='SFO'& year==2013)

d3<- data %>% group_by(month) %>% filter(origin=='JFK'& year==2013)

d4<- data %>% filter(origin=='JFK'& year==2013 & carrier=='UA') %>%

NAME:ACHANTA SAMPATH MIHIR

Week-8: Summary Statistics

Perform Descriptive statistics in R with summary functions.

You might also like