Professional Documents
Culture Documents
EXPT NO: 5
DATE:19/08/2022
REG. NO.:20BEC1351
Data Manipulation I
AIM
rm=(list=ls())
data=read.csv("C:\\Users\\dsp\\Desktop\\20be1351\\Credit.csv")
View(data)
library(dplyr)
data <-sample_n(data,6)
View(data)
df1 <- select(data,c(Cdur,Cpur))
View(df1)
df2 <- select(data,Camt,Prop:age)
View(df2)
df3 <- select(data,-c(Cpur,creditScore))
View(df3)
df4 <- select(data, starts_with("at"))
df4
df5 <- select(data, -ends_with("at"))
View(df5)
df6 <- select(data, Cpur, everything())
View(df6)
df7 <- rename(data, creditscore = Camt)
View(df7)
df8 <- transform(data, new=(data$Cdur/data$age))
View(df8)
df9 <- arrange(data, desc(Camt), Cdur)
View(df9)
v1 <- tapply(data$Cdur,data$Camt,mean)
(v1)
k <- apply(data[c('Cdur','Camt','age')],2,mean)
(k)
OUTPUT
CSE3505 – FOUNDATIONS OF DATA ANALYTICS
EXPT NO: 6
DATE:26/08/2022
REG. NO.:20BEC1351
Data Manipulation II
AIM
Understand the following manipulations on the ‘mtcars’ dataset and perform the
tasks given in page 2.
Task 1
Choose 10 random observations from ‘mtcars’ dataset and find the average weight
(‘wt’) of those cars for which the number of forward gears (‘gear’) is 3 or 4 and the
miles per gallon (‘mpg’) is between [18-22].
R PROGRAM
rm(list=ls())
data<- mtcars
library(dplyr)
data1<-sample_n(data,10)
df1<-filter(data1,gear==3 | gear==4)
df1
avg1=mean(df1$wt)
avg1
df2<-filter(data,mpg>=18 & mpg<=22)
df2
avg4=mean(df2$wt)
avg4
OUTPUT
Task 2
Choose 10 random observations from ‘mtcars’ dataset and find the average ‘mpg’
and average quarter mile time (‘qsec’) of those cars for which the ‘gear’ is 4 and
the ‘wt’ is less than 3.
R PROGRAM
data2<-sample_n(data,10)
df2<-filter(data2,gear==4&wt<3)
df2
avg2=mean(df2$mpg)
avg2
avg3=mean(df2$qsec)
avg3
OUTPUT
Task 3
Choose 2 sets of 4 random observations from ‘mtcars’ dataset and test the JOIN
functions.
R PROGRAM
OUTPUT
Task 4
Choose 2 sets of 12 random observations from ‘mtcars’ dataset and test the
union(), intersect(), and setdiff() functions.
R PROGRAM
OUTPUT
CSE3505 – FOUNDATIONS OF DATA ANALYTICS
EXPT NO: 7
DATE:09/09/2022
REG. NO.:20BEC1351
SOFTWARE USED: R
Task 1
Find the average ‘air_time’ and average ‘distance’ for all flights from ‘JFK’ to
‘SFO’ in the year 2013.
CODE:
library(nycflights13)
data<-read.csv('D:\\20bec1060\\flights.csv')
library(dplyr)
data <-sample_n(data_a,30)
#TASK1
avg_air_time=mean(d1$air_time, na.rm=TRUE)
avg_distance=mean(d1$distance)
d1
avg_air_time
avg_distance
Task 2
Find the month-wise, average ‘arr_delay’ and maximum ‘arr_delay’ for all flights
landed in ‘SFO’ in the year 2013.
CODE:
d2
Task 3
Find the month-wise, average ‘dep_delay’ and maximum ‘dep_delay’ for all flights
departed from ‘JFK’ in the year 2013.
CODE:
#TASK3
d3
Task 4
Find the average ‘air_time’ and average ‘distance’ for all ‘UA’ flights departed
from ‘JFK’ in the year 2013.
CODE:
#TASK4
d4
Task 5
On your birthday in 2013, how many flights departed ‘JFK’ between 9 AM and
9:59 AM.
CODE:
df5<- data %>% filter(origin=='JFK' & year==2013 & month==5 & day==31 &
dep_time>=900 & arr_time<=959)
df5
nrow(df5)
CSE3505 – FOUNDATIONS OF DATA ANALYTICS
EXPT NO: 8
DATE: 16/09/2022
REG. NO.:20BEC1351
AIM
R PROGRAM
rm(list=ls())
data_a <- mtcars
data_a
library(dplyr)
data <- sample_n(data_a,10)
data
summary(data)
sapply(data, function(gear) sum(is.na(gear)))
colSums(!is.na(data))
sapply(data, sum, na.rm=TRUE)
sapply(data, range, na.rm=TRUE)
sapply(data, var, na.rm=TRUE)
sapply(data, sd, na.rm=TRUE)
OUTPUT
R PROGRAM
rm(list=ls())
library(nycflights13)
data_a <- flights
data_a
library(dplyr)
data <- sample_n(data_a,10)
data
summary(data)
sapply(data, function(arr_time) sum(is.na(arr_time)))
colSums(!is.na(data))
data <- data[,!names(data) %in% c("tailnum", "origin", "dest", "carrier",
"time_hour")]
sapply(data, sum, na.rm=TRUE)
sapply(data, range, na.rm=TRUE)
sapply(data, var, na.rm=TRUE)
sapply(data, sd, na.rm=TRUE)
OUTPUT: