You are on page 1of 12

List Code Project Assessment using R

Membuat Data frame ....................................................................................................................................................... 2


Membuat Vector dan Index Position ................................................................................................................................ 2
Mengisi Syntax Factor ....................................................................................................................................................... 2
Menggunakan Function .................................................................................................................................................... 2
Mengganti Missing Value .................................................................................................................................................. 2
Problem 1 : Langkah Awal ................................................................................................................................................. 3
Mengecek Struktur Data ................................................................................................................................................... 4
Shapiro Test ...................................................................................................................................................................... 7
Visualisasi Sederhana ........................................................................................................................................................ 7
Mencari Hubungan ........................................................................................................................................................... 7
Analisa Efek Pemberian Obat Tidur .................................................................................................................................. 8
Menghasilkan Grafik - Boxplot .......................................................................................................................................... 9
Membuat Model Sederhana ........................................................................................................................................... 10
Training dan Testing ........................................................................................................................................................ 10
Model Decision Tree ....................................................................................................................................................... 11
Subbab Kode
df <- data.frame(a = c(1, 2, 3), b = c(4, 5, 6), c = c(7, 8, 9))

df[[2]]
df[[1]][[1]]
df[[2]][[3]]
> df <- data.frame(a = c(1, 2, 3), b = c(4, 5, 6), c = c(7, 8, 9))
Membuat
> df[[2]]
Data frame [1] 4 5 6

> df[[1]][[1]]
[1] 1

> df[[2]][[3]]
[1] 6
x <- c("red","blue","yellow","orange","green","purple")
y <- x[c(2,3,4)]
y
Membuat > x <- c("red","blue","yellow","orange","green","purple")
Vector dan
Index Position > y <- x[c(2,3,4)]
> y
[1] "blue" "yellow" "orange"
x <- factor(c("grape","apples","pear","cherry","mango","panda"))
x
x[6] <- "apples"
x
> x <- factor(c("grape","apples","pear","cherry","mango","panda"))

> x
Mengisi
[1] grape apples pear cherry mango panda
Syntax Factor Levels: apples cherry grape mango panda pear

> x[6] <- "apples"

> x
[1] grape apples pear cherry mango apples
Levels: apples cherry grape mango panda pear
add_numbers <- function(x,y){x+y}
add_numbers(3,3)
Menggunakan > add_numbers <- function(x,y){x+y}
Function
> add_numbers(3,3)
[1] 6
df <- c(1,2,3,4,5,6,NA,7,8,9,NA)
df
mean_replace <- function(x){ x[is.na(x)] <- mean(x, na.rm = TRUE); x}
df <- mean_replace(df)
df
> df <- c(1,2,3,4,5,6,NA,7,8,9,NA)

Mengganti > df
Missing Value [1] 1 2 3 4 5 6 NA 7 8 9 NA

> mean_replace <- function(x){ x[is.na(x)] <- mean(x, na.rm = TRUE); x}

> df <- mean_replace(df)

> df
[1] 1 2 3 4 5 6 5 7 8 9 5
library(readr)
trees_df <- read_csv("trees.csv")
Subbab Kode
> library(readr)
Problem 1 :
Langkah Awal > trees_df <- read_csv("trees.csv")
Subbab Kode
names(trees_df)
str(trees_df)
names(trees_df)[1] <- "Diameter"
Mengecek
trees_df$diameter_ft <- trees_df$Diameter*0.08333
Struktur Data head(trees_df)
summary(trees_df)
is.na(trees_df)
> names(trees_df)
[1] "Girth" "Height" "Volume"

> str(trees_df)
Classes ‘spec_tbl_df’, ‘tbl_df’, ‘tbl’ and 'data.frame': 31 obs. of 3
variables:
$ Girth : num 8.3 8.6 8.8 10.5 10.7 10.8 11 11 11.1 11.2 ...
$ Height: num 70 65 63 72 81 83 66 75 80 75 ...
$ Volume: num 10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 ...
- attr(*, "spec")=List of 3
..$ cols :List of 3
.. ..$ Girth : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
.. ..$ Height: list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
.. ..$ Volume: list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
..$ default: list()
.. ..- attr(*, "class")= chr [1:2] "collector_guess" "collector"
..$ skip : num 1
..- attr(*, "class")= chr "col_spec"

> names(trees_df)[1] <- "Diameter"

> trees_df$diameter_ft <- trees_df$Diameter*0.08333

> head(trees_df)
Diameter Height Volume diameter_ft
1 8.3 70 10.3 0.691639
2 8.6 65 10.3 0.716638
3 8.8 63 10.2 0.733304
4 10.5 72 16.4 0.874965
5 10.7 81 18.8 0.891631
6 10.8 83 19.7 0.899964

> summary(trees_df)
Diameter Height Volume diameter_ft
Min. : 8.30 Min. :63 Min. :10.20 Min. :0.6916
1st Qu.:11.05 1st Qu.:72 1st Qu.:19.40 1st Qu.:0.9208
Median :12.90 Median :76 Median :24.20 Median :1.0750
Mean :13.25 Mean :76 Mean :30.17 Mean :1.1040
3rd Qu.:15.25 3rd Qu.:80 3rd Qu.:37.30 3rd Qu.:1.2708
Max. :20.60 Max. :87 Max. :77.00 Max. :1.7166

> is.na(trees_df)
Diameter Height Volume diameter_ft
[1,] FALSE FALSE FALSE FALSE
[2,] FALSE FALSE FALSE FALSE
[3,] FALSE FALSE FALSE FALSE
[4,] FALSE FALSE FALSE FALSE
[5,] FALSE FALSE FALSE FALSE
[6,] FALSE FALSE FALSE FALSE
[7,] FALSE FALSE FALSE FALSE
[8,] FALSE FALSE FALSE FALSE
[9,] FALSE FALSE FALSE FALSE
[10,] FALSE FALSE FALSE FALSE
[11,] FALSE FALSE FALSE FALSE
[12,] FALSE FALSE FALSE FALSE
[13,] FALSE FALSE FALSE FALSE
[14,] FALSE FALSE FALSE FALSE
[15,] FALSE FALSE FALSE FALSE
[16,] FALSE FALSE FALSE FALSE
[17,] FALSE FALSE FALSE FALSE
[18,] FALSE FALSE FALSE FALSE
[19,] FALSE FALSE FALSE FALSE
Subbab Kode
[20,] FALSE FALSE FALSE FALSE
[21,] FALSE FALSE FALSE FALSE
[22,] FALSE FALSE FALSE FALSE
[23,] FALSE FALSE FALSE FALSE
[24,] FALSE FALSE FALSE FALSE
[25,] FALSE FALSE FALSE FALSE
[26,] FALSE FALSE FALSE FALSE
[27,] FALSE FALSE FALSE FALSE
[28,] FALSE FALSE FALSE FALSE
[29,] FALSE FALSE FALSE FALSE
[30,] FALSE FALSE FALSE FALSE
[31,] FALSE FALSE FALSE FALSE
Subbab Kode
shapiro.test(trees_df$diameter_ft)
shapiro.test(trees_df$Height)
shapiro.test(trees_df$Volume)
> shapiro.test(trees_df$diameter_ft)

Shapiro-Wilk normality test

data: trees_df$diameter_ft
W = 0.94117, p-value = 0.08893

> shapiro.test(trees_df$Height)
Shapiro Test
Shapiro-Wilk normality test

data: trees_df$Height
W = 0.96545, p-value = 0.4034

> shapiro.test(trees_df$Volume)

Shapiro-Wilk normality test

data: trees_df$Volume
W = 0.88757, p-value = 0.003579
plot(density(trees_df$Volume))

Visualisasi
Sederhana

lm(formula = Volume ~ Height + diameter_ft, data = trees_df)


Mencari
plot(trees_df$diameter_ft, trees_df$Volume)
Hubungan plot(trees_df$Height, trees_df$Volume)
Subbab Kode

library(readr) #pre-defined
library(dplyr) #pre-defined

sleep_df <- read_csv('sleep.csv') #pre-defined

# Save the data in two different dataframe/vector


group1 <- filter(sleep_df, sleep_df$group == 1)
group2 <- filter(sleep_df, sleep_df$group == 2)

# Compute t-test
t_test <- t.test(group1$extra, group2$extra)
t_test
> library(readr) #pre-defined

> library(dplyr) #pre-defined

> sleep_df <- read_csv('sleep.csv') #pre-defined


Analisa Efek
> # Save the data in two different dataframe/vector
Pemberian
> group1 <- filter(sleep_df, sleep_df$group == 1)
Obat Tidur
> group2 <- filter(sleep_df, sleep_df$group == 2)

> # Compute t-test


> t_test <- t.test(group1$extra, group2$extra)

> t_test

Welch Two Sample t-test

data: group1$extra and group2$extra


t = -1.8608, df = 17.776, p-value = 0.07939
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-3.3654832 0.2054832
sample estimates:
mean of x mean of y
0.75 2.33
library(ggplot2)
ggplot(sleep_df, aes(x=as.character(group), y=extra, fill=as.character(group)))
+ geom_boxplot()
Subbab Kode

Menghasilkan
Grafik -
Boxplot
Subbab Kode
library(readr)
electric_bill <- read_csv("electric_bill.csv")
model <- lm(amount_paid ~ num_people + housearea, data = electric_bill)

model
> library(readr)

> electric_bill <- read_csv("electric_bill.csv")


Membuat
Model > model <- lm(amount_paid ~ num_people + housearea, data = electric_bill)
Sederhana
> model

Call:
lm(formula = amount_paid ~ num_people + housearea, data = electric_bill)

Coefficients:
(Intercept) num_people housearea
482.920 4.834 0.118
library(readr)
library(caret)
set.seed(123)
iris <- read_csv("iris.csv")

trainIndex <- createDataPartition(iris$Species, p=0.8, list=FALSE)


training_set <- iris[trainIndex, ]
testing_set <- iris[-trainIndex, ]

dim(training_set)
dim(testing_set)
> library(readr)

> library(caret)
Training dan
Testing > set.seed(123)

> iris <- read_csv("iris.csv")

> trainIndex <- createDataPartition(iris$Species, p=0.8, list=FALSE)

> training_set <- iris[trainIndex, ]

> testing_set <- iris[-trainIndex, ]

> dim(training_set)
[1] 120 5

> dim(testing_set)
[1] 30 5
Subbab Kode
library(readr)
library(caret) #pre-defined
library(rpart) #pre-defined
set.seed(123) #pre-defined
suv_data <- read_csv("suv_data.csv") #pre-defined

#split data to training & testing set


trainIndex <- createDataPartition(suv_data$Purchased, p=0.8, list=FALSE)
training_set <- suv_data[trainIndex, ]
testing_set <- suv_data[-trainIndex, ]
Model
Decision Tree #build model with decision tree
model_dt <- rpart(Purchased ~.,data=training_set, method="class")
predictions_dt <- predict(model_dt, newdata=testing_set, type = "class")

#evaluate performance with new data/ testing_set


testing_purchased <- factor(testing_set$Purchased) #pre-defined

#show the evaluation result


evaluation_result <- confusionMatrix(predictions_dt,testing_purchased)
evaluation_result
Subbab Kode
> library(readr)

> library(caret) #pre-defined

> library(rpart) #pre-defined

> set.seed(123) #pre-defined

> suv_data <- read_csv("suv_data.csv") #pre-defined

> #split data to training & testing set


> trainIndex <- createDataPartition(suv_data$Purchased, p=0.8, list=FALSE)

> training_set <- suv_data[trainIndex, ]

> testing_set <- suv_data[-trainIndex, ]

> #build model with decision tree


> model_dt <- rpart(Purchased ~.,data=training_set, method="class")

> predictions_dt <- predict(model_dt, newdata=testing_set, type = "class")

> #evaluate performance with new data/ testing_set


> testing_purchased <- factor(testing_set$Purchased) #pre-defined

> #show the evaluation result


> evaluation_result <- confusionMatrix(predictions_dt,testing_purchased)

> evaluation_result
Confusion Matrix and Statistics

Reference
Prediction 0 1
0 53 1
1 5 21

Accuracy : 0.925
95% CI : (0.8439, 0.972)
No Information Rate : 0.725
P-Value [Acc > NIR] : 7.53e-06

Kappa : 0.822

Mcnemar's Test P-Value : 0.2207

Sensitivity : 0.9138
Specificity : 0.9545
Pos Pred Value : 0.9815
Neg Pred Value : 0.8077
Prevalence : 0.7250
Detection Rate : 0.6625
Detection Prevalence : 0.6750
Balanced Accuracy : 0.9342

'Positive' Class : 0

You might also like