Professional Documents
Culture Documents
df[[2]]
df[[1]][[1]]
df[[2]][[3]]
> df <- data.frame(a = c(1, 2, 3), b = c(4, 5, 6), c = c(7, 8, 9))
Membuat
> df[[2]]
Data frame [1] 4 5 6
> df[[1]][[1]]
[1] 1
> df[[2]][[3]]
[1] 6
x <- c("red","blue","yellow","orange","green","purple")
y <- x[c(2,3,4)]
y
Membuat > x <- c("red","blue","yellow","orange","green","purple")
Vector dan
Index Position > y <- x[c(2,3,4)]
> y
[1] "blue" "yellow" "orange"
x <- factor(c("grape","apples","pear","cherry","mango","panda"))
x
x[6] <- "apples"
x
> x <- factor(c("grape","apples","pear","cherry","mango","panda"))
> x
Mengisi
[1] grape apples pear cherry mango panda
Syntax Factor Levels: apples cherry grape mango panda pear
> x
[1] grape apples pear cherry mango apples
Levels: apples cherry grape mango panda pear
add_numbers <- function(x,y){x+y}
add_numbers(3,3)
Menggunakan > add_numbers <- function(x,y){x+y}
Function
> add_numbers(3,3)
[1] 6
df <- c(1,2,3,4,5,6,NA,7,8,9,NA)
df
mean_replace <- function(x){ x[is.na(x)] <- mean(x, na.rm = TRUE); x}
df <- mean_replace(df)
df
> df <- c(1,2,3,4,5,6,NA,7,8,9,NA)
Mengganti > df
Missing Value [1] 1 2 3 4 5 6 NA 7 8 9 NA
> df
[1] 1 2 3 4 5 6 5 7 8 9 5
library(readr)
trees_df <- read_csv("trees.csv")
Subbab Kode
> library(readr)
Problem 1 :
Langkah Awal > trees_df <- read_csv("trees.csv")
Subbab Kode
names(trees_df)
str(trees_df)
names(trees_df)[1] <- "Diameter"
Mengecek
trees_df$diameter_ft <- trees_df$Diameter*0.08333
Struktur Data head(trees_df)
summary(trees_df)
is.na(trees_df)
> names(trees_df)
[1] "Girth" "Height" "Volume"
> str(trees_df)
Classes ‘spec_tbl_df’, ‘tbl_df’, ‘tbl’ and 'data.frame': 31 obs. of 3
variables:
$ Girth : num 8.3 8.6 8.8 10.5 10.7 10.8 11 11 11.1 11.2 ...
$ Height: num 70 65 63 72 81 83 66 75 80 75 ...
$ Volume: num 10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 ...
- attr(*, "spec")=List of 3
..$ cols :List of 3
.. ..$ Girth : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
.. ..$ Height: list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
.. ..$ Volume: list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
..$ default: list()
.. ..- attr(*, "class")= chr [1:2] "collector_guess" "collector"
..$ skip : num 1
..- attr(*, "class")= chr "col_spec"
> head(trees_df)
Diameter Height Volume diameter_ft
1 8.3 70 10.3 0.691639
2 8.6 65 10.3 0.716638
3 8.8 63 10.2 0.733304
4 10.5 72 16.4 0.874965
5 10.7 81 18.8 0.891631
6 10.8 83 19.7 0.899964
> summary(trees_df)
Diameter Height Volume diameter_ft
Min. : 8.30 Min. :63 Min. :10.20 Min. :0.6916
1st Qu.:11.05 1st Qu.:72 1st Qu.:19.40 1st Qu.:0.9208
Median :12.90 Median :76 Median :24.20 Median :1.0750
Mean :13.25 Mean :76 Mean :30.17 Mean :1.1040
3rd Qu.:15.25 3rd Qu.:80 3rd Qu.:37.30 3rd Qu.:1.2708
Max. :20.60 Max. :87 Max. :77.00 Max. :1.7166
> is.na(trees_df)
Diameter Height Volume diameter_ft
[1,] FALSE FALSE FALSE FALSE
[2,] FALSE FALSE FALSE FALSE
[3,] FALSE FALSE FALSE FALSE
[4,] FALSE FALSE FALSE FALSE
[5,] FALSE FALSE FALSE FALSE
[6,] FALSE FALSE FALSE FALSE
[7,] FALSE FALSE FALSE FALSE
[8,] FALSE FALSE FALSE FALSE
[9,] FALSE FALSE FALSE FALSE
[10,] FALSE FALSE FALSE FALSE
[11,] FALSE FALSE FALSE FALSE
[12,] FALSE FALSE FALSE FALSE
[13,] FALSE FALSE FALSE FALSE
[14,] FALSE FALSE FALSE FALSE
[15,] FALSE FALSE FALSE FALSE
[16,] FALSE FALSE FALSE FALSE
[17,] FALSE FALSE FALSE FALSE
[18,] FALSE FALSE FALSE FALSE
[19,] FALSE FALSE FALSE FALSE
Subbab Kode
[20,] FALSE FALSE FALSE FALSE
[21,] FALSE FALSE FALSE FALSE
[22,] FALSE FALSE FALSE FALSE
[23,] FALSE FALSE FALSE FALSE
[24,] FALSE FALSE FALSE FALSE
[25,] FALSE FALSE FALSE FALSE
[26,] FALSE FALSE FALSE FALSE
[27,] FALSE FALSE FALSE FALSE
[28,] FALSE FALSE FALSE FALSE
[29,] FALSE FALSE FALSE FALSE
[30,] FALSE FALSE FALSE FALSE
[31,] FALSE FALSE FALSE FALSE
Subbab Kode
shapiro.test(trees_df$diameter_ft)
shapiro.test(trees_df$Height)
shapiro.test(trees_df$Volume)
> shapiro.test(trees_df$diameter_ft)
data: trees_df$diameter_ft
W = 0.94117, p-value = 0.08893
> shapiro.test(trees_df$Height)
Shapiro Test
Shapiro-Wilk normality test
data: trees_df$Height
W = 0.96545, p-value = 0.4034
> shapiro.test(trees_df$Volume)
data: trees_df$Volume
W = 0.88757, p-value = 0.003579
plot(density(trees_df$Volume))
Visualisasi
Sederhana
library(readr) #pre-defined
library(dplyr) #pre-defined
# Compute t-test
t_test <- t.test(group1$extra, group2$extra)
t_test
> library(readr) #pre-defined
> t_test
Menghasilkan
Grafik -
Boxplot
Subbab Kode
library(readr)
electric_bill <- read_csv("electric_bill.csv")
model <- lm(amount_paid ~ num_people + housearea, data = electric_bill)
model
> library(readr)
Call:
lm(formula = amount_paid ~ num_people + housearea, data = electric_bill)
Coefficients:
(Intercept) num_people housearea
482.920 4.834 0.118
library(readr)
library(caret)
set.seed(123)
iris <- read_csv("iris.csv")
dim(training_set)
dim(testing_set)
> library(readr)
> library(caret)
Training dan
Testing > set.seed(123)
> dim(training_set)
[1] 120 5
> dim(testing_set)
[1] 30 5
Subbab Kode
library(readr)
library(caret) #pre-defined
library(rpart) #pre-defined
set.seed(123) #pre-defined
suv_data <- read_csv("suv_data.csv") #pre-defined
> evaluation_result
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 53 1
1 5 21
Accuracy : 0.925
95% CI : (0.8439, 0.972)
No Information Rate : 0.725
P-Value [Acc > NIR] : 7.53e-06
Kappa : 0.822
Sensitivity : 0.9138
Specificity : 0.9545
Pos Pred Value : 0.9815
Neg Pred Value : 0.8077
Prevalence : 0.7250
Detection Rate : 0.6625
Detection Prevalence : 0.6750
Balanced Accuracy : 0.9342
'Positive' Class : 0