Professional Documents
Culture Documents
Officially: Hello ! Formation R - https://thinkr.fr 3 / 470
A sentence per bullet-point:
For the ones who cannot remember names Formation R - https://thinkr.fr 7 / 470
Oue Goal: Making You Independent
.Rproj
(10 + 2) * 5
<-
a <- 15
a
n <- 10 + 2
n
n <- 3 * 2
n
a <- 3
a
#> [1] 3
A <- 9
A
#> [1] 9
#> [1] 3
a <- 5
b <- a * 4
a <- B
a a
b B
runif()
runif()
runif(n = 1)
runif(n = 1, min = -5)
runif(n = 3, max = 5)
runif(n = 1, min = -5, max = 5)
runif(n = 3, min = -5, max = 5)
{proustr}
{proustr}
{proustr}
library(proustr)
data()
{proustr} albertinedisparue
alombredesjeunesfillesenfleurs ducotedechezswann laprisonniere
lecotedeguermantes letemprepreve proust_char sodomeetgomorrhe
stop_words
data(stop_words)
draw()
ibmi
consumed_quantity
age_class
food_type
#> # A tibble: 6 × 4
#> bmi age_class food_type consumed_quanti…
#> <dbl> <fct> <chr> <dbl>
#> 1 13 7-10 years Sweets and chocolate 84.4
#> 2 13 7-10 years Sandwiches, Pizzas, Pies, Pastries and Sav… 135.
#> 3 13 7-10 years Viennese pastries, cakes and sweet cookies 166.
#> 4 13 11-14 years Sweets and chocolate 23.0
#> 5 13 11-14 years Sandwiches, Pizzas, Pies, Pastries and Sav… 115.
#> 6 13 11-14 years Viennese pastries, cakes and sweet cookies 188.
ggplot(data = ...) +
aes(x = ..., y = ...) +
geom_...()
data.frame
bmi
age_class
food_type
consumed_quantity
#> # A tibble: 6 × 4
#> bmi age_class food_type consumed_quanti…
#> <dbl> <fct> <chr> <dbl>
#> 1 13 7-10 years Sweets and chocolate 84.4
#> 2 13 7-10 years Sandwiches, Pizzas, Pies, Pastries and Sav… 135.
#> 3 13 7-10 years Viennese pastries, cakes and sweet cookies 166.
#> 4 13 11-14 years Sweets and chocolate 23.0
#> 5 13 11-14 years Sandwiches, Pizzas, Pies, Pastries and Sav… 115.
#> 6 13 11-14 years Viennese pastries, cakes and sweet cookies 188.
ggplot(data = data_plot_target) +
aes(x = bmi, y = consumed_quantity) +
geom_point()
ggplot(data = data_plot_target) +
aes(x = bmi, y = consumed_quantity) +
geom_point()
ggplot(data = ...)
aes(...)
geom_...()
aes()
color
x fill
y shape
size
alpha
geom_*()
geom_histogram()
geom_point()
geom_boxplot()
geom_density()
geom_violin()
geom_col()
geom_label()
geom_*()
geom_*()
bmi
age_class
food_type
consumed_quantity
bmi
consumed_quantity
ggplot(data = data_plot_target) +
aes(
x = bmi,
y = consumed_quantity
) +
geom_point()
bmi
consumed_quantity
ggplot(data = data_plot_target) +
aes(
x = bmi,
y = consumed_quantity
) +
geom_point()
bmi
consumed_quantity
food_type
ggplot(data = data_plot_target) +
aes(
x = bmi,
y = consumed_quantity,
color = food_type
) +
geom_point()
bmi
age_class
ggplot(data = data_plot_target) +
aes(
x = bmi,
fill = age_class
) +
geom_density()
bmi
age_class
ggplot(data = data_plot_target) +
aes(
x = bmi,
fill = age_class
) +
geom_density()
#> # A tibble: 3 × 5
#> amount_sugar amount_vit_c amount_water time location
#> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 9.58 24.7 88.2 Lunch home
#> 2 0 0 0 Lunch home
#> 3 0.57 19 48.7 Lunch home
as.numeric View
icannotreadthistext,ithurtsmyeyes,don'tyouthink?
a<-1
# a <- 1
# a < -1
resultat=mean(1:10+26,na.rm=T)
class
class(1) class("mummy")
class(TRUE)
class(class)
c()
x <- c(1, 2, 3, 4)
x2 <- c("dad", "mom")
x3 <- c(TRUE, FALSE)
1:10
#> [1] 1 2 3 4 5 6 7 8 9 10
seq.int()
seq.int(from = 1, to = 30, by = 2)
#> [1] 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29
height + 1
!test_height
height != 1.20
vect_1 vect_2
vect_2 vect_2
is.na()
!is.na(height)
as.character() character
as.character(height)
as.numeric() numeric
#> [1] 1 1 0 1 1
length(a)
sum(a)
min(a)
max(a)
mean(a)
median(a)
na.rm = TRUE
sum(height)
#> [1] NA
length(height)
min(height, na.rm = TRUE)
max(height, na.rm = TRUE)
mean(height, na.rm = TRUE)
median(height, na.rm = TRUE)
NA
TRUE
numeric TRUE 1 FALSE 0
sum(as.numeric(is.na(height)))
#> [1] 2
#> [1] 2
result result
result result
iris
a_photo.jpg
a_message.txt
a_music.mp3
/ /
/ /
C:/a_file/a_sub_file/pictures/a_pictures.jpg
pictures/a_pictures.jpg
"C:/project/study/analyse.Rmd"
"C:/project/study/analyse"
"analysis.Rmd"
"analysis"
library(readxl)
conso_complement_alimentaire <- read_excel(path = "data/conso_ca_prod.xlsx")
conso_complement_alimentaire
#> # A tibble: 37 × 12
#> POPULATION NOIND periode_reference num_ligne_CA num_prod type_prod
#> <chr> <dbl> <chr> <dbl> <dbl> <chr>
#> 1 Pop1 Individu 119403801 12 mois 5711 1 Complément a…
#> 2 Pop1 Individu 121303701 12 mois 6351 1 Médicament
#> 3 Pop1 Individu 123200801 12 mois 7401 1 Complément a…
#> 4 Pop1 Individu 127510001 1 mois 10641 1 Médicament
#> 5 Pop1 Individu 212503101 12 mois 12731 1 Médicament
#> 6 Pop1 Individu 213102601 12 mois 13061 1 Complément a…
#> 7 Pop1 Individu 213102601 12 mois 13062 2 Non identifié
#> 8 Pop1 Individu 213102601 12 mois 13063 3 Complément a…
#> 9 Pop1 Individu 219400101 12 mois 17491 1 Complément a…
#> 10 Pop1 Individu 219400101 12 mois 17492 2 Médicament
#> # … with 27 more rows, and 6 more variables: classif_reg_prod <chr>,
#> # classif_prod <chr>, pres_prod <chr>, nb_unit_prod <chr>,
#> # mode_conso_prod <chr>, nb_jours_an <dbl>
Import a xls/xlsx file with {readxl} Formation R - https://thinkr.fr 172 / 470
Quiz
read_excel(path = "data/consumption.xlsx")
read_excel(path = "data/consumption.csv")
read_csv(path = "data/consumption.xls")
read_sas(path = "data/consumption.xlsx")
, ;
read_csv read_csv2
library(readr)
product <- read_csv(file = "data/conso_ca_prod.csv") # comma
indiv <- read_csv2(file = "data/conso_ca_indiv.csv") # semicolon
Import a dataset with the import button
# A tibble: 100 x 1
`POPULATION;NOIND;periode_reference;conso_ca;conso_ca_regl;co~
<chr>
1 Pop1 Individu;110100101;12 mois;Non;Non;NA;NA;NA;NA;NA;NA;NA;~
2 Pop1 Individu;113307301;12 mois;Non;Non;NA;NA;NA;NA;NA;NA;NA;~
3 Pop1 Individu;114902101;12 mois;Non;Non;NA;NA;NA;NA;NA;NA;NA;~
All good
Answer D
# A tibble: 86 x 1
`PK\003\004\024`
<chr>
1 "\xa1\xa6"
2 "B\xa8\x10\xaaf\x91\x97\xed9\xe7\xcc\xf1d<\xbb\\9[=AB\x13|#\x8e\xa~
3 "\xcc"
All good
Answer D
fill
scale_fill()
#> scale_fill_viridis_b
#> scale_color_continuous
#> scale_color_gradient2
color
#> scale_fill_gradient
#> scale_colour_continuous
scale_color_*()
#> scale_colour_viridis_d
#> scale_color_viridis_b
fill scale_fill_*()
#> scale_color_viridis_c
#> scale_discrete_manual
#> scale_colour_manual
#> scale_colour_viridis_c
#> scale_size_continuous
#> scale_shape_manual
#> scale_fill_viridis_d
#> scale_alpha_manual
#> scale_fill_viridis_c
#> scale_fill_gradient2
#> scale_fill_continuous
scale_color/fill_grey() scale_color/fill_manual()
scale_color/fill_viridis_d()
ggplot(data = data_plot_target) +
aes(x = bmi, fill = age_class) +
geom_density()
ggplot(data = data_plot_target) +
aes(x = bmi, fill = age_class) +
geom_density() +
scale_fill_grey()
ggplot(data = data_plot_target) +
aes(x = bmi, y = consumed_quantity,
color = food_type) +
geom_point()
ggplot(data = data_plot_target) +
aes(x = bmi, y = consumed_quantity,
color = food_type) +
geom_point() +
scale_color_manual(
values = c("#20B8D6", "#FF9300",
"#7176B8")
)
scale_color_viridis_d()
scale_size_viridis_d()
scale_fill_viridis_d()
color
fill
alpha
size
ggplot(data) +
aes(...) +
geom_...(color = ...)
ggplot(data = data_plot_target) +
aes(x = bmi, fill = age_class) +
geom_density() +
scale_fill_grey()
ggplot(data = data_plot_target) +
aes(x = bmi, fill = age_class) +
geom_density(alpha = 0.8) +
scale_fill_grey()
ggplot(data = data_plot_target) +
aes(x = bmi, fill = age_class) +
geom_density(alpha = 0.8) +
scale_fill_grey()
alpha
aes()
geom_density()
ggplot(data = data_plot_target) +
aes(x = bmi, y = age_class, color = age_class) +
geom_boxplot() +
geom_point()
facet_grid()
rows = vars(...)
cols = vars(...)
vars()
ggplot(data = data_plot_target) +
aes(x = bmi, y = consumed_quantity,
color = food_type) +
geom_point() +
scale_color_manual(values =
c("#20B8D6", "#FF9300", "#7176B8"))
facet_grid(rows = vars(time))
labs()
title
subtitle
caption
ggplot(data = data_plot_target) +
aes(x = bmi, fill = age_class) +
geom_density(alpha = 0.8) +
scale_fill_grey()
ggplot(data = data_plot_target) +
aes(x = bmi, fill = age_class) +
geom_density(alpha = 0.8) +
scale_fill_grey() +
labs(
title = "Children BMI by age class",
x = "BMI",
y = "Density",
fill = "Age class"
)
ggplot(data = data_plot_target) +
aes(x = bmi, y = consumed_quantity,
color = food_type) +
geom_point() +
scale_color_manual(values =
c("#20B8D6", "#FF9300", "#7176B8"))
ggplot(data = data_plot_target) +
aes(x = bmi, y = consumed_quantity,
color = food_type) +
geom_point() +
scale_color_manual(values =
c("#20B8D6", "#FF9300", "#7176B8")) +
labs(
title = "Consumption of fat/sweet
foods according to children BMI",
x = "BMI",
y = "Average consumption by\nchild
during study (in g)",
color = "Food type"
)
\n y = "..."
ggplot(data = data_plot_target) +
aes(x = bmi, y = consumed_quantity,
color = food_type) +
geom_point() +
scale_color_manual(values =
c("#20B8D6", "#FF9300", "#7176B8")) +
facet_grid(cols = vars(age_class))
ggplot(data = data_plot_target) +
aes(x = bmi, y = consumed_quantity,
color = food_type) +
geom_point() +
scale_color_manual(values =
c("#20B8D6", "#FF9300", "#7176B8")) +
facet_grid(cols = vars(age_class)) +
labs(
title = "",
subtitle = "By age class",
x = "BMI",
y = "Average consumption by\nchild
during study (in g)",
color = "Food type",
caption = "NB: BMI (Body Mass Index)
= weigth / (height ^ 2)"
)
Play with the coordinate system to flip your grah upside down Formation R - https://thinkr.fr 225 / 470
The coord_flip() function
Play with the coordinate system to flip your grah upside down Formation R - https://thinkr.fr 226 / 470
The theme_*() functions
ggplot(data = data_plot_target) +
aes(x = bmi, fill = age_class) +
geom_density(alpha = 0.8) +
scale_fill_grey() +
labs(title = "Children BMI by age
class",
x = "BMI",
y = "Density",
fill = "Age class")
ggplot(data = data_plot_target) +
aes(x = bmi, fill = age_class) +
geom_density(alpha = 0.8) +
scale_fill_grey() +
labs(title = "Children BMI by age
class",
x = "BMI",
y = "Density",
fill = "Age class") +
theme_few() # du package {ggthemes}
ggplot(data = data_plot_target) +
aes(x = bmi, y = consumed_quantity,
color = food_type) +
geom_point() +
scale_color_manual(values =
c("#20B8D6", "#FF9300", "#7176B8")) +
labs(title = "Fat/sweet food
consumption vs children's BMI",
x = "BMI",
y = "Average consumption by\nchild
during study (in g)",
color = "Food type")
ggplot(data = data_plot_target) +
aes(x = bmi, y = consumed_quantity,
color = food_type) +
geom_point() +
scale_color_manual(values =
c("#20B8D6", "#FF9300", "#7176B8")) +
labs(title = "Fat/sweet food
consumption vs children's BMI",
x = "BMI",
y = "Average consumption by\nchild
during study (in g)",
color = "Food type")+
theme_few()
ggplot(data = data_plot_target) +
aes(x = bmi, y = consumed_quantity,
color = food_type) +
geom_point() +
scale_color_manual(values =
c("#20B8D6", "#FF9300", "#7176B8")) +
facet_grid(cols = vars(age_class)) +
labs(
title = "",
subtitle = "By age class",
x = "BMI",
y = "Average consumption by\nchild
during study (in g)",
color = "Food type",
caption = "NB: BMI (Body Mass Index)
= weigth / (height ^ 2)"
) +
theme_few()
theme_*()
theme(legend.position = "bottom")
ggplot(data = data_plot_target) +
aes(x = bmi, y = consumed_quantity,
color = food_type) +
geom_point() +
scale_color_manual(values =
c("#20B8D6", "#FF9300", "#7176B8")) +
labs(title = "Fat/sweet food
consumption vs children's BMI",
x = "BMI",
y = "Average consumption by\nchild
during study (in g)",
color = "Food type") +
theme_few()
ggplot(data = data_plot_target) +
aes(x = bmi, y = consumed_quantity,
color = food_type) +
geom_point() +
scale_color_manual(values =
c("#20B8D6", "#FF9300", "#7176B8")) +
labs(title = "Fat/sweet food
consumption vs children's BMI",
x = "BMI",
y = "Average consumption by\nchild
during study (in g)",
color = "Food type") +
theme_few() +
theme(legend.position = "bottom")
ggplot(data = data_plot_target) +
aes(x = bmi, y = consumed_quantity,
color = food_type) +
geom_point() +
scale_color_manual(values =
c("#20B8D6", "#FF9300", "#7176B8")) +
labs(title = "Fat/sweet food
consumption vs children's BMI",
x = "BMI",
y = "Average consumption by\nchild
during study (in g)",
color = "Food type") +
theme_few() +
theme(legend.position = "bottom") +
guides(color = guide_legend(ncol = 1))
filename
plot
plot_alim_bmi
plot_alim_bmi
Wrangle data within tidyverse using {dplyr} Formation R - https://thinkr.fr 247 / 470
Manipulate a data.frame
data_food
#> # A tibble: 6 × 3
#> occasion_type food amount
#> <chr> <chr> <dbl>
#> 1 Aperitif before lunch tap water 148.
#> 2 Lunch fruit juice 100% pure juice 500
#> 3 Lunch n.s. cooking fat 3.22
#> 4 Lunch potato fries 107
#> 5 Lunch chicken nugget 161
#> 6 Lunch n.s. fat 12.8
occasion_type
food
amount
Wrangle data within tidyverse using {dplyr} Formation R - https://thinkr.fr 248 / 470
Manipulate a data.frame
data_food
#> # A tibble: 6 × 3
#> occasion_type food amount
#> <chr> <chr> <dbl>
#> 1 Aperitif before lunch tap water 148.
#> 2 Lunch fruit juice 100% pure juice 500
#> 3 Lunch n.s. cooking fat 3.22
#> 4 Lunch potato fries 107
#> 5 Lunch chicken nugget 161
#> 6 Lunch n.s. fat 12.8
Wrangle data within tidyverse using {dplyr} Formation R - https://thinkr.fr 249 / 470
Manipulate a data.frame
data_food
#> # A tibble: 6 × 3
#> occasion_type food amount
#> <chr> <chr> <dbl>
#> 1 Aperitif before lunch tap water 148.
#> 2 Lunch fruit juice 100% pure juice 500
#> 3 Lunch n.s. cooking fat 3.22
#> 4 Lunch potato fries 107
#> 5 Lunch chicken nugget 161
#> 6 Lunch n.s. fat 12.8
data_food
occasion_type "Lunch"
amount_kg amount
food
quantite_consommee_kg
Wrangle data within tidyverse using {dplyr} Formation R - https://thinkr.fr 250 / 470
Chain operations in {dplyr}
%>%
Wrangle data within tidyverse using {dplyr} Formation R - https://thinkr.fr 251 / 470
Chain operations in {dplyr}
data_food
#> # A tibble: 6 × 3
#> occasion_type food amount
#> <chr> <chr> <dbl>
#> 1 Aperitif before lunch tap water 148.
#> 2 Lunch fruit juice 100% pure juice 500
#> 3 Lunch n.s. cooking fat 3.22
#> 4 Lunch potato fries 107
#> 5 Lunch chicken nugget 161
#> 6 Lunch n.s. fat 12.8
# in pseudo code
data_food %>%
filter_lunch %>%
create_column_amount_kg %>%
group_by_food %>%
create_mean_amount_kg
Wrangle data within tidyverse using {dplyr} Formation R - https://thinkr.fr 252 / 470
Exercise
data_food
#> # A tibble: 6 × 3
#> occasion_type food amount
#> <chr> <chr> <dbl>
#> 1 Aperitif before lunch tap water 148.
#> 2 Lunch fruit juice 100% pure juice 500
#> 3 Lunch n.s. cooking fat 3.22
#> 4 Lunch potato fries 107
#> 5 Lunch chicken nugget 161
#> 6 Lunch n.s. fat 12.8
Wrangle data within tidyverse using {dplyr} Formation R - https://thinkr.fr 253 / 470
Explore rows of a dataset
data_food
#> # A tibble: 6 × 3
#> occasion_type food amount
#> <chr> <chr> <dbl>
#> 1 Aperitif before lunch tap water 148.
#> 2 Lunch fruit juice 100% pure juice 500
#> 3 Lunch n.s. cooking fat 3.22
#> 4 Lunch potato fries 107
#> 5 Lunch chicken nugget 161
#> 6 Lunch n.s. fat 12.8
#> # A tibble: 6 × 3
#> occasion_type food amount
#> <chr> <chr> <dbl>
#> 1 Aperitif before lunch tap water 148.
#> 2 Lunch fruit juice 100% pure juice 500
#> 3 Lunch n.s. cooking fat 3.22
#> 4 Lunch potato fries 107
#> 5 Lunch chicken nugget 161
#> 6 Lunch n.s. fat 12.8
"Lunch" occasion_type
food
filter()
count()
desc()
your_dataframe %>%
arrange(sorting_variable_1, sorting_variable_2)
#> # A tibble: 15 × 3
#> occasion_type food amount
#> <chr> <chr> <dbl>
#> 1 Breakfast stevia 0.025
#> 2 In the morning aspartame 0.0300
#> 3 Lunch aspartame 0.0300
#> 4 Snack aspartame 0.0300
#> 5 Dinner aspartame 0.0300
#> 6 In the evening/night aspartame 0.0300
#> 7 In the morning aspartame 0.0300
#> 8 Lunch aspartame 0.0300
#> 9 Snack aspartame 0.0300
#> 10 In the evening/night aspartame 0.0300
#> 11 Dinner olive oil 0.0448
#> 12 Lunch olive oil 0.048
#> 13 Lunch olive oil 0.0500
#> 14 Lunch stevia 0.0500
#> 15 Snack stevia 0.0500
#> # A tibble: 15 × 3
#> occasion_type food amount
#> <chr> <chr> <dbl>
#> 1 Dinner blond beer -2% 1768.
#> 2 Aperitif before dinner beer with peach sirup 1496.
#> 3 Lunch meat soup 1481.
#> 4 Lunch pistou soup 1481.
#> 5 Lunch meat stock 1467.
#> 6 Dinner meat stock 1467.
#> 7 Dinner croque madame 1443.
#> 8 In the afternoon (excluding snacks) fruit punch 1354.
#> 9 Lunch chinese soup 1185.
#> 10 Dinner meat stock 1173.
#> 11 Dinner stew stock 1173.
#> 12 Dinner vegetable stock 1167.
#> 13 Dinner fajita 1100
#> 14 Dinner vegetable soup 1050.
#> 15 Dinner diluted fruit juice 1024.
#> # A tibble: 15 × 3
#> occasion_type food amount
#> <chr> <chr> <dbl>
#> 1 Aperitif before dinner beer with peach sirup 1496.
#> 2 Aperitif before dinner wine-based cocktail 1014.
#> 3 Aperitif before dinner blond beer 2-4.9% 1010
#> 4 Aperitif before dinner n.s. blond beer 1010
#> 5 Aperitif before dinner blond beer 2-4.9% 1010
#> 6 Aperitif before dinner non-aromatised still water 1000
#> 7 Aperitif before dinner tap water 1000
#> 8 Aperitif before dinner tap water 1000
#> 9 Aperitif before dinner tap water 1000
#> 10 Aperitif before dinner tap water 1000
#> 11 Aperitif before dinner n.s. still water 1000
#> 12 Aperitif before dinner non-aromatised still water 1000
#> 13 Aperitif before dinner fruit punch 903.
#> 14 Aperitif before dinner blond beer 5-7.9% 864.
#> 15 Aperitif before dinner non-aromatised still water 855
your_dataframe %>%
filter(condition)
#> # A tibble: 15 × 3
#> occasion_type food amount
#> <chr> <chr> <dbl>
#> 1 Aperitif before lunch tap water 148.
#> 2 Aperitif before lunch tap water 333
#> 3 Aperitif before lunch soda with lemoin extract like sprite 178.
#> 4 Aperitif before lunch non-aromatised still water 125
#> 5 Aperitif before lunch tap water 142.
#> 6 Aperitif before lunch tap water 105
#> 7 Aperitif before lunch tap water 120
#> 8 Aperitif before lunch tap water 221.
#> 9 Aperitif before lunch champagne brut 135
#> 10 Aperitif before lunch tap water 258.
#> 11 Aperitif before lunch grilled peanut 100
#> 12 Aperitif before lunch olive n.s. 18
#> 13 Aperitif before lunch pastis 285
#> 14 Aperitif before lunch potato chips 24
#> 15 Aperitif before lunch green olive 12
#> # A tibble: 15 × 3
#> occasion_type food amount
#> <chr> <chr> <dbl>
#> 1 Dinner tap water 221.
#> 2 Dinner tap water 221.
#> 3 Dinner tap water 258.
#> 4 Dinner tap water 210
#> 5 Dinner tap water 210
#> 6 Dinner tap water 105
#> 7 Dinner tap water 120
#> 8 Dinner tap water 50
#> 9 Dinner tap water 62.5
#> 10 Dinner tap water 140
#> 11 Dinner tap water 267.
#> 12 Dinner tap water 315
#> 13 Dinner tap water 140
#> 14 Dinner tap water 120
#> 15 Dinner tap water 120
#> # A tibble: 15 × 3
#> occasion_type food amount
#> <chr> <chr> <dbl>
#> 1 Aperitif before lunch tap water 148.
#> 2 Dinner fruit yoghurt 125
#> 3 Dinner tomato sauce 102.
#> 4 Dinner dow 300
#> 5 Dinner white bread 29.4
#> 6 Dinner tap water 221.
#> 7 In the evening/night tap water 148.
#> 8 In the evening/night tap water 148.
#> 9 Lunch tap water 360
#> 10 Dinner red cabbage 65
#> 11 Dinner white salt 1
#> 12 Dinner beef bifteck 153
#> 13 Dinner salad dressing with wine vinegar 4.28
#> 14 Dinner green beans 30
#> 15 Dinner white bread 31.5
data_food %>%
filter(occasion_type %in% c("Lunch", "Dinner"))
data_food %>%
filter(occasion_type == "Lunch" | occasion_type == "Dinner")
data_food %>%
filter(occasion_type == "Lunch") %>%
arrange(desc(amount)) %>%
head()
#> # A tibble: 6 × 3
#> occasion_type food amount
#> <chr> <chr> <dbl>
#> 1 Lunch meat soup 1481.
#> 2 Lunch pistou soup 1481.
#> 3 Lunch meat stock 1467.
#> 4 Lunch chinese soup 1185.
#> 5 Lunch non-aromatised still water 1000
#> 6 Lunch tap water 1000
data_food %>%
distinct()
data_food %>%
distinct()
distinct()
data_food %>%
distinct(occasion_type)
#> # A tibble: 10 × 1
#> occasion_type
#> <chr>
#> 1 Aperitif before lunch
#> 2 Lunch
#> 3 In the afternoon (excluding snacks)
#> 4 Dinner
#> 5 In the evening/night
#> 6 Breakfast
#> 7 Aperitif before dinner
#> 8 In the morning
#> 9 Snack
#> 10 Before breakfast
data_food %>%
slice_sample(n = 10)
#> # A tibble: 10 × 3
#> occasion_type food amount
#> <chr> <chr> <dbl>
#> 1 Dinner n.s. cooking fat 5
#> 2 Dinner yaourt avec fruits 125
#> 3 Breakfast tartine craquante au froment (classique) type cracotte 42
#> 4 Lunch compote (de fruits) 90
#> 5 Lunch cuisse de canard 122.
#> 6 Lunch vin rouge 120
#> 7 Dinner salade batavia 15
#> 8 Dinner non-aromatised still water 162.
#> 9 Dinner nem au porc 30
#> 10 Dinner tap water 157
data_food %>%
slice_sample(prop = 0.05) # sample 5% of all rows
data_food %>%
slice_max(amount, n = 2)
#> # A tibble: 2 × 3
#> occasion_type food amount
#> <chr> <chr> <dbl>
#> 1 Dinner blond beer -2% 1768.
#> 2 Aperitif before dinner beer with peach sirup 1496.
data_food %>%
slice_min(amount, n = 2)
#> # A tibble: 10 × 3
#> occasion_type food amount
#> <chr> <chr> <dbl>
#> 1 Breakfast stevia 0.025
#> 2 In the morning aspartame 0.0300
#> 3 Lunch aspartame 0.0300
#> 4 Snack aspartame 0.0300
#> 5 Dinner aspartame 0.0300
#> 6 In the evening/night aspartame 0.0300
#> 7 In the morning aspartame 0.0300
#> 8 Lunch aspartame 0.0300
#> 9 Snack aspartame 0.0300
#> 10 In the evening/night aspartame 0.0300
#> # A tibble: 1 × 1
#> n
#> <int>
#> 1 256301
count()
#> # A tibble: 10 × 2
#> occasion_type n
#> <chr> <int>
#> 1 Aperitif before dinner 3967
#> 2 Aperitif before lunch 2181
#> 3 Before breakfast 2165
#> 4 Breakfast 40195
#> 5 Dinner 73760
#> 6 In the afternoon (excluding snacks) 12906
#> 7 In the evening/night 8501
#> 8 In the morning 10443
#> 9 Lunch 85188
#> 10 Snack 16995
data_food %>%
count(occasion_type, name = "number")
#> # A tibble: 10 × 2
#> occasion_type number
#> <chr> <int>
#> 1 Aperitif before dinner 3967
#> 2 Aperitif before lunch 2181
#> 3 Before breakfast 2165
#> 4 Breakfast 40195
#> 5 Dinner 73760
#> 6 In the afternoon (excluding snacks) 12906
#> 7 In the evening/night 8501
#> 8 In the morning 10443
#> 9 Lunch 85188
#> 10 Snack 16995
data_food
#> # A tibble: 6 × 3
#> occasion_type food amount
#> <chr> <chr> <dbl>
#> 1 Aperitif before lunch tap water 148.
#> 2 Lunch fruit juice 100% pure juice 500
#> 3 Lunch n.s. cooking fat 3.22
#> 4 Lunch potato fries 107
#> 5 Lunch chicken nugget 161
#> 6 Lunch n.s. fat 12.8
data_food %>%
filter(occasion_type == "Lunch") %>%
count(food, name = "number") %>%
arrange(desc(number)) %>%
head()
#> # A tibble: 6 × 2
#> food number
#> <chr> <int>
#> 1 tap water 6022
#> 2 white bread 4599
#> 3 non-aromatised still water 2597
#> 4 olive oil 1668
#> 5 dow 1462
#> 6 white salt 1355
#> # A tibble: 6 × 5
#> occasion_type occasion_location food_type food amount
#> <chr> <chr> <chr> <chr> <dbl>
#> 1 Aperitif before lunch Home water tap … 148.
#> 2 Lunch Home vegetable and fruit juice frui… 500
#> 3 Lunch Home animal fat n.s.… 3.22
#> 4 Lunch Home potatoes and other tubers pota… 107
#> 5 Lunch Home Meet based dish chic… 161
#> 6 Lunch Home plant-based fat n.s.… 12.8
occasion_type
occasion_location
food_type
food
amount
#> # A tibble: 6 × 5
#> occasion_type occasion_location food_type food amount
#> <chr> <chr> <chr> <chr> <dbl>
#> 1 Aperitif before lunch Home water tap … 148.
#> 2 Lunch Home vegetable and fruit juice frui… 500
#> 3 Lunch Home animal fat n.s.… 3.22
#> 4 Lunch Home potatoes and other tubers pota… 107
#> 5 Lunch Home Meet based dish chic… 161
#> 6 Lunch Home plant-based fat n.s.… 12.8
mutate()
rename()
your_dataframe %>%
select(variable_to_keep_1, variable_to_keep_2, ...)
data_food %>%
select(food, amount)
#> # A tibble: 6 × 2
#> food amount
#> <chr> <dbl>
#> 1 tap water 148.
#> 2 fruit juice 100% pure juice 500
#> 3 n.s. cooking fat 3.22
#> 4 potato fries 107
#> 5 chicken nugget 161
#> 6 n.s. fat 12.8
starts_with()
ends_with()
contains()
everything()
data_food %>%
select(-occasion_type, -occasion_location)
data_food %>%
select(starts_with("occasion"))
data_food %>%
select(-starts_with("occasion"))
data_food %>%
select(-ends_with("type"))
data_food %>%
select(-contains("occasion"))
data_food %>%
select(food, everything())
data_food %>%
starts_with("occasion")
data_food %>%
select(food) %>%
select(amount)
data_food food
your_dataframe %>%
mutate(new_variable_1 = operations(existing_variable_2),
new_variable_3 = operations(existing_variable_4),
...
)
{dplyr}
lag() lead()
cumsum() cumprod()
ifelse() case_when()
tibble(
hour = 12:18,
food_intake = c(280, 25, 0, 0, 100, 50, 200)
) %>%
mutate(
lag_hour = lag(hour),
diff_intake = food_intake - lag(food_intake),
cum_intake = cumsum(food_intake)
)
#> # A tibble: 7 × 5
#> hour food_intake lag_hour diff_intake cum_intake
#> <int> <dbl> <int> <dbl> <dbl>
#> 1 12 280 NA NA 280
#> 2 13 25 12 -255 305
#> 3 14 0 13 -25 305
#> 4 15 0 14 0 305
#> 5 16 100 15 100 405
#> 6 17 50 16 -50 455
#> 7 18 200 17 150 655
data_food %>%
mutate(amount_kg = amount / 1000,
amount_kg = round(amount_kg, digits = 2)) %>%
select(amount_kg, amount_kg) %>%
head()
#> # A tibble: 6 × 1
#> amount_kg
#> <dbl>
#> 1 0.15
#> 2 0.5
#> 3 0
#> 4 0.11
#> 5 0.16
#> 6 0.01
case_when()
condition ~ result
your_dataframe %>%
mutate(
variable = case_when(
condition_1 ~ value_1,
condition_2 ~ value_2,
...
))
mutate() variable
case_when()
data_food %>%
mutate(
amount_chr = case_when(
amount > 400 ~ "a gigantic quantity",
amount > 100 ~ "a lot",
amount >= 0 ~ "a small amount"
)
) %>%
select(amount, amount_chr)
#> # A tibble: 7 × 2
#> amount amount_chr
#> <dbl> <chr>
#> 1 148. a lot
#> 2 500 a gigantic quantity
#> 3 3.22 a small amount
#> 4 107 a lot
#> 5 161 a lot
#> 6 12.8 a small amount
#> 7 106 a lot
your_dataframe %>%
rename(new_name = old_name)
amount quantity
data_food %>%
select(amount) %>%
class()
data_food %>%
pull(amount) %>%
class()
mean() median()
n() summarise()
var() sd()
min() max()
your_dataframe %>%
summarise(
var1_summary = function_1(variable_1),
var2_summary = function_2(variable_2),
...
)
#> # A tibble: 6 × 3
#> occasion_type food amount
#> <chr> <chr> <dbl>
#> 1 Aperitif before lunch tap water 148.
#> 2 Lunch fruit juice 100% pure juice 500
#> 3 Lunch n.s. cooking fat 3.22
#> 4 Lunch potato fries 107
#> 5 Lunch chicken nugget 161
#> 6 Lunch n.s. fat 12.8
data_food %>%
summarise(
mean_summary = mean(amount),
variance_summary = var(amount),
number_summary = n()
)
#> # A tibble: 1 × 3
#> mean_summary variance_summary number_summary
#> <dbl> <dbl> <int>
#> 1 NA NA 256301
data_food %>%
summarise(
mean_summary = mean(amount, na.rm = TRUE),
variance_summary = var(amount, na.rm = TRUE),
number_summary = n()
)
#> # A tibble: 1 × 3
#> mean_summary variance_summary number_summary
#> <dbl> <dbl> <int>
#> 1 118. 17515. 256301
your_dataframe %>%
group_by(grouping_variable_1, grouping_variable_2, ...)
group_by() summarise()
#> # A tibble: 8 × 4
#> occasion_type mean_amount variance_amount number
#> <chr> <dbl> <dbl> <int>
#> 1 Aperitif before dinner 132. 20376. 3967
#> 2 Aperitif before lunch 121. 16260. 2181
#> 3 Before breakfast 159. 15585. 2165
#> 4 Breakfast 127. 22502. 40195
#> 5 Dinner 115. 18683. 73760
#> 6 In the afternoon (excluding snacks) 153. 18741. 12906
#> 7 In the evening/night 162. 18286. 8501
#> 8 In the morning 142. 16345. 10443
.groups = "drop"
#> # A tibble: 50 × 3
#> region gender time_physical_activity_hours
#> <chr> <chr> <dbl>
#> 1 Aquitaine autre 2.41
#> 2 Brittany M 4.40
#> 3 Normandy M 5.78
#> 4 Aquitaine F 0.353
#> 5 Burgondy M 1.86
#> 6 Burgondy autre 3.33
#> 7 Normandy F 1.23
#> 8 Brittany M 1.69
#> 9 Normandy M 5.04
#> 10 Brittany M 3.31
#> # … with 40 more rows
data_physical_activity %>%
filter(region == "Bretagne") %>%
select(-region) %>%
mutate(
time_physical_activity_hours = round(time_physical_activity_hours)
) %>%
group_by(gender) %>%
summarise(mean_time_phys_act_hours = mean(time_physical_activity_hours),
.groups = "drop") %>%
arrange(mean_time_phys_act_hours)
data_physical_activity %>%
group_by(gender) %>%
slice_sample(n = 10) %>%
mutate(time_physical_activity_minutes = time_physical_activity_hours * 60) %>%
slice_max(time_physical_activity_minutes, n = 3) %>%
summarise(
mean_time_h = mean(time_physical_activity_hours),
median_time_h = median(time_physical_activity_hours),
mean_time_min = mean(time_physical_activity_minutes),
median_time_min = median(time_physical_activity_minutes),
.groups = "drop"
)
#> # A tibble: 8 × 2
#> age gender
#> <dbl> <chr>
#> 1 25 male
#> 2 45 male
#> 3 31 female
#> 4 10 male
#> 5 23 male
#> 6 43 male
#> 7 45 female
#> 8 12 male
#> # A tibble: 5 × 5
#> age_class water carbs lipids proteins
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 7-10 year 759. 390. 422. 275.
#> 2 11-17 year 942. 417. 435. 143.
#> 3 18-35 year 1026. 529. 537. 188.
#> 4 36-55 year 734. 635. 577. 151.
#> 5 56 year + 883. 590. 368. 258.
data_a
data_b
data_c
food_intake
#> # A tibble: 6 × 3
#> id food_type amount
#> <dbl> <chr> <dbl>
#> 1 110100101 Water 1633.
#> 2 110100101 Juice 1421.
#> 3 110100101 Milk 251.
#> 4 110100701 Water 3082.
#> 5 110100801 Water 1500
#> 6 110100801 Milk 458.
pivot_wider()
# food_intake %>%
# pivot_wider(
# names_from = food_type,
# values_from = amount
food_intake )
values_fill
values_fill values_fill
# food_intake %>%
# pivot_wider(
# names_from = food_type,
food_intake %>% values_from = amount,
pivot_wider( values_fill = list(
names_from = food_type, amount = 0
values_from = amount )
) )
food_intake %>%
pivot_wider(
names_from = contains("groupe"),
values_from = contains("quantite")
)
intake_vitamins
#> # A tibble: 6 × 4
#> id gender vitamin_c vitamin_d
#> <dbl> <chr> <dbl> <dbl>
#> 1 110100101 Man 506. 9.7
#> 2 110100701 Woman 458. 19.6
#> 3 110100801 Man 193. 10.2
#> 4 110101201 Man 604. 20.5
#> 5 110101401 Woman 161. 12.8
#> 6 110300301 Man 91.7 10
pivot_longer()
# intake_vitamins %>%
# pivot_longer(
# cols = c(vitamin_c, vitamin_d),
# names_to = "vitamin",
# values_to = "amount"
intake_vitamins )
#> # A tibble: 6 × 6
#> age occasion_type occasion_location food_type food amount
#> <dbl> <chr> <chr> <chr> <chr> <dbl>
#> 1 26 Aperitif before lunch Home water tap water 148.
#> 2 43 Lunch Home vegetable and… fruit jui… 500
#> 3 56 Lunch Home animal fat n.s. cook… 3.22
#> 4 49 Lunch Home potatoes and … potato fr… 107
#> 5 53 Lunch Home Meet based di… chicken n… 161
#> 6 36 Lunch Home plant-based f… n.s. fat 12.8
age
occasion_type
occasion_location
food_type
food
amount
mutate_all()
mutate_at()
mutate_if()
mutate_if()
your_dataframe %>%
mutate_if(
condition,
function_to_apply
)
mutate_if()
data_food %>%
mutate_if(
is.numeric,
as.character
)
#> # A tibble: 6 × 6
#> age occasion_type occasion_location food_type food amount
#> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 26 Aperitif before lunch Home water tap water 147.5
#> 2 43 Lunch Home vegetable and… fruit jui… 500
#> 3 56 Lunch Home animal fat n.s. cook… 3.22
#> 4 49 Lunch Home potatoes and … potato fr… 107
#> 5 53 Lunch Home Meet based di… chicken n… 161
#> 6 36 Lunch Home plant-based f… n.s. fat 12.84
mutate_if()
data_food %>%
mutate_if(
is.numeric,
list("chr" = as.character)
)
#> # A tibble: 6 × 8
#> age occasion_type occasion_locati… food_type food amount age_chr amount_chr
#> <dbl> <chr> <chr> <chr> <chr> <dbl> <chr> <chr>
#> 1 26 Aperitif bef… Home water tap … 148. 26 147.5
#> 2 43 Lunch Home vegetabl… frui… 500 43 500
#> 3 56 Lunch Home animal f… n.s.… 3.22 56 3.22
#> 4 49 Lunch Home potatoes… pota… 107 49 107
#> 5 53 Lunch Home Meet bas… chic… 161 53 161
#> 6 36 Lunch Home plant-ba… n.s.… 12.8 36 12.84
mutate_at()
your_dataframe %>%
mutate_at(
variables_to_transformed,
functions_to_apply
)
mutate_at()
data_food %>%
mutate_at(
c("occasion_type", "amount"),
as.factor
)
#> # A tibble: 6 × 6
#> age occasion_type occasion_location food_type food amount
#> <dbl> <fct> <chr> <chr> <chr> <fct>
#> 1 26 Aperitif before lunch Home water tap water 147.5
#> 2 43 Lunch Home vegetable and… fruit jui… 500
#> 3 56 Lunch Home animal fat n.s. cook… 3.22
#> 4 49 Lunch Home potatoes and … potato fr… 107
#> 5 53 Lunch Home Meet based di… chicken n… 161
#> 6 36 Lunch Home plant-based f… n.s. fat 12.84
data_food %>%
mutate_at(
c("occasion_type", "amount"),
as.factor
)
#> # A tibble: 6 × 6
#> age occasion_type occasion_location food_type food amount
#> <dbl> <fct> <chr> <chr> <chr> <fct>
#> 1 26 Aperitif before lunch Home water tap water 147.5
#> 2 43 Lunch Home vegetable and… fruit jui… 500
#> 3 56 Lunch Home animal fat n.s. cook… 3.22
#> 4 49 Lunch Home potatoes and … potato fr… 107
#> 5 53 Lunch Home Meet based di… chicken n… 161
#> 6 36 Lunch Home plant-based f… n.s. fat 12.84
data_food %>%
mutate_at(
vars(occasion_type, amount),
as.factor
)
#> # A tibble: 6 × 6
#> age occasion_type occasion_location food_type food amount
#> <dbl> <fct> <chr> <chr> <chr> <fct>
#> 1 26 Aperitif before lunch Home water tap water 147.5
#> 2 43 Lunch Home vegetable and… fruit jui… 500
#> 3 56 Lunch Home animal fat n.s. cook… 3.22
#> 4 49 Lunch Home potatoes and … potato fr… 107
#> 5 53 Lunch Home Meet based di… chicken n… 161
#> 6 36 Lunch Home plant-based f… n.s. fat 12.84
data_food %>%
mutate_at(
vars(ends_with("food")),
as.factor
)
#> # A tibble: 6 × 6
#> age occasion_type occasion_location food_type food amount
#> <dbl> <chr> <chr> <chr> <fct> <dbl>
#> 1 26 Aperitif before lunch Home water tap water 148.
#> 2 43 Lunch Home vegetable and… fruit jui… 500
#> 3 56 Lunch Home animal fat n.s. cook… 3.22
#> 4 49 Lunch Home potatoes and … potato fr… 107
#> 5 53 Lunch Home Meet based di… chicken n… 161
#> 6 36 Lunch Home plant-based f… n.s. fat 12.8
summarise_all()
summarise_at()
summarise_if()
summarise_at()
data_food %>%
summarise_at(
vars(age, amount),
mean
)
#> # A tibble: 1 × 2
#> age amount
#> <dbl> <dbl>
#> 1 37.5 NA
amount NA
na.rm = TRUE
#> # A tibble: 1 × 2
#> age amount
#> <dbl> <dbl>
#> 1 37.5 118.
~ function(.x)
summarise_at()
data_food %>%
summarise_at(
vars(age, amount),
list(
"var" = ~ var(.x, na.rm = TRUE),
"median" = ~ median(.x, na.rm = TRUE)
)
)
#> # A tibble: 1 × 4
#> age_var amount_var age_median amount_median
#> <dbl> <dbl> <dbl> <dbl>
#> 1 169. 17515. 37 79.3
summarise_if()
data_food %>%
summarise_if(
is.numeric,
~ mean(.x, na.rm = TRUE)
)
#> # A tibble: 1 × 2
#> age amount
#> <dbl> <dbl>
#> 1 37.5 118.
summarise_if()
data_food %>%
summarise_if(
is.numeric,
list(
"mean" = ~ mean(.x, na.rm = TRUE),
"var" = ~ var(.x, na.rm = TRUE),
"max" = ~ max(.x, na.rm = TRUE))
)
#> # A tibble: 1 × 6
#> age_mean amount_mean age_var amount_var age_max amount_max
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 37.5 118. 169. 17515. 60 1768.
select_if()
select_at()
groups()
group_by_all()
group_by_at()
group_by_if()
group_split()
group_nest()
item
data_age data_phys_act
age
activity_profile
NOMEN
NOIND
item
data_a data_b
NOIND
by
by
data_a %>%
inner_join(data_b, by = "NOIND")
#> # A tibble: 3 × 3
#> NOIND gender reads_nutri_label
#> <chr> <chr> <chr>
#> 1 087 Male Never
#> 2 078 Male Never
#> 3 016 Female Never
data_a %>%
left_join(data_b, by = "NOIND")
#> # A tibble: 6 × 3
#> NOIND gender reads_nutri_label
#> <chr> <chr> <chr>
#> 1 087 Male Never
#> 2 049 Female <NA>
#> 3 054 Male <NA>
#> 4 078 Male Never
#> 5 064 Female <NA>
#> 6 016 Female Never
data_a %>%
full_join(data_b, by = "NOIND")
#> # A tibble: 9 × 3
#> NOIND gender reads_nutri_label
#> <chr> <chr> <chr>
#> 1 087 Male Never
#> 2 049 Female <NA>
#> 3 054 Male <NA>
#> 4 078 Male Never
#> 5 064 Female <NA>
#> 6 016 Female Never
#> 7 013 <NA> Never
#> 8 029 <NA> Sometimes
#> 9 044 <NA> Always
data_a %>%
anti_join(data_b, by = "NOIND")
#> # A tibble: 3 × 2
#> NOIND gender
#> <chr> <chr>
#> 1 049 Female
#> 2 054 Male
#> 3 064 Female
data_age data_phys_act
data_age_act_phys
#> # A tibble: 10 × 4
#> individual detail weight height
#> <int> <chr> <int> <int>
#> 1 1 60-M 96 166
#> 2 2 42-M 96 157
#> 3 3 32-I 96 161
#> 4 4 26-M 90 157
#> 5 5 56-F 86 170
#> 6 1 59-I 95 166
#> 7 2 38-M 85 171
#> 8 3 48-F 97 180
#> 9 4 24-M 88 155
#> 10 5 31-M 85 161
#> # A tibble: 10 × 5
#> indiv year month day obs
#> <int> <chr> <chr> <chr> <chr>
#> 1 1 2019 01 01 j
#> 2 2 2019 01 01 u
#> 3 3 2019 01 01 q
#> 4 4 2019 01 01 k
#> 5 5 2019 01 01 g
#> 6 1 2019 02 01 n
#> 7 2 2019 02 01 h
#> 8 3 2019 02 01 w
#> 9 4 2019 02 01 i
#> 10 5 2019 02 01 p
{tidyr}
separate()
unite()
col into
data %>%
separate(
col = colonne_a,
into = c("a", "b"),
sep = "-" # make separator explicit
)
remove = FALSE
data %>%
separate(
col = column_ab,
into = c("a", "b"),
sep = "-",
remove = FALSE
)
# data_indiv %>%
# separate(
# col = detail,
# sep = "-",
data_indiv into = c("age", "gender")
) %>%
#> # A tibble: 10 × 4
mutate(age = as.numeric(age))
#> individual detail weight height
#> <int> <chr> <int> <int> #> # A tibble: 10 × 5
#> 1 1 60-M 96 166 #> individual age gender weight height
#> 2 2 42-M 96 157 #> <int> <dbl> <chr> <int> <int>
#> 3 3 32-I 96 161 #> 1 1 60 M 96 166
#> 4 4 26-M 90 157 #> 2 2 42 M 96 157
#> 5 5 56-F 86 170 #> 3 3 32 I 96 161
#> 6 1 59-I 95 166 #> 4 4 26 M 90 157
#> 7 2 38-M 85 171 #> 5 5 56 F 86 170
#> 8 3 48-F 97 180 #> 6 1 59 I 95 166
#> 9 4 24-M 88 155 #> 7 2 38 M 85 171
#> 10 5 31-M 85 161 #> 8 3 48 F 97 180
#> 9 4 24 M 88 155
#> 10 5 31 M 85 161
separate()
col
data %>%
unite(
col = "colonne_ab",
colonne_a, colonne_b,
sep = "/"
)
remove = FALSE
data %>%
unite(
col = "colonne_ab",
colonne_a, colonne_b,
sep = "/",
remove = FALSE
)
# data_obs %>%
# unite(
# col = "date",
# year, month, day,
# sep = "/"
data_obs )
#> # A tibble: 8 × 4
#> id height_weight unite_weight unite_height
#> <int> <chr> <chr> <chr>
#> 1 1 187_83 kg cm
#> 2 2 166_69 kg cm
#> 3 3 175_86 kg cm
#> 4 4 164_70 kg cm
#> 5 5 183_81 kg cm
#> 6 6 177_88 kg cm
#> 7 7 160_68 kg cm
#> 8 8 179_79 kg cm
NA
#> # A tibble: 8 × 4
#> year individual weight height
#> <chr> <int> <int> <int>
#> 1 2019 1 NA 180
#> 2 <NA> 2 96 187
#> 3 <NA> 3 95 184
#> 4 <NA> 4 89 189
#> 5 2020 1 85 182
#> 6 <NA> 2 85 176
#> 7 <NA> 3 86 180
#> 8 <NA> 4 NA 170
NA
#> # A tibble: 4 × 3
#> year month weight
#> <chr> <chr> <dbl>
#> 1 2017 01 86
#> 2 2018 02 95
#> 3 2019 01 90
#> 4 2019 02 92
{tidyr}
fill()
drop_na()
replace_na()
complete()
data %>%
fill(column_a, column_b)
data %>%
fill(column_a, column_b, .direction = "up")
data_indiv %>%
# original dataset fill(year)
data_indiv
#> # A tibble: 8 × 4
#> # A tibble: 8 × 4 #> year individual weight height
#> year individual weight height #> <chr> <int> <int> <int>
#> <chr> <int> <int> <int> #> 1 2019 1 NA 180
#> 1 2019 1 NA 180 #> 2 2019 2 96 187
#> 2 <NA> 2 96 187 #> 3 2019 3 95 184
#> 3 <NA> 3 95 184 #> 4 2019 4 89 189
#> 4 <NA> 4 89 189 #> 5 2020 1 85 182
#> 5 2020 1 85 182 #> 6 2020 2 85 176
#> 6 <NA> 2 85 176 #> 7 2020 3 86 180
#> 7 <NA> 3 86 180 #> 8 2020 4 NA 170
#> 8 <NA> 4 NA 170
data_indiv %>%
# original dataset fill(year, .direction = "up")
data_indiv
#> # A tibble: 8 × 4
#> # A tibble: 8 × 4 #> year individual weight height
#> year individual weight height #> <chr> <int> <int> <int>
#> <chr> <int> <int> <int> #> 1 2019 1 NA 180
#> 1 2019 1 NA 180 #> 2 2020 2 96 187
#> 2 <NA> 2 96 187 #> 3 2020 3 95 184
#> 3 <NA> 3 95 184 #> 4 2020 4 89 189
#> 4 <NA> 4 89 189 #> 5 2020 1 85 182
#> 5 2020 1 85 182 #> 6 <NA> 2 85 176
#> 6 <NA> 2 85 176 #> 7 <NA> 3 86 180
#> 7 <NA> 3 86 180 #> 8 <NA> 4 NA 170
#> 8 <NA> 4 NA 170
data %>%
drop_na()
data %>%
drop_na(column_a, column_b)
data %>%
replace_na(
replace = list(
column_a = "value_a",
column_b = "value_b"
)
)
# data_indiv %>%
# replace_na(
# replace = list(year = "00")
data_indiv )
NA
data %>%
complete(column_a, column_b)
column_a column_b
NA
data %>%
fill(
contains("encoded")
)
data %>%
drop_na(
starts_with("comment")
)
data
library(lubridate)
now()
today()
mdy()
month - day - year
dmy(the_dates)
mdy(the_dates)
"1986/02/15 20h05"
ymd_hm("1986/02/15 20h05")
tz= OlsonNames()
#> [1] NA
year(present_moment) hour(present_moment)
month(present_moment) minute(present_moment)
day(present_moment) second(present_moment)
wday(present_moment)
#> [1] 5
tribble(
~name, ~date_of_birth,
"Sébastien", "26 juillet 83",
"Diane", "1er janvier 1985",
"Vincent", "11/02/1986",
"Colin", "22111988",
"Margot", "17 septembre 1991",
"Cervan", "22-octobre-91"
) %>%
mutate(date_of_birth = .....(date_of_birth)
) %>%
filter(.....(date_of_birth) == 9)
ymd("1986/11/02") %>%
wday(label = TRUE, abbr = FALSE)
today() + days(3)
tomorrow(tomorrow())
today() + days(2)
today() + hours(48)
today() + day(2)
sep collapse
str_c("one", "two", sep = " ") str_c(c("one", "two"), collapse = " ")
lastname_firstname
phonebook %>%
mutate(lastname_firstname = str_c(lastname, firstname, sep = "_")) %>%
select(-coord)
#> # A tibble: 5 × 4
#> id firstname lastname lastname_firstname
#> <int> <chr> <chr> <chr>
#> 1 1 Steven "DIXON" "DIXON_Steven"
#> 2 2 ERIN "FLORES " "FLORES _ERIN"
#> 3 3 Marie-Louise "guillaumin " "guillaumin _Marie-Louise"
#> 4 4 Layla "BRYANT" "BRYANT_Layla"
#> 5 5 Mitchell " Berry" " Berry_Mitchell"
phonebook %>%
mutate_all(str_trim) %>%
mutate(lastname_firstname = str_c(lastname, firstname, sep = "_")) %>%
select(-coord)
#> # A tibble: 5 × 4
#> id firstname lastname lastname_firstname
#> <chr> <chr> <chr> <chr>
#> 1 1 Steven DIXON DIXON_Steven
#> 2 2 ERIN FLORES FLORES_ERIN
#> 3 3 Marie-Louise guillaumin guillaumin_Marie-Louise
#> 4 4 Layla BRYANT BRYANT_Layla
#> 5 5 Mitchell Berry Berry_Mitchell
phonebook %>%
mutate_all(str_trim) %>%
mutate(
firstname = str_to_title(firstname),
lastname = str_to_upper(lastname),
lastname_firstname = str_c(lastname, firstname, sep = "_")
) %>%
select(-coord)
#> # A tibble: 5 × 4
#> id firstname lastname lastname_firstname
#> <chr> <chr> <chr> <chr>
#> 1 1 Steven DIXON DIXON_Steven
#> 2 2 Erin FLORES FLORES_Erin
#> 3 3 Marie-Louise GUILLAUMIN GUILLAUMIN_Marie-Louise
#> 4 4 Layla BRYANT BRYANT_Layla
#> 5 5 Mitchell BERRY BERRY_Mitchell
filter()
phonebook %>%
filter(str_detect(coord, "Dallas")) %>%
select(-firstname, -lastname)
#> # A tibble: 1 × 2
#> id coord
#> <int> <chr>
#> 1 3 9464 Preston Rd, Dallas, TX 75225
phonebook %>%
mutate(
coord_new =
str_replace_all(coord, pattern = "msn.com", replacement = "hotmail.com")
) %>%
select(starts_with("coord"))
#> # A tibble: 5 × 2
#> coord coord_new
#> <chr> <chr>
#> 1 " 6804 Preston Rd (563)-300-2113" " 6804 Preston Rd (563)-300-2113"
#> 2 "flores@mail.com (617)-990-5931 " "flores@mail.com (617)-990-5931 "
#> 3 "9464 Preston Rd, Dallas, TX 75225" "9464 Preston Rd, Dallas, TX 75225"
#> 4 "8046 Forest Ln, Humble, TX 77338" "8046 Forest Ln, Humble, TX 77338"
#> 5 "(089).225.6911 berry@msn.com" "(089).225.6911 berry@hotmail.com"
$ [:digit:]
^ [:upper:]
. [:punct:]
phonebook %>%
filter(str_detect(firstname, "^M")) %>%
select(-coord)
#> # A tibble: 2 × 3
#> id firstname lastname
#> <int> <chr> <chr>
#> 1 3 Marie-Louise "guillaumin "
#> 2 5 Mitchell " Berry"
phonebook %>%
mutate(email = coord %>% str_extract("[:alnum:]+@[:alnum:]+\\.[:alnum:]+")) %>%
select(id, email) %>%
filter(!is.na(email))
#> # A tibble: 2 × 2
#> id email
#> <int> <chr>
#> 1 2 flores@mail.com
#> 2 5 berry@msn.com
phonebook %>%
mutate(
... = str_extract(coord, "[:alnum:]+@[:alnum:]+\\.[:alnum:]+"),
... = str_extract(coord, "([:digit:]|[:punct:]){10,14}+"),
address = str_...(coord),
address = case_when(
is.na(email) & is.na(phone) ~ address,
is.na(email) ~ ...(address, fixed(phone)),
is.na(phone) ~ str_remove_all(address, ...),
TRUE ~ address %>%
str_remove_all(...) %>%
str_remove_all(fixed(phone))
),
telephone = str_remove_all(phone, ...)
) %>%
select(id, email, phone, address)
#> # A tibble: 5 × 4
#> id firstname lastname coord
#> <int> <chr> <chr> <chr>
#> 1 1 Steven "DIXON" " 6804 Preston Rd (563)-300-2113"
#> 2 2 ERIN "FLORES " "flores@mail.com (617)-990-5931 "
#> 3 3 Marie-Louise "guillaumin " "9464 Preston Rd, Dallas, TX 75225"
#> 4 4 Layla "BRYANT" "8046 Forest Ln, Humble, TX 77338"
#> 5 5 Mitchell " Berry" "(089).225.6911 berry@msn.com"
#> # A tibble: 5 × 4
#> id email phone address
#> <int> <chr> <chr> <chr>
#> 1 1 <NA> 5633002113 "6804 Preston Rd "
#> 2 2 flores@mail.com 6179905931 " "
#> 3 3 <NA> <NA> "9464 Preston Rd, Dallas, TX 75225"
#> 4 4 <NA> <NA> "8046 Forest Ln, Humble, TX 77338"
#> 5 5 berry@msn.com 0892256911 " "
i <- 1
glue::glue("the value of i is {i} it's little")
i <- 1
stringr::str_c("the value of i is ", i, " it's little")
x <- 1:4
y <- c("little", "not much", "not bad", "a lot")
glue::glue("the value of i is {x} it's {y}")
people %>%
mutate(bmi = round(weight / (height)**2, digits = 2),
text = glue::glue("the BMI of {firstname} is {bmi}")
)
#> # A tibble: 3 × 5
#> firstname weight height bmi text
#> <chr> <dbl> <dbl> <dbl> <glue>
#> 1 Teddy 131 2.04 31.5 the BMI of Teddy is 31.48
#> 2 Tom 0.1 0.5 0.4 the BMI of Tom is 0.4
#> 3 Carla 75 1.75 24.5 the BMI of Carla is 24.49
So ? What did you think about it ? Formation R - https://thinkr.fr 466 / 470
Satisfaction
So ? What did you think about it ? Formation R - https://thinkr.fr 467 / 470
Training Review
Ressources