You are on page 1of 5

Lista 4

Nivaldo Torres

06/11/2020

R Markdown
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and
MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the
output of any embedded R code chunks within the document. You can embed an R code chunk like this:
Questão 1 a)
bnames <- read.csv("bnames.csv")
births <- read.csv("births.csv")
head(bnames)

## year name prop sex soundex


## 1 1880 John 0.081541 boy J500
## 2 1880 William 0.080511 boy W450
## 3 1880 James 0.050057 boy J520
## 4 1880 Charles 0.045167 boy C642
## 5 1880 George 0.043292 boy G620
## 6 1880 Frank 0.027380 boy F652
head(births)

## year sex births


## 1 1880 boy 118405
## 2 1881 boy 108290
## 3 1882 boy 122034
## 4 1883 boy 112487
## 5 1884 boy 122745
## 6 1885 boy 115948
b) 2003 nomes mais populares de meninas
library(dplyr)

##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union

1
bnames2 <- left_join(bnames, births, by = c("year","sex"))
bnames2 <- mutate(bnames2, n = prop * births)
bnames2 <- mutate(bnames2, n = round(prop * births))
meninas_2003 <- filter(bnames2,sex=="girl",year=="2003")
slice(meninas_2003, (1:10))

## year name prop sex soundex births n


## 1 2003 Emily 0.012819 girl E540 2003252 25680
## 2 2003 Emma 0.011327 girl E500 2003252 22691
## 3 2003 Madison 0.010077 girl M325 2003252 20187
## 4 2003 Hannah 0.008792 girl H500 2003252 17613
## 5 2003 Olivia 0.008055 girl O410 2003252 16136
## 6 2003 Abigail 0.007944 girl A124 2003252 15914
## 7 2003 Alexis 0.007407 girl A422 2003252 14838
## 8 2003 Ashley 0.007241 girl A240 2003252 14506
## 9 2003 Elizabeth 0.007025 girl E421 2003252 14073
## 10 2003 Samantha 0.006918 girl S553 2003252 13858
c) Nome1=Emily, Nome2=Madison
library(ggplot2)
meninas_1 <- filter(bnames2,sex=="girl",name=="Emily")
meninas_2 <- filter(bnames2,sex=="girl",name=="Madison")
meninas_3 <- rbind(meninas_1, meninas_2)
qplot(year,n, data=meninas_3, geom="line", color=name)

20000

name
Emily
n

Madison
10000

1880 1920 1960 2000


year
d)Combinar banco de dados bnames e births e adicione nova coluna n que mostre o número total de beb~es
em cada ano para cada nome

2
library(dplyr)
bnames2 <- left_join(bnames, births, by = c("year","sex"))
bnames2 <- mutate(bnames2, n = prop * births)
bnames2 <- mutate(bnames2, n = round(prop * births))
View(bnames2)

e) descreva sucintamente como usar a função “str_sub” do pacote “stringr” library(stingr) Esta função
extrai partes de um string. Assim como o string, str_sub () leva os argumentos de início e fim que
fornecem a (inclusive) posição do substring.
library(stringr)
x <- c("Apple", "Banana", "Pear")
str_sub(x, 1, 3)

## [1] "App" "Ban" "Pea"


f) Usando a função str_sub, adicione uma coluna no banco de dados criado na letra d) contendo a inicial
(primeira letra) de cada nome.
library(stringr)
bnames3 <- mutate(bnames2, initial = str_sub(name, 1, 1))

g) Inicial mais popular para meninos e para meninas considerando os dados de todos os anos?
library(stringr)
library(reshape2)
library(dplyr)
inicial_masculino <- bnames3 %>% select (year,sex,n, initial) %>% filter(sex=="boy") %>% group_by(initi

## `summarise()` ungrouping output (override with `.groups` argument)


inicial_feminino <- bnames3 %>% select (year,sex,n, initial) %>% filter(sex=="girl") %>% group_by(initi

## `summarise()` ungrouping output (override with `.groups` argument)


inicialmascfem <- rbind(inicial_masculino,inicial_feminino)

h) representar graficamente as informações da letra g - letra J para menino e letra M para menina
library(ggplot2)
fig1 <- qplot(initial, frequencia, data = inicialmascfem, geom = "point", color = sexo)
fig1 + geom_point(aes(size = frequencia))

3
frequencia
2e+07
5.0e+06
1.0e+07
1.5e+07
frequencia

2.0e+07
2.5e+07

1e+07
sexo
feminino
masculino

0e+00

A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
initial

options(scipen=999)
fig2 <- ggplot(data= inicialmascfem, aes(x=initial,y= frequencia, fill= sexo)) + geom_bar(stat="identity
fig2

4
20000000
frequencia

sexo
feminino
masculino
10000000

A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
initial

You might also like