You are on page 1of 12

SQL in R: SQLDF Tutorial

Jasmine Dumas
August 19, 2016
General Queries (file:///Users/User/Downloads/sqldf_tutorial%20(4).html#general-queries)
Aggregate Queries (file:///Users/User/Downloads/sqldf_tutorial%20(4).html#aggregate-
queries)
Wild card match Queries (file:///Users/User/Downloads/sqldf_tutorial%20(4).html#wild-card-
match-queries)
Manipulation & Nested Queries
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#manipulation-nested-queries)
Join Queries (file:///Users/User/Downloads/sqldf_tutorial%20(4).html#join-queries)
Resources (file:///Users/User/Downloads/sqldf_tutorial%20(4).html#resources)

##Example Data

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb1-1)# libraries
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb1-2)library(sqldf
)

## Loading required package: gsubfn

## Loading required package: proto

## Loading required package: RSQLite

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb5-1)library(RH2)

## Loading required package: chron

## Loading required package: RJDBC

## Loading required package: DBI

## Loading required package: rJava


(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb10-1)data("UCBAdm
issions")
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb10-2)
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb10-3)# must be a
data frame
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb10-4)ucb <- as.da
ta.frame(UCBAdmissions)
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb10-5)
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb10-6)sqldf("selec
t * from ucb")

## Admit Gender Dept Freq


## 1 Admitted Male A 512
## 2 Rejected Male A 313
## 3 Admitted Female A 89
## 4 Rejected Female A 19
## 5 Admitted Male B 353
## 6 Rejected Male B 207
## 7 Admitted Female B 17
## 8 Rejected Female B 8
## 9 Admitted Male C 120
## 10 Rejected Male C 205
## 11 Admitted Female C 202
## 12 Rejected Female C 391
## 13 Admitted Male D 138
## 14 Rejected Male D 279
## 15 Admitted Female D 131
## 16 Rejected Female D 244
## 17 Admitted Male E 53
## 18 Rejected Male E 138
## 19 Admitted Female E 94
## 20 Rejected Female E 299
## 21 Admitted Male F 22
## 22 Rejected Male F 351
## 23 Admitted Female F 24
## 24 Rejected Female F 317

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb12-1)majors <- da
ta.frame(major = c("math", "biology", "engineering", "computer scien
ce", "history", "architecture"), Dept = c(LETTERS[1:5], "Other"), Fa
culty = round(runif(6, min = 10, max = 30)))
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb12-2)
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb12-3)sqldf("selec
t * from majors")
## major Dept Faculty
## 1 math A 12
## 2 biology B 26
## 3 engineering C 26
## 4 computer science D 16
## 5 history E 23
## 6 architecture Other 11

General Queries
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb14-1)# Return Fem
ale student admission result
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb14-2)sqldf("selec
t * from ucb where Gender = 'Female'")

## Admit Gender Dept Freq


## 1 Admitted Female A 89
## 2 Rejected Female A 19
## 3 Admitted Female B 17
## 4 Rejected Female B 8
## 5 Admitted Female C 202
## 6 Rejected Female C 391
## 7 Admitted Female D 131
## 8 Rejected Female D 244
## 9 Admitted Female E 94
## 10 Rejected Female E 299
## 11 Admitted Female F 24
## 12 Rejected Female F 317

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb16-1)# Return the


admitted students
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb16-2)sqldf("selec
t * from ucb where Admit = 'Admitted'")
## Admit Gender Dept Freq
## 1 Admitted Male A 512
## 2 Admitted Female A 89
## 3 Admitted Male B 353
## 4 Admitted Female B 17
## 5 Admitted Male C 120
## 6 Admitted Female C 202
## 7 Admitted Male D 138
## 8 Admitted Female D 131
## 9 Admitted Male E 53
## 10 Admitted Female E 94
## 11 Admitted Male F 22
## 12 Admitted Female F 24

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb18-1)# order admi


ssions per department
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb18-2)sqldf("selec
t * from ucb where Admit = 'Admitted' order by Freq DESC")

## Admit Gender Dept Freq


## 1 Admitted Male A 512
## 2 Admitted Male B 353
## 3 Admitted Female C 202
## 4 Admitted Male D 138
## 5 Admitted Female D 131
## 6 Admitted Male C 120
## 7 Admitted Female E 94
## 8 Admitted Female A 89
## 9 Admitted Male E 53
## 10 Admitted Female F 24
## 11 Admitted Male F 22
## 12 Admitted Female B 17

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb20-1)# how many d


epartments are in this table
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb20-2)sqldf("selec
t distinct Dept from ucb")

## Dept
## 1 A
## 2 B
## 3 C
## 4 D
## 5 E
## 6 F
Aggregate Queries
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb22-1)# total admi
tted studets
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb22-2)sqldf("selec
t sum(Freq) from ucb where Admit = 'Admitted'")

## SUM("Freq")
## 1 1755

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb24-1)# total reje


cted students
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb24-2)sqldf("selec
t sum(Freq) from ucb where Admit = 'Rejected'")

## SUM("Freq")
## 1 2771

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb26-1)# return tot


al admitted males
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb26-2)sqldf("selec
t sum(Freq) as total_dudes from ucb where Admit = 'Admitted' AND Gen
der = 'Male'")

## total_dudes
## 1 1198

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb28-1)# return tot


al reject females
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb28-2)sqldf("selec
t sum(Freq) as total_ladies from ucb where Admit = 'Rejected' AND Ge
nder = 'Female'")

## total_ladies
## 1 1278

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb30-1)# average nu
mber of admitted student by department (usually mean)
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb30-2)sqldf("selec
t Dept, avg(Freq) as average_admitted from ucb where Admit = 'Admitt
ed' group by Dept")
## Dept average_admitted
## 1 A 300.5
## 2 B 185.0
## 3 C 161.0
## 4 D 134.5
## 5 E 73.5
## 6 F 23.0

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb32-1)# how many m


ajors are there
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb32-2)sqldf("selec
t count(major) from majors")

## COUNT("major")
## 1 6

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb34-1)# minimum am
ount of studets rejected
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb34-2)sqldf("selec
t min(Freq) from ucb where Admit = 'Rejected'")

## MIN("Freq")
## 1 8

Wild card match Queries


(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb36-1)sqldf("selec
t * from ucb where Freq between 20 AND 100")

## Admit Gender Dept Freq


## 1 Admitted Female A 89
## 2 Admitted Male E 53
## 3 Admitted Female E 94
## 4 Admitted Male F 22
## 5 Admitted Female F 24

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb38-1)sqldf("selec
t * from ucb where Gender Like 'Fe%'")
## Admit Gender Dept Freq
## 1 Admitted Female A 89
## 2 Rejected Female A 19
## 3 Admitted Female B 17
## 4 Rejected Female B 8
## 5 Admitted Female C 202
## 6 Rejected Female C 391
## 7 Admitted Female D 131
## 8 Rejected Female D 244
## 9 Admitted Female E 94
## 10 Rejected Female E 299
## 11 Admitted Female F 24
## 12 Rejected Female F 317

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb40-1)sqldf("selec
t * from ucb where Gender Like '%male%'")

## Admit Gender Dept Freq


## 1 Admitted Female A 89
## 2 Rejected Female A 19
## 3 Admitted Female B 17
## 4 Rejected Female B 8
## 5 Admitted Female C 202
## 6 Rejected Female C 391
## 7 Admitted Female D 131
## 8 Rejected Female D 244
## 9 Admitted Female E 94
## 10 Rejected Female E 299
## 11 Admitted Female F 24
## 12 Rejected Female F 317

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb42-1)sqldf("selec
t * from ucb where Gender Like 'Ma%'")
## Admit Gender Dept Freq
## 1 Admitted Male A 512
## 2 Rejected Male A 313
## 3 Admitted Male B 353
## 4 Rejected Male B 207
## 5 Admitted Male C 120
## 6 Rejected Male C 205
## 7 Admitted Male D 138
## 8 Rejected Male D 279
## 9 Admitted Male E 53
## 10 Rejected Male E 138
## 11 Admitted Male F 22
## 12 Rejected Male F 351

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb44-1)sqldf("selec
t * from ucb where Gender = 'Female' AND Freq >= 100 ")

## Admit Gender Dept Freq


## 1 Admitted Female C 202
## 2 Rejected Female C 391
## 3 Admitted Female D 131
## 4 Rejected Female D 244
## 5 Rejected Female E 299
## 6 Rejected Female F 317

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb46-1)sqldf("selec
t * from ucb where Gender Like '_ale'")

## Admit Gender Dept Freq


## 1 Admitted Male A 512
## 2 Rejected Male A 313
## 3 Admitted Male B 353
## 4 Rejected Male B 207
## 5 Admitted Male C 120
## 6 Rejected Male C 205
## 7 Admitted Male D 138
## 8 Rejected Male D 279
## 9 Admitted Male E 53
## 10 Rejected Male E 138
## 11 Admitted Male F 22
## 12 Rejected Male F 351

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb48-1)sqldf("selec
t * from ucb where Gender NOT Like 'M_l_'")
## Admit Gender Dept Freq
## 1 Admitted Female A 89
## 2 Rejected Female A 19
## 3 Admitted Female B 17
## 4 Rejected Female B 8
## 5 Admitted Female C 202
## 6 Rejected Female C 391
## 7 Admitted Female D 131
## 8 Rejected Female D 244
## 9 Admitted Female E 94
## 10 Rejected Female E 299
## 11 Admitted Female F 24
## 12 Rejected Female F 317

Manipulation & Nested Queries


(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb50-1)# Which depa
rtment had the most admitted students = A
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb50-2)sqldf("selec
t Dept from ucb where Freq = (select max(Freq) from ucb where Admit
= 'Admitted')")

## Dept
## 1 A

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb52-1)# which depa


rtment had the most admitted Female student = C
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb52-2)sqldf("selec
t Dept from ucb where Freq = (select max(Freq) from ucb where Gender
= 'Female')")

## Dept
## 1 C

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb54-1)# department
with most faculty
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb54-2)sqldf("selec
t Dept from majors where Faculty = (select max(Faculty) from majors)
")

## Dept
## 1 B
## 2 C
Join Queries
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb56-1)# join the t
wo tables together by the common key
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb56-2)sqldf("selec
t * from ucb
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb56-3) inner
join majors on ucb.Dept = majors.Dept")

## Admit Gender Dept Freq major Dept Faculty


## 1 Admitted Male A 512 math A 12
## 2 Rejected Male A 313 math A 12
## 3 Admitted Female A 89 math A 12
## 4 Rejected Female A 19 math A 12
## 5 Admitted Male B 353 biology B 26
## 6 Rejected Male B 207 biology B 26
## 7 Admitted Female B 17 biology B 26
## 8 Rejected Female B 8 biology B 26
## 9 Admitted Male C 120 engineering C 26
## 10 Rejected Male C 205 engineering C 26
## 11 Admitted Female C 202 engineering C 26
## 12 Rejected Female C 391 engineering C 26
## 13 Admitted Male D 138 computer science D 16
## 14 Rejected Male D 279 computer science D 16
## 15 Admitted Female D 131 computer science D 16
## 16 Rejected Female D 244 computer science D 16
## 17 Admitted Male E 53 history E 23
## 18 Rejected Male E 138 history E 23
## 19 Admitted Female E 94 history E 23
## 20 Rejected Female E 299 history E 23

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb58-1)# join the t


able on the left with resultant nulls's on the right table
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb58-2)sqldf("selec
t * from ucb left join majors on ucb.Dept = majors.Dept")
## Admit Gender Dept Freq major Dept Faculty
## 1 Admitted Male A 512 math A 12
## 2 Rejected Male A 313 math A 12
## 3 Admitted Female A 89 math A 12
## 4 Rejected Female A 19 math A 12
## 5 Admitted Male B 353 biology B 26
## 6 Rejected Male B 207 biology B 26
## 7 Admitted Female B 17 biology B 26
## 8 Rejected Female B 8 biology B 26
## 9 Admitted Male C 120 engineering C 26
## 10 Rejected Male C 205 engineering C 26
## 11 Admitted Female C 202 engineering C 26
## 12 Rejected Female C 391 engineering C 26
## 13 Admitted Male D 138 computer science D 16
## 14 Rejected Male D 279 computer science D 16
## 15 Admitted Female D 131 computer science D 16
## 16 Rejected Female D 244 computer science D 16
## 17 Admitted Male E 53 history E 23
## 18 Rejected Male E 138 history E 23
## 19 Admitted Female E 94 history E 23
## 20 Rejected Female E 299 history E 23
## 21 Admitted Male F 22 <NA> <NA> NA
## 22 Rejected Male F 351 <NA> <NA> NA
## 23 Admitted Female F 24 <NA> <NA> NA
## 24 Rejected Female F 317 <NA> <NA> NA

(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb60-1)# join the t


able on the right with the left
(file:///Users/User/Downloads/sqldf_tutorial%20(4).html#cb60-2)sqldf("selec
t * from ucb right join majors on ucb.Dept = majors.Dept")
## major Dept Faculty Admit Gender Dept Freq
## 1 math A 12 Admitted Male A 512
## 2 math A 12 Rejected Male A 313
## 3 math A 12 Admitted Female A 89
## 4 math A 12 Rejected Female A 19
## 5 biology B 26 Admitted Male B 353
## 6 biology B 26 Rejected Male B 207
## 7 biology B 26 Admitted Female B 17
## 8 biology B 26 Rejected Female B 8
## 9 engineering C 26 Admitted Male C 120
## 10 engineering C 26 Rejected Male C 205
## 11 engineering C 26 Admitted Female C 202
## 12 engineering C 26 Rejected Female C 391
## 13 computer science D 16 Admitted Male D 138
## 14 computer science D 16 Rejected Male D 279
## 15 computer science D 16 Admitted Female D 131
## 16 computer science D 16 Rejected Female D 244
## 17 history E 23 Admitted Male E 53
## 18 history E 23 Rejected Male E 138
## 19 history E 23 Admitted Female E 94
## 20 history E 23 Rejected Female E 299
## 21 architecture Other 11 <NA> <NA> <NA> NA

Resources
https://cran.r-project.org/web/packages/sqldf/sqldf.pdf (https://cran.r-
project.org/web/packages/sqldf/sqldf.pdf)
https://github.com/ggrothendieck/sqldf (https://github.com/ggrothendieck/sqldf)
http://www.w3schools.com/sql/default.asp (http://www.w3schools.com/sql/default.asp)

fin.

You might also like