Professional Documents
Culture Documents
DeepDive R
DeepDive R
D e e p
D i v e
R
R . 4,400 18,000 R , 80 R
. R R
. R R
.
PDF IDG Korea , .
IDG Korea PDF , .
part
R
Sharon Machlis | ComputerWorld
.
R ,
R ? R
. 2009
R
.
R
R . R(RStudio)
.
.
/(Reinhart/Rogoff) .
.
.
, .
.
R .
R ? R
. R .
D. 10
R
R
.
. R
R
1 IT World
. ,
1 | R
,
.
R
R
R . r-project.org
(www.r-project.org) .
OS X
iOS .
R (Integrated Development Environment)
R
(RStudio) . R
(www.rstudio.com/ide)
R R .
. R 4
, , ,
R .
. R
R
. R
. .
,
. () .
, ,
.
R R 3
.
- . . R
.
.
- + () ( + () ). .
.
. .
- + ( + ).
. / +
.
IT World 2
R R
(www.rstudio.com/ide/docs) .
setwd() .
setwd(
~/mydirectory
)
(\) (/)
.
setwd(
C:/Sharon/Documents/RProjects
)
R
R,
(Install Packages) .
.
. .
(Revolution Analytics)
installed.packages()
R .
- .
.
- .
R
.
.
500 R
.
library(
thepackagename
)
- . R .
- .
R .
- . R .
.
.
3 IT World
update.packages()
.
remove.packages(
thepackagename
)
.
?functionName
.
help(functionName)
help() ?
?functionName .
.
example(functionName)
, . (args)
args(functionName) . R
.
help.search(
your search term
)
.
??(
my search term
)
IT World 4
part
R
Sharon Machlis | ComputerWorld
R . R
.
. R .
R
(Data Set) . R
.
data()
( )
(Plotting) .
.
mtcars
. (
360 .
)
, .
mtcars
. R
print() R .
R (CSV, Comma-Separated Value)
. filename.txt CSV
mydata R .
5 IT World
<-
. R (Assignment Operator). R
.
5 .
(Object) ?
mydata (Data Frame)
(Data Type) .
.
read.csv (Header Row)
, 1 . Header Row
header = FALSE .
mydata <- read.csv(
filename.txt
, header=FALSE)
, R (Column Header)
( Column Header . ).
, R
read.table . (Tab) ,
.
mydata <- read.table(
filename.txt
, sep=
\t
, header=TRUE)
.
?
R , , R
.(Good)
,(Average)(Poor)
.
. , read.table
stringsAsFactor=FALSE .
mydata <- read.table(
filename.txt
, sep=
\t
,
header=TRUE,stringsAsFactor=FALSE)
IT World 6
, R
. , R
(RStudio) ,
URL .
, R R
. , -
.
(Snippet)
, +C
R . Header Row
, , (x) .
x <- read.table(file = clipboard
, sep=
\t
, header=TRUE)
, SPSS, SAS, Stata R
. . (Perl)
,
CSV . ,
UCLAR
(http://www.dummies.com/howto/content/how-to-use-the-clipboard-to-copy-and-paste-data-in.html)
.
R , RPostgreSQL, RMySQL, RMongo, RSQLite
RODBC . R CRAN
(http://cran.r-project.org/web/packages/available_packages_by_name.html)
.
read.csv() read.table()
7 IT World
.
R ? CSV
. , -
- >
. , URL
URL .
. .
.
. CSV
. R .
mydata <- read.csv(
http://bit.ly/10ER84j
)
. ,
(http://www.pewresearch.org) CSV
. pew_data .
pew_data <- read.csv(
http://bit.ly/11I3iuU
)
R .
, header row ,
.
.
R
.
R R
.
, rPfid
Quantmod
.
Quantmod ,
barChart
4 .
R
.
IT World 8
install.packages(
quantmod
)
library(
quantmod
)
getSymbols(
AAPL
)
barChart(AAPL)
2 ? .
barChart(AAPL, subset=
last 14 days
)
chartSeries(AAPL, subset=
last 14 days
)
, .
barChart(AAPL[
2013-04-01::2013-04-12
])
,
. . ,
.
? , R
( ) ,
. ,
.
9 IT World
. . R
, .
save.image()
.RData .
.
R , R .
, R
. .RData.
, .RData
. R
.
save(variablename, file=
filename.rda
)
.
load(
filename.rda
)
IT
www.twitter.com/ITWorldKR
www.facebook.com/ITworld.Korea
IT World 10
part
Sharon Machlis | ComputerWorld
1, 2 R R
. ?
.
R 2 -
- , .
1 (Vector) . .
head(mydata)
.
head(mydata, 10)
.
tail(mydata, 10)
tail() ,
11 IT World
. ,
. R ,
str() .
str(mydata)
. ,
(
1 | PlantGrowth str()
R (Observations)) (R )
.
str() (, , )
.
8, [1:8] .
. mydata
, .
colnames(mydata)
, -
- .
rownames(mydata)
R
. ,
2 | diamonds summary
summary() .
summary(mydata)
. , , , 1
, 3 , .
, , , , (Factor) ,
.
summary() 1
.
, psych .
.
IT World 12
install.packages(
psych
)
.
.
library(psych)
psych , R library
. .
describe(mydata)
, , , (
) ( )
.
R mean(), median(), min(), max(), sd() ( ), var() (),
1 range()
. ( - , mean() median() - 2
).
, mode() (Mode)
. mfv() () modeest
.
R
. , , , , (
) . , cor() .
cor(mydata)
3 |
.
. ,
R NA . ,
summary()
, mean() 1 NA .
, NA .
na.rm=TRUE
13 IT World
NA .
.
mean(myvector, na.rm=TRUE)
,
.
?median
na.rm
. (Outlier)
mean() (Trim) .
R . , 15
4 ?
15! / 4! x 11! R choose() .
choose(15,4)
, 5
. mypeople .
mypeople <- c(
Bob
, Joanne
, Sally
, Tim
, Neal
)
, c() (combine) .
4 |
combn() .
.
combn(mypeople, 2)
R ,
. , 2
.
combn(c(
Bob
, Joanne
, Sally
, Tim
, Neal
),2)
15
IT World 14
. 3
. R
.
,
( , 1, 2, 3, ) . , mtcars
mpg, cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb.
?
str(mtcars) head(mtcars) .
names(mtcars)
mtcars.colnames ( )
.
mtcars.colnames <- names(mtcars)
. Mtcars mpg , R
.
mtcars$mpg
,
.
dataframename$columnname
1 .
[1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8
[12] 16.4 17.3 15.2 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5
[23] 15.2 13.3 19.2 27.3 26.0 30.4 15.8 19.7 15.0 21.4
, .
. , [1] .
, 11 , [12]
.
15 IT World
.
, mtcars$mpg
.
.
--
. ,
[, ] . ,
. mtcars 2 ~ 4 ,
.
mtcars[,2:4]
2:4 ?
,
.
,
. 3 2 4
, .
mtcars[,c(2,4)]
.
- R 0 1 . [0] [1].
- R . mtcars$mpg mtcars$MPG .
- mtcars[,-1]
. , R .
mtcars[, -1] .
- , c() . c mtcars[,
(2,4)] . c .
mtcars[,c(2,4)]
mpg > 20
?
.
mtcars$mpg>20
mpg 20 . ,
IT World 16
.
[1] TRUE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE
[10] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
[19] TRUE TRUE TRUE FALSE FALSE FALSE FALSE TRUE TRUE
[28] TRUE FALSE FALSE FALSE TRUE
, --
. .
mtcars[mtcars$mpg>20,]
.
mtcars[mtcars$mpg>20,c(
mpg
,
hp
)]
mpg
mtcars$mpg
? R .
, .
17 IT World
.
.
mtcars[mpg20, cols]
?
,
. Attach()
.
attach(mtcars)
, .
mpg20 <- mtcars$mpg > 20
.
mpg20 <- mpg > 20
attach() detach() .
detach()
R , . ,
subset() .
.
subset( , , )
IT World 18
mpg ?
subset(mtcars, mpg==max(mpg))
mpg mpg .
subset(mtcars, mpg==max(mpg), mpg)
subset ,
.
subset(mtcars, , c(
mpg
, hp
))
, select= .
subset(mtcars, select=c(
mpg
, hp
))
, table . diamonds
, .
5 | R table()
table(diamonds$cut)
- , , ,
.
?
table(diamonds$cut, diamonds$color)
R
, .
R (http://health.
adelaide.edu.au/psychology/ccs/docs/lsr/lsr-0.3.pdf) . 500 PDF
. , R Cookbook R in a Nutshell
.
19 IT World
part
Sharon Machlis | ComputerWorld
. , 2
, . R mtcars
. disp x mpg y
.
plot(mtcars$disp, mtcars$mpg)
. , .
x y , xlab ylab .
, main .
plot(mtcars$disp, mtcars$mpg, xlab=
Engine displacement
, ylab=
mpg
,
main=
MPG compared with engine displacement
)
y 90 ( ) , labs=1
.
plot(mtcars$disp, mtcars$mpg, xlab=
Engine displacement
, ylab=
mpg
,
main=
MPG vs engine displacement
, las=1)
1 | R
R ,
R
.
IT World 20
2 | R
. .
? ,
,
. R
.
ggplot2
ggplot2
. ggplot2
.
,
.
3 | Qplot() ggplot2
ggplot2 , .
install.packages(
ggplot2
)
. ggplot2
, library() R ggplot2
- R .
library(ggplot2)
ggplot2 . ggplot2 R
plot() qplot()
.
.
qplot(disp, mpg, data=mtcars)
qplot R y . ,
y 0 (
y 0 ).
y , ylim .
qplot(disp, mpg, ylim=c(0,35), data=mtcars)
1
. ,
21 IT World
ggplot2
.
qplot(cty, hwy, data=mpg)
geom jitter
.
qplot(cty, hwy, data=mpg, geom=
jitter
)
, ggplot2 ,
. ggplot() . ,
ggplot2 ggplot2() ggplot(). ggplot()
plot() qplot() ; .
ggplot(mtcars, aes(x=disp, y=mpg)) + geom_point()
ggplot() mtcars .
. ,aes()
geom_point()
?aes
(aesthetics)
- , , .geom
, ,
.
,
4 |
. ( )
? R
geom_
point() geom_line() .
ggplot(pressure, aes(x=temperature, y=pressure)) +
geom_line()
. pressure
,
,y=pressure
pressure .
, x y .
, .
Y ggplot ylim
. mydata, xcol x
ycol y
IT World 22
. ylim .
ggplot(mydata, aes(x=xcol, y=ycol), ylim=0) + geom_line()
.
ggplot(pressure, aes(x=temperature, y=pressure)) + geom_line() +
geom_point()
.
, , .
. (ggplot2)
The Graphics Cookbook
.
R BOD R barplot()
. BOD , .
barplot(BOD$demand)
main=
Graph of demand
.
barplot(BOD$demand, main=
Graph of demand
)
x , names.arg
.
5 | ggplot
barplot(BOD$demand, main=
Graph of demand
, names.arg =
BOD$Time)
,
. R table()
pressure
.
The R Graphics Cookbook mtcars 4-, 6-, 8-
. cyl
, R mtcars$cly .
table()
. clycount .
temperature
23 IT World
6 | R barplot()
cylcount .
4 6 8
11 7 14
.
barplot(cylcount)
7 |
ggplot2 qplot() .
qplot(mtcars$cyl)
, 4, 6, 8 4 8
, 5 7
. , , 4 ~ 8
4 6 8
8 |
.
qplot(factor(mtcars$cyl))
ggplot() ,
.
ggplot(mtcars, aes(factor(cyl))) + geom_bar()
(Bucket) (Bin)
, . R
.
hist(mydata$columnName, breaks = n)
columnName mydata
, n . ggplot2 .
IT World 24
ggplot() .
ggplot(mydata, aes(x=columnName)) + geom_histogram(binwidth=n)
ggplot() . ggplot()
, ,
.
R . , 1, 3
(Boxplot) boxplot()
. mtcars mpg boxplot
.
boxplot(mtcars$mpg)
ggplot2 x, y, z
.
boxplot(diamonds$x, diamonds$y, diamonds$z)
R (Heat Map) ,
. FlowingData . Corrplot (http://cran.rproject.org/web/packages/corrplot/corrplot.pdf)
(http://blog.revolutionanalytics.com/2013/03/rs-garden-of-probabilitydistributions.html) .
6 .
. , R
.
R RGB , HSV(Hue, Saturation, Value. ,
) .
,
,
,
,
. R 657
. color() colour() - R
- .
, ,
25 IT World
PDF . , .
rainbow
heat n
R .
rainbow(n)
heat.colors(n)
terrain.colors(n)
topo.colors(n)
cm.colors(n)
rainbow 5 , .
rainbow(5)
.
?rainbow
, ?
. ggplot() 3 rainbow
3 . .
mycolors <- rainbow(3)
heat.color .
9 | R 3
geom_bar ,
geombar() fill=mycolors .
ggplot(mtcars, aes(x=factor(cyl))) + geom_bar(fill=mycolors)
.
.
ggplot(mtcars, aes(x=factor(cyl))) + geom_bar(fill=rainbow(3))
IT World 26
. R
.
barplot(BOD$demand, col=rainbow(6))
( ) , ,
barplot(BOD$demand, col=
royalblue3
)
,
. R ,
, .
testscores <- c(96, 71, 85, 92, 82, 78, 72, 81, 68, 61, 78, 86, 90)
barplot .
barplot(testscores)
.
barplot(testscores, col=
blue
)
80 ?
,
. , 96,
. 71,
; . ,
! .
testcolors <- ifelse(testscores >= 80, blue
, red
)
, testscores (Loop)
. testscores 80
, testcolors blue
. testcolors red
. , testcolors
.
barplot(testscores, col=testcolors)
27 IT World
,
. .
barplot(testscores, col=testcolors, main=
Test scores
)
y 0 100 .
barplot(testscores, col=testcolors, main=
Test scores
, ylim=c(0,100))
90 las=1 .
barplot(testscores, col=testcolors, main=
Test scores
, ylim=c(0,100), las=1)
, .
10 |
, ,
testscores .
testscores <- sort(c(96, 71, 85, 92, 82, 78, 72, 81, 68, 61,
78, 86, 90), decreasing = TRUE)
sort() . decreasing =
TRUE .
,
, .
testscores <- c(96, 71, 85, 92, 82, 78, 72, 81, 68, 61, 78, 86, 90)
testscores_sorted <- sort(testscores, decreasing = TRUE)
results students,
testscores , ggplot2 ggplot()
.
ggplot(results, aes(x=students, y=testscores)) +
geom_bar(fill=testcolors, stat = identity
)
identity
? y
. ggplot2 qplot()
.
mtcars 4-, 6-, 8 .
IT World 28
, , R
. R
(www.harding.edu/fmccown/r/) .
The R Graphics Cookbook(www.amazon.com/RGraphics-Cookbook-Winston-Chang/dp/1449316956) . ggplot2
(http://docs.ggplot2.org/current/geom_bar.html)
.
11 |
R R . R
.
R ,
.
R , jpeg(), png(),
svg() pdf() (Container)
.
.
jpeg(
myplot.jpg
, width=350, height=420)
.
12 | R
barplot(BOD$demand, col=rainbow(6))
.
dev.off()
. ggplot2 ,
ggsave() , ggplot2
. ggsave()
- myplot.jpg JPEG, myplot.png
PNG .
R ,
.
, .
29 IT World
part
R
Sharon Machlis | ComputerWorld
R . ,
. R
.
(=)
. , x = 3 x 3 . R
. . R <- .
x <- 3
.
x = 3
, R -
. , ?
<-
. R
(http://google-styleguide.googlecode.com/
svn/trunk/google-r-style.html#assignment)
. , R 5
, R
.
R . x X
. R . , subset() Subset()
. ( , /) c ,
.
myArray = array(1, 1, 2, 3, 5, 8);
IT World 30
, .
myArray = [1, 1, 2, 3, 5, 8]
, R .
c() .
my_vector <- c(1, 1, 2, 3, 5, 8)
c . R c()
. ( ) c() ,
( ) .
(:) .
my_vector <- (1:10)
R c
. ,
c .
my_vector <- c(1:10)
c()
.
. .
my_vector <- c(1, 4, hello
, TRUE)
2 ,
. , c()
,
. , my_vector1
,4
,hello
, TRUE
. , c()
.
, (list)
. c() list() .
31 IT World
1, 4, hello TRUE
.
for
while
. R . R for, while repeat
, apply() plyr()
. ,
. .
my_vector <- c(7,9,23,5)
, 0.01 ,
? for, foreach while . ,
my_pct_vectors .
my_pct_vector <- my_vector * 0.01
. ,
. .
apply() plyr .
apply
6 . R
R
.
apply() 2
. 2 ,
. 1 2
.
apply(my_matrix, 1, median)
my_matrix
apply(my_matrix, 2, median)
IT World 32
Bill
, Bob
, Sue
. ,
.
33 IT World
1 | 1
[1]
1
. ,
,
.
TRUE/FALSE
, . ( , (3, 8,
small) 3
(
,8,
small
)
). , R 3 3.0 . 3
3L as,integer() . ,
class() .
class(3)
class(3.0)
class(3L)
class(as.integer(3))
.
,
.
( ) , R
.
.
, summary() psych describe()
.
.
, . , class() str()
data.frame , mode() .
. (factor).
IT World 34
. ,
, (
) .
.
,
,
.
R
R , . , R
,
.
? R
( ). list()
, . , R
. ? pwd .
gwtwd() .
rm(my_variable) . R grep()
. R grep regular-expressions.info R
(www.regular-expressions.info/rlanguage.html)
.
R (Expression)
R (
, ). , R
(, ) .
? R (Interpreter)
.
, R
. ,
+ - . R
.
: R SQL
SQL R - , R
- SQL
SELECT .
. sqldf R SQL
.(R ).
sqldf , .
35 IT World
sqldf(
select * from mtcars where mpg > 20 order by mpg desc
)
mtcars mpg 20
mpg mpg . R R
. SQL R .
, . R
.
2 | edit() R
GUI
,
, R
. edit() R
.
edit(mtcars)
. ,
-
-
, /
. R
( ) .
save.image() R
,
. ,
, ,
.
write.table(myData, testfile.txt
, sep=
\t
)
myData R testfile.
txt . sep=
\t
sep=
\c
.
IT World 36
part
Sharon Machlis | ComputerWorld
. R
, , , /,
/ .
Introduction to Data Science( , http://jsresearch.net/wiki/projects/
teachdatascience) : , .
? R
,
R .
, (R OAuth
) , (Map Mashup) R
.
,
. iOS
PDF - iOS .
.
R Cookbook : (O
Reilly) Cookbook , R
,
.
. R ,
.
(quantitative developer) .
R Graphics Cookbook : R ,
ggplot2
. R ,
, . R
.
R in a Nutshell : R -
, , - .
, R
37 IT World
R .
.
Visualize This : R , R
.
. Flowing Data(
)
.
R For Dummies : , .
(Dummies) , .
Dimmies.com R , R
R apply 100 .
.
Exploring Everyday Things with R and Ruby :
, .
. ,
ggplot2 R
. ,
R .
Cookbook for R(www.cookbook-r.com) : R Cookbook
, (R Graphics Cookbook )
,
. Cookbook . ,
? .
Quick-R(www.statmethods.net) :
. ,(Stats)
(Frequencies and Crosstab)
table()
. (R )
( ) . R in Action( R)
I. .
R Reference Card(http://cran.r-project.org/doc/contrib/Short-refcard.pdf) :
, (2004
) 4
IT World 38
R . ,
, .
(Juice LabsChart Choser) .
Twotorials(www.twotorials.com) : 2 (Twotorial)
. 2
,
. R
R
2 2 R
.R 10, 15, 20,000
. ,
.
Up and Running with R(www.lynda.com/R-tutorials/Up-Running-R/1206122.html) : Lynda.com R , ,
. ,
. Lynda.com 25 ,
7
.
Coursera: Computing for Data Analysis(www.coursera.org/course/compdata)
: .
. 2013 9. ,
;
. , ,
39 IT World
.
Coursera: Data Analysis : R R
. R
, .
,
.
Coursera: Statistics One(www.coursera.org/course/stats1) : 9
R 12
,
. R R
. .
Try R(http://tryr.codeschool.com) :
. ,
, R
.
An Introduction to R(http://cran.r-project.org/doc/manuals/R-intro.pdf) : R
Project . R
. Dl , R Core Team
, .
Learning statistics with R(http://health.adelaide.edu.au/psychology/ccs/
docs/lsr/lsr-0.3.pdf) :
(PDF).
,
.
.
IT World 40
r4stats.com(http://r4stats.com) : R
R .
, ggplots R
. R SAS SPSS
.
Getting started with charts in R(http://flowingdata.com/2012/12/17/gettingstarted-with-charts-in-r) : FlowingData
R
( FlowingData
, 2013 5 .)
Using R for your basic statistical Needs LISA Short Course(www.lisa.stat.vt.
edu/sites/default/files/Using_R_for_Your_Basic_Statistical_Needs.r) :
, R R
( ) .
, ,
.
. -2log(Likelihood) +
kp? AIC #step() .
k=2 # p = ( ).
(Laboratory for Interdisciplinary
StatisticalAanalysis) .
Producing Simple Graphs with R(www.harding.edu/fmccown/r) : 6
,
. .
Short courses(http://courses.had.co.nz) : R ggplot2
R
.
Quick introduction to ggplot2(http://blog.echen.me/2012/01/17/quickintroduction-to-ggplot2/) : , - - R
ggplot2 .
.
ggplot2_tutorial.R(http://rpubs.com/frank_davenport/6255) : RPubs.com
R , R
41 IT World
ggplot2 .
More and Fancier Graphics(http://ww2.coastal.edu/kingw/statistics/Rtutorials/graphs.html) : 1 R
R , locator() identify()
. B. .
Using R(www.sr.bham.ac.uk/~ajrs/R/index.html) : ,
. R ,
Lattis ggplot2
.
. ASR(Astrophysics & Space
Research) .
The Undergraduate Guide to R(https://sites.google.com/site/undergraduat
eguidetor/manual-files) : , ,
, , R
. PDF ,
. NIH .
Introducing R(www.ats.ucla.edu/stat/r/seminars/intro.htm) : UCLA
.
Introducing R(http://data.princeton.edu/R/gettingStarted.html) :
.
R: A self-learn tutorial(www.nceas.ucsb.edu/files/scicomp/Dloads/RProgra
mming/BestFirstRTutorial.pdf) : UC PDF
. , .
Statistics with R Computing and Graphics R(www.stats.ox.ac.uk/~konis/
OUCS/ComputingGraphics.pdf) : ). PDF
, PDF (15) , R
( )
.
.
Little Book of R for Time Series (http://a-little-book-of-r-for-time-series.
IT World 42
readthedocs.org/en/latest) : R
.
.
Introduction to ggplot2(http://heather.cs.ucdavis.edu/~matloff/GGPlot2/
GGPlot2Intro.pdf) : UC N. 11 PDF ggplot
.
Twitter #rstats hashtag(https://twitter.com/search?q=%23rstats) :
, R .
, (Reddit) R .
Stackoverflow(http://stackoverflow.com/questions/tagged/r) :
R .
, .
R . , Meetup.com
. R .
R
, R
. .
RSeek(www.rseek.org) R
.
R site search(http://finzi.psych.upenn.edu/search.
html) R , (
)
(
) .
Google
s R Style Guide(http://google-styleguide.
googlecode.com/svn/trunk/google-r-style.html) :
43 IT World
; R
. ,
R .
RStudio documentation(www.rstudio.com/ide/docs/) : R
, .
History of R Financial Time Series Plotting(http://timelyportfolio.github.io/
rCharts_time_series/history.html) : ,
,
. . .
. R , FAQ
R .
.
RStudio(www.rstudio.com) : R
, R (
). 2 R .
Tibco(www.tibco.com) : Dl R
Tibco Enterprise
Runtime for R Engines Tibco Enterprise Runtime for R Developers
Edition .
IT World 44