You are on page 1of 62

library(verification)

library(Hmisc)
library(Design)
library(rpart)
library(survival)
library(Epi)
require(MASS)
library(car)
library(graphics)
library(wle)
library(epiR)
library(stats)
library(epicalc)
library(hexbin)
library(ISwR)
library(sma)
library(geneplotter)
library(beeswarm)
library(gstat)
library ("fBasics")
library ("Rcmdr")
library(tcltk)
library(scatterplot3d)
library(amap)
library(pwr)
library(Biobase)
library(mclust)
library(ClassDiscovery)
library(scatterplot3d)
data(name of data base)
attach(name of data base)
detach(name of data base)

install.packages("verification",dep=T)
install.packages("Hmisc",dep=T)
install.packages("Design",dep=T)
install.packages("rpart",dep=T)
install.packages("survival",dep=T)
install.packages("amap",dep=T)
install.packages("Epi",dep=T)
install.packages("MASS",dep=T)
install.packages("car",dep=T)
install.packages("wle",dep=T)
install.packages("epiR",dep=T)
install.packages("stat",dep=T)
install.packages("epicalc",dep=T)
install.packages("hexbin",dep=T)
install.packages("ISwR",dep=T)
install.packages("sma",dep=T)
install.packages("lattice",dep=T)
install.packages("beeswarm",dep=T)
install.packages("gstat",dep=T)
install.packages("Rcmdr",dep=T)
install.packages("pwr",dep=T)
install.packages("mclust",dep=T)
install.packages("ClassDiscovery",dep=T)
install.packages("Biobase",dep=T)
install.packages("scatterplot3d",dep=T)
source("http://bioinformatics.mdanderson.org/OOMPA/oompaLite.R")
oompaLite()
oompainstall(groupName="all")
source("http://www.stat.washington.edu/mclust/license.txt")


A:Principal

Components Analysis
PC <- princomp(~SET_pos_106_USE+X205225_at, cor=TRUE, data=IHC_465)
PC
Call:
princomp(formula = ~SET_pos_106_USE + X205225_at, data = IHC_465,
cor = TRUE)
Standard deviations:
Comp.1

Comp.2

1.3788400 0.3143252
2

variables and

465 observations.

unclass(loadings(PC)) #
Comp.1

Comp.2

SET_pos_106_USE -0.7071068

0.7071068

X205225_at

-0.7071068 -0.7071068

PC$sd^2 #
Comp.1

Comp.2

1.90119966 0.09880034

t( PC$sd * t( PC$loadings ) )[, drop = FALSE] #


Comp.1

Comp.2

SET_pos_106_USE -0.974987
X205225_at

0.2222615

-0.974987 -0.2222615

unclass(loadings(PC))[,c(1, 2)] #

Comp.1

Comp.2

SET_pos_106_USE -0.7071068
X205225_at

0.7071068

-0.7071068 -0.7071068

0.7
0.5

0.6

lumAScore

0.8

0.9

Scatter plot

10

20

30

40

50

ONCOTYPE_DX_RS_score

Variance

10

scatterplot(lumAScore~ONCOTYPE_DX_RS_score)

10
Average

15

smoothScatter(Average,Variance,ylim=c(0,10),xlim=c(0,15))
smoothScatter(Average,Variance,ylim=c(0,10),xlim=c(0,15),ce
x.axis=2,cex.lab=2, cex.sub=2)

10

0.63

0.93

0.82

FOXM1_202580_x_at

0.59

0.91

0.85

MKI67_212021_s_at

0.77

0.71

TOP2A_FOXM1_NKI672

0.91

7 8 9

0.74

9.0

9.5

10

7.5

8.5

9.5

TOP2A_201292_at

11

13

8.5

GGIaverage

10

11

12

13

7.5

8.0

8.5

9.0

9.5 10.0

8.5

9.0

9.5

panel.cor <- function(x, y, digits=2, prefix="", cex.cor)


{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r = (cor(x, y))
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex * abs(r))
}
pairs(~TOP2A_201292_at+FOXM1_202580_x_at+MKI67_212021_s_at+TOP2A_FOXM
1_NKI67+GGIaverage,
lower.panel=panel.smooth,

upper.panel=panel.cor,

data=

X203440_at

X202274_at

X201426_s_at

X201131_s_at

X201015_s_at

Mainz_ERpos_grade2)

X201015_s_at

X201131_s_at

X201426_s_at

X202274_at

X203440_at

data(all)

# name of data set is all

attach(all)
plot.cor(x= cor(all, method = "spearman" ),new = FALSE,labels =
names( all ),zlim
M106

M107

= c( -1.0, 1.0 ) )
M108

M111

M113

M116

M117

M106 1.0000000 0.7818627 0.5588235 0.6348039 0.6004902 0.7426471


0.5882353
M107 0.7818627 1.0000000 0.8161765 0.7720588 0.6666667 0.6936275
0.8382353
M108 0.5588235 0.8161765 1.0000000 0.7058824 0.7769608 0.7083333
0.9117647
M111 0.6348039 0.7720588 0.7058824 1.0000000 0.8627451 0.4607843
0.7867647
M113 0.6004902 0.6666667 0.7769608 0.8627451 1.0000000 0.5490196
0.7573529

M116 0.7426471 0.6936275 0.7083333 0.4607843 0.5490196 1.0000000


0.6838235
M117 0.5882353 0.8382353 0.9117647 0.7867647 0.7573529 0.6838235
1.0000000
M120 0.4460784 0.5563725 0.6936275 0.5784314 0.7401961 0.6078431
0.6274510
M121 0.5906863 0.5441176 0.7034314 0.8186275 0.8333333 0.5318627
0.6323529
M120

M121

M106 0.4460784 0.5906863


M107 0.5563725 0.5441176
M108 0.6936275 0.7034314
M111 0.5784314 0.8186275
M113 0.7401961 0.8333333
M116 0.6078431 0.5318627
cor( all, method = "spearman" )

Courier New
Non-parametric

Parametric

Wilcoxon

T-test

Kruskal wallis

ANOVA

Fisher

X2

Spearmann

Peason

Wilcoxon

t,test

Age- continuous value


group-categorical (0,1,2,,,)
data(aa)
attach(aa)
t.test(Age~group,var.equal=T,data=aa)
p-value = 0.4842
Wilcox (Boxpolt :Kruskal-Wallis :Wilcox)
Age- continuous value
group-categorical (0,1,2,,,)
wilcox.test(Age~group,var.equal=T,data=aa)
Wilcoxon rank sum test with continuity correction
p-value = 0.6755
Welch
Age- continuous value
group-categorical (0,1,2,,,)
data(aa)
t.test(Age~group,var.equal=F,data=aa)
Welch Two Sample t-test
p-value = 0.2998

Fisher Exact test


3*2
70 30
45

x<-matrix(c(70,30,

45,5,

6,3),nrow=3,ncol=2,byrow=T)

#nrow :

Number of ROW ncol:


of column (=
Number

)

2*3
42, 7,

36, 3,

x<-matrix(c(42,7,1,36,3,0),nrow=2,ncol=3,byrow=T)
fisher.test(x)

X2
70 30
45 2 X2 Fisher
Fisher

x<-matrix(c(70,30,45,5),2,byrow=T)
fisher.test(x)
Fisher's Exact Test for Count Data
p-value = 0.007264
Pearson's Chi-squared test
x<-matrix(c(70,30,45,5),2,byrow=T)
chisq.test(x)
Pearson's Chi-squared test with Yates' continuity correction
X-squared = 6.3773, df = 1, p-value = 0.01156
x<-matrix(c(7,6,10,14,9,18),2,byrow=T)
chisq.test(x)
Pearson's Chi-squared test
data:

X-squared = 0.17, df = 2, p-value = 0.9185

3X3
x<matrix(c(652,1537,598,242,36,46,38,21,218,327,106,67),nrow=3,byrow=
T)

colnames(x)<-c("0","1-150","151-300",">300")
rownames(x)<-c("Married","Prev.married","Single")
x
0 1-150 151-300 >300
Married

1537

598

242

36

46

38

21

218

327

106

67

Prev.married

652

Single
chisq.test(x)

Pearson's Chi-squared test


data:

X-squared = 51.6556, df = 6, p-value = 2.187e-09

Cochran-Mantel-Haenszel Chi-Squared Test for Count Data v2.11.0


library(epicalc)
cc(cancer, snp)

Odds ratio from prospective/X-sectional study

Odds of outcome

OR = 2
1/2

95% CI = 0.72 , 5.81

1/4

non-exposed

exposed
Exposure category

cancer
snp

can Non Total

neg

24

25

49

pos

10

21

31

Total

34

46

80

OR =

95% CI = 0.72 5.81


Chi-squared = 2.17 ,

1 d.f. , P value = 0.14

Fisher's exact test (2-sided) P value = 0.168


mhor(cancer, snp,rec)
Stratified analysis by

rec

OR lower lim. upper lim. P value


rec non

2.68

0.749

11.22

0.105

rec rec

1.56

0.166

16.43

1.000

M-H combined 2.37

0.873

6.42

0.091

M-H Chi2(1) = 2.86 , P value = 0.091


Homogeneity test, chi-squared 1 d.f. = 0.23 , P value = 0.633

Stratified prospective/X-sectional analysis

Odds of outcome

recnon: OR = 2.68 (0.75, 11.22)


recrec: OR = 1.56 (0.17, 16.43)

1/2

MH-OR = 2.37 (0.87, 6.42)


homogeneity test P value = 0.633

1/4

Non-exposed

Exposed
Outcome= cancer , Exposure= snp

x<-matrix(c(70,30,45,5,10,13),2,byrow=T) #2 : Number of ROW (=yoko


retsu)

mantelhaen.test(x, y = NULL, z = NULL,


alternative = c("two.sided", "less", "greater"),
correct = TRUE, exact = FALSE, conf.level = 0.95)

x either a 3-dimensional contingency table in array form where each dimension

is at least 2 and the last dimension corresponds to the strata, or a factor object
with at least 2 levels. y a factor object with at least 2 levels; ignored if x is an
array. z a factor object with at least 2 levels identifying to which stratum the
corresponding elements in x and y belong; ignored if x is an array. alternative
indicates the alternative hypothesis and must be one of "two.sided", "greater"
or "less". You can specify just the initial letter. Only used in the 2 by 2 by K
case. correct a logical indicating whether to apply continuity correction when

computing the test statistic. Only used in the 2 by 2 by K case. exact a logical
indicating whether the Mantel-Haenszel test or the exact conditional test (given
the strata margins) should be computed. Only used in the 2 by 2 by K case.
conf.level confidence level for the returned confidence interval. Only used in

the 2 by 2 by K case.

Anova
Age- continuous value
group-categorical (0,1,2,,,)
data(hako)
anova(lm(Age~group,data=hako) )
group

308.2

154.1

1.1583 0.3303 (P )

Kruskal-Wallis (Boxpolt :Kruskal-Wallis :Wilcox)


Age- continuous value
group-categorical (0,1,2,,,)
data(hako)
kruskal.test(Age~group,data=hako)
Kruskal-Wallis rank sum test
data:

Age by group

Kruskal-Wallis chi-squared = 2.3641, df = 2, p-value = 0.3066

3e-05
2e-05

Density

4e-05

5e-05

Centroid TN according to subgroup definition


by gene expression - MDA dataset

0e+00

1e-05

ERnegativeHer2negative (by gene)


Others (by gene)

-1e+05

-5e+04

0e+00

5e+04

Centroid TN

Density
Data frame: mda
bb, ERnegHer2neg: categorical value
dd, continuous value
Centroid_TN.TN<-mda$dd[mda$bb=="ERnegHer2neg"]

bb=="ERnegHer2neg"bb ERnegHer2neg categorical


mda: data dd: continuous value
Centroid_TN.notTN<-mda$dd[mda$bb!="ERnegHer2neg"]
plot(density(Centroid_TN.TN),col=4,lwd=3,main="Kinome,,,,dataset",x
lab="Kinome score TN",xlim=c(-100000, 60000)) #lwd wide of line
lines(density(Centroid_TN.notTN),lwd=3,col=2)
legend(locator(1),

legend=c("ERnegativeHer2negative

(by

gene)

",

"Others (by gene) "), lty=c(1,1), lwd=c(3,3), col=c("blue", "red"))


x<-mda233$AR_211110_s_at[mda233$ERbyGENE=="1ERpos"]
y<-mda233$AR_211110_s_at[mda233$ERbyGENE=="2ERneg"]
plot(density(y),lwd=3,xlab="Log2

converted

gene

expression",xlim=c(0,16), lty=2, main="",ylim=c(0,0.6)) #lwd wide


of line
lines(density(x),lwd=3,lty=1)

0.3
0.0

0.1

0.2

Density

0.4

0.5

0.6

density.default(x = AKT1)

10
N = 286 Bandwidth = 0.1855

plot(density(AKT1))
abline(v=10) #Tate sen
abline(h=1.3) #Yoko sen

11

12

Histogram of Ozone

10

20

Frequency

20

10

Frequency

30

30

Histogram of Wind

10

15

20

Wind

50

100 150
Ozone

:
Data frame*: airquality
Wind: continuous value
Ozone: continuous value
attach(airquality)
shapiro.test(Wind)
W = 0.9858, p-value = 0.1178
shapiro.test(Ozone)
W = 0.8787, p-value = 2.790e-08
Wind Ozone
Histgram
layout(t(1:2))

hist(Wind)

main="Kinome,,,,dataset",xlab="Kinome

100, 600)
hist(Ozone)

10

15

20

Wind

3
layout(t(1:2:3))
hist(Wind)
hist(Wind)
hist(Wind)

20
10

0
0

10

Frequency

20

Frequency

30
20
0

10

Frequency

Histogram of Ozone

30

Histogram of Ozone

30

Histogram of Wind

50

100
Ozone

150

50

100
Ozone

150

TN",xlim=c(-

10
8
2

Log2 converted data

12

14

Stripchart

ERposHER2negIBC_nonrec

HER2posIBC_nonrec

HER2posIBC_rec

TNIBC_nonrec

TNIBC_rec

data(IBC)
attach(IBC)
mHT<-tapply(TIG1,IBC_rec_vs_nonrec,mean) #mean
sHT<-tapply(TIG1,IBC_rec_vs_nonrec,sd)
IS<-c(1,2,3,4,5)+0.15 #5 valuable 15%right
stripchart(TIG1~IBC_rec_vs_nonrec,method="jitter",vert=T,ylab="Log2
converted data")
points(IS,mHT,pch=1) #plot mean
arrows(IS,mHT-sHT,IS,mHT+sHT,code=3,angle=90,length=.1) #yokosen above
bar

10000
9500
9000

PUM1_201166_s_atROW

8500
8000

1Control_SUM149

2HDAC_SUM149

3Control_SUM190

4HDAC_SUM190

stripchart(PUM1_201166_s_atROW~group,method="jitter",vert=T,pch=
15, cex = 1.5)

Order rank
Data frame attach weht attach(weht)
Library(survival)
weht<-tk[order(tk$IKS),]

#tk: data set

attach(weht)
plot(IKS,col=unclass(Molecular)+1,pch=16:16,xlab="Rank ordered by
gene expression", ylab="Immune Kinome Score") #
col=unclass(Molecular)+1pch=16:16
title("TRANSBIG data sets")
legend(locator(1),

legend=c("ERnegHER2neg",

"ERposHER2neg",

"HER2pos"), lwd=c(3,3,3), col=c("blue", "red", "green"))


detach(weht)

Logistic Regression Model


glm, lrm glm
Logistic Regression Model
Step2 95%
p
If we want to get CI and Odds ration regardless of model, put the
following command!
x<glm(relapse~HER2byGENE+ERStatus+IKS3Label,family=binomial,data=wk)
summary(x)
exp(coef(x))

#odds ratio

exp(confint(x))

#95%coffidencial kukan

x<glm(relapse~HER2byGENE+ERStatus+IKS3Label,family=binomial,data=wk)
summary(x)
summary(x2<-step(x))
exp(coef(x2))

#p value, stepwise methods


#odds ratio

exp(confint(x2))

#95%coffidencial kukan

BR3<-cut(PTK6, c(0, 6,15))

#divide to 2 group

x<-glm(pCR~BR3,family=binomial,data=md233,
subset=(Molecular=="ERposHER2neg"))

#divide to sub group

summary(x2<-step(x))

Logistic Regression Model 2


relapse: event (1,0),

factor : categorical value continuous

value P-value
x<-lrm(relapse~ERstatusBYgene+MKS12,x=TRUE, y=TRUE,data=wk)
x
p < 0.05

fastbw(x) #

Logistic Regression Model 3


require(MASS)
data(birthwt)
attach(birthwt)
low<-factor(low)
race<-factor(race,labels=c("white","black","other"))
print(table(low,race))

race
low white black other
0

73

15

42

23

11

25

smoke<-(smoke>0);print(table(low,smoke))
smoke
low FALSE TRUE
0

86

44

29

30

bw<-data.frame(low,age,lwt,race,smoke,ptl,ht,ui,ftv)

#selection of

event+factor
detach(birthwt)
print(summary(res<-glm(low~.,family=binomial,data=bw)))
#low~age+lwt+,,
Call:
glm(formula = low ~ ., family = binomial, data = bw)
Deviance Residuals:
Min

1Q

Median

3Q

Max

-1.8946

-0.8212

-0.5316

0.9818

2.2125

Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept)

0.480623

1.196888

0.402

0.68801

age

-0.029549

0.037031

-0.798

0.42489

lwt

-0.015424

0.006919

-2.229

0.02580 *

1.272260

0.527357

2.413

0.01584 *

raceblack

raceother

0.880496

0.440778

1.998

0.04576 *

smokeTRUE

0.938846

0.402147

2.335

0.01957 *

ptl

0.543337

0.345403

1.573

0.11571

ht

1.863303

0.697533

2.671

0.00756 **

ui

0.767648

0.459318

1.671

0.09467 .

ftv

0.065302

0.172394

0.379

0.70484

--Signif. codes:

0 *** 0.001 ** 0.01 * 0.05 . 0.1 1

(Dispersion parameter for binomial family taken to be 1)


Null deviance: 234.67

on 188

degrees of freedom

Residual deviance: 201.28

on 179

degrees of freedom

AIC: 221.28
Number of Fisher Scoring iterations: 4
NagelkerkeR2<-function(rr,n)(1-exp((rr$dev-rr$null/n))/(1-exp(rr$null/n)))
print(NagelkerkeR2(res,nrow(bw)))
[1] -1.060980e+87
print(exp(coef(res)))
(Intercept)

age

smokeTRUE

ptl

1.6170819
2.5570281

lwt
ht

0.9708833

1.7217428

print(exp(confint(res)))
Waiting for profiling to be done...
2.5 %

ui

0.9846941

6.4449886

97.5 %

(Intercept) 0.1586248 17.7689406


age

0.9014649

1.0429731

lwt

0.9706547

0.9975382

raceblack

1.2733620 10.2378101

raceother

1.0269690

5.8422688

smokeTRUE

1.1753715

5.7425658

ptl

0.8838560

3.4765158

ht

1.7030020 27.6935195

raceblack
ftv

3.5689085

2.1546928

raceother

1.0674812

2.4120956

ui

0.8662663

5.3169672

ftv

0.7534567

1.4900589

print(summary(res2<-step(res))) #impotant!
Start:

AIC=221.28

low ~ age + lwt + race + smoke + ptl + ht + ui + ftv


Df Deviance

AIC

- ftv

201.43 219.43

- age

201.93 219.93

<none>

201.28 221.28

- ptl

203.83 221.83

- ui

204.03 222.03

- race

208.75 224.75

- lwt

206.80 224.80

- smoke

206.91 224.91

- ht

208.81 226.81

Step:

AIC=219.43

low ~ age + lwt + race + smoke + ptl + ht + ui


Df Deviance
- age

<none>

AIC

201.99 217.99
201.43 219.43

- ptl

203.95 219.95

- ui

204.11 220.11

- race

208.77 222.77

- lwt

206.81 222.81

- smoke

206.92 222.92

- ht

208.81 224.81

Step:

AIC=217.99

low ~ lwt + race + smoke + ptl + ht + ui


Df Deviance
<none>
- ptl

AIC

201.99 217.99
1

204.22 218.22

- ui

204.90 218.90

- smoke

207.73 221.73

- lwt

208.11 222.11

- race

210.31 222.31

- ht

209.46 223.46

Call:
glm(formula = low ~ lwt + race + smoke + ptl + ht + ui, family =
binomial,
data = bw)
Deviance Residuals:
Min

1Q

Median

3Q

Max

-1.9049

-0.8124

-0.5241

0.9483

2.1812

Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.086550

0.951760

-0.091

0.92754

lwt

-0.015905

0.006855

-2.320

0.02033 *

raceblack

1.325719

0.522243

2.539

0.01113 *

raceother

0.897078

0.433881

2.068

0.03868 *

smokeTRUE

0.938727

0.398717

2.354

0.01855 *

ptl

0.503215

0.341231

1.475

0.14029

ht

1.855042

0.695118

2.669

0.00762 **

ui

0.785698

0.456441

1.721

0.08519 .

--Signif. codes:

0 *** 0.001 ** 0.01 * 0.05 . 0.1 1

(Dispersion parameter for binomial family taken to be 1)


Null deviance: 234.67

on 188

degrees of freedom

Residual deviance: 201.99

on 181

degrees of freedom

AIC: 217.99
Number of Fisher Scoring iterations: 4
print(NagelkerkeR2(res2,nrow(bw))) #

[1] -2.138245e+87
print(exp(coef(res2)))

#impotant! To obtain odds ratio

(Intercept)

lwt

ptl

ht
0.9170901

1.6540303

raceblack

raceother

smokeTRUE

2.4524265

2.5567241

ui
0.9842205

6.3919640

3.7648926

2.1939368

print(exp(confint(res2))) #95% coeficient


Waiting for profiling to be done...
2.5 %

97.5 %

(Intercept) 0.1483796

6.2982590

lwt

0.9702639

0.9968395

raceblack

1.3555550 10.6805086

raceother

1.0617504

5.8773934

smokeTRUE

1.1846704

5.7092626

ptl

0.8552098

3.3045306

ht

1.6936142 27.2645876

ui

0.8879894

5.3881165

>
95 P

3.765

1.355

10.68

0.011

> library(survival)
Loading required package: splines
> library(MASS)
> data(wk)
> attach(wk)
>

x<-

glm(relapse~HER2byGENE+ERStatus+IKS3Label,family=binomial,data=wk)
> summary(x)

Call:
glm(formula = relapse ~ HER2byGENE + ERStatus + IKS3Label, family =
binomial,
data = wk)
Deviance Residuals:
Min

1Q

Median

3Q

Max

-1.0339

-0.9881

-0.8733

1.3427

1.5344

Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept)

-0.347302

0.352044

-0.987

0.324

HER2byGENEHER2pos

-0.041258

0.327125

-0.126

0.900

ERStatusER+

-0.009563

0.298193

-0.032

0.974

IKS3Label2Intermediate -0.106124

0.302806

-0.350

0.726

IKS3Label3High

0.323853

-1.268

0.205

-0.410629

(Dispersion parameter for binomial family taken to be 1)


Null deviance: 377.11

on 285

degrees of freedom

Residual deviance: 375.11

on 281

degrees of freedom

AIC: 385.11
Number of Fisher Scoring iterations: 4
> summary(x2<-step(x))
Start:

AIC=385.11

relapse ~ HER2byGENE + ERStatus + IKS3Label


Df Deviance

AIC

- IKS3Label

376.87 382.87

- ERStatus

375.12 383.12

- HER2byGENE

375.13 383.13

<none>
Step:

375.11 385.11
AIC=382.87

relapse ~ HER2byGENE + ERStatus


Df Deviance

AIC

- HER2byGENE

376.93 380.93

- ERStatus

377.03 381.03

<none>
Step:

376.87 382.87
AIC=380.93

relapse ~ ERStatus
Df Deviance
- ERStatus

AIC

377.11 379.11

<none>
Step:

376.93 380.93
AIC=379.11

relapse ~ 1

Call:
glm(formula = relapse ~ 1, family = binomial, data = wk)
Deviance Residuals:
Min

1Q

Median

3Q

Max

-0.9623

-0.9623

-0.9623

1.4089

1.4089

Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept)

-0.5295

0.1224

-4.325 1.53e-05 *** If we get the

significance, we get the data


--Signif. codes:

0 *** 0.001 ** 0.01 * 0.05 . 0.1 1

(Dispersion parameter for binomial family taken to be 1)


Null deviance: 377.11

on 285

degrees of freedom

Residual deviance: 377.11

on 285

degrees of freedom

AIC: 379.11
Number of Fisher Scoring iterations: 4
> exp(coef(x2))
(Intercept)
0.5888889
> exp(confint(x2))
Waiting for profiling to be done...
2.5 %

97.5 %

0.4619652 0.7469545
>

Last modified: Nov 17, 2004

R glm
R

glm(family=binomial)

y ~ x1+x2+x4
yx1x2x3

lr.data lr.data

x1, x2

data <- read.table("lr.data", header=TRUE)


result <- glm(y ~ x1+x2, data, family=binomial)
result
summary(result)
coefficients(result)
residuals(result)

result0 <- glm(y ~ 1, data, family=binomial)


anova(result0, result, test="Chisq")

> data <- read.table("lr.data", header=TRUE)


> result <- glm(y ~ x1+x2, data, family=binomial)
> result
Call:

glm(formula = y ~ x1 + x2, family = binomial, data = data)

Coefficients:
(Intercept)

x1

x2

-5.645581

0.008297

0.011386

Degrees of Freedom: 97 Total (i.e. Null);


Null Deviance:

95 Residual

76.71

Residual Deviance: 72.18

AIC: 78.18

> summary(result)
Call:
glm(formula = y ~ x1 + x2, family = binomial, data = data)

Deviance Residuals:
Min

1Q

Median

3Q

Max

-1.4350

-0.5413

-0.4625

-0.3801

2.2197

Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -5.645581

3.048239

-1.852

0.0640 .

x1

0.008297

0.021208

0.391

0.6956

x2

0.011386

0.005740

1.984

0.0473 *

--Signif. codes:

0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

(Dispersion parameter for binomial family taken to be 1)


Null deviance: 76.714

on 97

degrees of freedom

Residual deviance: 72.184

on 95

degrees of freedom

AIC: 78.184
Number of Fisher Scoring iterations: 5
> coefficients(result)
(Intercept)

x1

x2

-5.645580693

0.008297108

0.011386484

> residuals(result)
1

2.0972899

1.5625411

2.1967232

2.1767753

1.8665989

2.0548909

10

11

12

13

1.6885385 -0.3601806 -0.3990558

1.9670456

1.9349012

2.2197067

18

19

20

7
-0.4170057
8
14
1.7065171
15
21

16

17

1.9215695

1.4563358 -0.5168576 -0.5069061 -0.3865767 -0.7133382

-0.3640321

> result0 <- glm(y ~ 1, data, family=binomial)


> anova(result0, result, test="Chisq")
Analysis of Deviance Table
Model 1: y ~ 1
Model 2: y ~ x1 + x2
Resid. Df Resid. Dev Df Deviance P(>|Chi|)
1

97

76.714

95

72.184

4.530

0.104

0.8

1.0

ROC-LDLT

0.6
0.4

Sens: 67.3%
Spec: 70.7%
PV+: 37.9%
PV-: 89.0%

Variable
est. (s.e.)
(Intercept) -2.992 (0.403)
test 0.000 (0.000)

0.2

Sensitivity

x = 6778.926

0.0

Model: y ~ x
Area under the curve: 0.733

0.0

0.2

0.4

0.6

0.8

1.0

1-Specificity
ROC ROC

Event 10
Data frame: mdk2
MKS: continuous value
data(mdk2)
x<-(MKS)
y<-c(rep(1,49),rep(0,184))

#event 1:N=49, event 0:N-184

ROC(x,y,plot="ROC",main="ROC")

gs5: data sets gns : event 0 or 1, te : continuous value


AUC
roc.area(gs5$gns, gs5$te)
CI 95% AUC
rc<-rcorr.cens(gs5$te,gs5$gns)
rc
aucbas=0.5 + (.5 * (rc[2]-(1.96*(rc[3]/2))))
auchaut=0.5 + (.5 * (rc[2]+(1.96*(rc[3]/2))))
aucbas
auchaut
#Ex: CI95 0.6-0.9

English version
roc.area(JBI_259$relapse, JBI_259$GGI_128)
rc<-rcorr.cens(JBI_259$GGI_128,JBI_259$relapse)
rc
aucbas=0.5 + (.5 * (rc[2]-(1.96*(rc[3]/2))))
auchaut=0.5 + (.5 * (rc[2]+(1.96*(rc[3]/2))))
aucbas
auchaut
#Ex: CI95 0.6-0.9

15000
10000
0

5000

ROW data

20000

25000

Kinoma score ERpos/Her2neg-A

Cell19

Cell23

Cell51

JBI

Transbig

Wang

BOXPLOT (Boxpolt :Kruskal-Wallis :Wilcox)


boxplot(bb~aa,data=erc,col=c("blue","red","green"),main="Kinoma
score in Cell line 19",ylab="ROW data", ylim=c(0,1310000))
boxplot(dd~bb,names=c("HER2neg", "HER2pos), col=c("red", "blue"))
title("Centroid

HER2pos

and

molecular

subgroup\n

JBI

dataset",

ylab="Centroid HER2pos")
boxplot(bb~aa,data=ea,main="Kinoma score ERpos/Her2neg-A",ylab="ROW
data",
ylim=c(0,25000),names=c("Cell19","Cell23","Cell51","JBI","Transbig"
,"Wang"), col=rainbow(10))
legend(locator(1), legend=c("Kruskal-Wallis p=0.0579"))

14
12
10
8
6
4

10

11

12

boxplot(SUM149.0uM.1,SUM149.0uM.2,SUM149.0uM.3,SUM149.1uM.1,SUM149.
1uM.2,SUM149.1uM.3,SUM190.0uM.1,SUM190.0uM.2,SUM190.0uM.3,SUM190.1u
M.1,SUM190.1uM.2,SUM190.1uM.3)

COX (Cox proportional hazards model)


data frame: wk, RFSmonths: ,

relapse: event (1,0),subset:

Univariate analysis
Library(survival)
x<coxph(Surv(X120_RFSmonths,relapse)~IKS3Label,method="breslow",data=
wk, subset=(Molecular=="TN"))
summary(x)

Multivariate analysis
Library(survival)
x<coxph(Surv(X120_RFSmonths,relapse)~MKS100+IKS50,method="breslow",da
ta=tk,subset=(ERbyGENE=="ERpos"))
summary(x)
Call:
coxph(formula = Surv(time, event) ~ Pt, data = KM3)
n= 136
coef exp(coef) se(coef)
Pt 1.08

0.409 2.64 0.0084 P

2.94

exp(coef) exp(-coef) lower .95 upper .95


Pt

2.94

Rsquare= 0.048

0.34

1.32

6.56

(max possible= 0.802 )

Likelihood ratio test= 6.64

on 1 df,

p=0.00996 large number

means good result


Wald test

= 6.95

on 1 df,

p=0.00838

Score (logrank) test = 7.64

on 1 df,

p=0.0057

BR3<-cut(LYN, c(0, 9.5,15))

x<-coxph(Surv(X120_RFSmonths,
relapse)~BR3,method="breslow",data=WGG,
subset=(Molecular=="ERposHER2neg"))
summary(x)
Continuous value categorical value
x<-c(MKS/100) #100 Colum

Survival analysis: Multivariate analysis (stepwise)


COX model
x<coxph(Surv(OSt5y,Ose5y)~NKI70+R76+DX+MKSLabel2+IKS2Label+TOP2Lavel2
+KI67Label2+ER+prpos+her+TLabel2+NLabel2+gradeLabel2+ageLabel2,meth
od="breslow",data=marker)
summary(x)
summary(x2<-step(x))
exp(coef(x2))
exp(confint(x2))

#p value
#odds ratio
#95%coffidencial kukan

Time: RFSmonths, event: relapse, group: IKS4Label, subset

library(survival)
x<-survfit(Surv(X120_RFSmonths, relapse)~ IKS4Label, conf.int=.95,
subset = (Molecular == "ERposHER2neg"))
plot(x,

xlab="Months",

legend.text=c("Low",

ylab="Distant

Event

Free

Survival",

"Low-Intermediate","High-

Intermediate","High"), lty=c(1,2,3,4)) #lty line


title("DEFS according to IKS sub group ERpos/Her2neg Wang dataset")
legend(locator(1), legend=c("Logrank test p=0.0579"))

0.6
0.4

Logrank test p=0.0579

0.2

Distant Event Free Survival

0.8

1.0

DEFS according to IKS sub group ERpos/Her2neg Wang dataset

Low
Low-Intermediate

0.0

High-Intermediate
High
0

50

100

150

Months

KM
x<-survfit(Surv(RFSmonths,

relapse)~

IKS4Label,

conf.int=.95,

subset = (Molecular == "ERposHER2neg"))


plot(x,

xlab="Months",

ylab="Distant

legend.text=c("Low",
Intermediate","High"),

Event

Free

Survival",

"Low-Intermediate","Highlty=c(2,4,6,8),

col=c("red",

"green",

"blue","orange"))
title("DEFS according to IKS sub group ERpos/Her2neg Wang dataset")
legend(locator(1), legend=c("Logrank test p=0.0579"))

0.6
0.4
0.0

0.2

Distant Event Free Survival

0.8

1.0

No.

28

23

18

14

13

11

11

10

1Low

28

26

24

22

22

21

21

20

16

10

2High

12

24

36

48

60

72

84

96

108

120

Months

No. of Patients at risk

x<-survfit(Surv(X120_RFSmonths,

relapse)~

TIG1TNLabel2,

conf.int=.95, subset = (Molecular == "TN"), se.fit=FALSE)


survplot(x,type="kaplan-meier",pr=FALSE,conf.int=FALSE,time.inc=12,
label.curves=FALSE,
lty=c(1,5),

n.risk=TRUE,

col=c("red",

conf.type=c("none"),

"blue"),

lwd=c(2,2),

ylab="Relapse Free Survival", conf.int=TRUE)

se.fit=FALSE,
xlab="Months",

1.0
0.8
0.6
0.4

Logrank test p=0.0579


takayuki

0.0

0.2

Survival Probability

360

720

1080

1440

1800

2160

2520

Days

library(verification)
library(Hmisc)
library(Design)
library(survival)
data(KM3)
attach(KM3)
d <- datadist(KM3)
options(datadist="d")
srv<-Surv(time,event)
fit<-survfit(srv~Pt, data=KM3)
fit
Call: survfit(formula = srv ~ Pt, data = KM3)
n events median 0.95LCL 0.95UCL
0 97

12

Inf

2879

Inf

1 39

12

Inf

1983

Inf

2880

3240

3600

survplot(fit,type="kaplanmeier",pr=TRUE,conf.int=TRUE,xlim=c(0,3600),time.inc=360)
legend(locator(1),

legend=c("Logrank

test

p=0.0579","takayuki"),lty=c(3,1)) #lty line

summary(fit)
Call: survfit(formula = srv ~ GGIaverageLabel, data = JBI_grade2)
4 observations deleted due to missingness
1Low
time n.risk n.event survival std.err lower 95% CI upper 95% CI
13.0

64

0.969

0.0217

0.881

0.992

18.0

62

0.953

0.0264

0.862

0.985

27.5

60

0.937

0.0304

0.841

0.976

30.5

57

0.921

0.0340

0.820

0.966

40.7

55

0.904

0.0373

0.799

0.956

45.3

53

0.887

0.0403

0.777

0.945

45.4

52

0.870

0.0430

0.756

0.933

53.0

49

0.852

0.0456

0.735

0.920

84.0

36

0.829

0.0501

0.702

0.905

98.8

27

0.798

0.0569

0.657

0.885

2High
time n.risk n.event survival std.err lower 95% CI upper 95% CI
11.0

61

0.984

0.0163

0.889

0.998

14.1

60

0.967

0.0228

0.875

0.992

14.6

59

0.951

0.0277

0.855

0.984

16.2

58

0.934

0.0317

0.835

0.975

23.6

56

0.918

0.0352

0.814

0.965

25.2

54

0.901

0.0385

0.792

0.954

29.6

53

0.884

0.0413

0.771

0.943

37.2

50

0.866

0.0441

0.750

0.931

37.3

49

0.848

0.0466

0.729

0.918

41.0

48

0.831

0.0489

0.708

0.905

42.5

47

0.813

0.0509

0.688

0.892

43.6

46

0.795

0.0528

0.667

0.878

44.6

45

0.778

0.0545

0.648

0.865

53.0

40

0.758

0.0565

0.626

0.849

78.0

24

0.695

0.0672

0.542

0.806

87.0

20

0.660

0.0723

0.499

0.781

Scatter plot
pairs(~mpg+disp+drat+wt,data=mtcars,
main="Simple Scatterplot Matrix")

Metformin data set

Counts
1602
1502
1402
1302
1202
1102
1002
902
802
701
601
501
401
301
201
101
1

Variance

15

10

0
2

10

12

14

Average

library(hexbin)
bin<-hexbin(Average, Variance, xbins=50)
plot(bin, main="Metformin data set")

PRLR, AVERAGE: categorical value

library(car)
library(survival)
plot(PRLR~AVERAGE, col=unclass(Molecular)+1, pch=16, cex=1.5)
title("Scatter

plot

ERstatus

80%

concentrated

ellipse") #

col=unclass(ERbyGENE)+1
legend(locator(1),

legend=c("ERpos","ERneg"),

lty=c(1,1),

lwd=c(2,2), col=c("green", "red"))


legend(locator(1), legend=c("pvalue = 0.00035","rho=-0.232"))
ellipse(c(mean(AVERAGE),mean(PRLR)),cov(cbind(AVERAGE,
PRLR)),sqrt(qchisq(.8,2)),lty=2,lwd=1,col="blue")
Correlation test ( )
Para-metric : Peason test
Non-parametric : spearman test
cor.test(PRLR,AVERAGE, method="spearman")
Spearman's rank correlation rho
data:

PRLR and AVERAGE

S = 2598560, p-value = 0.0003548


alternative hypothesis: true rho is not equal to 0
sample estimates:
rho

-0.2326059

cor.test(dose,len)

Pearson's product-moment correlation


data:

dose and len

t = 10.2501, df = 58, p-value = 1.243e-14


alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.6892521 0.8777169 95[0.68,0.88]
sample estimates:
cor
0.8026913

11.5
11.0
10.5
10.0
9.5
8.0

8.5

9.0

SET.pos.106.USE

10

12

ER.205225_at.USE

plot(SET.pos.106.USE~ER.205225_at.USE,
col=unclass(ER_Level4)+1,pch=unclass(ER_Level4)+1)

14

12000

Two-way Interaction Plot

10000

Pair

6000
0

2000

4000

mean of IGFBP7

8000

9
8
16
10
17
13
7
15
11
1
4
14
12
5
3
18
6
2

post

pre
pp

interaction.plot(ppERpos, Pair, IGFBP7, col=unclass(ERbyGENEUSE)+1,


lty=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1))

Pair:class

#pp:

pre-post,

Sensitivity Specificity PPV NPV

Disease + Disease - Total


Test + a

a+b

Test - c

c+d

Total a + c

b+d

a+b+c+d

Disease + Disease - Total


Test + a

a+b

Test - c

c+d

Total a + c

b+d

a+b+c+d

epi.tests(a = 670, b = 202, c = 74, d = 640, conf.level = 0.95,


verbose = FALSE)
Disease +

Disease -

Total

Test +

670

202

872

Test -

74

640

714

Total

744

842

1586

Point estimates and 95 % CIs:


--------------------------------------------------------Apparent prevalence:

0.55 (0.53, 0.57)

True prevalence:

0.47 (0.44, 0.49)

Sensitivity:

0.9 (0.88, 0.92)

Specificity:

0.76 (0.73, 0.79)

Diagnostic accuracy:

0.83 (0.81, 0.84)

Diagnostic odds ratio:

28.69 (21.52, 38.24)

Youden's index:

0.66 (0.61, 0.71)

Positive predictive value:

0.77 (0.74, 0.8)

Negative predictive value:

0.9 (0.87, 0.92)

Positive likelihood ratio:

3.75 (3.32, 4.24)

Negative likelihood ratio:

0.13 (0.11, 0.16)

Number needed to diagnose:

1.51 (1.41, 1.64)

---------------------------------------------------------

Others
Color
col=rainbow(10)
,col=red ,
col=c(red,blue,grey80, grey50,black,white)
,cm.colors(20) ,topo.colors(18)
Install
Epi
install.packages("Epi",dep=T)
lty=c(1,2,3) #lty line
Devide
x<-c(MKS/100) #100 Colum

plot(x,

xlab="Months",

ylab="Distant

legend.text=c("Low",

Event

Free

Survival",

"Low-Intermediate","High-

Intermediate","High"), lty=c(1,2,3,4)) #lty line

Conver to log2
library(Biobase)
data(wang)
attach(wang)
dataDirectory <- system.file("data", package = "datasets")
exprsFile <- file.path(dataDirectory, "wang.txt")
wang<- as.matrix(read.table(exprsFile, header = TRUE, sep = "\t",
row.names = 1, as.is = TRUE))

exprsFile <- "c:/path/to/wang.txt"


annotation <- "hgu133a"
experimentData <- new("MIAME", name = "dudule", lab = "lpusztai",
contact

"lpus",

"www.lab.not.exist",

title

other

"jbi",
=

abstract

list(notes

"trial",

"Created

url

from

text

files"))
wlog=log2(wang)
write.table(wlog,

file="w_mas5log2",

quote=F,

sep="\t",

col.names=NA)

X
write.table(x$fs,

file="abcd.txt",

sep="\t",

col.names=T,row.names=T, quote=FALSE) #

Conbine
data(md233new)
data(md103new)
data(us)
x=cbind (md233new, md103new)
y=cbind (x, us)
write.table(y,

file="chemo", quote=F, sep="\t", col.names=NA)

subset
subset=(Molecular=="TN")
subset(mda233,Molecular=="ERposHER2neg"|Molecular=="TN")
name of data set, Molecular:name of row
100 Colum

mda233:

x<-c(MKS/100) #100 Colum


To make matrix

x<-matrix(c(1,2,3,4,5,6),nrow=2,ncol=3)

x <- rnorm(100)

# 100

hist(x, xlim=c(-4,4), ylim=c(0,0.5), prob=T, ann=F)#


par(new=T)

plot(density(x), xlim=c(-4,4), ylim=c(0,0.5),


xlab="" , ylab="" , main="" , col="red" )
Size of letters
cex.axis=2,cex.lab=2, cex.main=3, cex.sub=3

2 2
par(mfrow=c(2,2))
plot(sin)
plot(cos)
plot(asin)
plot(acos)

10

Density

0.0
2

10

10

10

density.default(x = TSPY1_217162_at)

10

Density
2

10

par(mfrow=c(3,3))
plot(density(BPY2_208331_at), xlim=c(2,10),xlab= "")
plot(density(CYorf14_207063_at), xlim=c(2,10), xlab= "")
plot(density(CSPG4LYP1_211461_at), xlim=c(2,10), xlab= "")
plot(density(USP9Y_206624_at), xlim=c(2,10), xlab= "")
plot(density(EIF1AY_204410_at), xlim=c(2,10), xlab= "")
plot(density(NLGN4Y_207703_at), xlim=c(2,10), xlab= "")
plot(density(LOC159110_216786_at), xlim=c(2,10), xlab= "")
plot(density(TSPY1_217162_at), xlim=c(2,10), xlab= "")
plot(density(SRY_207893_at), xlim=c(2,10),xlab= "")

Bar plot

10

0.00

0.2

Density

0.0
6

10

density.default(x = SRY_207893_at)

0.4

0.00 0.10 0.20 0.30

0.00
2

density.default(x = LOC159110_216786_at)

0.20
Density

0.20
0.10

Density
2

density.default(x = NLGN4Y_207703_at)

0.00

0.10
0.00

Density

density.default(x = EIF1AY_204410_at)

0.20

density.default(x = USP9Y_206624_at)

0.10

0.20

0.10

0.2

0.4

0.3
0.2
0.1

Density
2

Density

density.default(x = CSPG4LYP1_211461_at)

0.0

Density

density.default(x = CYorf14_207063_at)

0.0 0.1 0.2 0.3 0.4

density.default(x = BPY2_208331_at)

10

3.0
2.5
2.0
1.5
1.0
0.5
0.0

x<-matrix(c(0.246,
3.070,0.272,2.440,0.413,2.230,0.776,2.130,0.483,2.110,0.000,2.010,0.
726,1.980,0.355,1.980,0.586,1.970,0.451,1.970),2)
barplot(x,names=c("A","B","C","D","E","F","G","H","I","J"),beside=T,
col=c("black", "white"))
abline(h=1.30102999566398)

0.0

0.1

10

0.2

20

Ratio

0.3

30

0.4

0.5

40

barplot + line chart

Index

barplot(X.log,

ylim=c(0,40),axes=F)

abline(h=1.30102999566398)
axis(2)
par(new=T)
plot(Ratio,
axis(4)

axes=F, type="l",

lty=1,ylim=c(0,0.5))

PAM50
## Author: Yuan Qi <yqi1@mdanderson.org>
####

input:

mtx:

(MAS5-sc600)normalized,

log2-transformed

gene

expression values
PAM50.v2 <- function(mtx, fin.parameters="PAM50-parameters.RData" )
{
load(fin.parameters)
## added on 20091113F, for array platforms other than HGU133A.
if( nrow( mtx) != length(reference.median) ){
mtx = mtx[unlist(probes), ]
reference.median = reference.median[ unlist(probes) ]
}
mtx <- mtx - reference.median
len = sapply(probes, length)
p1 = character(length(len))
names(p1) = names(len)
for( i in 1:length(len)){
if( len[[i]] == 1){
p1[[i]] = probes[[i]]
}else if(len[[i]] > 1){
iqr1 = apply(mtx[probes[[i]],], 1, IQR)
p1[[i]] <- probes[[i]][ match(max(iqr1), iqr1) ]
}
}
gev <- mtx[p1,]
ctd1 <- ctd[names(p1),]
#type <- apply( gev, 2, function(x){
#corr <- cor(x, ctd1, method="spearman")
#colnames(corr)[match(max(corr), corr)]
#}
#)

outcorr = matrix( data=NA, nrow=ncol(gev), ncol=ncol(ctd1))


type = NULL
for( i in 1:ncol(gev)){
x = gev[, i]
corr <- cor(x, ctd1, method="spearman")
type[[i]] = colnames(corr)[match(max(corr), corr)]
outcorr[i, ] = corr[1,]
}
names(type) = colnames(gev)
colnames(outcorr) = colnames(ctd1)
rownames(outcorr) = colnames(gev)
#outcorr = cbind( subtype=type, outcorr)
#write.table( outcorr, file=fout.corr, sep="\t", col.names=NA,
quote=FALSE)
return(

list(subtype=type,

probes=p1,

genes=genes1,

origGenes=genes, noGenes=setdiff(genes, genes1), corr=outcorr ))


}

2 (sample size)
library(pwr)
0.8
100 80 ( 0.8)

cohen.ES(test="chisq",size="medium") # small: 1-2%, medium: 313%, large: 14-26%


Conventional effect size from Cohen (1982)
test = chisq

size = medium
effect.size = 0.3
pwr.chisq.test(w=0.3,df=1,power=0.8)

#Usually

power

is

setted

as

0.8 When we test 10 times, we can get 80% resulets with truth.
Chi squared power calculation
w = 0.3
N = 87.20955
df = 1
sig.level = 0.05
power = 0.8
NOTE: N is the number of observations

Total Sample size = 2* n( 88*2 )

Bimodal distribution
library(mclust)
library(ClassDiscovery)
library(Biobase)
exprsFile

<-

"c:/Program

Files/R/R-

2.9.0/library/datasets/data/63kinase51cell.txt"
exprs

<-

as.matrix(read.table(exprsFile,

header

TRUE,sep

"\t",

row.names = 1, as.is = TRUE))


dim(exprs)
exampleSet <- new("ExpressionSet", exprs = exprs)
mat <- exprs(exampleSet)
bimodalIndex(mat)
bi<-bimodalIndex(mat)
write.table(bi,file="c:\\BI_63kinase.txt",row.names=TRUE,col.names=TRU
E,sep="\t",quote=F, dec=".")

d1<-read.table("65-555-reg.txt", header=T)
d2<-data.frame(time=c(d1[,1]), cases=c(d1[,2]), distance=c(d1[,3]))
attach(d2)
scatterplot3d(cases, distance, time, angle=20, col.axis="blue",
col.grid="lightblue", main="Three-dimensional scatterplot",
pch=21, box=F, cex.symbols=2)
detach(d2)