You are on page 1of 11

2013 NSF Graduate Research Fellowship statistics

Elson Liu March 29, 2013

Contents

1 Preliminaries

1

2 Undergraduate institution

2

3 Graduate institution

4

4 Field of Study

7

5 Subject area

8

1 Preliminaries

e awardee list was downloaded from https://www.fastlane.nsf.gov/grfp/AwardeeList. do?method=sort&method%3DloadAwardeeList&exportType=2. Some preprocessing was done in Microso Excel: baccalaureate institutions were normalized by converting to lower case, and a “Subject” column was generated by splitting the “Field of Study” column on hyphens. e data were then exported in CSV format as NSFAwardeeList2013.csv.

Load libraries

library(xtable)

library(ggplot2)

library(gdata)

Import data

df

<-

read.csv("NSFAwardeeList2013.csv",

head

=

TRUE)

1

2

Undergraduate institution

Tabulate undergraduate institution frequencies

ugrads

<-

table(df$Lower.Case.Baccalaureate,

dnn

=

c("Number

of

awardees"))

Convert the table back to a data frame

 

udf

<-

as.data.frame(ugrads)

 

names(udf)

=

c("Undergrad",

"Awardees")

 

head(udf)

 

##

Undergrad

Awardees

 

##

1

albion

college

 

1

##

2

amherst

college

9

##

3

appalachian

state

university

1

##

4

arizona

state

university

17

##

5

asbury

college

1

##

6

auburn

university

4

Sort by number of awardees

 

o

<-

order(-udf$Awardees)

 

ugrads.sorted

<-

udf[o,

]

head(ugrads.sorted,

n

=

35L)

##

Undergrad

Awardees

##

168

massachusetts

institute

of

technology

55

##

322

university

of

california-berkeley

38

##

428

university

of

washington

38

##

416

university

of

texas

at

austin

35

##

271

stanford

university

30

##

77

cornell

university

29

##

130

harvard

university

27

##

228

princeton

university

27

##

30

california

institute

of

technology

25

##

119

georgia

institute

of

technology

24

##

321

university

of

california

berkeley

24

##

429

university

of

wisconsin-madison

23

##

366

university

of

michigan

22

##

466

yale

university

22

##

28

brown

university

21

##

74

columbia

university

21

##

333

university

of

chicago

20

##

387

university

of

pennsylvania

20

 

2

 

##

316

university

of

arizona

19

##

325

university

of

california-los

angeles

19

##

323

university

of

california-davis

18

##

352

university

of

illinois

at

urbana-champaign

18

##

447

washington

university

18

##

4

arizona

state

university

17

##

199

north

carolina

state

university

17

##

205

northwestern

university

17

##

230

purdue

university

17

##

328

university

of

california-santa

barbara

17

##

427

university

of

virginia

main

campus

17

##

95

duke

university

16

##

331

university

of

california,

san

diego

16

##

394

university

of

puerto

rico

mayaguez

16

##

342

university

of

florida

15

##

379

university

of

north

carolina

at

chapel

hill

15

##

388

university

of

pittsburgh

15

Select undergrad institutions with more than 20 awardees and draw a dotplot

ugrads.top <- drop.levels(udf[udf$Awardees > 20, ]) p <- qplot(x = Awardees, y = Undergrad, data
ugrads.top
<-
drop.levels(udf[udf$Awardees
>
20,
])
p
<-
qplot(x
=
Awardees,
y
=
Undergrad,
data
=
ugrads.top)
print(p)

3

Undergrad

yale university

university of wisconsin−madison

university of washington

university of texas at austin

university of michigan

university of california−berkeley

university of california berkeley

stanford university

princeton university

massachusetts institute of technology

harvard university

georgia institute of technology

cornell university

columbia university

california institute of technology

brown university

Generate a L A T E X-formatted table

● ● ● ● ● ● ● ●
● ● ● ● ● ● ● ●
● ● ● ● ● ● ● ●
● ● ● ● ● ● ● ●
● ● ● ● ● ● ● ●
● ● ● ● ● ● ● ●

● ● ● ●

● ● ● ● ● ● ● ●
● ● ● ● ● ● ● ●
● ● ● ● ● ● ● ●

20

30

40

50

Awardees

utable <- xtable(ugrads) print(utable, type = "latex", file = "undergrads.tex",
utable
<-
xtable(ugrads)
print(utable,
type
=
"latex",
file
=
"undergrads.tex",
tabular.environment
=

"longtable")

3 Graduate institution

Tabulate graduate institution frequencies

4

grads

<-

table(df$Proposed.Graduate.Institution,

dnn

=

c("Number

of

awardees"))

Convert the table back to a data frame

gdf

<-

as.data.frame(grads)

 

names(gdf)

=

c("Grad",

"Awardees")

 

head(gdf)

 

##

Grad

Awardees

##

1

American

Museum

Natural

History

 

1

##

2

Arizona

State

University

10

##

3

Boston

College

4

##

4

Boston

University

8

##

5

Boston

University

Charles

River

Campus

1

##

6

Brandeis

University

1

Sort by number of awardees

 

o

<-

order(-gdf$Awardees)

 

grads.sorted

<-

gdf[o,

]

head(grads.sorted,

n

=

19L)

 

##

Grad

Awardees

##

75

Stanford

University

154

##

93

University

of

California-Berkeley

132

##

45

Massachusetts

Institute

of

Technology

116

##

37

Harvard

University

69

##

161

University

of

Washington

65

##

127

University

of

Michigan

Ann

Arbor

60

##

19

Cornell

University

54

##

98

University

of

California-San

Diego

49

##

162

University

of

Wisconsin-Madison

47

##

24

Duke

University

44

##

117

University

of

Illinois

at

Urbana-Champaign

43

##

59

Northwestern

University

40

##

99

University

of

California-San

Francisco

39

##

94

University

of

California-Davis

37

##

95

University

of

California-Irvine

36

##

10

California

Institute

of

Technology

35

##

151

University

of

Texas

at

Austin

35

##

64

Princeton

University

34

##

96

University

of

California-Los

Angeles

31

Select grad institutions with more than 30 awardees and draw a dotplot

5

Grad

grads.top <- drop.levels(gdf[gdf$Awardees > 30, ]) p <- qplot(x = Awardees, y = Grad, data
grads.top
<-
drop.levels(gdf[gdf$Awardees
>
30,
])
p
<-
qplot(x
=
Awardees,
y
=
Grad,
data
=
grads.top)
print(p)

Yale University University of Wisconsin−Madison University of Washington University of Texas at Austin University of Michigan Ann Arbor University of Illinois at Urbana−Champaign University of California−Santa Barbara University of California−San Francisco University of California−San Diego University of California−Los Angeles University of California−Irvine University of California−Davis University of California−Berkeley Stanford University Princeton University Northwestern University Massachusetts Institute of Technology Harvard University Duke University Cornell University California Institute of Technology

Generate a L A T E X-formatted table

● ● ● ● ● ● ● ● ●
● ● ● ● ● ● ● ● ●
● ● ● ● ● ● ● ● ●
● ● ● ● ● ● ● ● ●
● ● ● ● ● ● ● ● ●

● ● ● ●

● ● ● ●
● ● ● ●
● ● ● ● ● ● ● ● ●
● ● ● ● ● ● ● ● ●
● ● ● ● ● ● ● ● ●
● ● ● ● ● ● ● ● ●

40

80

120

160

Awardees

gtable <- xtable(grads) print(gtable, type = "latex", file = "grads.tex",
gtable
<-
xtable(grads)
print(gtable,
type
=
"latex",
file
=
"grads.tex",
tabular.environment
=

"longtable")

6

4

Field of Study

Tabulate eld of study frequencies

fields

<-

table(df$Field.of.Study,

dnn

=

c("Number

of

awardees"))

Convert the table back to a data frame

fdf

<-

as.data.frame(fields)

names(fdf)

=

c("Field",

"Awardees")

 

head(fdf)

 

##

Field

Awardees

##

1

Chemistry

-

Chemical

Catalysis

21

##

2

Chemistry

-

Chemical

Measurement

and

Imaging

13

##

3

Chemistry

-

Chemical

Structure,

Dynamics,

and

Mechanism

9

##

4

Chemistry

-

Chemical

Synthesis

46

##

5

Chemistry

-

Chemical

Theory,

Models

and

Computational

Methods

14

##

6

Chemistry

-

Chemistry

of

Life

Processes

17

Sort by number of awardees

o <-

order(-fdf$Awardees)

fields.sorted

<-

fdf[o,

]

head(fields.sorted,

n

=

10L)

##

Field

Awardees

##

89

Life

Sciences

-

Ecology

124

##

33

Engineering

-

Biomedical

82

##

42

Engineering

-

Mechanical

82

##

96

Life

Sciences

 

-

Neurosciences

77

##

34

Engineering

-

Chemical

 

Engineering

72

##

32

Engineering

-

Bioengineering

53

##

91

Life

Sciences

-

Evolutionary

Biology

52

##

95

Life

Sciences

-

Molecular

Biology

48

##

4

Chemistry

-

Chemical

Synthesis

46

##

37

Engineering

-

Electrical

and

Electronic

46

Select elds of study with more than 40 awardees and draw a dotplot

fields.top

<-

drop.levels(fdf[fdf$Awardees

>

40,

])

p

<-

qplot(x

=

Awardees,

y

=

Field,

data

=

fields.top)

print(p)

 
 

7

Field

Life Sciences − Neurosciences

Life Sciences − Molecular Biology

Life Sciences − Evolutionary Biology

Life Sciences − Ecology

Engineering − Mechanical

Engineering − Electrical and Electronic

Engineering − Chemical Engineering

Engineering − Biomedical

Engineering − Bioengineering

Chemistry − Chemical Synthesis

Generate a L A T E X-formatted table

●
● ● ● ● ● ● ● ● ● ●

● ● ● ●

● ● ● ● ● ●
● ● ● ● ● ●
●

●
●
●

60

80

100

120

Awardees

ftable <- xtable(fields) print(ftable, type = "latex", file = "fields.tex",
ftable
<-
xtable(fields)
print(ftable,
type
=
"latex",
file
=
"fields.tex",
tabular.environment
=

"longtable")

5 Subject area

Tabulate subject area frequencies

8

subjects

<-

table(df$Subject,

dnn

=

c("Number

of

awardees"))

Convert the table back to a data frame

sdf

<-

as.data.frame(subjects)

names(sdf)

=

c("Subject",

"Awardees")

head(sdf,

n

=

10L)

##

Subject

Awardees

##

1

Chemistry

164

##

2

Comp/IS/Eng

89

##

3

Engineering

507

##

4

Geosciences

114

##

5

Life

Sciences

580

##

6

Materials

Research

42

##

7

Mathematical

Sciences

67

##

8

Physics

and

Astronomy

106

##

9

Psychology

157

##

10

Social

Sciences

163

Sort by number of awardees

o <-

subjects.sorted

head(subjects.sorted,

order(-sdf$Awardees)

<-

sdf[o,

n

=

]

10L)

##

Subject

Awardees

##

5

Life

Sciences

580

##

3

Engineering

507

##

1

Chemistry

164

##

10

Social

Sciences

163

##

9

Psychology

157

##

4

Geosciences

114

##

8

Physics

and

Astronomy

106

##

2

Comp/IS/Eng

89

##

7

Mathematical

Sciences

67

##

6

Materials

Research

42

Draw a dotplot of number of awardees for each subject area

p

<-

qplot(x

=

Awardees,

y

=

Subject,

data

=

subjects.sorted)

print(p)

 
 

9

Subject

STEM Education and Learning Research

Social Sciences

Psychology

Physics and Astronomy

Mathematical Sciences

Materials Research

Life Sciences

Geosciences

Engineering

Comp/IS/Eng

Chemistry

Generate a L A T E X-formatted table

● ● ● ● ● ● ● ● ● ● ●
● ● ● ● ● ● ● ● ● ● ●
● ● ● ● ● ● ● ● ● ● ●

● ● ● ● ● ● ●
● ●

● ● ● ● ● ● ●
● ● ● ● ● ● ●
● ● ● ● ● ● ● ● ● ● ●
● ● ● ● ● ● ● ● ● ● ●
● ● ● ● ● ● ● ● ● ● ●

0

200

400

600

Awardees

stable <- xtable(subjects) print(stable, type = "latex", file = "subjects.tex",
stable
<-
xtable(subjects)
print(stable,
type
=
"latex",
file
=
"subjects.tex",
tabular.environment
=

"longtable")

Number of awardees

Chemistry

164

Comp/IS/Eng

89

Engineering

507

Geosciences

114

Life Sciences

580

Materials Research

42

10

Mathematical Sciences

67

Physics and Astronomy

106

Psychology

157

Social Sciences

163

STEM Education and Learning Research

11

11