NSF Graduate Research Fellowship statistics

Elson Liu
March ,
Contents
: Preliminaries :
i Undergraduate institution i
¡ Graduate institution {
{ Field of Study ,
¡ Subject area 8
: Preliminaries
e awardee list was downloaded from https://www.fastlane.nsf.gov/grfp/AwardeeList.
do?method=sort&method%3DloadAwardeeList&exportType=2. Some preprocessing was done
in Microso Excel: baccalaureate institutions were normalized by converting to lower case, and
a “Subject” column was generated by splitting the “Field of Study” column on hyphens. e data
were then exported in CSV format as NSFAwardeeList213.csv.
Load libraries
library(xtable)
library(ggplot2)
library(gdata)
Import data
df <- read.csv("NSFAwardeeList213.csv", head = TRUE)

i Undergraduate institution
Tabulate undergraduate institution frequencies
ugrads <- table(df$Lower.Case.Baccalaureate, dnn = c("Number of awardees"))
Convert the table back to a data frame
udf <- as.data.frame(ugrads)
names(udf) = c("Undergrad", "Awardees")
head(udf)
## Undergrad Awardees
## 1 albion college 1
## 2 amherst college 9
## 3 appalachian state university 1
## 4 arizona state university 17
## 5 asbury college 1
## 6 auburn university 4
Sort by number of awardees
o <- order(-udf$Awardees)
ugrads.sorted <- udf[o, ]
head(ugrads.sorted, n = 35L)
## Undergrad Awardees
## 168 massachusetts institute of technology 55
## 322 university of california-berkeley 38
## 428 university of washington 38
## 416 university of texas at austin 35
## 271 stanford university 3
## 77 cornell university 29
## 13 harvard university 27
## 228 princeton university 27
## 3 california institute of technology 25
## 119 georgia institute of technology 24
## 321 university of california berkeley 24
## 429 university of wisconsin-madison 23
## 366 university of michigan 22
## 466 yale university 22
## 28 brown university 21
## 74 columbia university 21
## 333 university of chicago 2
## 387 university of pennsylvania 2

## 316 university of arizona 19
## 325 university of california-los angeles 19
## 323 university of california-davis 18
## 352 university of illinois at urbana-champaign 18
## 447 washington university 18
## 4 arizona state university 17
## 199 north carolina state university 17
## 25 northwestern university 17
## 23 purdue university 17
## 328 university of california-santa barbara 17
## 427 university of virginia main campus 17
## 95 duke university 16
## 331 university of california, san diego 16
## 394 university of puerto rico mayaguez 16
## 342 university of florida 15
## 379 university of north carolina at chapel hill 15
## 388 university of pittsburgh 15
Select undergrad institutions with more than awardees and draw a dotplot
ugrads.top <- drop.levels(udf[udf$Awardees > 2, ])
p <- qplot(x = Awardees, y = Undergrad, data = ugrads.top)
print(p)

q
q
q
q
q
q
q
q
q
q
q
q
q
q
q
q
brown university
california institute of technology
columbia university
cornell university
georgia institute of technology
harvard university
massachusetts institute of technology
princeton university
stanford university
university of california berkeley
university of california−berkeley
university of michigan
university of texas at austin
university of washington
university of wisconsin−madison
yale university
20 30 40 50
Awardees
U
n
d
e
r
g
r
a
d
Generate a L
A
T
E
X-formatted table
utable <- xtable(ugrads)
print(utable, type = "latex", file = "undergrads.tex", tabular.environment = "longtable")
¡ Graduate institution
Tabulate graduate institution frequencies

grads <- table(df$Proposed.Graduate.Institution, dnn = c("Number of awardees"))
Convert the table back to a data frame
gdf <- as.data.frame(grads)
names(gdf) = c("Grad", "Awardees")
head(gdf)
## Grad Awardees
## 1 American Museum Natural History 1
## 2 Arizona State University 1
## 3 Boston College 4
## 4 Boston University 8
## 5 Boston University Charles River Campus 1
## 6 Brandeis University 1
Sort by number of awardees
o <- order(-gdf$Awardees)
grads.sorted <- gdf[o, ]
head(grads.sorted, n = 19L)
## Grad Awardees
## 75 Stanford University 154
## 93 University of California-Berkeley 132
## 45 Massachusetts Institute of Technology 116
## 37 Harvard University 69
## 161 University of Washington 65
## 127 University of Michigan Ann Arbor 6
## 19 Cornell University 54
## 98 University of California-San Diego 49
## 162 University of Wisconsin-Madison 47
## 24 Duke University 44
## 117 University of Illinois at Urbana-Champaign 43
## 59 Northwestern University 4
## 99 University of California-San Francisco 39
## 94 University of California-Davis 37
## 95 University of California-Irvine 36
## 1 California Institute of Technology 35
## 151 University of Texas at Austin 35
## 64 Princeton University 34
## 96 University of California-Los Angeles 31
Select grad institutions with more than awardees and draw a dotplot

grads.top <- drop.levels(gdf[gdf$Awardees > 3, ])
p <- qplot(x = Awardees, y = Grad, data = grads.top)
print(p)
q
q
q
q
q
q
q
q
q
q
q
q
q
q
q
q
q
q
q
q
q
California Institute of Technology
Cornell University
Duke University
Harvard University
Massachusetts Institute of Technology
Northwestern University
Princeton University
Stanford University
University of California−Berkeley
University of California−Davis
University of California−Irvine
University of California−Los Angeles
University of California−San Diego
University of California−San Francisco
University of California−Santa Barbara
University of Illinois at Urbana−Champaign
University of Michigan Ann Arbor
University of Texas at Austin
University of Washington
University of Wisconsin−Madison
Yale University
40 80 120 160
Awardees
G
r
a
d
Generate a L
A
T
E
X-formatted table
gtable <- xtable(grads)
print(gtable, type = "latex", file = "grads.tex", tabular.environment = "longtable")

{ Field of Study
Tabulate eld of study frequencies
fields <- table(df$Field.of.Study, dnn = c("Number of awardees"))
Convert the table back to a data frame
fdf <- as.data.frame(fields)
names(fdf) = c("Field", "Awardees")
head(fdf)
## Field Awardees
## 1 Chemistry - Chemical Catalysis 21
## 2 Chemistry - Chemical Measurement and Imaging 13
## 3 Chemistry - Chemical Structure, Dynamics, and Mechanism 9
## 4 Chemistry - Chemical Synthesis 46
## 5 Chemistry - Chemical Theory, Models and Computational Methods 14
## 6 Chemistry - Chemistry of Life Processes 17
Sort by number of awardees
o <- order(-fdf$Awardees)
fields.sorted <- fdf[o, ]
head(fields.sorted, n = 1L)
## Field Awardees
## 89 Life Sciences - Ecology 124
## 33 Engineering - Biomedical 82
## 42 Engineering - Mechanical 82
## 96 Life Sciences - Neurosciences 77
## 34 Engineering - Chemical Engineering 72
## 32 Engineering - Bioengineering 53
## 91 Life Sciences - Evolutionary Biology 52
## 95 Life Sciences - Molecular Biology 48
## 4 Chemistry - Chemical Synthesis 46
## 37 Engineering - Electrical and Electronic 46
Select elds of study with more than awardees and draw a dotplot
fields.top <- drop.levels(fdf[fdf$Awardees > 4, ])
p <- qplot(x = Awardees, y = Field, data = fields.top)
print(p)

q
q
q
q
q
q
q
q
q
q
Chemistry − Chemical Synthesis
Engineering − Bioengineering
Engineering − Biomedical
Engineering − Chemical Engineering
Engineering − Electrical and Electronic
Engineering − Mechanical
Life Sciences − Ecology
Life Sciences − Evolutionary Biology
Life Sciences − Molecular Biology
Life Sciences − Neurosciences
60 80 100 120
Awardees
F
i
e
l
d
Generate a L
A
T
E
X-formatted table
ftable <- xtable(fields)
print(ftable, type = "latex", file = "fields.tex", tabular.environment = "longtable")
¡ Subject area
Tabulate subject area frequencies

subjects <- table(df$Subject, dnn = c("Number of awardees"))
Convert the table back to a data frame
sdf <- as.data.frame(subjects)
names(sdf) = c("Subject", "Awardees")
head(sdf, n = 1L)
## Subject Awardees
## 1 Chemistry 164
## 2 Comp/IS/Eng 89
## 3 Engineering 57
## 4 Geosciences 114
## 5 Life Sciences 58
## 6 Materials Research 42
## 7 Mathematical Sciences 67
## 8 Physics and Astronomy 16
## 9 Psychology 157
## 1 Social Sciences 163
Sort by number of awardees
o <- order(-sdf$Awardees)
subjects.sorted <- sdf[o, ]
head(subjects.sorted, n = 1L)
## Subject Awardees
## 5 Life Sciences 58
## 3 Engineering 57
## 1 Chemistry 164
## 1 Social Sciences 163
## 9 Psychology 157
## 4 Geosciences 114
## 8 Physics and Astronomy 16
## 2 Comp/IS/Eng 89
## 7 Mathematical Sciences 67
## 6 Materials Research 42
Draw a dotplot of number of awardees for each subject area
p <- qplot(x = Awardees, y = Subject, data = subjects.sorted)
print(p)

q
q
q
q
q
q
q
q
q
q
q
Chemistry
Comp/IS/Eng
Engineering
Geosciences
Life Sciences
Materials Research
Mathematical Sciences
Physics and Astronomy
Psychology
Social Sciences
STEM Education and Learning Research
0 200 400 600
Awardees
S
u
b
j
e
c
t
Generate a L
A
T
E
X-formatted table
stable <- xtable(subjects)
print(stable, type = "latex", file = "subjects.tex", tabular.environment = "longtable")
Number of awardees
Chemistry
Comp/IS/Eng
Engineering
Geosciences
Life Sciences
Materials Research

Mathematical Sciences
Physics and Astronomy
Psychology
Social Sciences
STEM Education and Learning Research