-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path1a_NSF_API_pull.R
More file actions
executable file
·148 lines (109 loc) · 5.09 KB
/
1a_NSF_API_pull.R
File metadata and controls
executable file
·148 lines (109 loc) · 5.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
############################################
## RADS Pull of NSF & NIH Awardees
## API pull attempt
##
## 2022-03-26
##
############################################
#clear workspace
rm(list=ls())
#required libraries
pacman::p_load("rjson", "dplyr", "jsonlite", "readxl", "textclean", "foreach", "doParallel")
#No auth appears to be needed
#source token information (defines key, token and oath)
#source('~/UnsyncedDocuments/API_reference/elsevier_token.R', chdir = TRUE)
#setwd
#setwd("~/Documents/NSF_RADS/")
#initially explored parallel calls, but did not handle errors well
# detectCores()
# registerDoParallel(35)
############################
#using NSF API
# The NSF Award Search web API provides a web API interface to the Research.gov's Research Spending and Results data, which provides NSF research award information from 2007.
#Reference this: https://www.research.gov/common/webapi/awardapisearch-v1.htm#sample-requests
#Reference by Awardee name (University of Minnesota) or by DUNS number
#UMN TC: 555917996
#UofMich AA: 073133571
#GET http://api.nsf.gov/services/v1/awards.{format}?parameters
# awardeename <- c("university+of+minnesota-twin+cities")
# awardeename <- c("university+of+michigan+ann+arbor")
# awardeename <- c("cornell")
# awardeename <- c("duke")
#awardeename <- c("washington+university")
#awardeeCity <- "louis"
awardeename <- c("virginia+tech")
awardeeCity <- "blacksburg"
#duns <- 073133571
metadata_url <- "http://api.nsf.gov/services/v1/awards.json"
#set up pagination
pagination <- seq(1, 10000, by=25)
allresults <- data.frame()
for (i in awardeename) {
for (j in pagination) {
cat("on item ", j, "\n")
metadata_params <- list()
#metadata_params$key <- paste0('apiKey=',key)
metadata_params$awardeeName <- paste0('awardeeName=',i)
#metadata_params$dunsNumber <- paste0('dunsNumber=',i)
metadata_params$awardeeCity <- paste0('awardeeCity=',awardeeCity)
metadata_params$offset <- paste0('offset=',j)
#compiles the parameters into a URL for API pull
metadatareq <- paste0(metadata_url,"/", "?",paste(unlist(metadata_params),collapse='&'))
metadata <- fromJSON(metadatareq)
#metadata$response$award
allresults <- bind_rows(allresults, metadata$response$award)
}
}
#write out list of awards
# write.csv(allresults, file="data/NSF/raw_data/UMN_NSF_results_20220328.csv", row.names = FALSE)
# write.csv(allresults, file="data/NSF/raw_data/UMICH_NSF_results_20220328.csv", row.names = FALSE)
# write.csv(allresults, file="data/NSF/raw_data/Duke_NSF_results_20220328.csv", row.names = FALSE)
# write.csv(allresults, file="data/NSF/raw_data/Cornell_NSF_results_20220328.csv", row.names = FALSE)
#write.csv(allresults, file="data/NSF/raw_data/WashU_NSF_results_20220328.csv", row.names = FALSE)
write.csv(allresults, file="data/NSF/raw_data/VirginaTech_NSF_results_20220328.csv", row.names = FALSE)
### Pulls for each award ######
## Read in the combined data file for all grants in the database subject to open access mandates, then pull individual information for them.
#clear workspace
rm(list=ls())
alldat <- read.csv("data/NSF/raw_data/CombinedNSF_data_pull.csv")
#https://api.nsf.gov/services/v1/awards/1052893.json?printFields=primaryProgram,id,fundProgramName,piEmail,awardAgencyCode,fundAgencyCode
metadata_url <- 'http://api.nsf.gov/services/v1/awards'
awardids <- alldat$id
printFields <- 'primaryProgram,id,fundProgramName,piEmail,awardAgencyCode,fundAgencyCode,projectOutComesReport,abstractText,startDate,expDate'
allresults <- data.frame()
awardids[9774:length(awardids)]
for (i in awardids[9774:length(awardids)]) {
cat("on item", which(awardids == i),"of", length(awardids), "\n")
metadata_params <- list()
metadata_params$printFields <- paste0('printFields=', printFields)
#compiles the parameters into a URL for API pull
metadatareq <- paste0(metadata_url,"/",i,".json", "?",paste(unlist(metadata_params),collapse='&'))
metadata <- fromJSON(metadatareq)
#metadata$response$award
allresults <- bind_rows(allresults, metadata$response$award)
}
alldat[which(alldat$id %in% allresults$id == FALSE),] %>% head
#the ones that are not in the details are NASA studies.
write.csv(allresults, "data/NSF/raw_data/CombinedNSF_data_details_20220719.csv", row.names = F)
# #using foreach
# testresults <- foreach(j=seq(1, 70000, by=25), .combine = 'bind_rows', .packages = c('rjson', 'jsonlite', 'dplyr'), .inorder=TRUE) %dopar% {
# metadata_params <- list()
# #metadata_params$key <- paste0('apiKey=',key)
# metadata_params$awardeeName <- paste0('awardeeName=',awardeename)
# #metadata_params$dunsNumber <- paste0('dunsNumber=',duns)
# metadata_params$offset <- paste0('offset=',j)
#
# #compiles the parameters into a URL for API pull
# metadatareq <- paste0(metadata_url,"/", "?",paste(unlist(metadata_params),collapse='&'))
#
#
#
# tryCatch(
# {metadata <- fromJSON(metadatareq)
# #metadata$response$award
#
# metadata$response$award},
# error = function(e) {paste("error with page", j)})
# }
#
# write.csv(testresults, file="UMICHPull_20220328.csv")