From 1e41f47b9168282b3e4212af9c2597a9fca2f86e Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 13 Apr 2026 08:44:16 -0500 Subject: [PATCH 1/2] Fix bugs, remove dead code, and deduplicate shared patterns Fix format_api_dates bug where loop variable was unused and full datetime vector was passed instead of each element. Fix readNWISrating referencing undefined intColumns variable. Remove dead functions (only_legacy, post_url, .capitalALL) and unreachable measurements branch. Extract add_api_token, coerce_num_cols, and log_rate_limit helpers to replace duplicated code. Simplify deprecated read_USGS_samples wrapper to use ... forwarding. Cache user-agent string. Replace rbind in loop with pre-allocated lists. Co-Authored-By: Claude Opus 4.6 (1M context) --- R/AAA.R | 14 ------- R/constructNWISURL.R | 20 --------- R/construct_api_requests.R | 16 ++++---- R/getWebServiceData.R | 5 +++ R/importWQP.R | 20 --------- R/readNGWMNdata.R | 19 +++++---- R/readNWISunit.R | 9 ---- R/read_waterdata_samples.R | 84 ++++---------------------------------- R/read_waterdata_stats.R | 7 +--- R/walk_pages.R | 65 ++++++++++++++--------------- 10 files changed, 64 insertions(+), 195 deletions(-) diff --git a/R/AAA.R b/R/AAA.R index 73c38fbbb..092445051 100644 --- a/R/AAA.R +++ b/R/AAA.R @@ -90,20 +90,6 @@ wqp_message_beta <- function() { message("WQX3 services are in-development, use with caution.") } -only_legacy <- function(service) { - legacy <- service %in% - c( - "Organization", - "ActivityMetric", - "SiteSummary", - "Project", - "ProjectMonitoringLocationWeighting", - "ResultDetectionQuantitationLimit", - "BiologicalMetric" - ) - return(legacy) -} - is_legacy <- function(service) { legacy <- service %in% c( diff --git a/R/constructNWISURL.R b/R/constructNWISURL.R index 34d724eba..fd06ed227 100644 --- a/R/constructNWISURL.R +++ b/R/constructNWISURL.R @@ -144,26 +144,6 @@ constructNWISURL <- function( url <- get_or_post(url, POST = POST, end_date = endDate) } }, - measurements = { - url <- get_or_post( - baseURL, - POST = POST, - site_no = siteNumbers, - .multi = "comma" - ) - url <- get_or_post(url, POST = POST, range_selection = "date_range") - if (nzchar(startDate)) { - url <- get_or_post(url, POST = POST, begin_date = startDate) - } - if (nzchar(endDate)) { - url <- get_or_post(url, POST = POST, end_date = endDate) - } - if (expanded) { - url <- get_or_post(url, POST = POST, format = "rdb_expanded") - } else { - url <- get_or_post(url, POST = POST, format = "rdb") - } - }, stat = { # for statistics service diff --git a/R/construct_api_requests.R b/R/construct_api_requests.R index c1cf483b5..662b95662 100644 --- a/R/construct_api_requests.R +++ b/R/construct_api_requests.R @@ -385,11 +385,9 @@ format_api_dates <- function(datetime, date = FALSE) { } datetime <- paste0(datetime, collapse = "/") } else { - for (i in seq_along(datetime)) { - datetime1 <- get_dateTime(datetime) - } + datetime1 <- lapply(datetime, get_dateTime) datetime <- paste0( - lubridate::format_ISO8601(datetime1, usetz = "Z"), + vapply(datetime1, lubridate::format_ISO8601, character(1), usetz = "Z"), collapse = "/" ) } @@ -599,12 +597,16 @@ basic_request <- function(url_base, format = "json") { httr2::req_error(body = error_body) |> httr2::req_timeout(seconds = 180) - token <- Sys.getenv("API_USGS_PAT") + req <- add_api_token(req) + + return(req) +} +add_api_token <- function(req) { + token <- Sys.getenv("API_USGS_PAT") if (token != "") { req <- req |> httr2::req_headers_redacted(`X-Api-Key` = token) } - - return(req) + req } diff --git a/R/getWebServiceData.R b/R/getWebServiceData.R index 836a4ee3b..c8185a2d9 100644 --- a/R/getWebServiceData.R +++ b/R/getWebServiceData.R @@ -98,6 +98,10 @@ check_non_200s <- function(returnedList) { #' #' @keywords internal default_ua <- function() { + if (!is.null(pkg.env$ua)) { + return(pkg.env$ua) + } + versions <- c( libcurl = curl::curl_version()$version, httr2 = as.character(utils::packageVersion("httr2")), @@ -110,6 +114,7 @@ default_ua <- function() { ua <- paste0(ua, "/", Sys.getenv("CUSTOM_DR_UA")) } + pkg.env$ua <- ua return(ua) } diff --git a/R/importWQP.R b/R/importWQP.R index 50f0739b8..a1716d80c 100644 --- a/R/importWQP.R +++ b/R/importWQP.R @@ -276,23 +276,3 @@ create_dateTime <- function(df, date_col, time_col, tz_col, tz) { return(df) } -post_url <- function(obs_url, csv = FALSE) { - split <- strsplit(obs_url, "?", fixed = TRUE) - - url <- split[[1]][1] - if (csv) { - url <- paste0(url, "?mimeType=csv") - } else { - url <- paste0(url, "?mimeType=tsv") - } - - if (grepl("sorted", split[[1]][2])) { - url <- paste0( - url, - "&sorted=", - strsplit(split[[1]][2], "sorted=", fixed = TRUE)[[1]][2] - ) - } - - return(url) -} diff --git a/R/readNGWMNdata.R b/R/readNGWMNdata.R index dc4a86aed..5c01cecdf 100644 --- a/R/readNGWMNdata.R +++ b/R/readNGWMNdata.R @@ -50,9 +50,6 @@ readNGWMNdata <- function(service, ..., asDateTime = TRUE, tz = "UTC") { match.arg(service, c("observation", "featureOfInterest")) if (service == "observation") { - allObs <- data.frame() - allAttrs <- data.frame() - # these attributes are pulled out and saved when doing binds to be reattached attrs <- c( "url", @@ -63,13 +60,17 @@ readNGWMNdata <- function(service, ..., asDateTime = TRUE, tz = "UTC") { ) featureID <- stats::na.omit(gsub(":", ".", dots[["siteNumbers"]])) - for (f in featureID) { - obsFID <- retrieveObservation(featureID = f, asDateTime, attrs, tz = tz) - obsFIDattr <- saveAttrs(attrs, obsFID) - obsFID <- removeAttrs(attrs, obsFID) - allObs <- r_bind_dr(allObs, obsFID) - allAttrs <- r_bind_dr(allAttrs, obsFIDattr) + obs_list <- vector("list", length(featureID)) + attr_list <- vector("list", length(featureID)) + for (idx in seq_along(featureID)) { + obsFID <- retrieveObservation( + featureID = featureID[idx], asDateTime, attrs, tz = tz + ) + attr_list[[idx]] <- saveAttrs(attrs, obsFID) + obs_list[[idx]] <- removeAttrs(attrs, obsFID) } + allObs <- Reduce(r_bind_dr, obs_list, init = data.frame()) + allAttrs <- Reduce(r_bind_dr, attr_list, init = data.frame()) allSites <- tryCatch( { diff --git a/R/readNWISunit.R b/R/readNWISunit.R index c5b4fbc62..fadc33886 100644 --- a/R/readNWISunit.R +++ b/R/readNWISunit.R @@ -287,9 +287,6 @@ readNWISrating <- function(siteNumber, type = "base", convertType = TRUE) { data <- importRDB1(url, asDateTime = FALSE, convertType = convertType) if ("current_rating_nu" %in% names(data)) { - intColumns <- intColumns[ - !("current_rating_nu" %in% names(data)[intColumns]) - ] data$current_rating_nu <- gsub(" ", "", data$current_rating_nu) } @@ -480,9 +477,3 @@ readNWISuse <- function( return(NULL) } -.capitalALL <- function(input) { - if (any(grepl("(?i)all", input))) { - input <- toupper(input) - } - return(input) -} diff --git a/R/read_waterdata_samples.R b/R/read_waterdata_samples.R index f18738b99..f3fbec2cd 100644 --- a/R/read_waterdata_samples.R +++ b/R/read_waterdata_samples.R @@ -129,14 +129,8 @@ construct_waterdata_sample_request <- function( baseURL <- httr2::request("https://api.waterdata.usgs.gov") |> httr2::req_url_path_append("samples-data") |> - httr2::req_url_query(mimeType = "text/csv") - - token <- Sys.getenv("API_USGS_PAT") - - if (token != "") { - baseURL <- baseURL |> - httr2::req_headers_redacted(`X-Api-Key` = token) - } + httr2::req_url_query(mimeType = "text/csv") |> + add_api_token() switch( dataType, @@ -387,14 +381,8 @@ check_waterdata_sample_params <- function( match.arg(service, choices = service_options, several.ok = FALSE) check_group_req <- httr2::request("https://api.waterdata.usgs.gov") |> - httr2::req_url_path_append("samples-data") - - token <- Sys.getenv("API_USGS_PAT") - - if (token != "") { - check_group_req <- check_group_req |> - httr2::req_headers_redacted(`X-Api-Key` = token) - } + httr2::req_url_path_append("samples-data") |> + add_api_token() if (service != "reference-list") { check_group_req <- check_group_req |> @@ -544,14 +532,8 @@ summarize_waterdata_samples <- function(monitoringLocationIdentifier) { baseURL <- httr2::request("https://api.waterdata.usgs.gov") |> httr2::req_url_path_append("samples-data") |> httr2::req_url_path_append("summary", monitoringLocationIdentifier) |> - httr2::req_url_query(mimeType = "text/csv") - - token <- Sys.getenv("API_USGS_PAT") - - if (token != "") { - baseURL <- baseURL |> - httr2::req_headers_redacted(`X-Api-Key` = token) - } + httr2::req_url_query(mimeType = "text/csv") |> + add_api_token() df <- importWQP(baseURL) @@ -569,64 +551,14 @@ summarize_waterdata_samples <- function(monitoringLocationIdentifier) { #' @rdname read_waterdata_samples #' @export -read_USGS_samples <- function( - monitoringLocationIdentifier = NA, - siteTypeCode = NA, - boundingBox = NA, - hydrologicUnit = NA, - activityMediaName = NA, - characteristicGroup = NA, - characteristic = NA, - characteristicUserSupplied = NA, - activityStartDateLower = NA, - activityStartDateUpper = NA, - countryFips = NA, - stateFips = NA, - countyFips = NA, - projectIdentifier = NA, - recordIdentifierUserSupplied = NA, - siteTypeName = NA, - usgsPCode = NA, - pointLocationLatitude = NA, - pointLocationLongitude = NA, - pointLocationWithinMiles = NA, - dataType = "results", - dataProfile = NA, - tz = "UTC", - convertType = TRUE -) { +read_USGS_samples <- function(...) { .Deprecated( new = "read_waterdata_samples", package = "dataRetrieval", msg = "Function has been renamed. Please begin to migrate to read_waterdata_samples" ) - read_waterdata_samples( - monitoringLocationIdentifier = monitoringLocationIdentifier, - siteTypeCode = siteTypeCode, - boundingBox = boundingBox, - hydrologicUnit = hydrologicUnit, - activityMediaName = activityMediaName, - characteristicGroup = characteristicGroup, - characteristic = characteristic, - characteristicUserSupplied = characteristicUserSupplied, - activityStartDateLower = activityStartDateLower, - activityStartDateUpper = activityStartDateUpper, - countryFips = countryFips, - stateFips = stateFips, - countyFips = countyFips, - projectIdentifier = projectIdentifier, - recordIdentifierUserSupplied = recordIdentifierUserSupplied, - siteTypeName = siteTypeName, - usgsPCode = usgsPCode, - pointLocationLatitude = pointLocationLatitude, - pointLocationLongitude = pointLocationLongitude, - pointLocationWithinMiles = pointLocationWithinMiles, - dataType = dataType, - dataProfile = dataProfile, - tz = tz, - convertType = convertType - ) + read_waterdata_samples(...) } diff --git a/R/read_waterdata_stats.R b/R/read_waterdata_stats.R index 3fb0911fc..897002921 100644 --- a/R/read_waterdata_stats.R +++ b/R/read_waterdata_stats.R @@ -163,12 +163,7 @@ construct_statistics_request <- function(service = "Normals") { httr2::req_url_path_append(getOption("dataRetrieval.api_version_stat")) |> httr2::req_url_path_append(paste0("observation", service)) - token <- Sys.getenv("API_USGS_PAT") - - if (token != "") { - base_request <- base_request |> - httr2::req_headers_redacted(`X-Api-Key` = token) - } + base_request <- add_api_token(base_request) return(base_request) } diff --git a/R/walk_pages.R b/R/walk_pages.R index 7b00adb65..aa72eea07 100644 --- a/R/walk_pages.R +++ b/R/walk_pages.R @@ -39,19 +39,7 @@ get_resp_data <- function(resp) { return_df <- sf::read_sf(httr2::resp_body_string(resp)) - included_num_cols <- names(return_df)[names(return_df) %in% num_cols] - - if ( - !all(sapply( - sf::st_drop_geometry(return_df[, included_num_cols]), - is.numeric - )) - ) { - return_df[, included_num_cols] <- lapply( - sf::st_drop_geometry(return_df[, included_num_cols]), - as.numeric - ) - } + return_df <- coerce_num_cols(return_df, is_sf = TRUE) if ("qualifier" %in% names(return_df)) { return_df$qualifier <- as.character(vapply( @@ -97,14 +85,7 @@ next_req_url <- function(resp, req) { return(NULL) } - header_info <- httr2::resp_headers(resp) - if (Sys.getenv("API_USGS_PAT") != "") { - message( - "Remaining requests this hour:", - header_info$`x-ratelimit-remaining`, - " " - ) - } + log_rate_limit(resp) if ("links" %in% names(body)) { links <- body$links if (any(sapply(links, function(x) x$rel) == "next")) { @@ -126,24 +107,13 @@ get_csv <- function(req, limit) { skip_geo <- grepl("skipGeometry=true", req$url, ignore.case = TRUE) resp <- httr2::req_perform(req) - header_info <- httr2::resp_headers(resp) - if (Sys.getenv("API_USGS_PAT") != "") { - message( - "Remaining requests this hour:", - header_info$`x-ratelimit-remaining`, - " " - ) - } + log_rate_limit(resp) if (httr2::resp_has_body(resp)) { return_list <- httr2::resp_body_string(resp) df <- data.table::fread(input = return_list, data.table = FALSE) - included_num_cols <- names(df)[names(df) %in% num_cols] - - if (!all(sapply(df[, included_num_cols], is.numeric))) { - df[, included_num_cols] <- lapply(df[, included_num_cols], as.numeric) - } + df <- coerce_num_cols(df) if (skip_geo) { df <- df[, names(df)[!names(df) %in% c("x", "y")]] @@ -164,3 +134,30 @@ ensure all requested data is returned." return(df) } + +coerce_num_cols <- function(df, is_sf = FALSE) { + included_num_cols <- names(df)[names(df) %in% num_cols] + if (length(included_num_cols) == 0) return(df) + + check_df <- if (is_sf) { + sf::st_drop_geometry(df[, included_num_cols, drop = FALSE]) + } else { + df[, included_num_cols, drop = FALSE] + } + + if (!all(vapply(check_df, is.numeric, logical(1)))) { + df[, included_num_cols] <- lapply(check_df, as.numeric) + } + df +} + +log_rate_limit <- function(resp) { + if (Sys.getenv("API_USGS_PAT") != "") { + header_info <- httr2::resp_headers(resp) + message( + "Remaining requests this hour:", + header_info$`x-ratelimit-remaining`, + " " + ) + } +} From a54e0e74d4d1aff15c154c814f6bc4c845c5e4f8 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Tue, 14 Apr 2026 14:28:41 -0500 Subject: [PATCH 2/2] Fix codoc mismatch for deprecated read_USGS_samples Give read_USGS_samples its own roxygen docs instead of sharing @rdname with read_waterdata_samples, since the ... signature doesn't match the documented parameter list. Co-Authored-By: Claude Opus 4.6 (1M context) --- R/read_waterdata_samples.R | 6 ++++- man/read_USGS_samples.Rd | 18 +++++++++++++++ man/read_waterdata_monitoring_location.Rd | 2 +- man/read_waterdata_samples.Rd | 28 ----------------------- 4 files changed, 24 insertions(+), 30 deletions(-) create mode 100644 man/read_USGS_samples.Rd diff --git a/R/read_waterdata_samples.R b/R/read_waterdata_samples.R index f3fbec2cd..f6d45b0f3 100644 --- a/R/read_waterdata_samples.R +++ b/R/read_waterdata_samples.R @@ -549,8 +549,12 @@ summarize_waterdata_samples <- function(monitoringLocationIdentifier) { } -#' @rdname read_waterdata_samples +#' @title Deprecated: Use \code{read_waterdata_samples} instead +#' @description This function has been renamed to \code{\link{read_waterdata_samples}}. +#' @param ... Arguments passed to \code{\link{read_waterdata_samples}}. +#' @return data frame returned from web service call. #' @export +#' @keywords internal read_USGS_samples <- function(...) { .Deprecated( new = "read_waterdata_samples", diff --git a/man/read_USGS_samples.Rd b/man/read_USGS_samples.Rd new file mode 100644 index 000000000..9729aac1e --- /dev/null +++ b/man/read_USGS_samples.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_waterdata_samples.R +\name{read_USGS_samples} +\alias{read_USGS_samples} +\title{Deprecated: Use \code{read_waterdata_samples} instead} +\usage{ +read_USGS_samples(...) +} +\arguments{ +\item{...}{Arguments passed to \code{\link{read_waterdata_samples}}.} +} +\value{ +data frame returned from web service call. +} +\description{ +This function has been renamed to \code{\link{read_waterdata_samples}}. +} +\keyword{internal} diff --git a/man/read_waterdata_monitoring_location.Rd b/man/read_waterdata_monitoring_location.Rd index 871ce1d2e..b8e87b7d0 100644 --- a/man/read_waterdata_monitoring_location.Rd +++ b/man/read_waterdata_monitoring_location.Rd @@ -136,7 +136,7 @@ Multiple monitoring_location_ids can be requested as a character vector.} \item{properties}{A vector of requested columns to be returned from the query. Available options are: -geometry, monitoring_location_id, agency_code, agency_name, monitoring_location_number, monitoring_location_name, district_code, country_code, country_name, state_code, state_name, county_code, county_name, minor_civil_division_code, site_type_code, site_type, hydrologic_unit_code, basin_code, altitude, altitude_accuracy, altitude_method_code, altitude_method_name, vertical_datum, vertical_datum_name, horizontal_positional_accuracy_code, horizontal_positional_accuracy, horizontal_position_method_code, horizontal_position_method_name, original_horizontal_datum, original_horizontal_datum_name, drainage_area, contributing_drainage_area, time_zone_abbreviation, uses_daylight_savings, construction_date, aquifer_code, national_aquifer_code, aquifer_type_code, well_constructed_depth, hole_constructed_depth, depth_source_code. +geometry, monitoring_location_id, agency_code, agency_name, monitoring_location_number, monitoring_location_name, district_code, country_code, country_name, state_code, state_name, county_code, county_name, minor_civil_division_code, site_type_code, site_type, hydrologic_unit_code, basin_code, altitude, altitude_accuracy, altitude_method_code, altitude_method_name, vertical_datum, vertical_datum_name, horizontal_positional_accuracy_code, horizontal_positional_accuracy, horizontal_position_method_code, horizontal_position_method_name, original_horizontal_datum, original_horizontal_datum_name, drainage_area, contributing_drainage_area, time_zone_abbreviation, uses_daylight_savings, construction_date, aquifer_code, national_aquifer_code, aquifer_type_code, well_constructed_depth, hole_constructed_depth, depth_source_code, revision_note, revision_created, revision_modified. The default (\code{NA}) will return all columns of the data.} \item{bbox}{Only features that have a geometry that intersects the bounding diff --git a/man/read_waterdata_samples.Rd b/man/read_waterdata_samples.Rd index 9a6f502b6..6ca606468 100644 --- a/man/read_waterdata_samples.Rd +++ b/man/read_waterdata_samples.Rd @@ -2,7 +2,6 @@ % Please edit documentation in R/read_waterdata_samples.R \name{read_waterdata_samples} \alias{read_waterdata_samples} -\alias{read_USGS_samples} \title{USGS Samples Data} \usage{ read_waterdata_samples( @@ -31,33 +30,6 @@ read_waterdata_samples( tz = "UTC", convertType = TRUE ) - -read_USGS_samples( - monitoringLocationIdentifier = NA, - siteTypeCode = NA, - boundingBox = NA, - hydrologicUnit = NA, - activityMediaName = NA, - characteristicGroup = NA, - characteristic = NA, - characteristicUserSupplied = NA, - activityStartDateLower = NA, - activityStartDateUpper = NA, - countryFips = NA, - stateFips = NA, - countyFips = NA, - projectIdentifier = NA, - recordIdentifierUserSupplied = NA, - siteTypeName = NA, - usgsPCode = NA, - pointLocationLatitude = NA, - pointLocationLongitude = NA, - pointLocationWithinMiles = NA, - dataType = "results", - dataProfile = NA, - tz = "UTC", - convertType = TRUE -) } \arguments{ \item{monitoringLocationIdentifier}{A monitoring location identifier has two parts: the agency code