diff --git a/.Rbuildignore b/.Rbuildignore index 7ad0cd654..819a77239 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -67,5 +67,7 @@ vignettes/changes_slides.Rmd vignettes/daily_data_statistics.Rmd vignettes/continuous_pr.Rmd vignettes/quick_slides.Rmd +vignettes/Reference_Lists.Rmd ^[.]?air[.]toml$ ^\.vscode$ +environment.yml diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 4491fc348..b5a0a8012 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -46,11 +46,9 @@ jobs: any::pkgdown any::rcmdcheck any::DT - any::data.table any::dplyr any::tidyr any::ggplot2 - any::zoo any::sf any::patchwork any::maps @@ -59,6 +57,12 @@ jobs: any::gridExtra local::. needs: website + - name: Setup Micromamba + uses: mamba-org/setup-micromamba@v1 + with: + environment-file: environment.yml + cache-environment: false + cache-downloads: false - name: Create public directory run: | mkdir public diff --git a/.gitignore b/.gitignore index 6debd82a7..02d0a38fd 100644 --- a/.gitignore +++ b/.gitignore @@ -12,10 +12,8 @@ docs /doc/ /Meta/ /Temp/ +/public/ vignettes/*.html vignettes/*.R - - /.quarto/ - **/*.quarto_ipynb diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4322313c2..6c88a8dd2 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -42,14 +42,13 @@ build-image: alias: docker rules: - changes: + - .gitlab-ci.yml - docker/Dockerfile + - environment.yml script: - echo ${CI_REGISTRY_PASSWORD} | docker login -u ${CI_REGISTRY_USER} --password-stdin $CI_REGISTRY - docker pull ${CI_REGISTRY_IMAGE}:latest || true - - cd docker - - docker build - -t ${CI_REGISTRY_IMAGE}:latest - . + - docker build --cache-from ${CI_REGISTRY_IMAGE}:latest -t ${CI_REGISTRY_IMAGE}:latest -t ${CI_REGISTRY_IMAGE}:BUILD_${CI_COMMIT_SHORT_SHA} -f docker/Dockerfile . - docker push --all-tags ${CI_REGISTRY_IMAGE} buildcheck: @@ -58,8 +57,15 @@ buildcheck: dependencies: - build-image script: - - Rscript -e 'devtools::install(quick = TRUE, upgrade = "never")' - - Rscript -e 'devtools::check(document = FALSE, args = "--no-tests", check_dir = Sys.getenv("BUILD_LOGS_DIR"), vignettes = FALSE)' + - | + Rscript -e " + rcmdcheck::rcmdcheck( + args = c('--no-manual', '--no-tests'), + build_args = c('--no-manual', '--no-resave-data'), + check_dir = '.', + error_on = 'warning' + ) + " unittests: stage: test @@ -68,8 +74,16 @@ unittests: - build-image - buildcheck script: - - R -e 'library(testthat); options(testthat.output_file = file.path(Sys.getenv("CI_PROJECT_DIR"), "test-out.xml")); devtools::test(reporter = "junit")' - - R -e 'x <- covr::package_coverage(); covr::to_cobertura(x); x; ' + - | + Rscript -e ' + library(testthat) + options(testthat.output_file = file.path(Sys.getenv("CI_PROJECT_DIR"), "test-out.xml")) + test_local(reporter = "junit")' + - | + Rscript -e ' + x <- covr::package_coverage() + covr::to_cobertura(x) + x' artifacts: when: always expire_in: 1 week @@ -100,11 +114,9 @@ pages: - build-image - buildcheck script: - - Rscript -e 'devtools::install(quick = TRUE, upgrade = "never")' - Rscript -e 'pkgdown::build_site(override = list(destination = "public"))' - Rscript -e 'file.copy(from = "./public/articles/logo.png", to = "./public/reference/logo.png")' - quarto render - artifacts: paths: - $PAGES_OUTDIR diff --git a/DESCRIPTION b/DESCRIPTION index cc8fef6e0..6890b9308 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: dataRetrieval Type: Package Title: Retrieval Functions for USGS and EPA Hydrology and Water Quality Data -Version: 2.7.23.9001 +Version: 2.7.24.9001 Authors@R: c( person("Laura", "DeCicco", role = c("aut","cre"), email = "ldecicco@usgs.gov", diff --git a/NAMESPACE b/NAMESPACE index 67434a709..4817ef167 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -55,6 +55,7 @@ export(read_waterdata_latest_daily) export(read_waterdata_metadata) export(read_waterdata_monitoring_location) export(read_waterdata_parameter_codes) +export(read_waterdata_ratings) export(read_waterdata_samples) export(read_waterdata_stats_daterange) export(read_waterdata_stats_por) diff --git a/NEWS b/NEWS index aa42bd4a1..2a00027a4 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,24 @@ +dataRetrieval 2.7.25 +=================== +* Added read_waterdata_ratings to access USGS rating curves with +new modern endpoint. +* Increase flexibility of chunking by monitoring_location_id by +including it as an argument in each relevant function. +* Clean up deprecated code. +* Updated retry strategy to include retry_on_failure = TRUE. +* Added countries, methods, method-categories, method-citations, and +citations to possible values in read_waterdata_metadata. +* Added field_measurements_series_id argument to read_waterdata_field_measurement +* Removed NWIS tests + + + dataRetrieval 2.7.24 =================== * Let dataRetrieval take care of chunking up requests by monitoring_location_id. +* Fixed bug causing some character columns in importWQP to be numeric +* Fixed bug causing open ended date requests in waterdata funcitons to +return full period of record. dataRetrieval 2.7.23 =================== diff --git a/R/AAA.R b/R/AAA.R index 092445051..e02bd1987 100644 --- a/R/AAA.R +++ b/R/AAA.R @@ -7,6 +7,11 @@ pkg.env <- new.env() options("dataRetrieval.api_version" = "v0") options("dataRetrieval.api_version_stat" = "v0") options("dataRetrieval.attach_request" = TRUE) + options("dataRetrieval.convertType" = TRUE) + options("dataRetrieval.no_paging" = FALSE) + options("dataRetrieval.site_chunk_size_meta" = 250) + options("dataRetrieval.site_chunk_size_data" = 10) + options("dataRetrieval.limit" = 50000) services <- c( "server", @@ -32,7 +37,12 @@ pkg.env <- new.env() "coordinate-method-codes", "medium-codes", "counties", + "countries", "hydrologic-unit-codes", + "methods", + "method-categories", + "method-citations", + "citations", "states", "national-aquifer-codes", "reliability-codes", @@ -52,7 +62,12 @@ pkg.env <- new.env() "coordinate_method_code", "medium_code", "county", + "country", "hydrologic_unit_code", + "methods", + "method_categories", + "method_citations", + "citations", "state", "national_aquifer_code", "reliability_code", diff --git a/R/construct_api_requests.R b/R/construct_api_requests.R index 662b95662..4fe15147c 100644 --- a/R/construct_api_requests.R +++ b/R/construct_api_requests.R @@ -5,26 +5,27 @@ #' #' @export #' @param service Which service available on . +#' @param output_id Name of id column to return #' @param ... Extra parameters from the specific services. #' @param bbox Only features that have a geometry that intersects the bounding #' box are selected.The bounding box is provided as four or six numbers, depending #' on whether the coordinate reference system includes a vertical axis (height or #' depth). -#' @param properties The properties that should be included for each feature. The -#' parameter value is a comma-separated list of property names which depend on the -#' service being called. -#' @param skipGeometry This option can be used to skip response geometries for -#' each feature. The returning object will be a data frame with no spatial -#' information. #' @keywords internal +#' +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api +#' #' @examples #' site <- "USGS-02238500" #' pcode <- "00060" #' req_dv <- construct_api_requests("daily", +#' output_id = "daily_id", #' monitoring_location_id = site, #' parameter_code = "00060") #' #' req_dv <- construct_api_requests("daily", +#' output_id = "daily_id", #' monitoring_location_id = site, #' parameter_code = c("00060", "00065")) #' @@ -32,21 +33,39 @@ #' start_date <- "2018-01-01" #' end_date <- "2022-01-01" #' req_dv <- construct_api_requests("daily", -#' monitoring_location_id = sites, -#' parameter_code = c("00060", "00065"), -#' datetime = c(start_date, end_date)) +#' output_id = "daily_id", +#' monitoring_location_id = sites, +#' parameter_code = c("00060", "00065"), +#' datetime = c(start_date, end_date)) #' construct_api_requests <- function( service, - properties = NA_character_, + output_id, + ..., bbox = NA, - skipGeometry = FALSE, - no_paging = FALSE, - ... + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_data"), + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") ) { POST <- FALSE full_list <- list(...) + full_list[["limit"]] <- limit + + check_arguments_non_api( + convertType = convertType, + no_paging = no_paging, + limit = full_list[["limit"]], + attach_request = attach_request, + chunk_size = chunk_size + ) + + check_arguments_api( + bbox = full_list[["bbox"]], + skipGeometry = full_list[["skipGeometry"]] + ) time_periods <- c( "last_modified", @@ -58,6 +77,20 @@ construct_api_requests <- function( "end_utc" ) + full_list <- switch_arg_id( + full_list, + id_name = output_id, + service = service + ) + + # Clean out non-API arguments: + properties <- switch_properties_id( + properties = full_list[["properties"]], + id = output_id + ) + + full_list[["properties"]] <- NULL + if (any(time_periods %in% names(full_list))) { for (i in time_periods[time_periods %in% names(full_list)]) { dates <- FALSE @@ -74,9 +107,10 @@ construct_api_requests <- function( "begin", "end", "time", - "limit", "begin_utc", - "end_utc" + "end_utc", + "limit", + "skipGeometry" ) comma_params <- c( @@ -85,7 +119,8 @@ construct_api_requests <- function( "statistic_id", "time_series_id", "computation_period_identifier", - "computation_identifier" + "computation_identifier", + "data_type" ) if ( @@ -110,6 +145,7 @@ construct_api_requests <- function( Negate(anyNA), lapply(full_list[comma_params], function(x) x[!is.na(x)]) ) + comma_params_filtered <- comma_params_filtered[ !sapply(comma_params_filtered, is.null) ] @@ -129,8 +165,6 @@ construct_api_requests <- function( get_list <- c(single_params_filtered, comma_params_filtered) } - get_list[["skipGeometry"]] <- skipGeometry - get_list <- get_list[!is.na(get_list)] format_type <- ifelse(isTRUE(no_paging), "csv", "json") @@ -207,15 +241,6 @@ construct_api_requests <- function( return(baseURL) } -check_limits <- function(args) { - current_api_limit <- 50000 - - if (is.na(args[["limit"]])) { - args[["limit"]] <- current_api_limit - } - - return(args) -} #' Setup the request for the OGC API requests #' @@ -595,6 +620,7 @@ basic_request <- function(url_base, format = "json") { httr2::req_headers(`Accept-Encoding` = c("compress", "gzip")) |> httr2::req_url_query(f = format, lang = "en-US") |> httr2::req_error(body = error_body) |> + httr2::req_retry(max_tries = 3, retry_on_failure = TRUE) |> httr2::req_timeout(seconds = 180) req <- add_api_token(req) diff --git a/R/dataRetrieval-package.R b/R/dataRetrieval-package.R index a899f4831..9eda9c1f0 100644 --- a/R/dataRetrieval-package.R +++ b/R/dataRetrieval-package.R @@ -211,7 +211,7 @@ NULL # # property_list <- list() # for(service in services){ -# property_list[[service]] <- get_properties_for_docs(service) +# property_list[[service]] <- dataRetrieval:::get_properties_for_docs(service) # } # # num_cols <- c("value", "contributing_drainage_area", "drainage_area", diff --git a/R/getWebServiceData.R b/R/getWebServiceData.R index c8185a2d9..98759426d 100644 --- a/R/getWebServiceData.R +++ b/R/getWebServiceData.R @@ -30,7 +30,11 @@ getWebServiceData <- function(obs_url, ...) { obs_url <- httr2::req_user_agent(obs_url, default_ua()) obs_url <- httr2::req_throttle(obs_url, rate = 30 / 60) - obs_url <- httr2::req_retry(obs_url, max_tries = 3, max_seconds = 180) + obs_url <- httr2::req_retry( + obs_url, + max_tries = 3, + retry_on_failure = TRUE + ) obs_url <- httr2::req_headers( obs_url, `Accept-Encoding` = c("compress", "gzip") diff --git a/R/get_ogc_data.R b/R/get_ogc_data.R index e0a915ac1..89ebb6306 100644 --- a/R/get_ogc_data.R +++ b/R/get_ogc_data.R @@ -3,15 +3,16 @@ #' @param args arguments from individual functions #' @param output_id Name of id column to return #' @param service Endpoint name. -#' @param \dots Used to force users to fully name the details argument. -#' @param chunk_size Number of monitoring_location_ids to chunk requests into. #' #' @noRd #' @return data.frame with attributes -get_ogc_data <- function(args, output_id, service, ..., chunk_size = 250) { - rlang::check_dots_empty() +get_ogc_data <- function(args, output_id, service) { + chunk_size <- args[["chunk_size"]] + args[["..."]] <- NULL - if (length(args[["monitoring_location_id"]]) > chunk_size) { + if ( + !is.na(chunk_size) & length(args[["monitoring_location_id"]]) > chunk_size + ) { ml_splits <- split( args[["monitoring_location_id"]], ceiling(seq_along(args[["monitoring_location_id"]]) / chunk_size) @@ -32,16 +33,7 @@ get_ogc_data <- function(args, output_id, service, ..., chunk_size = 250) { ignore.attr = TRUE )) } else { - args[["chunk_sites_by"]] <- NULL - - args <- switch_arg_id(args, id_name = output_id, service = service) - - args <- check_limits(args) - - properties <- args[["properties"]] - args[["properties"]] <- switch_properties_id(properties, id = output_id) - convertType <- args[["convertType"]] - args[["convertType"]] <- NULL + args[["output_id"]] <- output_id args[["service"]] <- service req <- do.call(construct_api_requests, args) @@ -56,24 +48,18 @@ get_ogc_data <- function(args, output_id, service, ..., chunk_size = 250) { return_list <- walk_pages(req) } - if (is.na(args[["skipGeometry"]])) { - skipGeometry <- FALSE - } else { - skipGeometry <- args[["skipGeometry"]] - } - return_list <- deal_with_empty( return_list, - properties, + args[["properties"]], service, - skipGeometry, - convertType, + isTRUE(args[["skipGeometry"]]), + args[["convertType"]], no_paging ) - return_list <- rejigger_cols(return_list, properties, output_id) + return_list <- rejigger_cols(return_list, args[["properties"]], output_id) - if (convertType) { + if (args[["convertType"]]) { return_list <- cleanup_cols(return_list, service) return_list <- order_results(return_list) @@ -96,11 +82,25 @@ get_ogc_data <- function(args, output_id, service, ..., chunk_size = 250) { return_list <- move_id_col(return_list, output_id) } - if (getOption("dataRetrieval.attach_request")) { + if (args[["attach_request"]]) { attr(return_list, "request") <- req } } + if ( + !isTRUE(args[["skipGeometry"]]) & + "geometry" %in% names(return_list) + ) { + if ( + all(sf::st_is_empty(return_list[["geometry"]])) & + !"geometry" %in% args[["properties"]] + ) { + return_list <- sf::st_drop_geometry(return_list) + } else { + return_list <- sf::st_as_sf(return_list) + } + } + attr(return_list, "queryTime") <- Sys.time() return(return_list) } @@ -179,3 +179,103 @@ switch_properties_id <- function(properties, id) { return(properties) } + +#' Check non-API arguments +#' +#' Function to check types and create parameter descriptions. +#' +#' @param convertType logical, defaults to `r getOption("dataRetrieval.convertType")`. +#' If `TRUE`, the function will convert the data to dates, any qualifiers to string +#' vector and reorder the returned data frame. +#' @param no_paging logical, defaults to `r getOption("dataRetrieval.no_paging")`. +#' If `TRUE`, the data will +#' be requested from a native csv format. This can be dangerous because the +#' data will cut off at 50,000 rows without indication that more data +#' is available. Use `TRUE` with caution. +#' @param limit numeric, The optional limit parameter is used to control the subset of the +#' selected features that should be returned in each page. The maximum allowable +#' limit is 50,000. It may be beneficial to set this number lower if your internet +#' connection is spotty. The default (`NA`) will set the limit to the maximum +#' allowable limit for the service. +#' @param attach_request logical, defaults to `r getOption("dataRetrieval.attach_request")`. +#' If set to `TRUE`, the full request sent to the Water Data API is attached +#' as an attribute to the data set. +#' @param chunk_size Number of monitoring_location_ids to chunk requests into. +#' The default for functions that don't generally return long-term data records +#' is `r getOption("dataRetrieval.site_chunk_size_meta")`, while +#' the default for time series functions is +#' `r getOption("dataRetrieval.site_chunk_size_data")`. +#' Setting to `NA` will eliminate site chunking, giving users full control. +#' @param \dots Not used. Included to help differentiate official Water Data API arguments +#' from more seldom used, optional dataRetrieval-specific arguments. +#' @keywords internal +check_arguments_non_api <- function( + convertType, + no_paging, + limit, + attach_request, + chunk_size, + ... +) { + if (!is.null(convertType)) { + if (!is.na(convertType) & !is.logical(convertType)) { + stop("convertType should be a logical TRUE/FALSE") + } + } + + if (!is.null(no_paging)) { + if (!is.na(no_paging) & !is.logical(no_paging)) { + stop("no_paging should be a logical TRUE/FALSE") + } + } + + if (!is.null(attach_request)) { + if (!is.na(attach_request) & !is.logical(attach_request)) { + stop("attach_request should be a logical TRUE/FALSE") + } + } + + if (!is.null(limit)) { + if (!is.na(limit) & !is.numeric(limit)) { + stop("limit should be an integer") + } + } + + if (!is.null(chunk_size)) { + if (!is.na(chunk_size) & !is.numeric(chunk_size)) { + stop("chunk_size should be an integer") + } + } +} + +#' Check other arguments +#' +#' Additional functions to check types and create parameter descriptions. +#' +#' @param bbox Only features that have a geometry that intersects the bounding +#' box are selected.The bounding box is provided as four or six numbers, depending +#' on whether the coordinate reference system includes a vertical axis (height or +#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric +#' vector structured: c(xmin,ymin,xmax,ymax). +#' Another way to think of it is c(Western-most longitude, +#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). +#' @param skipGeometry This parameter can be used to skip response geometries for +#' each feature. The returning object will be a data frame with no spatial +#' information. The default `NA` will not specify the argument in the request. +#' +#' @keywords internal +check_arguments_api <- function(bbox, skipGeometry) { + if (!is.null(skipGeometry)) { + if (!is.na(skipGeometry) & !is.logical(skipGeometry)) { + stop("skipGeometry should be a logical TRUE/FALSE") + } + } + + if (!is.null(bbox)) { + if (!all(is.na(bbox))) { + if (!length(bbox) %in% c(1, 4)) { + stop("bbox is not set up correctly") + } + } + } +} diff --git a/R/importWQP.R b/R/importWQP.R index a1716d80c..30e78c887 100644 --- a/R/importWQP.R +++ b/R/importWQP.R @@ -67,6 +67,7 @@ importWQP <- function(obs_url, tz = "UTC", csv = TRUE, convertType = TRUE) { data.table = FALSE, sep = ifelse(csv, ",", "\t"), fill = TRUE, + colClasses = "character", quote = ifelse(csv, '\"', "") ) diff --git a/R/readNWISunit.R b/R/readNWISunit.R index fadc33886..89399818f 100644 --- a/R/readNWISunit.R +++ b/R/readNWISunit.R @@ -280,7 +280,12 @@ readNWISpeak <- function( #' attr(data, "RATING") #' } readNWISrating <- function(siteNumber, type = "base", convertType = TRUE) { - message(new_nwis_message()) + .Deprecated( + new = "read_waterdata_ratings", + package = "dataRetrieval", + msg = "NWIS servers are slated for decommission. Please begin to migrate to read_waterdata_ratings." + ) + # No rating xml service url <- constructNWISURL(siteNumber, service = "rating", ratingType = type) @@ -476,4 +481,3 @@ readNWISuse <- function( ) return(NULL) } - diff --git a/R/read_waterdata.R b/R/read_waterdata.R index 2a5df774b..815f5510c 100644 --- a/R/read_waterdata.R +++ b/R/read_waterdata.R @@ -10,6 +10,8 @@ #' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function #' will convert the data to dates and qualifier to string vector. #' @param \dots Additional arguments to send to the request. +#' @inheritParams check_arguments_non_api +#' #' @examplesIf is_dataRetrieval_user() #' #' \donttest{ @@ -51,12 +53,26 @@ #' CQL = cql_huc_wildcard) #' #' } -read_waterdata <- function(service, CQL, ..., convertType = TRUE) { +read_waterdata <- function( + service, + CQL, + ..., + convertType = getOption("dataRetrieval.convertType"), + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") +) { match.arg(service, pkg.env$api_endpoints) args <- list(...) - output_id <- switch( + args[["convertType"]] <- convertType + args[["limit"]] <- limit + args[["attach_request"]] <- attach_request + args[["bbox"]] <- NA + args[["no_paging"]] <- FALSE # drops id if TRUE + args[["chunk_size"]] <- NA # Chunking doesn't make sense. + + args[["output_id"]] <- switch( service, "daily" = "daily_id", "latest-daily" = "latest_daily_id", @@ -72,12 +88,7 @@ read_waterdata <- function(service, CQL, ..., convertType = TRUE) { args[["properties"]] <- NA_character_ } - if (!"limit" %in% names(args)) { - args[["limit"]] <- NA_character_ - } - args[["service"]] <- service - args <- check_limits(args) data_req <- suppressWarnings(do.call(construct_api_requests, args)) @@ -87,28 +98,24 @@ read_waterdata <- function(service, CQL, ..., convertType = TRUE) { return_list <- walk_pages(data_req) - if (is.null(args[["skipGeometry"]])) { - skipGeometry <- FALSE - } else if (is.na(args[["skipGeometry"]])) { - skipGeometry <- FALSE - } else { - skipGeometry <- args[["skipGeometry"]] - } - return_list <- deal_with_empty( return_list, args[["properties"]], service, - skipGeometry, + isTRUE(args[["skipGeometry"]]), convertType ) - return_list <- rejigger_cols(return_list, args[["properties"]], output_id) + return_list <- rejigger_cols( + return_list, + args[["properties"]], + args[["output_id"]] + ) if (convertType) { return_list <- cleanup_cols(return_list, service) return_list <- order_results(return_list) - return_list <- move_id_col(return_list, output_id) + return_list <- move_id_col(return_list, args[["output_id"]]) } return(return_list) diff --git a/R/read_waterdata_channel.R b/R/read_waterdata_channel.R index 62c0376c4..74e92840c 100644 --- a/R/read_waterdata_channel.R +++ b/R/read_waterdata_channel.R @@ -34,26 +34,8 @@ #' Available options are: #' `r dataRetrieval:::get_properties_for_docs("channel-measurements", "channel_measurements_id")`. #' The default (`NA`) will return all columns of the data. -#' @param bbox Only features that have a geometry that intersects the bounding -#' box are selected.The bounding box is provided as four or six numbers, depending -#' on whether the coordinate reference system includes a vertical axis (height or -#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, -#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). -#' @param limit The optional limit parameter is used to control the subset of the -#' selected features that should be returned in each page. The maximum allowable -#' limit is 50000. It may be beneficial to set this number lower if your internet -#' connection is spotty. The default (`NA`) will set the limit to the maximum -#' allowable limit for the service. -#' @param skipGeometry This option can be used to skip response geometries for -#' each feature. The returning object will be a data frame with no spatial -#' information. -#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function -#' will convert the data to dates and qualifier to string vector. -#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will -#' be requested from a native csv format. This can be dangerous because the -#' data will cut off at 50,000 rows without indication that more data -#' is available. Use `TRUE` with caution. +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api #' #' @inherit read_waterdata_continuous details #' @@ -94,13 +76,17 @@ read_waterdata_channel <- function( properties = NA_character_, skipGeometry = NA, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + limit = getOption("dataRetrieval.limit"), + chunk_size = getOption("dataRetrieval.site_chunk_size_data"), + attach_request = getOption("dataRetrieval.attach_request") ) { service <- "channel-measurements" output_id <- "channel_measurements_id" + rlang::check_dots_empty() args <- mget(names(formals())) return_list <- get_ogc_data(args, output_id, service) diff --git a/R/read_waterdata_combined_meta.R b/R/read_waterdata_combined_meta.R index d72d4471a..beb3f1189 100644 --- a/R/read_waterdata_combined_meta.R +++ b/R/read_waterdata_combined_meta.R @@ -82,27 +82,8 @@ #' Available options are: #' `r dataRetrieval:::get_properties_for_docs("combined-metadata", "field_measurement_id")`. #' The default (`NA`) will return all columns of the data. -#' @param bbox Only features that have a geometry that intersects the bounding -#' box are selected.The bounding box is provided as four or six numbers, depending -#' on whether the coordinate reference system includes a vertical axis (height or -#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, -#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). -#' @param limit The optional limit parameter is used to control the subset of the -#' selected features that should be returned in each page. The maximum allowable -#' limit is 50000. It may be beneficial to set this number lower if your internet -#' connection is spotty. The default (`NA`) will set the limit to the maximum -#' allowable limit for the service. -#' @param skipGeometry This option can be used to skip response geometries for -#' each feature. The returning object will be a data frame with no spatial -#' information. -#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function -#' will convert the data to dates and qualifier to string vector. -#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will -#' be requested from a native csv format. This can be dangerous because the -#' data will cut off at 50,000 rows without indication that more data -#' is available. Use `TRUE` with caution. -#' +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api #' @inherit read_waterdata_continuous details #' #' @@ -202,12 +183,16 @@ read_waterdata_combined_meta <- function( properties = NA_character_, skipGeometry = NA, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + limit = getOption("dataRetrieval.limit"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + attach_request = getOption("dataRetrieval.attach_request") ) { service <- "combined-metadata" output_id <- "combined_meta_id" + rlang::check_dots_empty() args <- mget(names(formals())) return_list <- get_ogc_data(args, output_id, service) diff --git a/R/read_waterdata_continuous.R b/R/read_waterdata_continuous.R index c63e6d87f..d9291e560 100644 --- a/R/read_waterdata_continuous.R +++ b/R/read_waterdata_continuous.R @@ -32,18 +32,9 @@ #' Available options are: #' `r dataRetrieval:::get_properties_for_docs("continuous", "continuous_id")`. #' The default (`NA`) will return all columns of the data. -#' @param limit The optional limit parameter is used to control the subset of the -#' selected features that should be returned in each page. The maximum allowable -#' limit is 50000. It may be beneficial to set this number lower if your internet -#' connection is spotty. The default (`NA`) will set the limit to the maximum -#' allowable limit for the service. -#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function -#' will convert the data to dates and qualifier to string vector, and sepcifically -#' order the returning data frame by time and monitoring_location_id. -#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will -#' be requested from a native csv format. This can be dangerous because the -#' data will cut off at 50,000 rows without indication that more data -#' is available. Use `TRUE` with caution. +#' @param \dots Not used. Included to help differentiate official Water Data API arguments +#' from more seldom used, optional dataRetrieval-specific arguments. +#' @inheritParams check_arguments_non_api #' #' @details #' You can also use a vector of length 2 for any time queries (such as time @@ -129,12 +120,16 @@ read_waterdata_continuous <- function( value = NA, last_modified = NA_character_, time = NA_character_, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + limit = getOption("dataRetrieval.limit"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_data"), + attach_request = getOption("dataRetrieval.attach_request") ) { service <- "continuous" output_id <- "continuous_id" + rlang::check_dots_empty() args <- mget(names(formals())) args[["skipGeometry"]] <- TRUE diff --git a/R/read_waterdata_daily.R b/R/read_waterdata_daily.R index a05186746..d13072c8f 100644 --- a/R/read_waterdata_daily.R +++ b/R/read_waterdata_daily.R @@ -25,26 +25,9 @@ #' Available options are: #' `r dataRetrieval:::get_properties_for_docs("daily", "daily_id")`. #' The default (`NA`) will return all columns of the data. -#' @param bbox Only features that have a geometry that intersects the bounding -#' box are selected.The bounding box is provided as four or six numbers, depending -#' on whether the coordinate reference system includes a vertical axis (height or -#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, -#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). -#' @param limit The optional limit parameter is used to control the subset of the -#' selected features that should be returned in each page. The maximum allowable -#' limit is 50000. It may be beneficial to set this number lower if your internet -#' connection is spotty. The default (`NA`) will set the limit to the maximum -#' allowable limit for the service. -#' @param skipGeometry This option can be used to skip response geometries for -#' each feature. The returning object will be a data frame with no spatial -#' information. -#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function -#' will convert the data to dates and qualifier to string vector. -#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will -#' be requested from a native csv format. This can be dangerous because the -#' data will cut off at 50,000 rows without indication that more data -#' is available. Use `TRUE` with caution. +#' +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api #' #' @inherit read_waterdata_continuous details #' @@ -105,12 +88,16 @@ read_waterdata_daily <- function( skipGeometry = NA, time = NA_character_, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_data"), + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") ) { service <- "daily" output_id <- "daily_id" + rlang::check_dots_empty() args <- mget(names(formals())) return_list <- get_ogc_data(args, output_id, service) diff --git a/R/read_waterdata_field_measurements.R b/R/read_waterdata_field_measurements.R index b4cb90f4a..d0718abe6 100644 --- a/R/read_waterdata_field_measurements.R +++ b/R/read_waterdata_field_measurements.R @@ -20,6 +20,7 @@ #' See also Details below for more information. #' @param qualifier `r get_ogc_params("field-measurements")$qualifier` #' @param field_visit_id `r get_ogc_params("field-measurements")$field_visit_id` +#' @param field_measurements_series_id `r get_ogc_params("field-measurements")$field_measurements_series_id` #' @param vertical_datum `r get_ogc_params("field-measurements")$vertical_datum` #' @param measuring_agency `r get_ogc_params("field-measurements")$measuring_agency` #' @param control_condition `r get_ogc_params("field-measurements")$control_condition` @@ -31,26 +32,8 @@ #' Available options are: #' `r dataRetrieval:::get_properties_for_docs("field-measurements", "field_measurement_id")`. #' The default (`NA`) will return all columns of the data. -#' @param bbox Only features that have a geometry that intersects the bounding -#' box are selected.The bounding box is provided as four or six numbers, depending -#' on whether the coordinate reference system includes a vertical axis (height or -#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, -#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). -#' @param limit The optional limit parameter is used to control the subset of the -#' selected features that should be returned in each page. The maximum allowable -#' limit is 50000. It may be beneficial to set this number lower if your internet -#' connection is spotty. The default (`NA`) will set the limit to the maximum -#' allowable limit for the service. -#' @param skipGeometry This option can be used to skip response geometries for -#' each feature. The returning object will be a data frame with no spatial -#' information. -#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function -#' will convert the data to dates and qualifier to string vector. -#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will -#' be requested from a native csv format. This can be dangerous because the -#' data will cut off at 50,000 rows without indication that more data -#' is available. Use `TRUE` with caution. +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api #' #' @inherit read_waterdata_continuous details #' @@ -83,6 +66,9 @@ #' old_df <- read_waterdata_field_measurements(monitoring_location_id = "USGS-425957088141001", #' time = c("1980-01-01", NA)) #' +#' new_df <- read_waterdata_field_measurements(monitoring_location_id = "USGS-425957088141001", +#' time = c(NA, "2020-01-01")) +#' #' surface_water <- read_waterdata_field_measurements( #' monitoring_location_id = c("USGS-07069000", #' "USGS-07064000", @@ -98,6 +84,7 @@ read_waterdata_field_measurements <- function( observing_procedure_code = NA_character_, properties = NA_character_, field_visit_id = NA_character_, + field_measurements_series_id = NA_character_, approval_status = NA_character_, unit_of_measure = NA_character_, qualifier = NA_character_, @@ -111,12 +98,16 @@ read_waterdata_field_measurements <- function( skipGeometry = NA, time = NA_character_, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + limit = getOption("dataRetrieval.limit"), + chunk_size = getOption("dataRetrieval.site_chunk_size_data"), + attach_request = getOption("dataRetrieval.attach_request") ) { service <- "field-measurements" output_id <- "field_measurement_id" + rlang::check_dots_empty() args <- mget(names(formals())) return_list <- get_ogc_data(args, output_id, service) diff --git a/R/read_waterdata_field_meta.R b/R/read_waterdata_field_meta.R index b4df45b6a..4df6e2ac5 100644 --- a/R/read_waterdata_field_meta.R +++ b/R/read_waterdata_field_meta.R @@ -24,26 +24,9 @@ #' Available options are: #' `r dataRetrieval:::get_properties_for_docs("field-measurements-metadata", "field_measurement_id")`. #' The default (`NA`) will return all columns of the data. -#' @param bbox Only features that have a geometry that intersects the bounding -#' box are selected.The bounding box is provided as four or six numbers, depending -#' on whether the coordinate reference system includes a vertical axis (height or -#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, -#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). -#' @param limit The optional limit parameter is used to control the subset of the -#' selected features that should be returned in each page. The maximum allowable -#' limit is 50000. It may be beneficial to set this number lower if your internet -#' connection is spotty. The default (`NA`) will set the limit to the maximum -#' allowable limit for the service. -#' @param skipGeometry This option can be used to skip response geometries for -#' each feature. The returning object will be a data frame with no spatial -#' information. -#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function -#' will convert the data to dates and qualifier to string vector. -#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will -#' be requested from a native csv format. This can be dangerous because the -#' data will cut off at 50,000 rows without indication that more data -#' is available. Use `TRUE` with caution. +#' +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api #' #' @inherit read_waterdata_continuous details #' @@ -95,11 +78,15 @@ read_waterdata_field_meta <- function( skipGeometry = NA, bbox = NA, limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + attach_request = getOption("dataRetrieval.attach_request") ) { service <- "field-measurements-metadata" output_id <- "field_series_id" + rlang::check_dots_empty() args <- mget(names(formals())) return_list <- get_ogc_data(args, output_id, service) diff --git a/R/read_waterdata_latest_continuous.R b/R/read_waterdata_latest_continuous.R index b015c9192..e450561ae 100644 --- a/R/read_waterdata_latest_continuous.R +++ b/R/read_waterdata_latest_continuous.R @@ -23,26 +23,8 @@ #' Available options are: #' `r dataRetrieval:::get_properties_for_docs("latest-continuous", "latest_continuous_id")`. #' The default (`NA`) will return all columns of the data. -#' @param bbox Only features that have a geometry that intersects the bounding -#' box are selected.The bounding box is provided as four or six numbers, depending -#' on whether the coordinate reference system includes a vertical axis (height or -#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, -#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). -#' @param limit The optional limit parameter is used to control the subset of the -#' selected features that should be returned in each page. The maximum allowable -#' limit is 50000. It may be beneficial to set this number lower if your internet -#' connection is spotty. The default (`NA`) will set the limit to the maximum -#' allowable limit for the service. -#' @param skipGeometry This option can be used to skip response geometries for -#' each feature. The returning object will be a data frame with no spatial -#' information. -#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function -#' will convert the data to dates and qualifier to string vector. -#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will -#' be requested from a native csv format. This can be dangerous because the -#' data will cut off at 50,000 rows without indication that more data -#' is available. Use `TRUE` with caution. +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api #' #' @inherit read_waterdata_continuous details #' @examplesIf is_dataRetrieval_user() @@ -92,12 +74,16 @@ read_waterdata_latest_continuous <- function( skipGeometry = NA, time = NA_character_, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") ) { service <- "latest-continuous" output_id <- "latest_continuous_id" + rlang::check_dots_empty() args <- mget(names(formals())) return_list <- get_ogc_data(args, output_id, service) diff --git a/R/read_waterdata_latest_daily.R b/R/read_waterdata_latest_daily.R index dfb5deb38..85bc6c11f 100644 --- a/R/read_waterdata_latest_daily.R +++ b/R/read_waterdata_latest_daily.R @@ -25,27 +25,9 @@ #' Available options are: #' `r dataRetrieval:::get_properties_for_docs("latest-daily", "latest_daily_id")`. #' The default (`NA`) will return all columns of the data. -#' @param bbox Only features that have a geometry that intersects the bounding -#' box are selected.The bounding box is provided as four or six numbers, depending -#' on whether the coordinate reference system includes a vertical axis (height or -#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, -#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). -#' @param limit The optional limit parameter is used to control the subset of the -#' selected features that should be returned in each page. The maximum allowable -#' limit is 50000. It may be beneficial to set this number lower if your internet -#' connection is spotty. The default (`NA`) will set the limit to the maximum -#' allowable limit for the service. -#' @param skipGeometry This option can be used to skip response geometries for -#' each feature. The returning object will be a data frame with no spatial -#' information. -#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function -#' will convert the data to dates and qualifier to string vector. -#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will -#' be requested from a native csv format. This can be dangerous because the -#' data will cut off at 50,000 rows without indication that more data -#' is available. Use `TRUE` with caution. #' +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api #' @inherit read_waterdata_continuous details #' #' @examplesIf is_dataRetrieval_user() @@ -71,7 +53,7 @@ #' skipGeometry = TRUE) #' #' multi_site <- read_waterdata_latest_daily(monitoring_location_id = c("USGS-01491000", -#' "USGS-01645000"), +#' "USGS-01645000"), #' parameter_code = c("00060", "00010")) #' #' } @@ -89,12 +71,16 @@ read_waterdata_latest_daily <- function( skipGeometry = NA, time = NA_character_, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + limit = getOption("dataRetrieval.limit"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + attach_request = getOption("dataRetrieval.attach_request") ) { service <- "latest-daily" output_id <- "latest_daily_id" + rlang::check_dots_empty() args <- mget(names(formals())) return_list <- get_ogc_data(args, output_id, service) diff --git a/R/read_waterdata_metadata.R b/R/read_waterdata_metadata.R index f2c673701..5021817d8 100644 --- a/R/read_waterdata_metadata.R +++ b/R/read_waterdata_metadata.R @@ -10,13 +10,16 @@ #' "coordinate-method-codes", "hydrologic-unit-codes", "medium-codes", #' "national-aquifer-codes", "reliability-codes", "site-types", "statistic-codes", #' "topographic-codes", "time-zone-codes". +#' @param \dots Optional arguments to pass to the query. Available parameters +#' can be found with the \code{get_ogc_params} function. #' @param limit The optional limit parameter is used to control the subset of the #' selected features that should be returned in each page. The maximum allowable #' limit is 50000. It may be beneficial to set this number lower if your internet #' connection is spotty. The default (`NA`) will set the limit to the maximum #' allowable limit for the service. -#' @param \dots Optional arguments to pass to the query. Available parameters -#' can be found with the \code{get_ogc_params} function. +#' @param attach_request logical, defaults to `r getOption("dataRetrieval.attach_request")`. +#' If set to `TRUE`, the full request sent to the Water Data API is attached +#' as an attribute to the data set. #' @examplesIf is_dataRetrieval_user() #' #' \donttest{ @@ -25,11 +28,16 @@ #' aquifer_codes <- read_waterdata_metadata("aquifer-codes") #' aquifer_types <- read_waterdata_metadata("aquifer-types") #' counties <- read_waterdata_metadata("counties") +#' countries <- read_waterdata_metadata("countries") #' us_counties <- read_waterdata_metadata("counties", country_code = "US") #' coordinate_accuracy_codes <- read_waterdata_metadata("coordinate-accuracy-codes") #' coordinate_datum_codes <- read_waterdata_metadata("coordinate-datum-codes") #' coordinate_method_codes <- read_waterdata_metadata("coordinate-method-codes") #' huc_codes <- read_waterdata_metadata("hydrologic-unit-codes") +#' methods <- read_waterdata_metadata("methods") +#' method_categories <- read_waterdata_metadata("method-categories") +#' method_citations <- read_waterdata_metadata("method-citations") +#' citations <- read_waterdata_metadata("citations") #' national_aquifer_codes <- read_waterdata_metadata("national-aquifer-codes") #' parameter_codes <- read_waterdata_metadata("parameter-codes") #' reliability_codes <- read_waterdata_metadata("reliability-codes") @@ -42,7 +50,12 @@ #' time_zone_limited <- read_waterdata_metadata("time-zone-codes", #' time_zone_description = c("Alaska", "Hawaii", "Pacific North America")) #' } -read_waterdata_metadata <- function(collection, limit = NA, ...) { +read_waterdata_metadata <- function( + collection, + ..., + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") +) { match.arg(collection, pkg.env$metadata) output_id <- names(pkg.env$metadata)[pkg.env$metadata == collection] @@ -57,12 +70,13 @@ read_waterdata_metadata <- function(collection, limit = NA, ...) { stop(paste0("Unknown argument: ", wrong_args)) } } - + args[["attach_request"]] <- attach_request args[["limit"]] <- limit args[["convertType"]] <- FALSE - args[["skipGeometry"]] <- TRUE + args[["skipGeometry"]] <- NA args[["bbox"]] <- NA args[["no_paging"]] <- FALSE # drops id if TRUE + args[["chunk_size"]] <- NA # Chunking doesn't make sense. return_list <- get_ogc_data( args = args, diff --git a/R/read_waterdata_monitoring_location.R b/R/read_waterdata_monitoring_location.R index 7ec6d4b25..8561f98a8 100644 --- a/R/read_waterdata_monitoring_location.R +++ b/R/read_waterdata_monitoring_location.R @@ -48,20 +48,8 @@ #' Available options are: #' `r dataRetrieval:::get_properties_for_docs("monitoring-locations", "monitoring_location_id")`. #' The default (`NA`) will return all columns of the data. -#' @param bbox Only features that have a geometry that intersects the bounding -#' box are selected.The bounding box is provided as four or six numbers, depending -#' on whether the coordinate reference system includes a vertical axis (height or -#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, -#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). -#' @param limit The optional limit parameter is used to control the subset of the -#' selected features that should be returned in each page. The maximum allowable -#' limit is 50000. It may be beneficial to set this number lower if your internet -#' connection is spotty. The default (`NA`) will set the limit to the maximum -#' allowable limit for the service. -#' @param skipGeometry This option can be used to skip response geometries for -#' each feature. The returning object will be a data frame with no spatial -#' information. +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api #' @examplesIf is_dataRetrieval_user() #' #' \donttest{ @@ -91,6 +79,8 @@ #' #' bbox_vals = c(-94.00, 35.0, -93.5, 35.5) #' multi_site <- read_waterdata_monitoring_location(bbox = bbox_vals) +#' +#' #' } read_waterdata_monitoring_location <- function( monitoring_location_id = NA_character_, @@ -135,11 +125,16 @@ read_waterdata_monitoring_location <- function( depth_source_code = NA_character_, properties = NA_character_, bbox = NA, - limit = NA, - skipGeometry = NA + skipGeometry = NA, + ..., + limit = getOption("dataRetrieval.limit"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + attach_request = getOption("dataRetrieval.attach_request") ) { service <- "monitoring-locations" output_id <- "monitoring_location_id" + rlang::check_dots_empty() args <- mget(names(formals())) args[["convertType"]] <- FALSE diff --git a/R/read_waterdata_parameter_codes.R b/R/read_waterdata_parameter_codes.R index 6876c3197..c098e3999 100644 --- a/R/read_waterdata_parameter_codes.R +++ b/R/read_waterdata_parameter_codes.R @@ -18,11 +18,16 @@ #' Available options are: #' `r dataRetrieval:::get_properties_for_docs("parameter-codes", "parameter_code_id")`. #' The default (`NA`) will return all columns of the data. +#' @param \dots Not used. Included to help differentiate official Water Data API arguments +#' from more seldom used, optional dataRetrieval-specific arguments. #' @param limit The optional limit parameter is used to control the subset of the #' selected features that should be returned in each page. The maximum allowable #' limit is 50000. It may be beneficial to set this number lower if your internet #' connection is spotty. The default (`NA`) will set the limit to the maximum #' allowable limit for the service. +#' @param attach_request logical, defaults to `r getOption("dataRetrieval.attach_request")`. +#' If set to `TRUE`, the full request sent to the Water Data API is attached +#' as an attribute to the data set. #' @examplesIf is_dataRetrieval_user() #' #' \donttest{ @@ -56,16 +61,20 @@ read_waterdata_parameter_codes <- function( temperature_basis = NA_character_, epa_equivalence = NA_character_, properties = NA_character_, - limit = NA + ..., + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") ) { service <- "parameter-codes" output_id <- "parameter_code" + rlang::check_dots_empty() args <- mget(names(formals())) args[["convertType"]] <- FALSE - args[["skipGeometry"]] <- TRUE + args[["skipGeometry"]] <- NA args[["bbox"]] <- NA args[["no_paging"]] <- FALSE # drops id if TRUE + args[["chunk_size"]] <- NA return_list <- get_ogc_data( args = args, diff --git a/R/read_waterdata_ratings.R b/R/read_waterdata_ratings.R new file mode 100644 index 000000000..3663592af --- /dev/null +++ b/R/read_waterdata_ratings.R @@ -0,0 +1,188 @@ +#' Get USGS Rating Curve Data +#' +#' Reads current rating table for an active USGS streamgages. More information +#' can be found at https://api.waterdata.usgs.gov/docs/stac/. +#' +#' @param monitoring_location_id A unique identifier representing a single +#' monitoring location. Monitoring location IDs are created by combining the +#' agency code of the agency responsible for the monitoring location (e.g. USGS) +#' with the ID number of the monitoring location (e.g. 02238500), separated by +#' a hyphen (e.g. USGS-02238500). +#' @param file_type Rating file time. Could be any of "exsa", "corr", or "base". +#' If `file_type` is "base" then the columns are +#' INDEP, typically the gage height, in feet; DEP, typically the streamflow, +#' in cubic feet per second; and STOR, where "*" indicates that the pair are +#' a fixed point of the rating curve. If `file_type` is "exsa" then an +#' additional column, SHIFT, is included that indicates the current shift in +#' the rating for that value of INDEP. If `file_type` is "corr" then the +#' columns are INDEP, typically the gage height, in feet; CORR, the correction +#' for that value; and CORRINDEP, the corrected value for CORR. +#' @param file_path Path to save the rating curve rdb files. The +#' default is `tempdir()`, which will wipe out the files. +#' @param datetime Only return items that have a temporal property that +#' intersects this value. Either a date-time or an interval, open or closed. +#' See Details below. +#' @param bbox Only features that have a geometry that intersects the bounding +#' box are selected.The bounding box is provided as four or six numbers, depending +#' on whether the coordinate reference system includes a vertical axis (height or +#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric +#' vector structured: c(xmin,ymin,xmax,ymax). +#' Another way to think of it is c(Western-most longitude, +#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). +#' @param \dots Not used. +#' @param limit Limits the number of results that are included in each page of +#' the response (capped at the default 10,000). +#' @param download_and_parse Logical to define whether or not to download, parse, +#' and return a list of data frames with rating curve data (`TRUE`), or to return +#' just a list of available rating curve files (`FALSE`). Default is `TRUE`. +#' @export +#' @inherit read_waterdata_continuous details +#' +#' @return List of data frames which contain the requested rating curves. +#' +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' +#' monitoring_location_id <- c("USGS-01104475", "USGS-01104460") +#' ratings_exsa <- read_waterdata_ratings( +#' monitoring_location_id = monitoring_location_id, +#' file_type = "exsa") +#' +#' head(ratings_exsa[["USGS-01104475.exsa.rdb"]]) +#' comment(ratings_exsa[["USGS-01104475.exsa.rdb"]])[1:15] +#' +#' ratings_corr <- read_waterdata_ratings( +#' monitoring_location_id = monitoring_location_id, +#' file_type = "corr") +#' +#' head(ratings_corr[["USGS-01104460.corr.rdb"]]) +#' comment(ratings_corr[["USGS-01104460.corr.rdb"]])[1:15] +#' +#' rating_2 <- read_waterdata_ratings( +#' monitoring_location_id = monitoring_location_id, +#' file_type = c("corr", "exsa")) +#' names(rating_2) +#' +#' bbox <- c(-95.00, 40.0, -92.0, 42) +#' +#' bbox_query <- read_waterdata_ratings(bbox = bbox, +#' download_and_parse = FALSE) +#' length(bbox_query) +#' recent_query <- read_waterdata_ratings(bbox = bbox, +#' datetime = c(Sys.Date()-7, NA), +#' download_and_parse = FALSE) +#' length(recent_query) +#'} +read_waterdata_ratings <- function( + monitoring_location_id = NA_character_, + file_type = c("exsa", "base", "corr"), + file_path = tempdir(), + bbox = NA, + datetime = NA_character_, + ..., + limit = 10000, + download_and_parse = TRUE +) { + match.arg( + arg = file_type, + choices = c("exsa", "base", "corr"), + several.ok = TRUE + ) + rlang::check_dots_empty() + + request <- httr2::request("https://api.waterdata.usgs.gov/stac/v0/") |> + httr2::req_url_path_append("search") + + filter <- NA_character_ + + if (!all(is.na(monitoring_location_id))) { + if (length(monitoring_location_id) > 1) { + monitoring_location_id <- paste0( + monitoring_location_id, + collapse = "', '" + ) + } + + filter <- sprintf( + "monitoring_location_id IN ('%s')", + monitoring_location_id + ) + } + + if (length(file_type) == 1) { + filter <- sprintf("%s AND file_type = '%s'", filter, file_type) + } + + if (!is.na(filter)) { + if (substr(filter, 1, 3) == "AND") { + filter <- substr(filter, 4, nchar(filter)) + } + + request <- request |> + httr2::req_url_query(filter = filter) + } + + if (!all(is.na(datetime))) { + if (any(grepl("P", datetime))) { + stop( + "Periods are not supported in datetime argument in the rating curve service." + ) + } + datetime <- format_api_dates(datetime, date = FALSE) + + request <- request |> + httr2::req_url_query(datetime = datetime) + } + + if (all(!is.na(bbox))) { + request <- httr2::req_url_query( + request, + bbox = as.numeric(bbox), + .multi = "comma" + ) + } + + request <- request |> + httr2::req_url_query(limit = limit) |> + basic_request() + + resp <- httr2::req_perform(request) + log_rate_limit(resp) + + features <- httr2::resp_body_json(resp)[["features"]] + + if (download_and_parse) { + return_list <- list() + for (feature in features) { + id <- feature$id + df <- download_convert(feature, file_path, file_type) + if (!is.null(df)) { + return_list[[id]] <- df + } + } + + return(return_list) + } else { + return(features) + } +} + +download_convert <- function(feature, file_path, file_type) { + links <- feature$links + id <- feature$id + url <- feature$assets$data$href + + req <- httr2::request(url) |> + basic_request() + + if (any(sapply(file_type, function(x) grepl(x, url)))) { + full_file_path <- file.path(file_path, id) + message("Requesting: \n", url) + resp <- httr2::req_perform(req, path = full_file_path) + rating <- importRDB1(full_file_path) + return(rating) + } + + return(NULL) +} diff --git a/R/read_waterdata_ts_meta.R b/R/read_waterdata_ts_meta.R index dd9777841..b3f869dec 100644 --- a/R/read_waterdata_ts_meta.R +++ b/R/read_waterdata_ts_meta.R @@ -45,28 +45,8 @@ #' `r dataRetrieval:::get_properties_for_docs("time-series-metadata", "time_series_id")`. #' The default (`NA`) will return all columns of the data. #' @param time_series_id `r get_ogc_params("time-series-metadata")$id` -#' @param bbox Only features that have a geometry that intersects the bounding -#' box are selected.The bounding box is provided as four or six numbers, depending -#' on whether the coordinate reference system includes a vertical axis (height or -#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, -#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). -#' @param limit The optional limit parameter is used to control the subset of the -#' selected features that should be returned in each page. The maximum allowable -#' limit is 50000. It may be beneficial to set this number lower if your internet -#' connection is spotty. The default (`NA`) will set the limit to the maximum -#' allowable limit for the service. -#' @param max_results The optional maximum number of rows to return. This value -#' must be less than the requested limit. -#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function -#' will convert the data to dates and qualifier to string vector. -#' @param skipGeometry This option can be used to skip response geometries for -#' each feature. The returning object will be a data frame with no spatial -#' information. -#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will -#' be requested from a native csv format. This can be dangerous because the -#' data will cut off at 50,000 rows without indication that more data -#' is available. Use `TRUE` with caution. +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api #' #' @inherit read_waterdata_continuous details #' @@ -77,7 +57,7 @@ #' meta_1 <- read_waterdata_ts_meta(monitoring_location_id = site) #' #' meta_multi <- read_waterdata_ts_meta(monitoring_location_id = c("USGS-01491000", -#' "USGS-01645000"), +#' "USGS-01645000"), #' parameter_code = c("00060", "00010"), #' properties = c("monitoring_location_id", #' "parameter_code", @@ -109,16 +89,19 @@ read_waterdata_ts_meta <- function( time_series_id = NA_character_, web_description = NA_character_, skipGeometry = NA, - limit = NA, - max_results = NA, bbox = NA, begin = NA_character_, end = NA_character_, - convertType = TRUE, - no_paging = FALSE + ..., + limit = getOption("dataRetrieval.limit"), + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + attach_request = getOption("dataRetrieval.attach_request") ) { - service = "time-series-metadata" + service <- "time-series-metadata" output_id <- "time_series_id" + rlang::check_dots_empty() args <- mget(names(formals())) return_list <- get_ogc_data(args, output_id, service) diff --git a/R/sysdata.rda b/R/sysdata.rda index d984e226a..76776c7db 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/R/walk_pages.R b/R/walk_pages.R index aa72eea07..c703bd3af 100644 --- a/R/walk_pages.R +++ b/R/walk_pages.R @@ -12,6 +12,8 @@ walk_pages <- function(req) { on_error = "stop" ) + failures <- resps |> httr2::resps_failures() + return_list <- resps |> httr2::resps_successes() |> httr2::resps_data(\(resp) get_resp_data(resp)) @@ -111,15 +113,21 @@ get_csv <- function(req, limit) { if (httr2::resp_has_body(resp)) { return_list <- httr2::resp_body_string(resp) - df <- data.table::fread(input = return_list, data.table = FALSE) + df <- data.table::fread( + input = return_list, + data.table = FALSE, + colClasses = "character" + ) df <- coerce_num_cols(df) if (skip_geo) { df <- df[, names(df)[!names(df) %in% c("x", "y")]] } else { - df <- sf::st_as_sf(df, coords = c("x", "y")) - sf::st_crs(df) <- 4269 + if (all(c("x", "y") %in% names(df))) { + df <- sf::st_as_sf(df, coords = c("x", "y")) + sf::st_crs(df) <- 4269 + } } if (nrow(df) == limit) { @@ -137,7 +145,9 @@ ensure all requested data is returned." coerce_num_cols <- function(df, is_sf = FALSE) { included_num_cols <- names(df)[names(df) %in% num_cols] - if (length(included_num_cols) == 0) return(df) + if (length(included_num_cols) == 0) { + return(df) + } check_df <- if (is_sf) { sf::st_drop_geometry(df[, included_num_cols, drop = FALSE]) diff --git a/README.md b/README.md index c1b857d01..97827bbc1 100644 --- a/README.md +++ b/README.md @@ -206,13 +206,13 @@ NWIScitation #> U.S. Geological Survey (2026). _USGS Water Data for the Nation: U.S. #> Geological Survey National Water Information System database_. #> doi:10.5066/F7P55KJN , Accessed Mar -#> 09, 2026. +#> 30, 2026. print(NWIScitation, style = "Bibtex") #> @Manual{, #> title = {USGS Water Data for the Nation: U.S. Geological Survey National Water Information System database}, #> author = {{U.S. Geological Survey}}, #> doi = {10.5066/F7P55KJN}, -#> note = {Accessed Mar 09, 2026}, +#> note = {Accessed Mar 30, 2026}, #> year = {2026}, #> } ``` @@ -235,14 +235,14 @@ WQPcitation <- create_WQP_bib(SC) WQPcitation #> National Water Quality Monitoring Council (2026). _Water Quality #> Portal_. doi:10.5066/P9QRKUVJ , -#> Accessed Mar 09, 2026, +#> Accessed Mar 30, 2026, #> . print(WQPcitation, style = "Bibtex") #> @Manual{, #> title = {Water Quality Portal}, #> author = {{National Water Quality Monitoring Council}}, #> doi = {10.5066/P9QRKUVJ}, -#> note = {Accessed Mar 09, 2026}, +#> note = {Accessed Mar 30, 2026}, #> year = {2026}, #> url = {https://www.waterqualitydata.us/data/Result/search?siteid=USGS-05288705&count=no&pCode=00300&mimeType=csv}, #> } diff --git a/_pkgdown.yml b/_pkgdown.yml index ed2f068e0..df8a331bf 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -46,6 +46,8 @@ navbar: href: articles/daily_data_statistics.html - text: Continuous Data href: articles/continuous_pr.html + - text: USGS Reference Lists + href: articles/Reference_Lists.html - text: Changes to QW href: articles/qwdata_changes.html - text: Background @@ -58,12 +60,8 @@ navbar: href: articles/wqp_large_pull_script.html - text: Large Request Pipeline Approach href: articles/wqp_large_pull_targets.html - - text: Stat Service - href: articles/statsServiceMap.html - text: NLDI Interface href: articles/nldi.html - - text: Moving Averages - href: articles/movingAverages.html - text: How to Contribute href: articles/Contributing.html right: @@ -91,6 +89,7 @@ reference: - read_waterdata_channel - read_waterdata_field_meta - read_waterdata_combined_meta + - read_waterdata_ratings - title: National Water Information System (NWIS) desc: Functions to retrieve (USGS) NWIS data. These will be slowly phased out and replaced with the read_waterdata family of functions. contents: diff --git a/docker/Dockerfile b/docker/Dockerfile index 404040c33..5a49a37db 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,31 +1,48 @@ -FROM code.chs.usgs.gov:5001/ctek/docker/r-lang/r-base:4.4 +FROM code.chs.usgs.gov:5001/ctek/docker/r-lang/r-base:4.5 + +# Change the name of this environment to something which pleases you, if you +# so please. But the name will not be relevant for most cases, as reticulate +# will be pointed to the environment no matter what it is named. +ARG CONDA_ENVIRONMENT_NAME=dataretrieval + +ENV CONDA_DIR="/root/conda" +ENV PATH=$CONDA_DIR/bin:$PATH +COPY environment.yml / # Necessary R libraries RUN apt-get update -qq && apt-get -y --no-install-recommends install \ - r-cran-oce \ - r-cran-devtools \ - r-cran-here \ + wget \ + r-cran-rcmdcheck \ + r-cran-testthat \ + r-cran-pkgdown \ r-cran-rmarkdown \ r-cran-knitr \ r-cran-dt \ r-cran-data.table \ - r-cran-gridextra \ - r-cran-tidyverse \ + r-cran-dplyr \ + r-cran-purrr \ + r-cran-lubridate \ + r-cran-tidyr \ r-cran-jsonlite \ r-cran-readr \ r-cran-xml2 \ r-cran-httr2 \ - r-cran-rsconnect \ - r-cran-connectapi \ r-cran-covr \ r-cran-sf \ - r-cran-zoo \ r-cran-patchwork \ - r-cran-maps \ r-cran-leaflet \ r-cran-readxl \ r-cran-whisker \ r-cran-ggplot2 \ - && rm -rf /var/lib/apt/lists/* - + r-cran-reticulate \ + && rm -rf /var/lib/apt/lists/* + +RUN wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" && \ + bash Miniforge3.sh -b -p "${HOME}/conda" && \ + rm Miniforge3.sh +ENV CONDA_DIR="/root/conda" +ENV PATH=$CONDA_DIR/bin:$PATH +COPY environment.yml / +RUN conda env create -n ${CONDA_ENVIRONMENT_NAME} -f /environment.yml && conda clean -a -y +ENV RETICULATE_PYTHON=/root/conda/envs/${CONDA_ENVIRONMENT_NAME}/bin/python diff --git a/environment.yml b/environment.yml new file mode 100644 index 000000000..3c2e4b310 --- /dev/null +++ b/environment.yml @@ -0,0 +1,8 @@ +name: dataretrieval +channels: + - conda-forge +dependencies: + # required + - python=3.12 + - dataretrieval +prefix: /home/user/miniforge3/envs/dataretrieval diff --git a/inst/CITATION b/inst/CITATION index c39fd524a..0a45adf0b 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -37,9 +37,9 @@ bibentry(bibtype = "Manual", title = "dataRetrieval: R packages for discovering and retrieving water data available from U.S. federal hydrologic web services", publisher = "U.S. Geological Survey", address="Reston, VA", - version = "2.7.23", + version = "2.7.24", institution = "U.S. Geological Survey", year = 2026, doi = "10.5066/P9X4L3GE", - textVersion = "De Cicco, L.A., Hirsch, R.M., Lorenz, D., Watkins, W.D., Johnson, M., Blodgett, D.L., Hinman, E.D., Zemmels, J., 2026, dataRetrieval: R packages for discovering and retrieving water data available from Federal hydrologic web services, v.2.7.23, doi:10.5066/P9X4L3GE" + textVersion = "De Cicco, L.A., Hirsch, R.M., Lorenz, D., Watkins, W.D., Johnson, M., Blodgett, D.L., Hinman, E.D., Zemmels, J., 2026, dataRetrieval: R packages for discovering and retrieving water data available from Federal hydrologic web services, v.2.7.24, doi:10.5066/P9X4L3GE" ) diff --git a/man/check_arguments_api.Rd b/man/check_arguments_api.Rd new file mode 100644 index 000000000..d53e12ce6 --- /dev/null +++ b/man/check_arguments_api.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_ogc_data.R +\name{check_arguments_api} +\alias{check_arguments_api} +\title{Check other arguments} +\usage{ +check_arguments_api(bbox, skipGeometry) +} +\arguments{ +\item{bbox}{Only features that have a geometry that intersects the bounding +box are selected.The bounding box is provided as four or six numbers, depending +on whether the coordinate reference system includes a vertical axis (height or +depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, +Southern-most latitude, Eastern-most longitude, Northern-most longitude).} + +\item{skipGeometry}{This parameter can be used to skip response geometries for +each feature. The returning object will be a data frame with no spatial +information. The default \code{NA} will not specify the argument in the request.} +} +\description{ +Additional functions to check types and create parameter descriptions. +} +\keyword{internal} diff --git a/man/check_arguments_non_api.Rd b/man/check_arguments_non_api.Rd new file mode 100644 index 000000000..193ab821a --- /dev/null +++ b/man/check_arguments_non_api.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_ogc_data.R +\name{check_arguments_non_api} +\alias{check_arguments_non_api} +\title{Check non-API arguments} +\usage{ +check_arguments_non_api( + convertType, + no_paging, + limit, + attach_request, + chunk_size, + ... +) +} +\arguments{ +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} + +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will +be requested from a native csv format. This can be dangerous because the +data will cut off at 50,000 rows without indication that more data +is available. Use \code{TRUE} with caution.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50,000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{\dots}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} +} +\description{ +Function to check types and create parameter descriptions. +} +\keyword{internal} diff --git a/man/construct_api_requests.Rd b/man/construct_api_requests.Rd index 3e691d751..cbad05506 100644 --- a/man/construct_api_requests.Rd +++ b/man/construct_api_requests.Rd @@ -6,30 +6,54 @@ \usage{ construct_api_requests( service, - properties = NA_character_, + output_id, + ..., bbox = NA, - skipGeometry = FALSE, - no_paging = FALSE, - ... + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_data"), + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ \item{service}{Which service available on \url{https://api.waterdata.usgs.gov/ogcapi/v0/}.} -\item{properties}{The properties that should be included for each feature. The -parameter value is a comma-separated list of property names which depend on the -service being called.} +\item{output_id}{Name of id column to return} + +\item{...}{Extra parameters from the specific services.} \item{bbox}{Only features that have a geometry that intersects the bounding box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth).} -\item{skipGeometry}{This option can be used to skip response geometries for -each feature. The returning object will be a data frame with no spatial -information.} +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} -\item{...}{Extra parameters from the specific services.} +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will +be requested from a native csv format. This can be dangerous because the +data will cut off at 50,000 rows without indication that more data +is available. Use \code{TRUE} with caution.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50,000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Main documentation: \url{https://api.waterdata.usgs.gov/ogcapi/v0/}, @@ -39,10 +63,12 @@ Swagger docs: \url{https://api.waterdata.usgs.gov/ogcapi/v0/openapi?f=html}. site <- "USGS-02238500" pcode <- "00060" req_dv <- construct_api_requests("daily", + output_id = "daily_id", monitoring_location_id = site, parameter_code = "00060") req_dv <- construct_api_requests("daily", + output_id = "daily_id", monitoring_location_id = site, parameter_code = c("00060", "00065")) @@ -50,9 +76,10 @@ sites <- c("USGS-01491000", "USGS-01645000") start_date <- "2018-01-01" end_date <- "2022-01-01" req_dv <- construct_api_requests("daily", - monitoring_location_id = sites, - parameter_code = c("00060", "00065"), - datetime = c(start_date, end_date)) + output_id = "daily_id", + monitoring_location_id = sites, + parameter_code = c("00060", "00065"), + datetime = c(start_date, end_date)) } \keyword{internal} diff --git a/man/read_waterdata.Rd b/man/read_waterdata.Rd index 6b78e0bbf..942c5dcfd 100644 --- a/man/read_waterdata.Rd +++ b/man/read_waterdata.Rd @@ -4,7 +4,14 @@ \alias{read_waterdata} \title{Generalized USGS Water Data API retrieval function} \usage{ -read_waterdata(service, CQL, ..., convertType = TRUE) +read_waterdata( + service, + CQL, + ..., + convertType = getOption("dataRetrieval.convertType"), + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") +) } \arguments{ \item{service}{character, can be any existing collection.} @@ -15,6 +22,16 @@ read_waterdata(service, CQL, ..., convertType = TRUE) \item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function will convert the data to dates and qualifier to string vector.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50,000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Function that allows complex CQL queries. diff --git a/man/read_waterdata_channel.Rd b/man/read_waterdata_channel.Rd index 537a7fdf6..024cd908d 100644 --- a/man/read_waterdata_channel.Rd +++ b/man/read_waterdata_channel.Rd @@ -33,9 +33,12 @@ read_waterdata_channel( properties = NA_character_, skipGeometry = NA, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + limit = getOption("dataRetrieval.limit"), + chunk_size = getOption("dataRetrieval.site_chunk_size_data"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -118,30 +121,47 @@ Available options are: geometry, channel_measurements_id, monitoring_location_id, field_visit_id, measurement_number, time, channel_name, channel_flow, channel_flow_unit, channel_width, channel_width_unit, channel_area, channel_area_unit, channel_velocity, channel_velocity_unit, channel_location_distance, channel_location_distance_unit, channel_stability, channel_material, channel_evenness, horizontal_velocity_description, vertical_velocity_description, longitudinal_velocity_description, measurement_type, last_modified, channel_measurement_type, channel_location_direction. The default (\code{NA}) will return all columns of the data.} -\item{skipGeometry}{This option can be used to skip response geometries for +\item{skipGeometry}{This parameter can be used to skip response geometries for each feature. The returning object will be a data frame with no spatial -information.} +information. The default \code{NA} will not specify the argument in the request.} \item{bbox}{Only features that have a geometry that intersects the bounding box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, Southern-most latitude, Eastern-most longitude, Northern-most longitude).} -\item{limit}{The optional limit parameter is used to control the subset of the -selected features that should be returned in each page. The maximum allowable -limit is 50000. It may be beneficial to set this number lower if your internet -connection is spotty. The default (\code{NA}) will set the limit to the maximum -allowable limit for the service.} +\item{...}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} -\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function -will convert the data to dates and qualifier to string vector.} +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} -\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will be requested from a native csv format. This can be dangerous because the data will cut off at 50,000 rows without indication that more data is available. Use \code{TRUE} with caution.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50,000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Channel measurements taken as part of streamflow field measurements. diff --git a/man/read_waterdata_combined_meta.Rd b/man/read_waterdata_combined_meta.Rd index f5f2e4de1..f7d22c0d1 100644 --- a/man/read_waterdata_combined_meta.Rd +++ b/man/read_waterdata_combined_meta.Rd @@ -64,9 +64,12 @@ read_waterdata_combined_meta( properties = NA_character_, skipGeometry = NA, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + limit = getOption("dataRetrieval.limit"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -91,7 +94,7 @@ Multiple statistic_ids can be requested as a character vector.} \item{data_type}{The computational period type of data collected at the monitoring location.} -\item{computation_identifier}{Indicates whether the data from this time series represent a specific statistical computation. +\item{computation_identifier}{Indicates the computation performed to calculate this time series. Values of "Instantaneous" reflect point measurements. Multiple computation_identifiers can be requested as a character vector.} \item{computation_period_identifier}{Multiple computation_period_identifiers can be requested as a character vector.} @@ -108,11 +111,11 @@ for timely best science and to assist with daily operations which need real-time information. Non-primary time series data are only retained by this system for 120 days.} -\item{web_description}{A description of what this time series represents, as used by WDFN and other USGS data dissemination products.} +\item{web_description}{An optional description of the time series. WDFN and other USGS data dissemination products use this field, in combination with sublocation_identifier, to distinguish the differences between multiple time series for the same parameter code, statistic code, and monitoring location.} \item{parent_time_series_id}{The unique identifier representing the parent or "upchain" time series that a daily values time series is generated from. Daily values time series have one and only one parent time series.} -\item{begin}{The datetime of the earliest observation in the time series. Together with \code{end}, this field represents the period of record of a time series. Note that some time series may have large gaps in their collection record. +\item{begin}{The datetime of the earliest observation in the time series. Together with \code{end_utc}, this field represents the period of record of a time series. Note that some time series may have large gaps in their collection record. You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Examples: \itemize{ @@ -122,11 +125,11 @@ Examples: \item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours } -Only features that have a \code{begin} that intersects the value of datetime are selected. +Only features that have a \code{begin_utc} that intersects the value of datetime are selected. See also Details below for more information.} -\item{end}{The datetime of the most recent observation in the time series. Data returned by this endpoint updates at most once per day, and potentially less frequently than that, and as such there may be more recent observations within a time series than the time series \code{end} value reflects. Together with \code{begin}, this field represents the period of record of a time series. It is additionally used to determine whether a time series is "active". +\item{end}{The datetime of the most recent observation in the time series. Data returned by this endpoint updates at most once per day, and potentially less frequently than that, and as such there may be more recent observations within a time series than the time series \code{end_utc} value reflects. Together with \code{begin_utc}, this field represents the period of record of a time series. It is additionally used to determine whether a time series is "active". You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Examples: \itemize{ @@ -136,7 +139,7 @@ Examples: \item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours } -Only features that have a \code{end} that intersects the value of datetime are selected. +Only features that have a \code{end_utc} that intersects the value of datetime are selected. See also Details below for more information.} @@ -237,30 +240,47 @@ Available options are: geometry, monitoring_location_id, agency_code, agency_name, monitoring_location_number, monitoring_location_name, district_code, country_code, country_name, state_code, state_name, county_code, county_name, minor_civil_division_code, site_type_code, site_type, hydrologic_unit_code, basin_code, altitude, altitude_accuracy, altitude_method_code, altitude_method_name, vertical_datum, vertical_datum_name, horizontal_positional_accuracy_code, horizontal_positional_accuracy, horizontal_position_method_code, horizontal_position_method_name, original_horizontal_datum, original_horizontal_datum_name, drainage_area, contributing_drainage_area, time_zone_abbreviation, uses_daylight_savings, construction_date, aquifer_code, national_aquifer_code, aquifer_type_code, well_constructed_depth, hole_constructed_depth, depth_source_code, field_measurement_id, unit_of_measure, parameter_name, parameter_code, statistic_id, last_modified, begin, end, data_type, computation_identifier, thresholds, sublocatio The default (\code{NA}) will return all columns of the data.} -\item{skipGeometry}{This option can be used to skip response geometries for +\item{skipGeometry}{This parameter can be used to skip response geometries for each feature. The returning object will be a data frame with no spatial -information.} +information. The default \code{NA} will not specify the argument in the request.} \item{bbox}{Only features that have a geometry that intersects the bounding box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, Southern-most latitude, Eastern-most longitude, Northern-most longitude).} -\item{limit}{The optional limit parameter is used to control the subset of the +\item{...}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} + +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable -limit is 50000. It may be beneficial to set this number lower if your internet +limit is 50,000. It may be beneficial to set this number lower if your internet connection is spotty. The default (\code{NA}) will set the limit to the maximum allowable limit for the service.} -\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function -will convert the data to dates and qualifier to string vector.} - -\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will be requested from a native csv format. This can be dangerous because the data will cut off at 50,000 rows without indication that more data is available. Use \code{TRUE} with caution.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ This endpoint combines metadata from timeseries and field measurements collections by site. diff --git a/man/read_waterdata_continuous.Rd b/man/read_waterdata_continuous.Rd index 8b66cbb29..b35db8666 100644 --- a/man/read_waterdata_continuous.Rd +++ b/man/read_waterdata_continuous.Rd @@ -15,9 +15,12 @@ read_waterdata_continuous( value = NA, last_modified = NA_character_, time = NA_character_, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + limit = getOption("dataRetrieval.limit"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_data"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -73,20 +76,35 @@ Only features that have a \code{time} that intersects the value of datetime are See also Details below for more information.} -\item{limit}{The optional limit parameter is used to control the subset of the +\item{\dots}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} + +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable -limit is 50000. It may be beneficial to set this number lower if your internet +limit is 50,000. It may be beneficial to set this number lower if your internet connection is spotty. The default (\code{NA}) will set the limit to the maximum allowable limit for the service.} -\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function -will convert the data to dates and qualifier to string vector, and sepcifically -order the returning data frame by time and monitoring_location_id.} - -\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will be requested from a native csv format. This can be dangerous because the data will cut off at 50,000 rows without indication that more data is available. Use \code{TRUE} with caution.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Continuous data are collected via automated sensors installed at a monitoring location. They are collected at a high frequency and often at a fixed 15-minute interval. Depending on the specific monitoring location, the data may be transmitted automatically via telemetry and be available on WDFN within minutes of collection, while other times the delivery of data may be delayed if the monitoring location does not have the capacity to automatically transmit data. Continuous data are described by parameter name and parameter code (pcode). These data might also be referred to as "instantaneous values" or "IV". diff --git a/man/read_waterdata_daily.Rd b/man/read_waterdata_daily.Rd index 9f3fa8a33..ef3d7eede 100644 --- a/man/read_waterdata_daily.Rd +++ b/man/read_waterdata_daily.Rd @@ -18,9 +18,12 @@ read_waterdata_daily( skipGeometry = NA, time = NA_character_, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_data"), + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -67,9 +70,9 @@ Only features that have a \code{last_modified} that intersects the value of date See also Details below for more information.} -\item{skipGeometry}{This option can be used to skip response geometries for +\item{skipGeometry}{This parameter can be used to skip response geometries for each feature. The returning object will be a data frame with no spatial -information.} +information. The default \code{NA} will not specify the argument in the request.} \item{time}{The date an observation represents. You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Examples: @@ -88,22 +91,39 @@ See also Details below for more information.} box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, Southern-most latitude, Eastern-most longitude, Northern-most longitude).} -\item{limit}{The optional limit parameter is used to control the subset of the -selected features that should be returned in each page. The maximum allowable -limit is 50000. It may be beneficial to set this number lower if your internet -connection is spotty. The default (\code{NA}) will set the limit to the maximum -allowable limit for the service.} +\item{...}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} -\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function -will convert the data to dates and qualifier to string vector.} +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} -\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will be requested from a native csv format. This can be dangerous because the data will cut off at 50,000 rows without indication that more data is available. Use \code{TRUE} with caution.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50,000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Daily data provide one data value to represent water conditions for the day. Throughout much of the history of the USGS, the primary water data available was daily data collected manually at the monitoring location once each day. With improved availability of computer storage and automated transmission of data, the daily data published today are generally a statistical summary or metric of the continuous data collected each day, such as the daily mean, minimum, or maximum value. Daily data are automatically calculated from the continuous data of the same parameter code and are described by parameter code and a statistic code. These data have also been referred to as “daily values” or “DV”. diff --git a/man/read_waterdata_field_measurements.Rd b/man/read_waterdata_field_measurements.Rd index 508d2c9cd..b4de841a8 100644 --- a/man/read_waterdata_field_measurements.Rd +++ b/man/read_waterdata_field_measurements.Rd @@ -10,6 +10,7 @@ read_waterdata_field_measurements( observing_procedure_code = NA_character_, properties = NA_character_, field_visit_id = NA_character_, + field_measurements_series_id = NA_character_, approval_status = NA_character_, unit_of_measure = NA_character_, qualifier = NA_character_, @@ -23,9 +24,12 @@ read_waterdata_field_measurements( skipGeometry = NA, time = NA_character_, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + limit = getOption("dataRetrieval.limit"), + chunk_size = getOption("dataRetrieval.site_chunk_size_data"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -41,11 +45,13 @@ Multiple parameter_codes can be requested as a character vector.} \item{properties}{A vector of requested columns to be returned from the query. Available options are: -geometry, field_measurement_id, field_visit_id, parameter_code, monitoring_location_id, observing_procedure_code, observing_procedure, value, unit_of_measure, time, qualifier, vertical_datum, approval_status, measuring_agency, last_modified, control_condition, measurement_rated. +geometry, field_measurement_id, field_measurements_series_id, field_visit_id, parameter_code, monitoring_location_id, observing_procedure_code, observing_procedure, value, unit_of_measure, time, qualifier, vertical_datum, approval_status, measuring_agency, last_modified, control_condition, measurement_rated. The default (\code{NA}) will return all columns of the data.} \item{field_visit_id}{A universally unique identifier (UUID) for the field visit. Multiple measurements may be made during a single field visit.} +\item{field_measurements_series_id}{A unique identifier representing a single collection series. This corresponds to the \code{id} field in the \code{field-measurements-metadata} endpoint. Collection series are defined as the set of field measurements at a given monitoring location for a single parameter code using a single reading type.} + \item{approval_status}{Some of the data that you have obtained from this U.S. Geological Survey database may not have received Director's approval. Any such data values are qualified as provisional and are subject to revision. Provisional data are released on the condition that neither the USGS nor the United States Government may be held liable for any damages resulting from its use. This field reflects the approval status of each record, and is either "Approved", meaining processing review has been completed and the data is approved for publication, or "Provisional" and subject to revision. For more information about provisional data, go to \url{https://waterdata.usgs.gov/provisional-data-statement/}.} \item{unit_of_measure}{A human-readable description of the units of measurement associated with an observation.} @@ -74,14 +80,18 @@ See also Details below for more information.} \item{measuring_agency}{The agency performing the measurement.} -\item{control_condition}{What and where the control of flow is for the gage pool.} +\item{control_condition}{The state of the control feature at the time of observation. + +What and where the control of flow is for the gage pool.} -\item{measurement_rated}{Rated measurement based on the hydrologic/hydraulic conditions in which the measurement was made +\item{measurement_rated}{A qualitative estimate of the quality of a measurement. + +Rated measurement based on the hydrologic/hydraulic conditions in which the measurement was made (excellent (2 percent), good (5 percent), fair (8 percent), or poor (more than 8 percent). percent)} -\item{skipGeometry}{This option can be used to skip response geometries for +\item{skipGeometry}{This parameter can be used to skip response geometries for each feature. The returning object will be a data frame with no spatial -information.} +information. The default \code{NA} will not specify the argument in the request.} \item{time}{The date an observation represents. You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Examples: @@ -100,22 +110,39 @@ See also Details below for more information.} box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, Southern-most latitude, Eastern-most longitude, Northern-most longitude).} -\item{limit}{The optional limit parameter is used to control the subset of the -selected features that should be returned in each page. The maximum allowable -limit is 50000. It may be beneficial to set this number lower if your internet -connection is spotty. The default (\code{NA}) will set the limit to the maximum -allowable limit for the service.} +\item{...}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} -\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function -will convert the data to dates and qualifier to string vector.} +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} -\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will be requested from a native csv format. This can be dangerous because the data will cut off at 50,000 rows without indication that more data is available. Use \code{TRUE} with caution.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50,000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Field measurements are physically measured values collected during a visit to the monitoring location. Field measurements consist of measurements of gage height and discharge, and readings of groundwater levels, and are primarily used as calibration readings for the automated sensors collecting continuous data. They are collected at a low frequency, and delivery of the data in WDFN may be delayed due to data processing time. @@ -165,6 +192,9 @@ multi_site <- read_waterdata_field_measurements( old_df <- read_waterdata_field_measurements(monitoring_location_id = "USGS-425957088141001", time = c("1980-01-01", NA)) +new_df <- read_waterdata_field_measurements(monitoring_location_id = "USGS-425957088141001", + time = c(NA, "2020-01-01")) + surface_water <- read_waterdata_field_measurements( monitoring_location_id = c("USGS-07069000", "USGS-07064000", diff --git a/man/read_waterdata_field_meta.Rd b/man/read_waterdata_field_meta.Rd index 56b9af00b..1ecfe588f 100644 --- a/man/read_waterdata_field_meta.Rd +++ b/man/read_waterdata_field_meta.Rd @@ -16,8 +16,11 @@ read_waterdata_field_meta( skipGeometry = NA, bbox = NA, limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -36,7 +39,7 @@ Multiple parameter_names can be requested as a character vector.} Multiple parameter_descriptions can be requested as a character vector.} -\item{begin}{The datetime of the earliest observation in the time series. Together with \code{end}, this field represents the period of record of a time series. Note that some time series may have large gaps in their collection record. +\item{begin}{The datetime of the earliest observation in the time series. Together with \code{end_utc}, this field represents the period of record of a time series. Note that some time series may have large gaps in their collection record. You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Examples: \itemize{ @@ -46,11 +49,11 @@ Examples: \item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours } -Only features that have a \code{begin} that intersects the value of datetime are selected. +Only features that have a \code{begin_utc} that intersects the value of datetime are selected. See also Details below for more information.} -\item{end}{The datetime of the most recent observation in the time series. Data returned by this endpoint updates at most once per day, and potentially less frequently than that, and as such there may be more recent observations within a time series than the time series \code{end} value reflects. Together with \code{begin}, this field represents the period of record of a time series. It is additionally used to determine whether a time series is "active". +\item{end}{The datetime of the most recent observation in the time series. Data returned by this endpoint updates at most once per day, and potentially less frequently than that, and as such there may be more recent observations within a time series than the time series \code{end_utc} value reflects. Together with \code{begin_utc}, this field represents the period of record of a time series. It is additionally used to determine whether a time series is "active". You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Examples: \itemize{ @@ -60,7 +63,7 @@ Examples: \item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours } -Only features that have a \code{end} that intersects the value of datetime are selected. +Only features that have a \code{end_utc} that intersects the value of datetime are selected. See also Details below for more information.} @@ -83,30 +86,47 @@ Available options are: geometry, field_measurement_id, monitoring_location_id, parameter_code, parameter_name, parameter_description, begin, end, last_modified. The default (\code{NA}) will return all columns of the data.} -\item{skipGeometry}{This option can be used to skip response geometries for +\item{skipGeometry}{This parameter can be used to skip response geometries for each feature. The returning object will be a data frame with no spatial -information.} +information. The default \code{NA} will not specify the argument in the request.} \item{bbox}{Only features that have a geometry that intersects the bounding box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, Southern-most latitude, Eastern-most longitude, Northern-most longitude).} -\item{limit}{The optional limit parameter is used to control the subset of the +\item{limit}{numeric, The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable -limit is 50000. It may be beneficial to set this number lower if your internet +limit is 50,000. It may be beneficial to set this number lower if your internet connection is spotty. The default (\code{NA}) will set the limit to the maximum allowable limit for the service.} -\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function -will convert the data to dates and qualifier to string vector.} +\item{...}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} -\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} + +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will be requested from a native csv format. This can be dangerous because the data will cut off at 50,000 rows without indication that more data is available. Use \code{TRUE} with caution.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ This endpoint provides metadata about field measurement collections, including when the earliest and most recent observations for a parameter occurred at a monitoring location and its units. diff --git a/man/read_waterdata_latest_continuous.Rd b/man/read_waterdata_latest_continuous.Rd index 14b211de5..e980a60e6 100644 --- a/man/read_waterdata_latest_continuous.Rd +++ b/man/read_waterdata_latest_continuous.Rd @@ -17,9 +17,12 @@ read_waterdata_latest_continuous( skipGeometry = NA, time = NA_character_, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -62,9 +65,9 @@ Only features that have a \code{last_modified} that intersects the value of date See also Details below for more information.} -\item{skipGeometry}{This option can be used to skip response geometries for +\item{skipGeometry}{This parameter can be used to skip response geometries for each feature. The returning object will be a data frame with no spatial -information.} +information. The default \code{NA} will not specify the argument in the request.} \item{time}{The date an observation represents. You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Examples: @@ -83,22 +86,39 @@ See also Details below for more information.} box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, Southern-most latitude, Eastern-most longitude, Northern-most longitude).} -\item{limit}{The optional limit parameter is used to control the subset of the -selected features that should be returned in each page. The maximum allowable -limit is 50000. It may be beneficial to set this number lower if your internet -connection is spotty. The default (\code{NA}) will set the limit to the maximum -allowable limit for the service.} +\item{...}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} -\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function -will convert the data to dates and qualifier to string vector.} +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} -\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will be requested from a native csv format. This can be dangerous because the data will cut off at 50,000 rows without indication that more data is available. Use \code{TRUE} with caution.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50,000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ This endpoint provides the most recent observation for each time series of continuous data. Continuous data are collected via automated sensors installed at a monitoring location. They are collected at a high frequency and often at a fixed 15-minute interval. Depending on the specific monitoring location, the data may be transmitted automatically via telemetry and be available on WDFN within minutes of collection, while other times the delivery of data may be delayed if the monitoring location does not have the capacity to automatically transmit data. Continuous data are described by parameter name and parameter code. These data might also be referred to as "instantaneous values" or "IV" diff --git a/man/read_waterdata_latest_daily.Rd b/man/read_waterdata_latest_daily.Rd index a50f727e9..6eb5a386b 100644 --- a/man/read_waterdata_latest_daily.Rd +++ b/man/read_waterdata_latest_daily.Rd @@ -18,9 +18,12 @@ read_waterdata_latest_daily( skipGeometry = NA, time = NA_character_, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + limit = getOption("dataRetrieval.limit"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -67,9 +70,9 @@ Only features that have a \code{last_modified} that intersects the value of date See also Details below for more information.} -\item{skipGeometry}{This option can be used to skip response geometries for +\item{skipGeometry}{This parameter can be used to skip response geometries for each feature. The returning object will be a data frame with no spatial -information.} +information. The default \code{NA} will not specify the argument in the request.} \item{time}{The date an observation represents. You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Examples: @@ -88,22 +91,39 @@ See also Details below for more information.} box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, Southern-most latitude, Eastern-most longitude, Northern-most longitude).} -\item{limit}{The optional limit parameter is used to control the subset of the -selected features that should be returned in each page. The maximum allowable -limit is 50000. It may be beneficial to set this number lower if your internet -connection is spotty. The default (\code{NA}) will set the limit to the maximum -allowable limit for the service.} +\item{...}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} -\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function -will convert the data to dates and qualifier to string vector.} +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} -\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will be requested from a native csv format. This can be dangerous because the data will cut off at 50,000 rows without indication that more data is available. Use \code{TRUE} with caution.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50,000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Daily data provide one data value to represent water conditions for the day. Throughout much of the history of the USGS, the primary water data available was daily data collected manually at the monitoring location once each day. With improved availability of computer storage and automated transmission of data, the daily data published today are generally a statistical summary or metric of the continuous data collected each day, such as the daily mean, minimum, or maximum value. Daily data are automatically calculated from the continuous data of the same parameter code and are described by parameter code and a statistic code. These data have also been referred to as “daily values” or “DV”. @@ -148,7 +168,7 @@ dv_data <- read_waterdata_latest_daily(monitoring_location_id = site, skipGeometry = TRUE) multi_site <- read_waterdata_latest_daily(monitoring_location_id = c("USGS-01491000", - "USGS-01645000"), + "USGS-01645000"), parameter_code = c("00060", "00010")) } diff --git a/man/read_waterdata_metadata.Rd b/man/read_waterdata_metadata.Rd index 868df0652..54f50ce01 100644 --- a/man/read_waterdata_metadata.Rd +++ b/man/read_waterdata_metadata.Rd @@ -4,7 +4,12 @@ \alias{read_waterdata_metadata} \title{Generalized USGS Water Meta Data API retrieval function} \usage{ -read_waterdata_metadata(collection, limit = NA, ...) +read_waterdata_metadata( + collection, + ..., + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") +) } \arguments{ \item{collection}{character, can be any existing collection such @@ -14,14 +19,18 @@ as "parameter-codes", "agency-codes", "altitude-datums", "aquifer-codes", "national-aquifer-codes", "reliability-codes", "site-types", "statistic-codes", "topographic-codes", "time-zone-codes".} +\item{\dots}{Optional arguments to pass to the query. Available parameters +can be found with the \code{get_ogc_params} function.} + \item{limit}{The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable limit is 50000. It may be beneficial to set this number lower if your internet connection is spotty. The default (\code{NA}) will set the limit to the maximum allowable limit for the service.} -\item{\dots}{Optional arguments to pass to the query. Available parameters -can be found with the \code{get_ogc_params} function.} +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Function to get metadata from Water Data API. These are useful to get the @@ -36,11 +45,16 @@ altitude_datums <- read_waterdata_metadata("altitude-datums") aquifer_codes <- read_waterdata_metadata("aquifer-codes") aquifer_types <- read_waterdata_metadata("aquifer-types") counties <- read_waterdata_metadata("counties") +countries <- read_waterdata_metadata("countries") us_counties <- read_waterdata_metadata("counties", country_code = "US") coordinate_accuracy_codes <- read_waterdata_metadata("coordinate-accuracy-codes") coordinate_datum_codes <- read_waterdata_metadata("coordinate-datum-codes") coordinate_method_codes <- read_waterdata_metadata("coordinate-method-codes") huc_codes <- read_waterdata_metadata("hydrologic-unit-codes") +methods <- read_waterdata_metadata("methods") +method_categories <- read_waterdata_metadata("method-categories") +method_citations <- read_waterdata_metadata("method-citations") +citations <- read_waterdata_metadata("citations") national_aquifer_codes <- read_waterdata_metadata("national-aquifer-codes") parameter_codes <- read_waterdata_metadata("parameter-codes") reliability_codes <- read_waterdata_metadata("reliability-codes") diff --git a/man/read_waterdata_monitoring_location.Rd b/man/read_waterdata_monitoring_location.Rd index b8e87b7d0..5072a644f 100644 --- a/man/read_waterdata_monitoring_location.Rd +++ b/man/read_waterdata_monitoring_location.Rd @@ -47,12 +47,16 @@ read_waterdata_monitoring_location( depth_source_code = NA_character_, properties = NA_character_, bbox = NA, - limit = NA, - skipGeometry = NA + skipGeometry = NA, + ..., + limit = getOption("dataRetrieval.limit"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ -\item{monitoring_location_id}{A unique identifier representing a single monitoring location. This corresponds to the \code{id} field in the \code{monitoring-locations} endpoint. Monitoring location IDs are created by combining the agency code of the agency responsible for the monitoring location (e.g. USGS) with the ID number of the monitoring location (e.g. 02238500), separated by a hyphen (e.g. USGS-02238500). +\item{monitoring_location_id}{A unique identifier representing a single monitoring location. This corresponds to the \code{monitoring_location_id} field in other endpoints. Monitoring location IDs are created by combining the agency code of the agency responsible for the monitoring location (e.g. USGS) with the ID number of the monitoring location (e.g. 02238500), separated by a hyphen (e.g. USGS-02238500). Multiple monitoring_location_ids can be requested as a character vector.} @@ -143,18 +147,39 @@ The default (\code{NA}) will return all columns of the data.} box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, Southern-most latitude, Eastern-most longitude, Northern-most longitude).} -\item{limit}{The optional limit parameter is used to control the subset of the +\item{skipGeometry}{This parameter can be used to skip response geometries for +each feature. The returning object will be a data frame with no spatial +information. The default \code{NA} will not specify the argument in the request.} + +\item{...}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable -limit is 50000. It may be beneficial to set this number lower if your internet +limit is 50,000. It may be beneficial to set this number lower if your internet connection is spotty. The default (\code{NA}) will set the limit to the maximum allowable limit for the service.} -\item{skipGeometry}{This option can be used to skip response geometries for -each feature. The returning object will be a data frame with no spatial -information.} +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will +be requested from a native csv format. This can be dangerous because the +data will cut off at 50,000 rows without indication that more data +is available. Use \code{TRUE} with caution.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Location information is basic information about the monitoring location including the name, identifier, agency responsible for data collection, and the date the location was established. It also includes information about the type of location, such as stream, lake, or groundwater, and geographic information about the location, such as state, county, latitude and longitude, and hydrologic unit code (HUC). @@ -189,6 +214,8 @@ site_info_no_sf <- read_waterdata_monitoring_location( bbox_vals = c(-94.00, 35.0, -93.5, 35.5) multi_site <- read_waterdata_monitoring_location(bbox = bbox_vals) + + } \dontshow{\}) # examplesIf} } diff --git a/man/read_waterdata_parameter_codes.Rd b/man/read_waterdata_parameter_codes.Rd index 95f9d0901..69cca65f2 100644 --- a/man/read_waterdata_parameter_codes.Rd +++ b/man/read_waterdata_parameter_codes.Rd @@ -17,7 +17,9 @@ read_waterdata_parameter_codes( temperature_basis = NA_character_, epa_equivalence = NA_character_, properties = NA_character_, - limit = NA + ..., + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -48,11 +50,18 @@ Available options are: geometry, parameter_code_id, parameter_name, unit_of_measure, parameter_group_code, parameter_description, medium, statistical_basis, time_basis, weight_basis, particle_size_basis, sample_fraction, temperature_basis, epa_equivalence. The default (\code{NA}) will return all columns of the data.} +\item{\dots}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} + \item{limit}{The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable limit is 50000. It may be beneficial to set this number lower if your internet connection is spotty. The default (\code{NA}) will set the limit to the maximum allowable limit for the service.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Parameter codes are 5-digit codes and associated descriptions used to identify the constituent measured and the units of measure. Some parameter code definitions include information about the sampling matrix, fraction, and methods used to measure the constituent. Some parameters are fixed-value (fxd) numeric codes having textual meaning (for example: parameter code 00041 is a weather code parameter, code of 60 means rain), but more commonly represent a numeric value for chemical, physical, or biological data. diff --git a/man/read_waterdata_ratings.Rd b/man/read_waterdata_ratings.Rd new file mode 100644 index 000000000..4b516e345 --- /dev/null +++ b/man/read_waterdata_ratings.Rd @@ -0,0 +1,118 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_waterdata_ratings.R +\name{read_waterdata_ratings} +\alias{read_waterdata_ratings} +\title{Get USGS Rating Curve Data} +\usage{ +read_waterdata_ratings( + monitoring_location_id = NA_character_, + file_type = c("exsa", "base", "corr"), + file_path = tempdir(), + bbox = NA, + datetime = NA_character_, + ..., + limit = 10000, + download_and_parse = TRUE +) +} +\arguments{ +\item{monitoring_location_id}{A unique identifier representing a single +monitoring location. Monitoring location IDs are created by combining the +agency code of the agency responsible for the monitoring location (e.g. USGS) +with the ID number of the monitoring location (e.g. 02238500), separated by +a hyphen (e.g. USGS-02238500).} + +\item{file_type}{Rating file time. Could be any of "exsa", "corr", or "base". +If \code{file_type} is "base" then the columns are +INDEP, typically the gage height, in feet; DEP, typically the streamflow, +in cubic feet per second; and STOR, where "*" indicates that the pair are +a fixed point of the rating curve. If \code{file_type} is "exsa" then an +additional column, SHIFT, is included that indicates the current shift in +the rating for that value of INDEP. If \code{file_type} is "corr" then the +columns are INDEP, typically the gage height, in feet; CORR, the correction +for that value; and CORRINDEP, the corrected value for CORR.} + +\item{file_path}{Path to save the rating curve rdb files. The +default is \code{tempdir()}, which will wipe out the files.} + +\item{bbox}{Only features that have a geometry that intersects the bounding +box are selected.The bounding box is provided as four or six numbers, depending +on whether the coordinate reference system includes a vertical axis (height or +depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, +Southern-most latitude, Eastern-most longitude, Northern-most longitude).} + +\item{datetime}{Only return items that have a temporal property that +intersects this value. Either a date-time or an interval, open or closed. +See Details below.} + +\item{\dots}{Not used.} + +\item{limit}{Limits the number of results that are included in each page of +the response (capped at the default 10,000).} + +\item{download_and_parse}{Logical to define whether or not to download, parse, +and return a list of data frames with rating curve data (\code{TRUE}), or to return +just a list of available rating curve files (\code{FALSE}). Default is \code{TRUE}.} +} +\value{ +List of data frames which contain the requested rating curves. +} +\description{ +Reads current rating table for an active USGS streamgages. More information +can be found at https://api.waterdata.usgs.gov/docs/stac/. +} +\details{ +You can also use a vector of length 2 for any time queries (such as time +or last_modified). The first value is the starting date (or datetime), +the second value is the ending date(or datetime). +NA's within the vector indicate a half-bound date. +For example, \code{time = c("2024-01-01", NA)} will return all data starting +at 2024-01-01. +\code{time = c(NA, "2024-01-01")} will return all data from the beginning of +the timeseries until 2024-01-01. +By default, time is assumed UTC, although time zone attributes +will be accommodated. As an example, setting \code{time = as.POSIXct(c("2021-01-01 12:00:00", +"2021-01-01 14:00"), tz = "America/New_York")} will request data that between +noon and 2pm eastern time on 2021-01-01. +All time values RETURNED from the service are UTC with the exception of +daily data, which returns time values in local dates. +} +\examples{ +\dontshow{if (is_dataRetrieval_user()) withAutoprint(\{ # examplesIf} + +\donttest{ + +monitoring_location_id <- c("USGS-01104475", "USGS-01104460") +ratings_exsa <- read_waterdata_ratings( + monitoring_location_id = monitoring_location_id, + file_type = "exsa") + +head(ratings_exsa[["USGS-01104475.exsa.rdb"]]) +comment(ratings_exsa[["USGS-01104475.exsa.rdb"]])[1:15] + +ratings_corr <- read_waterdata_ratings( + monitoring_location_id = monitoring_location_id, + file_type = "corr") + +head(ratings_corr[["USGS-01104460.corr.rdb"]]) +comment(ratings_corr[["USGS-01104460.corr.rdb"]])[1:15] + +rating_2 <- read_waterdata_ratings( + monitoring_location_id = monitoring_location_id, + file_type = c("corr", "exsa")) +names(rating_2) + +bbox <- c(-95.00, 40.0, -92.0, 42) + +bbox_query <- read_waterdata_ratings(bbox = bbox, + download_and_parse = FALSE) +length(bbox_query) +recent_query <- read_waterdata_ratings(bbox = bbox, + datetime = c(Sys.Date()-7, NA), + download_and_parse = FALSE) +length(recent_query) +} +\dontshow{\}) # examplesIf} +} diff --git a/man/read_waterdata_ts_meta.Rd b/man/read_waterdata_ts_meta.Rd index 181b237f0..3188d4b8b 100644 --- a/man/read_waterdata_ts_meta.Rd +++ b/man/read_waterdata_ts_meta.Rd @@ -25,13 +25,15 @@ read_waterdata_ts_meta( time_series_id = NA_character_, web_description = NA_character_, skipGeometry = NA, - limit = NA, - max_results = NA, bbox = NA, begin = NA_character_, end = NA_character_, - convertType = TRUE, - no_paging = FALSE + ..., + limit = getOption("dataRetrieval.limit"), + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -68,7 +70,7 @@ Only features that have a \code{last_modified} that intersects the value of date See also Details below for more information.} -\item{begin_utc}{The datetime of the earliest observation in the time series. Together with \code{end}, this field represents the period of record of a time series. Note that some time series may have large gaps in their collection record. +\item{begin_utc}{The datetime of the earliest observation in the time series. Together with \code{end_utc}, this field represents the period of record of a time series. Note that some time series may have large gaps in their collection record. You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Examples: \itemize{ @@ -78,12 +80,12 @@ Examples: \item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours } -Only features that have a \code{begin} that intersects the value of datetime are selected. +Only features that have a \code{begin_utc} that intersects the value of datetime are selected. #' See also Details below for more information.} -\item{end_utc}{The datetime of the most recent observation in the time series. Data returned by this endpoint updates at most once per day, and potentially less frequently than that, and as such there may be more recent observations within a time series than the time series \code{end} value reflects. Together with \code{begin}, this field represents the period of record of a time series. It is additionally used to determine whether a time series is "active". +\item{end_utc}{The datetime of the most recent observation in the time series. Data returned by this endpoint updates at most once per day, and potentially less frequently than that, and as such there may be more recent observations within a time series than the time series \code{end_utc} value reflects. Together with \code{begin_utc}, this field represents the period of record of a time series. It is additionally used to determine whether a time series is "active". You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Examples: \itemize{ @@ -93,7 +95,7 @@ Examples: \item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours } -Only features that have a \code{end} that intersects the value of datetime are selected. +Only features that have a \code{end_utc} that intersects the value of datetime are selected. See also Details below for more information.} @@ -106,7 +108,7 @@ See also Details below for more information.} \item{computation_period_identifier}{Indicates the period of data used for any statistical computations. Multiple computation_period_identifiers can be requested as a character vector.} -\item{computation_identifier}{Indicates whether the data from this time series represent a specific statistical computation. +\item{computation_identifier}{Indicates the computation performed to calculate this time series. Values of "Instantaneous" reflect point measurements. Multiple computation_identifiers can be requested as a character vector.} \item{thresholds}{Thresholds represent known numeric limits for a time series, for example the historic maximum value for a parameter or a level below which a sensor is non-operative. These thresholds are sometimes used to automatically determine if an observation is erroneous due to sensor error, and therefore shouldn't be included in the time series.} @@ -123,41 +125,55 @@ this system for 120 days.} \item{parent_time_series_id}{The unique identifier representing the parent or "upchain" time series that a daily values time series is generated from. Daily values time series have one and only one parent time series.} -\item{time_series_id}{A unique identifier representing a single time series. This corresponds to the \code{id} field in the \code{time-series-metadata} endpoint.} +\item{time_series_id}{A unique identifier representing a single time series. This corresponds to the "time_series_id" field in other endpoints.} -\item{web_description}{A description of what this time series represents, as used by WDFN and other USGS data dissemination products.} +\item{web_description}{An optional description of the time series. WDFN and other USGS data dissemination products use this field, in combination with sublocation_identifier, to distinguish the differences between multiple time series for the same parameter code, statistic code, and monitoring location.} -\item{skipGeometry}{This option can be used to skip response geometries for +\item{skipGeometry}{This parameter can be used to skip response geometries for each feature. The returning object will be a data frame with no spatial -information.} - -\item{limit}{The optional limit parameter is used to control the subset of the -selected features that should be returned in each page. The maximum allowable -limit is 50000. It may be beneficial to set this number lower if your internet -connection is spotty. The default (\code{NA}) will set the limit to the maximum -allowable limit for the service.} - -\item{max_results}{The optional maximum number of rows to return. This value -must be less than the requested limit.} +information. The default \code{NA} will not specify the argument in the request.} \item{bbox}{Only features that have a geometry that intersects the bounding box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, Southern-most latitude, Eastern-most longitude, Northern-most longitude).} \item{begin}{This field contains the same information as "begin_utc", but in the local time of the monitoring location. It is retained for backwards compatibility, but will be removed in V1 of these APIs.} \item{end}{This field contains the same information as "end_utc", but in the local time of the monitoring location. It is retained for backwards compatibility, but will be removed in V1 of these APIs.} -\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function -will convert the data to dates and qualifier to string vector.} +\item{...}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50,000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} -\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} + +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will be requested from a native csv format. This can be dangerous because the data will cut off at 50,000 rows without indication that more data is available. Use \code{TRUE} with caution.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Daily data and continuous measurements are grouped into time series, which represent a collection of observations of a single parameter, potentially aggregated using a standard statistic, at a single monitoring location. This endpoint provides metadata about those time series, including their operational thresholds, units of measurement, and when the earliest and most recent observations in a time series occurred. @@ -186,7 +202,7 @@ site <- "USGS-02238500" meta_1 <- read_waterdata_ts_meta(monitoring_location_id = site) meta_multi <- read_waterdata_ts_meta(monitoring_location_id = c("USGS-01491000", - "USGS-01645000"), + "USGS-01645000"), parameter_code = c("00060", "00010"), properties = c("monitoring_location_id", "parameter_code", diff --git a/tests/testthat/tests_general.R b/tests/testthat/tests_general.R index f5a13b1da..8945174fe 100644 --- a/tests/testthat/tests_general.R +++ b/tests/testthat/tests_general.R @@ -97,41 +97,6 @@ test_that("General NWIS retrievals working", { expect_is(timeseriesInfo$begin, "POSIXct") # nolint start: line_length_linter - url <- httr2::request( - "https://waterservices.usgs.gov/nwis/dv/?site=09037500&format=rdb&ParameterCd=00060&StatCd=00003&startDT=1985-10-02&endDT=2012-09-06" - ) - dv <- importRDB1(url, asDateTime = FALSE) - # nolint end - dailyStat <- readNWISdata( - site = c("03112500", "03111520", "02319394"), - service = "stat", - statReportType = "daily", - statType = c("p25", "p50", "p75", "min", "max"), - parameterCd = "00065", - convertType = FALSE - ) - expect_true(length(dailyStat$min_va) > 1) - expect_is(dailyStat$p25_va, "character") - - waterYearStat <- readNWISdata( - site = c("01646500"), - service = "stat", - statReportType = "annual", - statYearType = "water", - missingData = "on" - ) - expect_is(waterYearStat$mean_va, "numeric") - expect_is(waterYearStat$parameter_cd, "character") - - # Empty data - # note....not empty anymore! - # nolint start: line_length_linter - urlTest <- httr2::request( - "https://nwis.waterservices.usgs.gov/nwis/iv/?site=11447650&format=waterml,1.1&ParameterCd=63680&startDT=2016-12-13&endDT=2016-12-13" - ) - x <- importWaterML1(urlTest) - expect_true(all(c("agency_cd", "site_no", "dateTime", "tz_cd") %in% names(x))) - # nolint end # Test list: args <- list( @@ -190,37 +155,20 @@ test_that("General NWIS retrievals working", { AS <- read_waterdata_monitoring_location(state_name = "American Samoa") expect_gt(nrow(AS), 0) - site_id <- "01594440" - rating_curve <- readNWISdata( - service = "rating", - site_no = site_id, + site_id <- "USGS-01594440" + rating_curve <- read_waterdata_ratings( + monitoring_location_id = site_id, file_type = "base" ) - rating_curve2 <- readNWISrating( - siteNumber = site_id, - type = "base" - ) - expect_equal( - attr(rating_curve, "url"), - "https://waterdata.usgs.gov/nwisweb/get_ratings/?site_no=01594440&file_type=base" - ) - expect_equal(rating_curve$INDEP, rating_curve2$INDEP) - state_rating_list <- readNWISdata( - service = "rating", - file_type = "base", - period = 24 + expect_equal(names(rating_curve), "USGS-01594440.base.rdb") + + state_rating_list <- read_waterdata_ratings( + datetime = c(Sys.Date() - 1, NA), + download_and_parse = FALSE ) - expect_true(all( - names(state_rating_list) %in% - c( - "agency_cd", - "site_no", - "type", - "update_time", - "url" - ) - )) + + expect_true(length(state_rating_list) > 0) multi_hucs <- c("07130007", "07130011") multi_huc_sites <- read_waterdata_monitoring_location( @@ -302,6 +250,9 @@ test_that("General WQP retrievals working", { service = "ResultWQX3" ) expect_is(pHData$Activity_StartDateTime, "POSIXct") + expect_type(pHData$USGSpcode, "character") + expect_type(pHData$Result_Measure, "double") + expect_type(pHData$SampleCollectionMethod_Identifier, "character") # # # testing lists: startDate <- as.Date("2022-01-01") diff --git a/tests/testthat/tests_imports.R b/tests/testthat/tests_imports.R index 509eede51..a8916640d 100644 --- a/tests/testthat/tests_imports.R +++ b/tests/testthat/tests_imports.R @@ -1,55 +1,3 @@ -context("importRDB_noCRAN") - -test_that("External importRDB1 tests", { - testthat::skip_on_cran() - - siteNumber <- "02177000" - startDate <- "2012-09-01" - endDate <- "2012-10-01" - offering <- "00003" - property <- "00060" - - obs_url <- constructNWISURL( - siteNumber, - property, - startDate, - endDate, - "dv", - format = "tsv" - ) - data <- importRDB1(obs_url) - expect_is(data$datetime, "Date") - - urlMultiPcodes <- constructNWISURL( - "04085427", - c("00060", "00010"), - startDate, - endDate, - "dv", - statCd = c("00003", "00001"), - "tsv" - ) - multiData <- importRDB1(urlMultiPcodes) - pCodeCols <- grep("X", colnames(multiData)) - expect_true(length(pCodeCols) / 2 > 2) - - unitDataURL <- constructNWISURL( - siteNumber, - property, - "2013-11-03", - "2013-11-03", - "uv", - format = "tsv" - ) # includes timezone switch - unitData <- importRDB1(unitDataURL, asDateTime = TRUE) - - # Need to think of a way to automatically check timezone conversion: - # expect_that(as.numeric(unitData[which(unitData$tz_cd == "EST")[1],"datetime"]), - # equals(as.numeric(as.POSIXct("2013-11-03 01:00:00", tz="UTC")+60*60*5))) - - site <- "05427850" -}) - context("importRDB") test_that("CRAN-friendly importRDB test", { filePath <- system.file("extdata", package = "dataRetrieval") @@ -71,146 +19,6 @@ test_that("CRAN-friendly importWaterML1 test", { expect_is(importUserWML1$dateTime, "POSIXct") }) -test_that("External importWaterML1 test", { - testthat::skip_on_cran() - - siteNumber <- "02177000" - startDate <- "2012-09-01" - endDate <- "2012-10-01" - offering <- "00003" - property <- "00060" - obs_url <- constructNWISURL(siteNumber, property, startDate, endDate, "dv") - - data <- importWaterML1(obs_url, TRUE) - expect_is(data$dateTime, "POSIXct") - - unitDataURL <- constructNWISURL( - siteNumber, - property, - "2020-10-30", - "2020-11-01", - "uv" - ) - unitData <- importWaterML1(unitDataURL, TRUE) - expect_is(unitData$dateTime, "POSIXct") - - # Two sites, two pcodes, one site has two data descriptors - siteNumber <- c("01480015", "04085427") # one site seems to have lost it"s 2nd dd - obs_url <- constructNWISURL( - siteNumber, - c("00060", "00010"), - startDate, - endDate, - "dv" - ) - data <- importWaterML1(obs_url) - - expect_true(length(unique(data$site_no)) == 2) - expect_true(ncol(data) == 8) # 3 data, 3 remark codes, and 4 (agency, site, dateTime, tz) - - inactiveSite <- "05212700" - inactiveSite <- constructNWISURL( - inactiveSite, - "00060", - "2014-01-01", - "2014-01-10", - "dv" - ) - inactiveSite <- importWaterML1(inactiveSite) - expect_true(nrow(inactiveSite) == 0) - - inactiveAndActive <- c("07334200", "05212700") - inactiveAndActive <- constructNWISURL( - inactiveAndActive, - "00060", - "2014-01-01", - "2014-12-31", - "dv" - ) - inactiveAndActive <- importWaterML1(inactiveAndActive) - # - # The inactive site became active, need a new test. - - # raw XML - url <- constructNWISURL( - service = "dv", - siteNumber = "02319300", - parameterCd = "00060", - startDate = "2014-01-01", - endDate = "2014-01-01" - ) - raw <- httr2::req_perform(url) - raw <- httr2::resp_body_xml(raw) - rawParsed <- importWaterML1(raw) - expect_true(nrow(rawParsed) > 0) - expect_true(data.class(rawParsed$X_00060_00003) == "numeric") - - # no data - url <- constructNWISURL( - "05212700", - "00060", - "2014-01-01", - "2014-01-10", - "dv", - statCd = "00001" - ) - noData <- importWaterML1(url) - expect_true(class(attr(noData, "url")) == "character") - expect_true(all(dim(noData) == c(0, 4))) - - url <- constructNWISURL( - service = "iv", - site = c("02319300", "02171500"), - startDate = "2015-04-04", - endDate = "2015-04-05" - ) - data <- importWaterML1(url, tz = "America/New_York", asDateTime = TRUE) - expect_true(data.class(data$dateTime) == "POSIXct") - expect_true(nrow(data) > 0) - - # expect_error(readNWISdata( - # sites = "05114000", - # service = "iv", - # parameterCd = "00060", - # startDate = "2014-05-01T00:00", - # endDate = "2014-05-01T12:00", - # tz = "blah" - # )) - # - # arg.list <- list( - # sites = "05114000", - # parameterCd = "00060", - # startDate = "2014-05-01T00:00", - # endDate = "2014-05-01T12:00" - # ) - # - # chi_iv <- readNWISdata(arg.list, - # service = "iv", - # tz = "America/Chicago" - # ) - # - # expect_true(all(chi_iv$tz_cd == "America/Chicago")) - # expect_equal(chi_iv$dateTime[1], as.POSIXct("2014-05-01T00:00", - # format = "%Y-%m-%dT%H:%M", - # tz = "America/Chicago" - # )) - # expect_equal(chi_iv$dateTime[nrow(chi_iv)], as.POSIXct("2014-05-01T12:00", - # format = "%Y-%m-%dT%H:%M", - # tz = "America/Chicago" - # )) - - # Time over daylight saving switch: - tzURL <- constructNWISURL( - "04027000", - c("00300", "63680"), - "2011-11-05", - "2011-11-07", - "uv" - ) - tzIssue <- importWaterML1(tzURL, asDateTime = TRUE, tz = "America/Chicago") - expect_false(any(duplicated(tzIssue$dateTime))) -}) - context("importWaterML2") test_that("importWaterML2 internal test", { @@ -228,27 +36,7 @@ context("importWQP_noCRAN") test_that("External WQP tests", { testthat::skip_on_cran() - rawSampleURL <- constructWQPURL( - "USGS-01594440", - "01075", - "", - "", - legacy = FALSE - ) - # rawSample <- importWQP(rawSampleURL) - # expect_is(rawSample$Activity_StartDateTime, "POSIXct") - url2 <- constructWQPURL("USGS-01594440", "01075", "", "", legacy = TRUE) rawSample2 <- suppressWarnings(importWQP(url2)) expect_is(rawSample2$ActivityStartDateTime, "POSIXct") - - STORETex <- constructWQPURL( - "WIDNR_WQX-10032762", - "Specific conductance", - "", - "", - legacy = FALSE - ) - # STORETdata <- importWQP(STORETex) - # expect_is(STORETdata$Activity_StartDateTime, "POSIXct") }) diff --git a/tests/testthat/tests_samples.R b/tests/testthat/tests_samples.R index b06830733..02e05fc5a 100644 --- a/tests/testthat/tests_samples.R +++ b/tests/testthat/tests_samples.R @@ -9,6 +9,9 @@ test_that("General samples-data retrievals work using WQP tests", { characteristic = nameToUse ) expect_is(pHData$Activity_StartDateTime, "POSIXct") + expect_type(pHData$USGSpcode, "character") + expect_type(pHData$Result_Measure, "double") + expect_type(pHData$SampleCollectionMethod_Identifier, "character") # testing lists: startDate <- as.Date("2022-01-01") diff --git a/tests/testthat/tests_userFriendly_fxns.R b/tests/testthat/tests_userFriendly_fxns.R index 6ec117802..c5c7f6d86 100644 --- a/tests/testthat/tests_userFriendly_fxns.R +++ b/tests/testthat/tests_userFriendly_fxns.R @@ -44,10 +44,38 @@ test_that("Unit value data returns correct types", { ) # nolint start: line_length_linter - expect_equal( - attr(rawData, "request")[["url"]], - "https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lang=en-US&skipGeometry=TRUE&monitoring_location_id=USGS-05114000¶meter_code=00060&time=2014-10-10T00%3A00%3A00Z%2F2014-10-10T00%3A00%3A00Z&limit=50000" + expect_true( + grepl( + x = attr(rawData, "request")[["url"]], + pattern = "monitoring_location_id=USGS-05114000" + ) + ) + + expect_true( + grepl( + x = attr(rawData, "request")[["url"]], + pattern = "time=2014-10-10T00%3A00%3A00Z%2F2014-10-10T00%3A00%3A00Z" + ) + ) + + expect_true( + grepl( + x = attr(rawData, "request")[["url"]], + pattern = "parameter_code=00060" + ) + ) + + expect_true( + grepl( + x = attr(rawData, "request")[["url"]], + pattern = paste0( + "https://api.waterdata.usgs.gov/ogcapi/", + getOption("dataRetrieval.api_version"), + "/collections/continuous/items" + ) + ) ) + # nolint end timeZoneChange <- read_waterdata_continuous( monitoring_location_id = c("04024430", "04024000"), @@ -58,9 +86,11 @@ test_that("Unit value data returns correct types", { expect_is(rawData$time, "POSIXct") expect_is(rawData$value, "numeric") # nolint start: line_length_linter - expect_equal( - attr(rawData, "request")[["url"]], - "https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lang=en-US&skipGeometry=TRUE&monitoring_location_id=USGS-05114000¶meter_code=00060&time=2014-10-10T00%3A00%3A00Z%2F2014-10-10T00%3A00%3A00Z&limit=50000" + expect_true( + grepl( + x = attr(rawData, "request")[["url"]], + pattern = "time=2014-10-10T00%3A00%3A00Z%2F2014-10-10T00%3A00%3A00Z" + ) ) # nolint end site <- "USGS-04087170" @@ -88,9 +118,12 @@ test_that("peak, rating curves, surface-water measurements", { expect_is(data$agency_cd, "character") # Rating curvs: - siteNumber <- "01594440" - data <- readNWISrating(siteNumber, "base") - expect_that(length(attr(data, "RATING")), equals(7)) + siteNumber <- "USGS-01594440" + data <- read_waterdata_ratings( + monitoring_location_id = siteNumber, + file_type = "base" + ) + expect_gt(length(comment(data[[1]])), 1) # Surface meas: siteNumbers <- c("USGS-01594440", "USGS-040851325") @@ -120,6 +153,8 @@ test_that("peak, rating curves, surface-water measurements", { )), 0 ) + # This does come back empty because 50268 isn't at this site + expect_equal( ncol(read_waterdata_ts_meta( monitoring_location_id = "USGS-10312000", @@ -129,11 +164,6 @@ test_that("peak, rating curves, surface-water measurements", { 4 ) - url <- httr2::request( - "https://waterservices.usgs.gov/nwis/site/?format=rdb&seriesCatalogOutput=true&sites=05114000" - ) - x <- importRDB1(url) - siteID <- "USGS-263819081585801" gwl_1 <- read_waterdata_field_measurements(monitoring_location_id = siteID) expect_equal(unique(gwl_1$monitoring_location_id), siteID) @@ -164,6 +194,31 @@ test_that("read_waterdata_daily", { ) expect_is(raw_waterdata_daily$time, "Date") + raw_waterdata_daily_no_start <- read_waterdata_daily( + monitoring_location_id = siteNumber, + parameter_code = pCode, + time = c(NA, endDate) + ) + expect_equal( + max(raw_waterdata_daily_no_start$time), + max(raw_waterdata_daily$time) + ) + expect_lt( + min(raw_waterdata_daily_no_start$time), + min(raw_waterdata_daily$time) + ) + + raw_waterdata_daily_no_end <- read_waterdata_daily( + monitoring_location_id = siteNumber, + parameter_code = pCode, + time = c(startDate, NA) + ) + expect_gt(max(raw_waterdata_daily_no_end$time), max(raw_waterdata_daily$time)) + expect_equal( + min(raw_waterdata_daily_no_end$time), + min(raw_waterdata_daily$time) + ) + raw_waterdata_TempMeanMax <- read_waterdata_daily( monitoring_location_id = siteNumber, parameter_code = c("00010", "00060"), @@ -381,6 +436,7 @@ test_that("Construct USGS urls", { url_daily <- construct_api_requests( service = "daily", + output_id = "daily_id", monitoring_location_id = siteNumber, parameter_code = pCode, time = c(startDate, endDate), @@ -389,9 +445,8 @@ test_that("Construct USGS urls", { ) # nolint start: line_length_linter - expect_equal( - url_daily$url, - "https://api.waterdata.usgs.gov/ogcapi/v0/collections/daily/items?f=json&lang=en-US&skipGeometry=FALSE&monitoring_location_id=USGS-01594440¶meter_code=00060,00010&time=2024-01-01%2F..&statistic_id=00003,00001&limit=10000" + expect_true( + grepl(x = url_daily$url, pattern = "parameter_code=00060,00010") ) url_works <- dataRetrieval:::walk_pages(url_daily) @@ -399,14 +454,17 @@ test_that("Construct USGS urls", { url_ts_meta <- construct_api_requests( monitoring_location_id = siteNumber, + output_id = "time_series_id", parameter_code = pCode, service = "time-series-metadata", limit = 10000 ) - expect_equal( - url_ts_meta$url, - "https://api.waterdata.usgs.gov/ogcapi/v0/collections/time-series-metadata/items?f=json&lang=en-US&skipGeometry=FALSE&monitoring_location_id=USGS-01594440¶meter_code=00060,00010&limit=10000" + expect_true( + grepl( + x = url_ts_meta$url, + pattern = "collections/time-series-metadata/items" + ) ) url_works_ts <- dataRetrieval:::walk_pages(url_ts_meta) @@ -414,13 +472,13 @@ test_that("Construct USGS urls", { url_ml <- construct_api_requests( id = siteNumber, + output_id = "monitoring_location_id", service = "monitoring-locations", limit = 50000 ) - expect_equal( - url_ml$url, - "https://api.waterdata.usgs.gov/ogcapi/v0/collections/monitoring-locations/items?f=json&lang=en-US&skipGeometry=FALSE&id=USGS-01594440&limit=50000" + expect_true( + grepl(x = url_ml$url, pattern = "id=USGS-01594440") ) url_works_ml <- dataRetrieval:::walk_pages(url_ml) @@ -560,6 +618,18 @@ test_that("bad_properties", { time = c("2021-01-01", "2022-01-01"), properties = c("value", "time", "blah") )) + + # No paging + dv_data_quick <- read_waterdata_daily( + monitoring_location_id = "USGS-02238500", + parameter_code = "00060", + no_paging = TRUE + ) + + expect_type(dv_data_quick$parameter_code, "character") + expect_is(dv_data_quick$time, "Date") + expect_equal(dv_data_quick$parameter_code[1], "00060") + # Empty result: expect_message(read_waterdata_daily( monitoring_location_id = "USGS-02238500", diff --git a/tutorials/images/help_file_2.png b/tutorials/images/help_file_2.png index 232e10289..52af3b94c 100644 Binary files a/tutorials/images/help_file_2.png and b/tutorials/images/help_file_2.png differ diff --git a/tutorials/quick_intro_deck.qmd b/tutorials/quick_intro_deck.qmd index 7046eef8a..b332a4427 100644 --- a/tutorials/quick_intro_deck.qmd +++ b/tutorials/quick_intro_deck.qmd @@ -15,44 +15,61 @@ title-slide-attributes: data-background-size: 15% data-background-position: 2% 2% editor: source +engine: knitr editor_options: chunk_output_type: console execute: echo: true warning: false message: false +params: + run_python: true --- ```{r} #| echo: false #| include: false -# library(dataRetrieval) +#| # library(dataRetrieval) library(ggplot2) library(dplyr) +library(reticulate) +py_require("dataretrieval") options(dplyr.summarise.inform = FALSE) -dt_me <- function(x, - page_length = 8, - paging = TRUE, - font = "0.7em", - escape = TRUE){ - DT::datatable(x, - rownames = FALSE, - options = list(pageLength = page_length, - info = FALSE, - searching = FALSE, - paging = paging, - lengthChange = FALSE, - initComplete = htmlwidgets::JS( - "function(settings, json) {", - paste0("$(this.api().table().container()).css({'font-size': '", - font, "'});"), - "}")), escape = escape) +evaluate_python <- params$run_python + +dt_me <- function( + x, + page_length = 8, + paging = TRUE, + font = "0.7em", + escape = TRUE +) { + DT::datatable( + x, + rownames = FALSE, + options = list( + pageLength = page_length, + info = FALSE, + searching = FALSE, + paging = paging, + lengthChange = FALSE, + initComplete = htmlwidgets::JS( + "function(settings, json) {", + paste0( + "$(this.api().table().container()).css({'font-size': '", + font, + "'});" + ), + "}" + ) + ), + escape = escape + ) } theme_set(theme_grey(base_size = 24)) -update_geom_defaults("point", list(size = 3)) - +update_geom_defaults("point", list(size = 3)) ``` @@ -111,11 +128,14 @@ In this ~45 minute introduction, the goal is: `dataRetrieval` is available on the Comprehensive R Archive Network (CRAN) repository. To install `dataRetrieval` on your computer, open RStudio and run this line of code in the Console: +::: {.panel-tabset} + +### R + ```{r} #| echo: true #| eval: false install.packages("dataRetrieval") - ``` Then each time you open R, you'll need to load the library: @@ -125,6 +145,24 @@ Then each time you open R, you'll need to load the library: library(dataRetrieval) ``` +### Python + +```{bash} +#| echo: true +#| eval: false +pip install dataretrieval + +``` + +Then each time you open Python, you'll need to load the library: + +```{python} +#| eval: !expr evaluate_python +from dataretrieval import waterdata +``` + +::: + ::: footer ::: @@ -157,7 +195,7 @@ Within R, you can call help files for any `dataRetrieval` function: ```{r} #| echo: true #| eval: false -?readWQPdata +?read_waterdata_daily ``` :::: {.columns} @@ -179,14 +217,12 @@ Examples ```{r} #| eval: false -# Legacy: -nameToUse <- "pH" -pHData <- readWQPdata(siteid = "USGS-04024315", - characteristicName = nameToUse) -ncol(pHData) -attr(pHData, "siteInfo") -attr(pHData, "queryTime") -attr(pHData, "url") +site <- "USGS-02238500" +dv_data_sf <- read_waterdata_daily( + monitoring_location_id = site, + parameter_code = "00060", + time = c("2021-01-01", "2022-01-01") +) ``` ::: @@ -370,10 +406,14 @@ We're going walk through 3 retrievals: ::: -## Workflow 1: Daily data for known site +## Workflow 1: Daily data for known site {.smaller} Let's pull daily mean discharge data for site "USGS-0940550", getting all the data from October 10, 2024 onward. +::: {.panel-tabset} + +### R + ```{r} #| message: true library(dataRetrieval) @@ -382,13 +422,39 @@ pcode <- "00060" # Discharge stat_cd <- "00003" # Mean range <- c("2024-10-01", NA) -df <- read_waterdata_daily(monitoring_location_id = site, - parameter_code = pcode, - statistic_id = stat_cd, - time = range) +df <- read_waterdata_daily( + monitoring_location_id = site, + parameter_code = pcode, + statistic_id = stat_cd, + time = range +) +nrow(df) ``` +### Python + +```{python} +#| eval: !expr evaluate_python +from dataretrieval import waterdata + +site = "USGS-09405500" +pcode = "00060" # Discharge +stat_cd = "00003" # Mean + +df = waterdata.get_daily( + monitoring_location_id=site, + parameter_code=pcode, + statistic_id=stat_cd, + time="2024-10-01/..", +) + +df[0].shape[0] +``` + + +::: + ::: footer ::: @@ -399,12 +465,11 @@ In RStudio, click on the data frame in the upper right Environment tab to open a ```{r} #| echo: false - -dt_me(df |> - sf::st_drop_geometry(), - page_length = 3) - - +dt_me( + df |> + sf::st_drop_geometry(), + page_length = 3 +) ``` ::: footer @@ -421,10 +486,7 @@ Let's use `ggplot2` to visualize the data. library(ggplot2) ggplot(data = df) + - geom_point(aes(x = time, - y = value, - color = approval_status)) - + geom_point(aes(x = time, y = value, color = approval_status)) ``` ## Water Data API Notes: Argument input @@ -444,9 +506,10 @@ Use your "tab" key! ```{r} #| eval: false #| echo: true -discharge <- read_waterdata_daily(parameter_code = "00060", - statistic_id = "00003") - +discharge <- read_waterdata_daily( + parameter_code = "00060", + statistic_id = "00003" +) ``` ::: {.fragment} @@ -492,9 +555,11 @@ Here are a bunch of valid inputs: time = "2025-01-01" time = as.Date("2025-01-01") time = "2025-01-01T23:20:50Z" -time = as.POSIXct("2025-01-01T23:20:50Z", - format = "%Y-%m-%dT%H:%M:%S", - tz = "UTC") +time = as.POSIXct( + "2025-01-01T23:20:50Z", + format = "%Y-%m-%dT%H:%M:%S", + tz = "UTC" +) # Ask for specific range time = c("2024-01-01", "2025-01-01") # or Dates or POSIXs # Asking beginning of record to specific end: @@ -517,22 +582,46 @@ Use your "tab" key! ![](images/autocomplete_samples.png) -## Workflow 2: Discrete data for known site +## Workflow 2: Discrete data for known site {.smaller} Let's get orthophosphate ("00660") data from the Shenandoah River at Front Royal, VA ("USGS-01631000"). +::: {.panel-tabset} + +### R + ```{r} #| message: true site <- "USGS-01631000" pcode <- "00660" -qw_data <- read_waterdata_samples(monitoringLocationIdentifier = site, - usgsPCode = pcode, - dataType = "results", - dataProfile = "basicphyschem") +qw_data <- read_waterdata_samples( + monitoringLocationIdentifier = site, + usgsPCode = pcode, + dataType = "results", + dataProfile = "basicphyschem" +) ncol(qw_data) ``` +### Python +```{python} +#| eval: !expr evaluate_python +site = "USGS-01631000" +pcode = "00660" + +qw_data = waterdata.get_samples( + monitoringLocationIdentifier=site, + usgsPCode=pcode, + service="results", + profile="basicphyschem", +) + +qw_data[0].shape[1] +``` + +::: + That's a LOT of columns returned. We won't look at them here, but you can use `View` in RStudio to explore on your own. ::: footer @@ -549,21 +638,31 @@ That's a LOT of columns returned. We won't look at them here, but you can use `V ```{r} #| echo: false - -df <- tibble(dataType = c("results", "locations", "activities", "projects", "organizations"), - Description = c("Results data and metadata for measures and observations matching your query", - "Find monitoring locations that have data matching your query", - "Information about the monitoring activities conducted that produced data", - "Information on the projects that have results matching your data query", - "Information about the organizations that have provided data that matches your query"), - dataProfile = c('fullphyschem
basicphyschem
fullbio
basicbio
narrow
resultdetectionquantitationlimit
labsampleprep
count', - 'site
count', - 'sampact
actmetric
actgroup
ncount', - 'project
projectmonitoringlocationweight', - 'organization
count')) +df <- tibble( + dataType = c( + "results", + "locations", + "activities", + "projects", + "organizations" + ), + Description = c( + "Results data and metadata for measures and observations matching your query", + "Find monitoring locations that have data matching your query", + "Information about the monitoring activities conducted that produced data", + "Information on the projects that have results matching your data query", + "Information about the organizations that have provided data that matches your query" + ), + dataProfile = c( + 'fullphyschem
basicphyschem
fullbio
basicbio
narrow
resultdetectionquantitationlimit
labsampleprep
count', + 'site
count', + 'sampact
actmetric
actgroup
ncount', + 'project
projectmonitoringlocationweight', + 'organization
count' + ) +) dt_me(df, escape = FALSE, paging = FALSE) - ``` ::: footer @@ -590,10 +689,11 @@ p_code_rt <- "99133" start_date <- "2024-01-01" end_date <- "2024-06-01" -continuous_data <- read_waterdata_continuous(monitoring_location_id = site_id, - parameter_code = p_code_rt, - time = c(start_date, end_date)) - +continuous_data <- read_waterdata_continuous( + monitoring_location_id = site_id, + parameter_code = p_code_rt, + time = c(start_date, end_date) +) ``` ::: @@ -626,8 +726,7 @@ https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lan ```{r} #| output-location: column ggplot(data = continuous_data) + - geom_point(aes(x = time, - y = value)) + geom_point(aes(x = time, y = value)) ``` @@ -647,18 +746,19 @@ The next slides will demo how to use those. ## Data Discovery: Time Series {.smaller} ```{r} -ts_available <- read_waterdata_combined_meta(monitoring_location_id = "USGS-04183500") +ts_available <- read_waterdata_combined_meta( + monitoring_location_id = "USGS-04183500" +) ``` ```{r} #| echo: false - -dt_me(ts_available |> - sf::st_drop_geometry() |> - select(data_type, - parameter_name, - parameter_code, statistic_id, begin, end), page_length = 6) - +dt_me( + ts_available |> + sf::st_drop_geometry() |> + select(data_type, parameter_name, parameter_code, statistic_id, begin, end), + page_length = 6 +) ``` ::: footer @@ -668,19 +768,24 @@ dt_me(ts_available |> ## Data Discovery: Discrete {.smaller} ```{r} -discrete_available <- summarize_waterdata_samples(monitoringLocationIdentifier = "USGS-04183500") - +discrete_available <- summarize_waterdata_samples( + monitoringLocationIdentifier = "USGS-04183500" +) ``` ```{r} #| echo: false - -dt_me(discrete_available |> - select(characteristicUserSupplied, - resultCount, activityCount, - firstActivity, mostRecentActivity), - page_length = 6) - +dt_me( + discrete_available |> + select( + characteristicUserSupplied, + resultCount, + activityCount, + firstActivity, + mostRecentActivity + ), + page_length = 6 +) ``` ::: footer @@ -692,8 +797,10 @@ dt_me(discrete_available |> * characteristicUserSupplied can be an input to `read_waterdata_sample` ```{r} -discrete1 <- read_waterdata_samples(characteristicUserSupplied = "Phosphorus as phosphorus, water, unfiltered", - monitoringLocationIdentifier = "USGS-04183500") +discrete1 <- read_waterdata_samples( + characteristicUserSupplied = "Phosphorus as phosphorus, water, unfiltered", + monitoringLocationIdentifier = "USGS-04183500" +) nrow(discrete1) ``` diff --git a/vignettes/Reference_Lists.Rmd b/vignettes/Reference_Lists.Rmd new file mode 100644 index 000000000..4614bb105 --- /dev/null +++ b/vignettes/Reference_Lists.Rmd @@ -0,0 +1,243 @@ +--- +title: "USGS Reference Lists" +editor_options: + chunk_output_type: console +output: + rmarkdown::html_vignette: + toc: true + number_sections: false +vignette: > + %\VignetteIndexEntry{USGS Reference Lists} + \usepackage[utf8]{inputenc} + %\VignetteEngine{knitr::rmarkdown} +--- + + +```{r setup, include=FALSE, message=FALSE} +library(knitr) +library(dataRetrieval) + +options(continue = " ", + width = 50) + +knitr::opts_chunk$set( + echo = TRUE, + message = FALSE, + warning = FALSE, + fig.height = 4, + fig.width = 7 +) +``` + +## USGS Reference Lists + +### Agency Codes + +`r dataRetrieval:::get_description("agency-codes")` + +```{r} +agency_codes <- read_waterdata_metadata("agency-codes") +head(agency_codes) +``` + +### Altitude Datums + +`r dataRetrieval:::get_description("altitude-datums")` + +```{r} +altitude_datums <- read_waterdata_metadata("altitude-datums") +head(altitude_datums) +``` + + +### Aquifer Codes + +`r dataRetrieval:::get_description("aquifer-codes")` + +```{r} +aquifer_codes <- read_waterdata_metadata("aquifer-codes") +head(aquifer_codes) +``` + +### Aquifer Types + +`r dataRetrieval:::get_description("aquifer-types")` + +```{r} +aquifer_types <- read_waterdata_metadata("aquifer-types") +head(aquifer_types) +``` + +### Coordinate Accuracy Codes + +`r dataRetrieval:::get_description("coordinate-accuracy-codes")` + +```{r} +coordinate_accuracy_codes <- read_waterdata_metadata("coordinate-accuracy-codes") +head(coordinate_accuracy_codes) +``` + +### Coordinate Datum Codes + +`r dataRetrieval:::get_description("coordinate-accuracy-codes")` + +```{r} +coordinate_datum_codes <- read_waterdata_metadata("coordinate-datum-codes") +head(coordinate_datum_codes) +``` + +### Coordinate Method Codes + +`r dataRetrieval:::get_description("coordinate-method-codes")` + +```{r} +coordinate_method_codes <- read_waterdata_metadata("coordinate-method-codes") +head(coordinate_method_codes) +``` + +### Country Identifiers + +`r dataRetrieval:::get_description("countries")` + +```{r} +countries <- read_waterdata_metadata("countries") +head(countries) +``` + +### County Identifiers + +`r dataRetrieval:::get_description("counties")` + +```{r} +counties <- read_waterdata_metadata("counties") +head(counties) +``` + +### Hydrologic Unit Codes + +`r dataRetrieval:::get_description("hydrologic-unit-codes")` + +```{r} +huc_codes <- read_waterdata_metadata("hydrologic-unit-codes") +head(huc_codes) +``` + + +### Medium Codes + +`r dataRetrieval:::get_description("medium-codes")` + +```{r} +medium_codes <- read_waterdata_metadata("medium-codes") +head(medium_codes) +``` + +### Methods + +`r dataRetrieval:::get_description("methods")` + +```{r} +methods <- read_waterdata_metadata("methods") +head(methods) +``` + +### Method Categories + +`r dataRetrieval:::get_description("method-categories")` + +```{r} +method_categories <- read_waterdata_metadata("method-categories") +head(method_categories) +``` + +### Method Citations + +`r dataRetrieval:::get_description("method-citations")` + +```{r} +method_citations <- read_waterdata_metadata("method-citations") +head(method_citations) +``` + +### Citations + +`r dataRetrieval:::get_description("citations")` + +```{r} +citations <- read_waterdata_metadata("citations") +head(citations) +``` + + +### National Aquifer Codes + +`r dataRetrieval:::get_description("national-aquifer-codes")` + +```{r} +medium_codes <- read_waterdata_metadata("medium-codes") +head(medium_codes) +``` + +### Parameter Codes + +`r dataRetrieval:::get_description("parameter-codes")` + +```{r} +parameter_codes <- read_waterdata_metadata("parameter-codes") +head(parameter_codes) +``` + +### Reliability Codes + +`r dataRetrieval:::get_description("reliability-codes")` + +```{r} +reliability_codes <- read_waterdata_metadata("reliability-codes") +head(reliability_codes) +``` + +### Site Types + +`r dataRetrieval:::get_description("site-types")` + +```{r} +site_types <- read_waterdata_metadata("site-types") +head(site_types) +``` + +### State Identifiers + +`r dataRetrieval:::get_description("states")` + +```{r} +states <- read_waterdata_metadata("states") +head(states) +``` + +### Statistic Codes + +`r dataRetrieval:::get_description("statistic-codes")` + +```{r} +statistic_codes <- read_waterdata_metadata("statistic-codes") +head(statistic_codes) +``` + + +### Topographic Codes + +`r dataRetrieval:::get_description("topographic-codes")` + +```{r} +topographic_codes <- read_waterdata_metadata("topographic-codes") +head(topographic_codes) +``` + +### Time Zone Codes + +`r dataRetrieval:::get_description("time-zone-codes")` + +```{r} +time_zone_codes <- read_waterdata_metadata("time-zone-codes") +head(time_zone_codes) +``` diff --git a/vignettes/movingAverages.Rmd b/vignettes/movingAverages.Rmd deleted file mode 100644 index 96b024b4e..000000000 --- a/vignettes/movingAverages.Rmd +++ /dev/null @@ -1,278 +0,0 @@ ---- -title: "Calculating Moving Averages and Historical Flow Quantiles" -author: "Laura DeCicco" -date: "2016-10-25" -output: - rmarkdown::html_vignette: - toc: true - fig_caption: yes - fig_height: 7 - fig_width: 7 -vignette: > - %\VignetteIndexEntry{Calculating Moving Averages and Historical Flow Quantiles} - \usepackage[utf8]{inputenc} - %\VignetteEngine{knitr::rmarkdown} -editor_options: - chunk_output_type: console ---- - -**WARNING** - -This post is very old! A better way to do all these plots and calculations can be found here: - -**WARNING** - -This post will show simple way to calculate moving averages, calculate historical-flow quantiles, and plot that information. The goal is to reproduce the graph at this link: -[PA Graph](http://pa.water.usgs.gov/drought/indicators/sw/images/f30_01538000.html). The motivation for this post was inspired by a USGS colleague that that is considering creating these type of plots in R. We thought this plot provided an especially fun challenge - maybe you will, too! - -First we get the data using the [dataRetrieval](https://CRAN.R-project.org/package=dataRetrieval) package. The siteNumber and parameterCd could be adjusted for other sites or measured parameters. In this example, we are getting discharge (parameter code 00060) at a site in PA. - -It may be important to note that this script is a bit lazy in handling leap days. - -## Get data using dataRetrieval - -```{r message=FALSE} -library(dataRetrieval) - -# Retrieve daily Q -siteNumber <- c("01538000") -parameterCd <- "00060" # Discharge -dailyQ <- readNWISdv(siteNumber, parameterCd) -dailyQ <- renameNWISColumns(dailyQ) -stationInfo <- readNWISsite(siteNumber) -nrow(dailyQ) -``` - -## Calculate moving average - -Next, we calculate a 30-day moving average on all of the flow data: - -```{r message=FALSE} -library(dplyr) -library(zoo) - -# Check for missing days, if so, add NA rows: -if (as.numeric(diff(range(dailyQ$Date))) != (nrow(dailyQ) + 1)) { - fullDates <- seq( - from = min(dailyQ$Date), - to = max(dailyQ$Date), by = "1 day" - ) - fullDates <- data.frame( - Date = fullDates, - agency_cd = unique(dailyQ$agency_cd), - site_no = unique(dailyQ$site_no) - ) - dailyQ <- fullDates %>% - left_join(dailyQ, - by = c("Date", "agency_cd", "site_no") - ) %>% - arrange(Date) -} - -dailyQ <- dailyQ %>% - mutate( - rollMean = rollmean(Flow, 30, fill = NA, align = "center"), - day.of.year = as.numeric(strftime(Date, - format = "%j" - )) - ) -``` - -## Calculate historical percentiles - -We can use the `quantile` function to calculate historical percentile flows. Then use the `loess` function for smoothing. The argument `smooth.span` defines how much smoothing should be applied. To get a smooth transistion at the start of the graph, we can add include an earlier year which is not plotted at the end. - -```{r message=FALSE} -summaryQ <- dailyQ %>% - group_by(day.of.year) %>% - summarize( - p75 = quantile(rollMean, probs = .75, na.rm = TRUE), - p25 = quantile(rollMean, probs = .25, na.rm = TRUE), - p10 = quantile(rollMean, probs = 0.1, na.rm = TRUE), - p05 = quantile(rollMean, probs = 0.05, na.rm = TRUE), - p00 = quantile(rollMean, probs = 0, na.rm = TRUE) - ) - -current.year <- as.numeric(strftime(Sys.Date(), format = "%Y")) - -summary.0 <- summaryQ %>% - mutate( - Date = as.Date(day.of.year - 1, - origin = paste0(current.year - 2, "-01-01") - ), - day.of.year = day.of.year - 365 - ) -summary.1 <- summaryQ %>% - mutate(Date = as.Date(day.of.year - 1, - origin = paste0(current.year - 1, "-01-01") - )) -summary.2 <- summaryQ %>% - mutate( - Date = as.Date(day.of.year - 1, - origin = paste0(current.year, "-01-01") - ), - day.of.year = day.of.year + 365 - ) - -summaryQ <- bind_rows(summary.0, summary.1, summary.2) - - -smooth.span <- 0.3 - -summaryQ$sm.75 <- predict(loess(p75 ~ day.of.year, data = summaryQ, span = smooth.span)) -summaryQ$sm.25 <- predict(loess(p25 ~ day.of.year, data = summaryQ, span = smooth.span)) -summaryQ$sm.10 <- predict(loess(p10 ~ day.of.year, data = summaryQ, span = smooth.span)) -summaryQ$sm.05 <- predict(loess(p05 ~ day.of.year, data = summaryQ, span = smooth.span)) -summaryQ$sm.00 <- predict(loess(p00 ~ day.of.year, data = summaryQ, span = smooth.span)) - -latest.years <- dailyQ %>% - filter(Date >= as.Date(paste0(current.year - 1, "-01-01"))) %>% - mutate(day.of.year = seq_len(nrow(.))) - -# Let's just take the middle chunk: -summaryQ <- summaryQ %>% - filter(day.of.year %in% 1:365) - -summaryQ <- summaryQ %>% - bind_rows( - summaryQ, - summaryQ - ) %>% - mutate(day.of.year = seq_len(nrow(.)) - 365) -``` - -## Plot using base R - -Many of the graphical requirements defined by the USGS are difficult to achieve in `ggplot2`. Base R plotting can be used to obtain these types of graphs: - -```{r fig.cap="Simple 30-day moving average daily flow plot using base R"} - -title.text <- paste0( - stationInfo$station_nm, "\n", - "Provisional Data - Subject to change\n", - "Record Start = ", min(dailyQ$Date), - " Number of years = ", - as.integer(as.numeric(difftime( - time1 = max(dailyQ$Date), - time2 = min(dailyQ$Date), - units = "weeks" - )) / 52.25), - "\nDate of plot = ", Sys.Date(), - " Drainage Area = ", stationInfo$drain_area_va, "mi^2" -) - -mid.month.days <- c(15, 45, 74, 105, 135, 166, 196, 227, 258, 288, 319, 349) -month.letters <- c("J", "F", "M", "A", "M", "J", "J", "A", "S", "O", "N", "D") -start.month.days <- c(1, 32, 61, 92, 121, 152, 182, 214, 245, 274, 305, 335) -label.text <- c("Normal", "Drought Watch", "Drought Warning", "Drought Emergency") - -plot(latest.years$day.of.year, latest.years$rollMean, - ylim = c(1, 1000), xlim = c(1, 733), - log = "y", axes = FALSE, type = "n", xaxs = "i", yaxs = "i", - ylab = "30-day moving ave", - xlab = "" -) -title(title.text, cex.main = 0.75) -polygon(c(summaryQ$day.of.year, rev(summaryQ$day.of.year)), - c(summaryQ$sm.75, rev(summaryQ$sm.25)), - col = "darkgreen", border = FALSE -) -polygon(c(summaryQ$day.of.year, rev(summaryQ$day.of.year)), - c(summaryQ$sm.25, rev(summaryQ$sm.10)), - col = "yellow", border = FALSE -) -polygon(c(summaryQ$day.of.year, rev(summaryQ$day.of.year)), - c(summaryQ$sm.10, rev(summaryQ$sm.05)), - col = "orange", border = FALSE -) -polygon(c(summaryQ$day.of.year, rev(summaryQ$day.of.year)), - c(summaryQ$sm.05, rev(summaryQ$sm.00)), - col = "red", border = FALSE -) -lines(latest.years$day.of.year, latest.years$rollMean, - lwd = 2, col = "black" -) -abline(v = 366) -axis(2, las = 1, at = c(1, 100, 1000), tck = -0.02) -axis(2, at = c(seq(1, 90, by = 10)), labels = NA, tck = -0.01) -axis(2, at = c(seq(100, 1000, by = 100)), labels = NA, tck = -0.01) -axis(1, - at = c(mid.month.days, 365 + mid.month.days), - labels = rep(month.letters, 2), - tick = FALSE, line = -0.5, cex.axis = 0.75 -) -axis(1, - at = c(start.month.days, 365 + start.month.days), - labels = NA, tck = -0.02 -) -axis(1, - at = c(182, 547), labels = c(current.year - 1, current.year), - line = .5, tick = FALSE -) -legend("bottom", label.text, - horiz = TRUE, - fill = c("darkgreen", "yellow", "orange", "red"), - inset = c(0, 0), xpd = TRUE, bty = "n", cex = 0.75 -) -box() -``` - - -## Plot using ggplot2 - -Finally, we can also try to create the graph using the `ggplot2` package. The following script shows a simple way to re-create the graph in `ggplot2` with no effort on imitating desired style: - - -```{r fig.cap="Simple 30-day moving average daily flow plot using ggplot2", alt.text = "30-day moving average daily flow plot, no effort on style", message=FALSE, warning=FALSE, fig.height=5} -library(ggplot2) - -simple.plot <- ggplot(data = summaryQ, aes(x = day.of.year)) + - geom_ribbon(aes(ymin = sm.25, ymax = sm.75, fill = "Normal")) + - geom_ribbon(aes(ymin = sm.10, ymax = sm.25, fill = "Drought Watch")) + - geom_ribbon(aes(ymin = sm.05, ymax = sm.10, fill = "Drought Warning")) + - geom_ribbon(aes(ymin = sm.00, ymax = sm.05, fill = "Drought Emergency")) + - scale_y_log10(limits = c(1, 1000)) + - geom_line(data = latest.years, aes(x = day.of.year, y = rollMean, color = "30-Day Mean"), size = 2) + - geom_vline(xintercept = 365) - -simple.plot -``` - -Next, we can play with various options to do a better job to imitate the style: - -```{r fig.cap="Detailed 30-day moving average daily flow plot", alt.text = "30-day moving average daily flow plot", message=FALSE, warning=FALSE} - -styled.plot <- simple.plot + - scale_x_continuous( - breaks = c(mid.month.days, 365 + mid.month.days), - labels = rep(month.letters, 2), - expand = c(0, 0), - limits = c(0, 730) - ) + - annotation_logticks(sides = "l") + - expand_limits(x = 0) + - annotate( - geom = "text", - x = c(182, 547), - y = 1, - label = c(current.year - 1, current.year), size = 4 - ) + - theme_bw() + - theme( - axis.ticks.x = element_blank(), - panel.grid.major = element_blank(), - panel.grid.minor = element_blank() - ) + - labs(title = title.text, - y = "30-day moving ave", x = "" - ) + - scale_fill_manual( - name = "", breaks = label.text, - values = c("red", "orange", "yellow", "darkgreen") - ) + - scale_color_manual(name = "", values = "black") + - theme(legend.position = "bottom") - -styled.plot -``` - diff --git a/vignettes/read_waterdata_functions.Rmd b/vignettes/read_waterdata_functions.Rmd index 5cb9c37de..cb2e94cfa 100644 --- a/vignettes/read_waterdata_functions.Rmd +++ b/vignettes/read_waterdata_functions.Rmd @@ -445,171 +445,7 @@ leaflet(data = what_huc_sites |> There is a new function `read_waterdata_metadata` that gives access to a wide variety of tables that have metadata information. Any returned column can also be filtered on, similar to the time series functions above. -### Agency Codes - -`r dataRetrieval:::get_description("agency-codes")` - -```{r} -#| eval: false -agency_codes <- read_waterdata_metadata("agency-codes") -``` - -### Altitude Datums - -`r dataRetrieval:::get_description("altitude-datums")` - -```{r} -#| eval: false -altitude_datums <- read_waterdata_metadata("altitude-datums") -``` - - -### Aquifer Codes - -`r dataRetrieval:::get_description("aquifer-codes")` - -```{r} -#| eval: false -aquifer_codes <- read_waterdata_metadata("aquifer-codes") -``` - -### Aquifer Types - -`r dataRetrieval:::get_description("aquifer-types")` - -```{r} -#| eval: false -aquifer_types <- read_waterdata_metadata("aquifer-types") -``` - -### Coordinate Accuracy Codes - -`r dataRetrieval:::get_description("coordinate-accuracy-codes")` - -```{r} -#| eval: false -coordinate_accuracy_codes <- read_waterdata_metadata("coordinate-accuracy-codes") -``` - -### Coordinate Datum Codes - -`r dataRetrieval:::get_description("coordinate-accuracy-codes")` - -```{r} -#| eval: false -coordinate_datum_codes <- read_waterdata_metadata("coordinate-datum-codes") -``` - -### Coordinate Method Codes - -`r dataRetrieval:::get_description("coordinate-method-codes")` - -```{r} -#| eval: false -coordinate_method_codes <- read_waterdata_metadata("coordinate-method-codes") -``` - -### County Identifiers - -`r dataRetrieval:::get_description("counties")` - -```{r} -#| eval: false -counties <- read_waterdata_metadata("counties") -``` - -### Hydrologic Unit Codes - -`r dataRetrieval:::get_description("hydrologic-unit-codes")` - -```{r} -#| eval: false -huc_codes <- read_waterdata_metadata("hydrologic-unit-codes") -``` - - -### Medium Codes - -`r dataRetrieval:::get_description("medium-codes")` - -```{r} -#| eval: false -medium_codes <- read_waterdata_metadata("medium-codes") -``` - -### National Aquifer Codes - -`r dataRetrieval:::get_description("national-aquifer-codes")` - -```{r} -#| eval: false -medium_codes <- read_waterdata_metadata("medium-codes") -``` - -### Parameter Codes - -`r dataRetrieval:::get_description("parameter-codes")` - -```{r} -#| eval: false -parameter_codes <- read_waterdata_metadata("parameter-codes") -``` - -### Reliability Codes - -`r dataRetrieval:::get_description("reliability-codes")` - -```{r} -#| eval: false -reliability_codes <- read_waterdata_metadata("reliability-codes") -``` - -### Site Types - -`r dataRetrieval:::get_description("site-types")` - -```{r} -#| eval: false -site_types <- read_waterdata_metadata("site-types") -``` - -### State Identifiers - -`r dataRetrieval:::get_description("states")` - -```{r} -#| eval: false -states <- read_waterdata_metadata("states") -``` - -### Statistic Codes - -`r dataRetrieval:::get_description("statistic-codes")` - -```{r} -#| eval: false -statistic_codes <- read_waterdata_metadata("statistic-codes") -``` - - -### Topographic Codes - -`r dataRetrieval:::get_description("topographic-codes")` - -```{r} -#| eval: false -topographic_codes <- read_waterdata_metadata("topographic-codes") -``` - -### Time Zone Codes - -`r dataRetrieval:::get_description("time-zone-codes")` - -```{r} -#| eval: false -time_zone_codes <- read_waterdata_metadata("time-zone-codes") -``` - +See [USGS Reference Lists](articles/Reference_Lists.html) for more detail. ## Discrete Samples diff --git a/vignettes/samples_data.Rmd b/vignettes/samples_data.Rmd index 8895130da..e3928f873 100644 --- a/vignettes/samples_data.Rmd +++ b/vignettes/samples_data.Rmd @@ -202,7 +202,7 @@ Let's say we don't know a USGS site number, but we do have an area of interest. North and south are latitude values; east and west are longitude values. A vector of 4 (west, south, east, north) is expected. -```{r} +```{r eval=FALSE} bbox <- c(-90.8, 44.2, -89.9, 45.0) user_char <- "Phosphorus as phosphorus, water, unfiltered" @@ -214,9 +214,6 @@ bbox_sites <- read_waterdata_samples(boundingBox = bbox, ``` -```{r echo=FALSE, message=FALSE} -map_it(bbox_sites) -``` ### Hydrologic Unit Codes (HUCs) diff --git a/vignettes/statsServiceMap.Rmd b/vignettes/statsServiceMap.Rmd deleted file mode 100644 index 08b218bb6..000000000 --- a/vignettes/statsServiceMap.Rmd +++ /dev/null @@ -1,197 +0,0 @@ ---- -title: "Using the dataRetrieval Stats Service" -author: "David Watkins" -date: "2016-10-05" -output: - rmarkdown::html_vignette: - toc: true - fig_caption: yes - fig_height: 7 - fig_width: 7 -vignette: > - %\VignetteIndexEntry{Using the dataRetrieval Stats Service} - \usepackage[utf8]{inputenc} - %\VignetteEngine{knitr::rmarkdown} ---- - -# Introduction - -This script utilizes the new `dataRetrieval` package access to the [USGS Statistics Web Service](https://waterservices.usgs.gov/docs/statistics/). We will be pulling daily mean data using the daily value service in `readNWISdata`, and using the stats service data to put it in the context of the site's history. Here we are retrieving data for July 12th in the Upper Midwest, where a major storm system had recently passed through. You can modify this script to look at other areas and dates simply by modifying the `states` and `storm.date` objects. - -To run this code, we recommend having either `dataRetreival` version 2.5.13 (currently the latest release on CRAN) or version 2.6.1 (currently the latest Github release). - -# Get the data - -There are two separate `dataRetrieval` calls here — one to retrieve the daily discharge data, and one to retrieve the historical discharge statistics. Both calls are inside loops to split them into smaller pieces, to accomodate web service restrictions. The daily values service allows only single states as a filter, so we loop over the list of states. The stats service does not allow requests of more than ten sites, so the loop iterates by groups of ten site codes. Retrieving the data can take a few tens of seconds. Once we have both the daily value and statistics data, the two data frames are joined by site number via [dplyr's](https://cran.rstudio.com/web/packages/dplyr/vignettes/introduction.html) `left_join` function. We use a [pipe](https://cran.r-project.org/web/packages/magrittr/vignettes/magrittr.html) to send the output of the join to `na.omit()` function. Then we add a column to the final data frame to hold the color value for each station. - -```{r getData, warning=FALSE, message=FALSE} -# example stats service map, comparing real-time current discharge to history for each site -# reusable for other state(s) -# David Watkins June 2016 - - -library(maps) -library(dplyr) -library(lubridate) -library(dataRetrieval) - -# pick state(s) and date -states <- c("WI", "MN", "ND", "SD", "IA") -storm.date <- "2016-07-12" - -# download each state individually -for (st in states) { - stDV <- renameNWISColumns(readNWISdata( - service = "dv", - parameterCd = "00060", - stateCd = st, - startDate = storm.date, - endDate = storm.date - )) - if (st != states[1]) { - storm.data <- full_join(storm.data, stDV) - sites <- full_join(sites, attr(stDV, "siteInfo")) - } else { - storm.data <- stDV - sites <- attr(stDV, "siteInfo") - } -} - -# retrieve stats data, dealing with 10 site limit to stat service requests -reqBks <- seq(1, nrow(sites), by = 10) -statData <- data.frame() -for (i in reqBks) { - getSites <- sites$site_no[i:(i + 9)] - currentSites <- readNWISstat( - siteNumbers = getSites, - parameterCd = "00060", - statReportType = "daily", - statType = c("p10", "p25", "p50", "p75", "p90", "mean") - ) - statData <- rbind(statData, currentSites) -} - -statData.storm <- statData[statData$month_nu == month(storm.date) & - statData$day_nu == day(storm.date), ] - -finalJoin <- left_join(storm.data, statData.storm) -finalJoin <- left_join(finalJoin, sites) - -finalJoin[, grep("_va", names(finalJoin))] <- sapply( - finalJoin[ - , - grep("_va", names(finalJoin)) - ], - function(x) as.numeric(x) -) - -# remove sites without current data -finalJoin <- finalJoin[!is.na(finalJoin$Flow), ] - - -# classify current discharge values -finalJoin$class <- NA - -finalJoin$class[finalJoin$Flow > finalJoin$p75_va] <- "navy" -finalJoin$class[finalJoin$Flow < finalJoin$p25_va] <- "red" - -finalJoin$class[finalJoin$Flow > finalJoin$p25_va & - finalJoin$Flow <= finalJoin$p50_va] <- "green" -finalJoin$class[finalJoin$Flow > finalJoin$p50_va & - finalJoin$Flow <= finalJoin$p75_va] <- "blue" - -finalJoin$class[is.na(finalJoin$class) & - finalJoin$Flow > finalJoin$p50_va] <- "cyan" -finalJoin$class[is.na(finalJoin$class) & - finalJoin$Flow < finalJoin$p50_va] <- "yellow" - -# take a look at the columns that we will plot later: -head(finalJoin[, c("dec_lon_va", "dec_lat_va", "class")]) -``` - -# Make the static plot - -The base map consists of two plots. The first makes the county lines with a gray background, and the second overlays the heavier state lines. After that we add the points for each stream gage, colored by the column we added to `finalJoin`. In the finishing details, `grconvertXY` is a handy function that converts your inputs from a normalized (0-1) coordinate system to the actual map coordinates, which allows the legend and scale to stay in the same relative location on different maps. - -```{r plot, fig.cap="Map discharge percentiles"} -# convert states from postal codes to full names -states <- stateCdLookup(states, outputType = "fullName") -par(pty = "s") -map("county", regions = states, fill = TRUE, col = "gray87", lwd = 0.5) -map("state", regions = states, fill = FALSE, lwd = 2, add = TRUE) -points(finalJoin$dec_lon_va, - finalJoin$dec_lat_va, - col = finalJoin$class, pch = 19 -) -title(paste("Daily discharge value percentile rank\n", storm.date), line = 1) -par(mar = c(5.1, 4.1, 4.1, 6), xpd = TRUE) - -legend.colors <- c( - "cyan", "yellow", - "red", - "green", "blue", - "navy" -) -legend.names <- c( - "Q > P50*", "Q < P50*", - "Q < P25", - "P25 < Q < P50", "P50 < Q < P75", - "Q > P75" -) - -legend("bottomleft", - inset = c(0.01, .01), - legend = legend.names, - pch = 19, cex = 0.75, pt.cex = 1.2, - col = legend.colors, - ncol = 2 -) -map.scale( - ratio = FALSE, cex = 0.75, - grconvertX(.07, "npc"), - grconvertY(.2, "npc") -) -text("*Other percentiles not available for these sites", - cex = 0.75, - x = grconvertX(0.2, "npc"), - y = grconvertY(-0.08, "npc") -) -``` - -# Make an interactive plot - -Static maps are great for papers and presentations. When possible, interactive maps allow the reader more flexibility to examine the data. The R leaflet package makes it easy to create useful interactive maps. - -```{r leaflet, fig.height=5} -library(leaflet) - -finalJoin$popup <- with(finalJoin, paste( - "", station_nm, - "
", - "Measured Flow:", Flow, - "ft3/s
", - "25% historical:", p25_va, - "ft3/s
", - "50% historical:", p50_va, - "ft3/s
", - "75% historical:", p75_va, - "ft3/s" -)) - -leafMapStat <- leaflet(data = finalJoin) %>% - addProviderTiles("CartoDB.Positron") %>% - addCircleMarkers(~dec_lon_va, ~dec_lat_va, - color = ~class, radius = 3, stroke = FALSE, - fillOpacity = 0.8, opacity = 0.8, - popup = ~popup - ) - -leafMapStat <- addLegend(leafMapStat, - position = "bottomleft", - colors = legend.colors, - labels = legend.names, - opacity = 0.8 -) - -leafMapStat -```