diff --git a/.Rbuildignore b/.Rbuildignore index 01ddd28..bf5b60a 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -18,3 +18,5 @@ ^codemeta\.json$ ^CODE_OF_CONDUCT\.md$ ^src/.*\.o$ +^dev\.R$ +^paper$ \ No newline at end of file diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 64f0e36..ce7e187 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -50,7 +50,15 @@ Please note that the tidyhydro project is released with a [Contributor Code of C ### GOF -[ ] KGE'' --[ ] KGE Score +-[ ] KGE Score [@knobenTechnicalNoteInherent2019] + +-[ ] Weighted KGE [@mizukamiChoiceCalibrationMetrics2019] + +-[ ] MSDR - mean squared deviation ratio [@oliverTutorialGuideGeostatistics2014] + +-[x] RMSE + +-[ ] NRMSE - normalized root mean squared error ### Regression -[ ] PPCC - maximizes the probability plot correlation coefficient [@helselStatisticalMethodsWater2002, p.253] @@ -61,4 +69,13 @@ Please note that the tidyhydro project is released with a [Contributor Code of C -[ ] MSPE - model standard percentage error log and nonlog [@rasmussenGuidelinesProceduresComputing2009, p. 13] --[ ] RMSE \ No newline at end of file +### Stats +-[ ] MAD + +-[x] Geometric Mean (GM) + +-[x] Coefficient of Variation (Cv) + +-[ ] Coefficient of Skewness (g or Cs) + +-[ ] Others measures of descriptive stats mentioned in Helsel et al. (2020) \ No newline at end of file diff --git a/.github/workflows/check-no-suggests.yaml b/.github/workflows/check-no-suggests.yaml index bc93129..3775153 100644 --- a/.github/workflows/check-no-suggests.yaml +++ b/.github/workflows/check-no-suggests.yaml @@ -1,59 +1,59 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples -# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help -# -# NOTE: This workflow only directly installs "hard" dependencies, i.e. Depends, -# Imports, and LinkingTo dependencies. Notably, Suggests dependencies are never -# installed, with the exception of testthat, knitr, and rmarkdown. The cache is -# never used to avoid accidentally restoring a cache containing a suggested -# dependency. -on: - push: - branches: master - pull_request: - -name: check-no-suggests.yaml - -permissions: read-all - -jobs: - check-no-suggests: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: ubuntu-latest, r: 'release'} - - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - R_KEEP_PKG_SOURCE: yes - - steps: - - uses: actions/checkout@v4 - - - uses: r-lib/actions/setup-pandoc@v2 - - - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.config.r }} - http-user-agent: ${{ matrix.config.http-user-agent }} - use-public-rspm: true - - - uses: r-lib/actions/setup-r-dependencies@v2 - with: - dependencies: '"hard"' - cache: false - extra-packages: | - any::rcmdcheck - any::testthat - any::knitr - any::rmarkdown - needs: check - - - uses: r-lib/actions/check-r-package@v2 - with: - upload-snapshots: true +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +# +# NOTE: This workflow only directly installs "hard" dependencies, i.e. Depends, +# Imports, and LinkingTo dependencies. Notably, Suggests dependencies are never +# installed, with the exception of testthat, knitr, and rmarkdown. The cache is +# never used to avoid accidentally restoring a cache containing a suggested +# dependency. +on: + push: + branches: master + pull_request: + +name: check-no-suggests.yaml + +permissions: read-all + +jobs: + check-no-suggests: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: ubuntu-latest, r: 'release'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + dependencies: '"hard"' + cache: false + extra-packages: | + any::rcmdcheck + any::testthat + any::knitr + any::rmarkdown + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' \ No newline at end of file diff --git a/.github/workflows/check-r-pkg.yaml b/.github/workflows/check-r-pkg.yaml index 1a62901..28c6b92 100644 --- a/.github/workflows/check-r-pkg.yaml +++ b/.github/workflows/check-r-pkg.yaml @@ -1,50 +1,50 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples -# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help -on: - push: - branches: master - pull_request: - branches: master - -name: R-CMD-check - -jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: macos-latest, r: 'release'} - - {os: windows-latest, r: 'release'} - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-latest, r: 'oldrel-1'} - - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - R_KEEP_PKG_SOURCE: yes - - steps: - - uses: actions/checkout@v4 - - - uses: r-lib/actions/setup-pandoc@v2 - - - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.config.r }} - http-user-agent: ${{ matrix.config.http-user-agent }} - use-public-rspm: true - - - uses: r-lib/actions/setup-r-dependencies@v2 - with: - extra-packages: any::rcmdcheck - needs: check - - - uses: r-lib/actions/check-r-package@v2 - with: - upload-snapshots: true - build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: master + pull_request: + branches: master + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 722375c..8bd57fa 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -1,49 +1,49 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples -# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help -on: - push: - branches: master - pull_request: - release: - types: [published] - workflow_dispatch: - -name: pkgdown.yaml - -permissions: read-all - -jobs: - pkgdown: - runs-on: ubuntu-latest - # Only restrict concurrency for non-PR jobs - concurrency: - group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - permissions: - contents: write - steps: - - uses: actions/checkout@v4 - - - uses: r-lib/actions/setup-pandoc@v2 - - - uses: r-lib/actions/setup-r@v2 - with: - use-public-rspm: true - - - uses: r-lib/actions/setup-r-dependencies@v2 - with: - extra-packages: any::pkgdown, local::. - needs: website - - - name: Build site - run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) - shell: Rscript {0} - - - name: Deploy to GitHub pages 🚀 - if: github.event_name != 'pull_request' - uses: JamesIves/github-pages-deploy-action@v4.5.0 - with: - clean: false - branch: gh-pages - folder: docs +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: master + pull_request: + release: + types: [published] + workflow_dispatch: + +name: pkgdown.yaml + +permissions: read-all + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.5.0 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 3b95185..b69edb7 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -1,61 +1,61 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples -# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help -on: - push: - branches: master - pull_request: - branches: master - -name: test-coverage.yaml - -permissions: read-all - -jobs: - test-coverage: - runs-on: ubuntu-latest - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - - steps: - - uses: actions/checkout@v4 - - - uses: r-lib/actions/setup-r@v2 - with: - use-public-rspm: true - - - uses: r-lib/actions/setup-r-dependencies@v2 - with: - extra-packages: any::covr, any::xml2 - needs: coverage - - - name: Test coverage - run: | - cov <- covr::package_coverage( - quiet = FALSE, - clean = FALSE, - install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") - ) - covr::to_cobertura(cov) - shell: Rscript {0} - - - uses: codecov/codecov-action@v4 - with: - fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }} - file: ./cobertura.xml - plugin: noop - disable_search: true - token: ${{ secrets.CODECOV_TOKEN }} - - - name: Show testthat output - if: always() - run: | - ## -------------------------------------------------------------------- - find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true - shell: bash - - - name: Upload test results - if: failure() - uses: actions/upload-artifact@v4 - with: - name: coverage-test-failures +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: master + pull_request: + branches: master + +name: test-coverage.yaml + +permissions: read-all + +jobs: + test-coverage: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::covr, any::xml2 + needs: coverage + + - name: Test coverage + run: | + cov <- covr::package_coverage( + quiet = FALSE, + clean = FALSE, + install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") + ) + covr::to_cobertura(cov) + shell: Rscript {0} + + - uses: codecov/codecov-action@v4 + with: + fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }} + file: ./cobertura.xml + plugin: noop + disable_search: true + token: ${{ secrets.CODECOV_TOKEN }} + + - name: Show testthat output + if: always() + run: | + ## -------------------------------------------------------------------- + find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: coverage-test-failures path: ${{ runner.temp }}/package \ No newline at end of file diff --git a/.gitignore b/.gitignore index cf973ee..8b89e3a 100644 --- a/.gitignore +++ b/.gitignore @@ -13,4 +13,6 @@ CRAN-SUBMISSION docs **/.quarto/ *.o -*.so \ No newline at end of file +*.so +src/tidyhydro.dll +tidyhydro.md diff --git a/DESCRIPTION b/DESCRIPTION index f43b0cc..b1cc2b3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,17 +1,18 @@ Package: tidyhydro Type: Package Title: Tidy Metrics for Assessing Hydrological Models Performance -Version: 0.1.1.9000 +Version: 0.1.2 Authors@R: person(given = "Anatoly", family = "Tsyplenkov", email = "atsyplenkov@fastmail.com", role = c("cre", "aut", "cph"), comment = c(ORCID = "0000-0003-4144-8402")) Maintainer: Anatoly Tsyplenkov -Description: Provides tidy tools for comparing simulated and observed hydrological time series. Includes compatibility with the 'yardstick' package for model performance evaluation using commonly used metrics such as the Nash–Sutcliffe Efficiency (NSE), Kling–Gupta Efficiency (KGE), percent bias (pBIAS) and etc. +Description: Provides tidy tools to measure the characteristics of hydrological time series and to assess the performance of hydrological models. Includes compatibility with the 'yardstick' package for model performance evaluation using commonly used metrics such as the Nash–Sutcliffe Efficiency (NSE), Kling–Gupta Efficiency (KGE), percent bias (pBIAS) and etc. Additionally provides a set of measures to calculate the descriptive statistics of a single dataset in accordance with Helsel et al. (2020). Helsel DR, Hirsch RM, Ryberg KR, Archfield SA, Gilroy EJ. Statistical methods in water resources. Reston, VA: 2020. . License: MIT + file LICENSE Depends: R (>= 4.1.0) Imports: Rcpp (>= 1.0.12), rlang (>= 1.1.0), - yardstick (>= 1.3.1) + yardstick (>= 1.3.1), + checkmate (>= 2.3.1) LinkingTo: Rcpp Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 @@ -20,10 +21,22 @@ Language: en-US Suggests: hydroGOF, testthat (>= 3.0.0), - quickcheck (>= 0.1.3), - quarto + quickcheck (>= 0.1.3) Config/testthat/edition: 3 URL: https://github.com/atsyplenkov/tidyhydro, https://atsyplenkov.github.io/tidyhydro/ BugReports: https://github.com/atsyplenkov/tidyhydro/issues LazyData: true Config/Needs/website: bench, ggplot2, quarto, lubridate, dplyr +Collate: + 'RcppExports.R' + 'aaa-new.R' + 'central-tendency.R' + 'data.R' + 'kge.R' + 'mse.R' + 'nse.R' + 'pbias.R' + 'press.R' + 'sfe.R' + 'tidyhydro-package.R' + 'variability.R' diff --git a/LICENSE b/LICENSE index 35d126c..63b8835 100644 --- a/LICENSE +++ b/LICENSE @@ -1,2 +1,2 @@ -YEAR: 2025 -COPYRIGHT HOLDER: tidyhydro authors +YEAR: 2025 +COPYRIGHT HOLDER: tidyhydro authors diff --git a/LICENSE.md b/LICENSE.md index 970dc5d..1042766 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,21 +1,21 @@ -# MIT License - -Copyright (c) 2025 tidyhydro authors - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +# MIT License + +Copyright (c) 2025 tidyhydro authors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/NAMESPACE b/NAMESPACE index 57460df..ed7190e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,25 +1,51 @@ -useDynLib(tidyhydro, .registration = TRUE) -importFrom(Rcpp, evalCpp) -# exportPattern("^[[:alpha:]]+") - -S3method(nse, data.frame) -S3method(kge, data.frame) -S3method(kge2012, data.frame) -S3method(mse, data.frame) -S3method(pbias, data.frame) -S3method(press, data.frame) -S3method(sfe, data.frame) -export(nse) -export(kge) -export(kge2012) -export(mse) -export(pbias) -export(press) -export(sfe) -export(nse_vec) -export(kge_vec) -export(kge2012_vec) -export(mse_vec) -export(pbias_vec) -export(press_vec) -export(sfe_vec) +useDynLib(tidyhydro, .registration = TRUE) +importFrom(Rcpp, evalCpp) +# exportPattern("^[[:alpha:]]+") + +S3method(print, measure) +S3method(format, measure) + +# general functions +export(nse) +export(kge) +export(kge2012) +export(kgelog) +export(kgelog_low) +export(kgelog_hi) +export(mse) +export(rmse) +export(pbias) +export(press) +export(sfe) +export(cv) +export(gm) + +# data.frame methods +S3method(nse, data.frame) +S3method(kge, data.frame) +S3method(kge2012, data.frame) +S3method(kgelog, data.frame) +S3method(kgelog_low, data.frame) +S3method(kgelog_hi, data.frame) +S3method(mse, data.frame) +S3method(rmse, data.frame) +S3method(pbias, data.frame) +S3method(press, data.frame) +S3method(sfe, data.frame) +S3method(cv, data.frame) +S3method(gm, data.frame) + +# vector functions +export(nse_vec) +export(kge_vec) +export(kge2012_vec) +export(kgelog_vec) +export(kgelog_low_vec) +export(kgelog_hi_vec) +export(mse_vec) +export(rmse_vec) +export(pbias_vec) +export(press_vec) +export(sfe_vec) +export(cv_vec) +export(gm_vec) diff --git a/NEWS.md b/NEWS.md index 550b61b..ca65707 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,13 @@ -# tidyhydro (development version) +# tidyhydro 0.1.2 + +## New features +- Added RMSE (`rmse`) and log-transformed KGE (`kgelog`, `kgelog_low` and `kgelog_hi`) +- Introduced descriptive statistics class — `measure` +- Added `cv`, `gm` measures + +## Miscellaneous + +- Added structure. Functions are now grouped into two categories: regression and GOF # tidyhydro 0.1.1 @@ -9,10 +18,11 @@ ## Bug fixes -- Improved documenation by switching from `\url` to `\doi` -- Removed unicode characters α, β +- Improved documentation by switching from `\url` to `\doi` +- Removed unicode characters ## Miscellaneous + - Created website with vignettes (https://atsyplenkov.github.io/tidyhydro) # tidyhydro 0.1.0 diff --git a/R/RcppExports.R b/R/RcppExports.R index ec7d1ac..5c9731d 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -5,8 +5,8 @@ kge_cpp <- function(obs, sim, na_rm = TRUE, version = "2012") { .Call(`_tidyhydro_kge_cpp`, obs, sim, na_rm, version) } -mse_cpp <- function(truth, estimate, na_rm = TRUE) { - .Call(`_tidyhydro_mse_cpp`, truth, estimate, na_rm) +mse_cpp <- function(truth, estimate, na_rm = TRUE, sqrt = TRUE) { + .Call(`_tidyhydro_mse_cpp`, truth, estimate, na_rm, sqrt) } nse_cpp <- function(truth, estimate, performance = FALSE, na_rm = TRUE) { diff --git a/R/aaa-new.R b/R/aaa-new.R new file mode 100644 index 0000000..b821a24 --- /dev/null +++ b/R/aaa-new.R @@ -0,0 +1,77 @@ +# TODO: +# - Add hyperlink to `measure_set` +# - Add tests + +# Modified after https://github.com/tidymodels/yardstick/blob/main/R/aaa-new.R + +#' Construct a new measure function +#' @keywords summary_stats +#' +#' @description +#' These functions provide convenient wrappers to create the three types of +#' measure functions in `tidyhydro`: measures of central tendency, variability +#' and symmetry. They add a measure-specific class to `fn` and +#' mimic a behaviour of [metric_set][yardstick::metric_set]. These features +#' are used by measure_set. +#' +#' See [Custom performance +#' metrics](https://www.tidymodels.org/learn/develop/metrics/) for more +#' information about creating custom metrics. +#' +#' @param fn A function. The measure function to attach a measure-specific class +#' +#' @name new-measure +NULL + +#' @rdname new-measure +#' @export +new_tendency_measure <- function(fn) { + new_measure(fn, class = "tendency_measure") +} + +#' @rdname new-measure +#' @export +new_var_measure <- function(fn) { + new_measure(fn, class = "var_measure") +} + +#' @rdname new-measure +#' @export +new_sym_measure <- function(fn) { + new_measure(fn, class = "sym_measure") +} + +new_measure <- function(fn, class = NULL) { + checkmate::assert_function(fn, args = "data") + + class <- c(class, "measure", "function") + + structure(fn, class = class) +} + +is_measure <- function(x) { + inherits(x, "measure") +} + +#' @noRd +#' @export +print.measure <- function(x, ...) { + cat(format(x), sep = "\n") + invisible(x) +} + +#' @noRd +#' @export +format.measure <- function(x, ...) { + first_class <- class(x)[[1]] + measure_type <- + switch( + first_class, + "tendency_measure" = "Measure of Central Tendency", + "var_measure" = "Measure of Variability", + "sym_measure" = "Measure of Distribution Symmetry", + "measure" + ) + + cat(paste("A", measure_type)) +} diff --git a/R/central-tendency.R b/R/central-tendency.R new file mode 100644 index 0000000..ba4752f --- /dev/null +++ b/R/central-tendency.R @@ -0,0 +1,67 @@ +# TODO: +# - Add tests +# - Add description + +#' Geometric Mean (GM) +#' @keywords summary_stats +#' +#' @family descriptive statistics +#' @templateVar fn gm +#' @template return +#' +#' @param data A `data.frame` containing the columns specified by the `truth` +#' and `estimate` arguments. +#' +#' @param truth The column identifier for the true results +#' (that is `numeric`). This should be an unquoted column name although +#' this argument is passed by expression and supports +#' [quasiquotation][rlang::quasiquotation] (you can unquote column +#' names). For `_vec()` functions, a `numeric` vector. +#' +#' @param na_rm A `logical` value indicating whether `NA` +#' values should be stripped before the computation proceeds. +#' +#' @param ... Not currently used. +#' +#' @template examples-description +#' +#' @export +#' + +gm <- function(data, ...) { + UseMethod("gm") +} + +gm <- new_tendency_measure(gm) + +#' @rdname gm +#' @export +gm.data.frame <- function( + data, + truth, + na_rm = TRUE, + ... +) { + yardstick::numeric_metric_summarizer( + name = "gm", + fn = gm_vec, + data = data, + truth = !!rlang::enquo(truth), + estimate = !!rlang::enquo(truth), + na_rm = na_rm + ) +} + +#' @rdname gm +#' @export +gm_vec <- function( + truth, + na_rm = TRUE, + ... +) { + checkmate::assert_numeric( + truth, + lower = 1e-323 + ) + exp(mean(log(truth), na.rm = na_rm)) +} diff --git a/R/data.R b/R/data.R index 27fec6a..dcbc5ca 100644 --- a/R/data.R +++ b/R/data.R @@ -1,27 +1,28 @@ -#' Mean Daily Water Discharge At Avacha River (Elizovo City) -#' @keywords data -#' -#' @details These data contain the measured (`obs`) mean daily water discharge -#' values (in \eqn{m^3/s}) at the Avacha River -- Elizovo City state gauging -#' station for the 2022 calendar year. They are accompanied by the GloFAS v4.0 -#' reanalysis water discharge values for the last 24 hours (`sim`), derived -#' from -#' \url{https://ewds.climate.copernicus.eu/datasets/cems-glofas-historical}. -#' -#' Read more about GloFAS Water Discharge reanalysis -- -#' \url{https://confluence.ecmwf.int/display/CEMS/GloFAS+v4.0} -#' -#' @name avacha -#' @aliases avacha -#' @docType data -#' @return \item{avacha}{a data frame} -#' -#' @source -#' * \url{https://gmvo.skniivh.ru/} -#' * \url{https://ewds.climate.copernicus.eu/datasets/cems-glofas-historical} -#' -#' @keywords datasets -#' @examples -#' data(avacha) -#' str(avacha) -NULL +#' Mean daily water discharge at the Avacha River (Elizovo City) +#' @keywords data +#' +#' @details These data contain the measured (`obs`) mean daily water discharge +#' values (in \eqn{m^3/s}) at the Avacha River streamgage near Elizovo City, +#' Russia, 2022 calendar year. They are accompanied by the GloFAS v4.0 +#' reanalysis water discharge values for the last 24 hours (`sim`), derived +#' from +#' \url{https://ewds.climate.copernicus.eu/datasets/cems-glofas-historical}. +#' +#' Read more about GloFAS Water Discharge reanalysis -- +#' \url{https://confluence.ecmwf.int/display/CEMS/GloFAS+v4.0} +#' +#' @name avacha +#' @aliases avacha +#' @docType data +#' @return \item{avacha}{a data frame} +#' +#' @source +#' * observed water discharge \url{https://gmvo.skniivh.ru/} +#' * simulated water discharge +#' \url{https://ewds.climate.copernicus.eu/datasets/cems-glofas-historical} +#' +#' @keywords datasets +#' @examples +#' data(avacha) +#' str(avacha) +NULL diff --git a/R/kge.R b/R/kge.R index d126ff2..c0d0dbd 100644 --- a/R/kge.R +++ b/R/kge.R @@ -1,251 +1,566 @@ -#' Kling-Gupta Efficiency (KGE) -#' @keywords gof -#' -#' @description -#' Calculate the Kling-Gupta Efficiency (*Gupta et al., 2009*). -#' Dimensionless (from \eqn{-\infty} to 1). `kge()` assesses the accuracy of -#' simulated data by considering correlation, bias, and variability relative -#' to observed data. -#' -#' @details -#' The Kling-Gupta Efficiency is a composite metric that decomposes model -#' performance into three components: correlation (\eqn{r}), -#' variability ratio (\eqn{\alpha}), and bias ratio (\eqn{\beta}). -#' It improves upon the Nash-Sutcliffe Efficiency (see [nse]) -#' by explicitly accounting for each source of error (*Gupta et al., 2009*). -#' -#' The Kling-Gupta Efficiency is estimated as follows: -#' \deqn{ -#' KGE = 1 - \sqrt{(r - 1)^2 + (\alpha - 1)^2 + (\beta - 1)^2} -#' } -#' where: -#' \itemize{ -#' \item \eqn{r} is the linear Pearson correlation coefficient between -#' observed and simulated values -#' \item \eqn{\alpha = \sigma_{sim} / \sigma_{obs}} is the ratio of the -#' standard deviations (variability ratio) -#' \item \eqn{\beta = \mu_{sim} / \mu_{obs}} is the ratio of the -#' means (bias ratio) -#' } -#' -#' @note -#' Unlike the Nash–Sutcliffe Efficiency ([nse]), the KGE does not have an -#' inherent benchmark such as "mean flow", and \eqn{KGE = 0} does not -#' correspond to a baseline performance. -#' Therefore, KGE values should not be interpreted as "good" or "bad" based -#' solely on their sign or magnitude. -#' Instead, users are encouraged to examine the individual components -#' (\eqn{r}, \eqn{\alpha}, \eqn{\beta}) -#' to understand the nature of model performance and consider defining -#' explicit benchmarks based on the study context. -#' -#' For further discussion, see Knoben et al. (2019), who caution against -#' directly translating NSE-based interpretation thresholds to KGE. -#' -#' @family numeric metrics -#' @family accuracy metrics -#' @templateVar fn kge -#' @template return -#' -#' @param data A `data.frame` containing the columns specified by the `truth` -#' and `estimate` arguments. -#' -#' @param truth The column identifier for the true results -#' (that is `numeric`). This should be an unquoted column name although -#' this argument is passed by expression and supports -#' [quasiquotation][rlang::quasiquotation] (you can unquote column -#' names). For `_vec()` functions, a `numeric` vector. -#' -#' @param estimate The column identifier for the predicted -#' results (that is also `numeric`). As with `truth` this can be -#' specified different ways but the primary method is to use an -#' unquoted variable name. For `_vec()` functions, a `numeric` vector. -#' -#' @param na_rm A `logical` value indicating whether `NA` -#' values should be stripped before the computation proceeds. -#' -#' @param ... Not currently used. -#' -#' @references -#' Gupta, H.V.; Kling, H.; Yilmaz, K.K.; Martinez, G.F. (2009). -#' Decomposition of the mean squared error and kge performance criteria: -#' Implications for improving hydrological modelling. Journal of Hydrology, -#' 377(1-2), 80-91. \doi{10.1016/j.jhydrol.2009.08.003} -#' -#' Knoben, W. J. M., Freer, J. E., & Woods, R. A. (2019). -#' Technical note: Inherent benchmark or not? Comparing Nash–Sutcliffe and -#' Kling–Gupta efficiency scores. Hydrology and Earth System Sciences, 23, -#' 4323–4331. \doi{10.5194/hess-23-4323-2019} -#' -#' @template examples-numeric -#' -#' @export -#' -kge <- function(data, ...) { - UseMethod("kge") -} - -kge <- yardstick::new_numeric_metric( - kge, - direction = "maximize" -) - -#' @rdname kge -#' @export -kge.data.frame <- function( - data, - truth, - estimate, - na_rm = TRUE, - ... -) { - yardstick::numeric_metric_summarizer( - name = "kge", - fn = kge_vec, - data = data, - truth = !!rlang::enquo(truth), - estimate = !!rlang::enquo(estimate), - na_rm = na_rm - ) -} - -#' @rdname kge -#' @export -kge_vec <- function( - truth, - estimate, - na_rm = TRUE, - ... -) { - yardstick::check_numeric_metric(truth, estimate, case_weights = NULL) - - kge_cpp(truth, estimate, na_rm = na_rm, version = "2009") -} - -#' Modified Kling-Gupta Efficiency (KGE') -#' @keywords gof -#' -#' @description -#' Calculate the modified Kling-Gupta Efficiency (*Kling et al., 2012*), -#' aka \eqn{KGE'}. Dimensionless (from \eqn{-\infty} to 1). -#' `kge2012()` assesses the accuracy of -#' simulated data by considering correlation, bias, and variability relative -#' to observed data. -#' -#' @details -#' The Modified Kling-Gupta Efficiency is a composite metric that decomposes -#' model performance into three components: correlation (\eqn{r}), -#' bias ratio (\eqn{\beta}), and variability ratio (\eqn{\gamma}). -#' It improves upon the Kling-Gupta Efficiency (see [kge]) by replacing -#' standard deviation with Coefficient of Variation. This ensures that the -#' bias and variability ratios are not cross-correlated, -#' which otherwise may occur when e.g. the precipitation inputs are biased. -#' -#' The Modified Kling-Gupta Efficiency (\eqn{KGE'}) is estimated as follows: -#' \deqn{ -#' KGE' = 1 - \sqrt{(r - 1)^2 + (\beta - 1)^2 + (\gamma - 1)^2} -#' } -#' where: -#' \itemize{ -#' \item \eqn{r} is the linear Pearson correlation coefficient between -#' observed and simulated values -#' \item \eqn{\beta = \mu_{sim} / \mu_{obs}} is the ratio of the -#' means (bias ratio) -#' \item \eqn{ -#' \gamma = \frac{\sigma_{sim} / \mu_{sim}}{\sigma_{sim} / \mu_{sim}} -#' } is the ratio of the Coefficients of Variation (variability ratio) -#' } -#' -#' @note -#' Unlike the Nash–Sutcliffe Efficiency ([nse]), the KGE does not have an -#' inherent benchmark such as "mean flow", and \eqn{KGE' = 0} does not -#' correspond to a baseline performance. -#' Therefore, \eqn{KGE'} values should not be interpreted as "good" or "bad" -#' based solely on their sign or magnitude. -#' Instead, users are encouraged to examine the individual components -#' (\eqn{r}, \eqn{\beta}, \eqn{\gamma}) -#' to understand the nature of model performance and consider defining -#' explicit benchmarks based on the study context. -#' -#' For further discussion, see Knoben et al. (2019), who caution against -#' directly translating NSE-based interpretation thresholds to KGE. -#' -#' @family numeric metrics -#' @family accuracy metrics -#' @templateVar fn kge2012 -#' @template return -#' -#' @param data A `data.frame` containing the columns specified by the `truth` -#' and `estimate` arguments. -#' -#' @param truth The column identifier for the true results -#' (that is `numeric`). This should be an unquoted column name although -#' this argument is passed by expression and supports -#' [quasiquotation][rlang::quasiquotation] (you can unquote column -#' names). For `_vec()` functions, a `numeric` vector. -#' -#' @param estimate The column identifier for the predicted -#' results (that is also `numeric`). As with `truth` this can be -#' specified different ways but the primary method is to use an -#' unquoted variable name. For `_vec()` functions, a `numeric` vector. -#' -#' @param na_rm A `logical` value indicating whether `NA` -#' values should be stripped before the computation proceeds. -#' -#' @param ... Not currently used. -#' -#' @references -#' Kling, H., Fuchs, M., & Paulin, M. (2012). Runoff conditions in the upper -#' Danube basin under an ensemble of climate change scenarios. -#' Journal of Hydrology, 424–425, 264–277. -#' \doi{10.1016/j.jhydrol.2012.01.011} -#' -#' Knoben, W. J. M., Freer, J. E., & Woods, R. A. (2019). -#' Technical note: Inherent benchmark or not? Comparing Nash–Sutcliffe and -#' Kling–Gupta efficiency scores. Hydrology and Earth System Sciences, 23, -#' 4323–4331. \doi{10.5194/hess-23-4323-2019} -#' -#' @template examples-numeric -#' -#' @export -#' -kge2012 <- function(data, ...) { - UseMethod("kge2012") -} - -kge2012 <- yardstick::new_numeric_metric( - kge2012, - direction = "maximize" -) - -#' @rdname kge2012 -#' @export -kge2012.data.frame <- function( - data, - truth, - estimate, - na_rm = TRUE, - ... -) { - yardstick::numeric_metric_summarizer( - name = "kge2012", - fn = kge2012_vec, - data = data, - truth = !!rlang::enquo(truth), - estimate = !!rlang::enquo(estimate), - na_rm = na_rm - ) -} - -#' @rdname kge2012 -#' @export -kge2012_vec <- function( - truth, - estimate, - na_rm = TRUE, - ... -) { - yardstick::check_numeric_metric(truth, estimate, case_weights = NULL) - - kge_cpp(truth, estimate, na_rm = na_rm, version = "2012") -} +#' Kling-Gupta Efficiency (KGE) +#' @keywords gof +#' +#' @description +#' Calculate the Kling-Gupta Efficiency (*Gupta et al., 2009*). +#' Dimensionless (from \eqn{-\infty} to 1). `kge()` assesses the accuracy of +#' simulated data by considering correlation, bias, and variability relative +#' to observed data. +#' +#' @details +#' The Kling-Gupta Efficiency is a composite metric that decomposes model +#' performance into three components: correlation (\eqn{r}), +#' variability ratio (\eqn{\alpha}), and bias ratio (\eqn{\beta}). +#' It improves upon the Nash-Sutcliffe Efficiency (see [nse]) +#' by explicitly accounting for each source of error (*Gupta et al., 2009*). +#' +#' The Kling-Gupta Efficiency is estimated as follows: +#' \deqn{ +#' KGE = 1 - \sqrt{(r - 1)^2 + (\alpha - 1)^2 + (\beta - 1)^2} +#' } +#' where: +#' \itemize{ +#' \item \eqn{r} is the linear Pearson correlation coefficient between +#' observed and simulated values +#' \item \eqn{\alpha = \sigma_{sim} / \sigma_{obs}} is the ratio of the +#' standard deviations (variability ratio) +#' \item \eqn{\beta = \mu_{sim} / \mu_{obs}} is the ratio of the +#' means (bias ratio) +#' } +#' +#' @note +#' Unlike the Nash–Sutcliffe Efficiency ([nse]), the KGE does not have an +#' inherent benchmark such as "mean flow", and \eqn{KGE = 0} does not +#' correspond to a baseline performance. +#' Therefore, KGE values should not be interpreted as "good" or "bad" based +#' solely on their sign or magnitude. +#' Instead, users are encouraged to examine the individual components +#' (\eqn{r}, \eqn{\alpha}, \eqn{\beta}) +#' to understand the nature of model performance and consider defining +#' explicit benchmarks based on the study context. +#' +#' For further discussion, see *Knoben et al.* (2019), who caution against +#' directly translating NSE-based interpretation thresholds to KGE. +#' +#' @family KGE variants +#' @templateVar fn kge +#' @template return +#' +#' @param data A `data.frame` containing the columns specified by the `truth` +#' and `estimate` arguments. +#' +#' @param truth The column identifier for the true results +#' (that is `numeric`). This should be an unquoted column name although +#' this argument is passed by expression and supports +#' [quasiquotation][rlang::quasiquotation] (you can unquote column +#' names). For `_vec()` functions, a `numeric` vector. +#' +#' @param estimate The column identifier for the predicted +#' results (that is also `numeric`). As with `truth` this can be +#' specified different ways but the primary method is to use an +#' unquoted variable name. For `_vec()` functions, a `numeric` vector. +#' +#' @param na_rm A `logical` value indicating whether `NA` +#' values should be stripped before the computation proceeds. +#' +#' @param ... Not currently used. +#' +#' @references +#' Gupta, H.V.; Kling, H.; Yilmaz, K.K.; Martinez, G.F. (2009). +#' Decomposition of the mean squared error and kge performance criteria: +#' Implications for improving hydrological modelling. Journal of Hydrology, +#' 377(1-2), 80-91. \doi{10.1016/j.jhydrol.2009.08.003} +#' +#' Knoben, W. J. M., Freer, J. E., & Woods, R. A. (2019). +#' Technical note: Inherent benchmark or not? Comparing Nash–Sutcliffe and +#' Kling–Gupta efficiency scores. Hydrology and Earth System Sciences, 23, +#' 4323–4331. \doi{10.5194/hess-23-4323-2019} +#' +#' @template examples-numeric +#' +#' @export +#' +kge <- function(data, ...) { + UseMethod("kge") +} + +kge <- yardstick::new_numeric_metric( + kge, + direction = "maximize" +) + +#' @rdname kge +#' @export +kge.data.frame <- function( + data, + truth, + estimate, + na_rm = TRUE, + ... +) { + yardstick::numeric_metric_summarizer( + name = "kge", + fn = kge_vec, + data = data, + truth = !!rlang::enquo(truth), + estimate = !!rlang::enquo(estimate), + na_rm = na_rm + ) +} + +#' @rdname kge +#' @export +kge_vec <- function( + truth, + estimate, + na_rm = TRUE, + ... +) { + yardstick::check_numeric_metric(truth, estimate, case_weights = NULL) + + kge_cpp(truth, estimate, na_rm = na_rm, version = "2009") +} + +#' Modified Kling-Gupta Efficiency (KGE') +#' @keywords gof +#' +#' @description +#' Calculate the modified Kling-Gupta Efficiency (*Kling et al., 2012*), +#' aka \eqn{KGE'}. Dimensionless (from \eqn{-\infty} to 1). +#' `kge2012()` assesses the accuracy of +#' simulated data by considering correlation, bias, and variability relative +#' to observed data. +#' +#' @details +#' The Modified Kling-Gupta Efficiency is a composite metric that decomposes +#' model performance into three components: correlation (\eqn{r}), +#' bias ratio (\eqn{\beta}), and variability ratio (\eqn{\gamma}). +#' It improves upon the Kling-Gupta Efficiency (see [kge]) by replacing +#' standard deviation with Coefficient of Variation. This ensures that the +#' bias and variability ratios are not cross-correlated, +#' which otherwise may occur when e.g. the precipitation inputs are biased. +#' +#' The Modified Kling-Gupta Efficiency (\eqn{KGE'}) is estimated as follows: +#' \deqn{ +#' KGE' = 1 - \sqrt{(r - 1)^2 + (\beta - 1)^2 + (\gamma - 1)^2} +#' } +#' where: +#' \itemize{ +#' \item \eqn{r} is the linear Pearson correlation coefficient between +#' observed and simulated values +#' \item \eqn{\beta = \mu_{sim} / \mu_{obs}} is the ratio of the +#' means (bias ratio) +#' \item \eqn{ +#' \gamma = \frac{\sigma_{sim} / \mu_{sim}}{\sigma_{sim} / \mu_{sim}} +#' } is the ratio of the Coefficients of Variation (variability ratio) +#' } +#' +#' @note +#' Unlike the Nash–Sutcliffe Efficiency ([nse]), the KGE does not have an +#' inherent benchmark such as "mean flow", and \eqn{KGE' = 0} does not +#' correspond to a baseline performance. +#' Therefore, \eqn{KGE'} values should not be interpreted as "good" or "bad" +#' based solely on their sign or magnitude. +#' Instead, users are encouraged to examine the individual components +#' (\eqn{r}, \eqn{\beta}, \eqn{\gamma}) +#' to understand the nature of model performance and consider defining +#' explicit benchmarks based on the study context. +#' +#' For further discussion, see *Knoben et al.* (2019), who caution against +#' directly translating NSE-based interpretation thresholds to KGE. +#' +#' @family KGE variants +#' @templateVar fn kge2012 +#' @template return +#' +#' @param data A `data.frame` containing the columns specified by the `truth` +#' and `estimate` arguments. +#' +#' @param truth The column identifier for the true results +#' (that is `numeric`). This should be an unquoted column name although +#' this argument is passed by expression and supports +#' [quasiquotation][rlang::quasiquotation] (you can unquote column +#' names). For `_vec()` functions, a `numeric` vector. +#' +#' @param estimate The column identifier for the predicted +#' results (that is also `numeric`). As with `truth` this can be +#' specified different ways but the primary method is to use an +#' unquoted variable name. For `_vec()` functions, a `numeric` vector. +#' +#' @param na_rm A `logical` value indicating whether `NA` +#' values should be stripped before the computation proceeds. +#' +#' @param ... Not currently used. +#' +#' @references +#' Kling, H., Fuchs, M., & Paulin, M. (2012). Runoff conditions in the upper +#' Danube basin under an ensemble of climate change scenarios. +#' Journal of Hydrology, 424–425, 264–277. +#' \doi{10.1016/j.jhydrol.2012.01.011} +#' +#' Knoben, W. J. M., Freer, J. E., & Woods, R. A. (2019). +#' Technical note: Inherent benchmark or not? Comparing Nash–Sutcliffe and +#' Kling–Gupta efficiency scores. Hydrology and Earth System Sciences, 23, +#' 4323–4331. \doi{10.5194/hess-23-4323-2019} +#' +#' @template examples-numeric +#' +#' @export +#' +kge2012 <- function(data, ...) { + UseMethod("kge2012") +} + +kge2012 <- yardstick::new_numeric_metric( + kge2012, + direction = "maximize" +) + +#' @rdname kge2012 +#' @export +kge2012.data.frame <- function( + data, + truth, + estimate, + na_rm = TRUE, + ... +) { + yardstick::numeric_metric_summarizer( + name = "kge2012", + fn = kge2012_vec, + data = data, + truth = !!rlang::enquo(truth), + estimate = !!rlang::enquo(estimate), + na_rm = na_rm + ) +} + +#' @rdname kge2012 +#' @export +kge2012_vec <- function( + truth, + estimate, + na_rm = TRUE, + ... +) { + yardstick::check_numeric_metric(truth, estimate, case_weights = NULL) + + kge_cpp(truth, estimate, na_rm = na_rm, version = "2012") +} + +#' Log-transformed Modified Kling-Gupta Efficiency +#' @rdname kgelog +#' @keywords gof +#' +#' @description +#' Calculate the modified Kling-Gupta Efficiency (*Kling et al., 2012*) on +#' **log-transformed** data as proposed in *Mai* (2023), +#' namely \eqn{KGE_{log}}, \eqn{KGE_{logQ_{low}}} and \eqn{KGE_{logQ_{hi}}}. +#' All are dimensionless (from \eqn{-\infty} to 1). +#' +#' This metric is recommended for emphasising low flows. By transforming the +#' discharge data logarithmically, it gives more weight to smaller flow +#' values, which is important for understanding drought conditions or +#' baseflow behaviour (see *Mai 2023*; *Mizukami et al., 2019*). +#' +#' @details +#' While the `kgelog()` function proposes the log-transformed version of the +#' [kge2012], functions such as `kgelog_low()` and `kgelog_hi()` +#' also perform data subsetting according to conditions specified in +#' *Mai* (2023). +#' +#' The metrics `kgelog_low()` and `kgelog_hi()` are then the \eqn{KGE'} +#' of the log-transformed observed and simulated streamflow considering +#' only low-flow and high-flow time steps, respectively. +#' +#' A data point is considered in the derivation of `kgelog_low()` if the +#' observed streamflow (\eqn{\text{obs}}) for that time step satisfies +#' the following conditions: +#' +#' \deqn{ +#' 0.0 < \text{obs} \le min(\text{obs}) + 0.05 \times +#' (max(\text{obs}) - min(\text{obs})) +#' } +#' +#' A data point is considered in the derivation of `kgelog_hi()` if the +#' observed streamflow (\eqn{\text{obs}}) for that time step satisfies +#' the following conditions: +#' +#' \deqn{ +#' \text{obs} > min(\text{obs}) + 0.05 \times +#' (max(\text{obs}) - min(\text{obs})) +#' } +#' +#' @note +#' Please note that the decision if a time step is a low-flow or high-flow +#' time step is solely based on the observations which means it is always +#' the same time steps for a given basin and time period while being +#' independent of the simulation (*Mai*, 2023). +#' +#' Unlike the Nash–Sutcliffe Efficiency ([nse]), the KGE does not have an +#' inherent benchmark such as "mean flow", and \eqn{KGE' = 0} does not +#' correspond to a baseline performance. +#' Therefore, \eqn{KGE_{log}} values should not be interpreted as "good" +#' or "bad" based solely on their sign or magnitude. +#' Instead, users are encouraged to examine the individual components +#' (\eqn{r}, \eqn{\beta}, \eqn{\gamma}) +#' to understand the nature of model performance and consider defining +#' explicit benchmarks based on the study context. +#' +#' For further discussion, see *Knoben et al.* (2019), who caution against +#' directly translating NSE-based interpretation thresholds to KGE. +#' +#' @family KGE variants +#' @templateVar fn kgelog +#' @template return +#' +#' @param data A `data.frame` containing the columns specified by the `truth` +#' and `estimate` arguments. +#' +#' @param truth The column identifier for the true results +#' (that is `numeric`). This should be an unquoted column name although +#' this argument is passed by expression and supports +#' [quasiquotation][rlang::quasiquotation] (you can unquote column +#' names). For `_vec()` functions, a `numeric` vector. +#' +#' @param estimate The column identifier for the predicted +#' results (that is also `numeric`). As with `truth` this can be +#' specified different ways but the primary method is to use an +#' unquoted variable name. For `_vec()` functions, a `numeric` vector. +#' +#' @param na_rm A `logical` value indicating whether `NA` +#' values should be stripped before the computation proceeds. +#' +#' @param ... Not currently used. +#' +#' @references +#' Kling, H., Fuchs, M., & Paulin, M. (2012). Runoff conditions in the upper +#' Danube basin under an ensemble of climate change scenarios. +#' Journal of Hydrology, 424–425, 264–277. +#' \doi{10.1016/j.jhydrol.2012.01.011} +#' +#' Knoben, W. J. M., Freer, J. E., & Woods, R. A. (2019). +#' Technical note: Inherent benchmark or not? Comparing Nash–Sutcliffe and +#' Kling–Gupta efficiency scores. Hydrology and Earth System Sciences, 23, +#' 4323–4331. \doi{10.5194/hess-23-4323-2019} +#' +#' Mai, J. (2023). Ten strategies towards successful calibration of +#' environmental models. Journal of Hydrology, 620, 129414. +#' \doi{10.1016/j.jhydrol.2023.129414} +#' +#' Mizukami, N., Rakovec, O., Newman, A. J., Clark, M. P., Wood, A. W., +#' Gupta, H. V., & Kumar, R. (2019). On the choice of calibration metrics +#' for “high-flow” estimation using hydrologic models. +#' Hydrology and Earth System Sciences, 23(6), 2601–2614. +#' \doi{10.5194/hess-23-2601-2019} +#' +#' @templateVar fn kgelog +#' @template examples-numeric +#' +#' @export +#' +kgelog <- function(data, ...) { + UseMethod("kgelog") +} + +kgelog <- yardstick::new_numeric_metric( + kgelog, + direction = "maximize" +) + +#' @rdname kgelog +#' @export +kgelog.data.frame <- function( + data, + truth, + estimate, + na_rm = TRUE, + ... +) { + yardstick::numeric_metric_summarizer( + name = "kgelog", + fn = kgelog_vec, + data = data, + truth = !!rlang::enquo(truth), + estimate = !!rlang::enquo(estimate), + na_rm = na_rm + ) +} + +#' @rdname kgelog +#' @export +kgelog_vec <- function( + truth, + estimate, + na_rm = TRUE, + ... +) { + # checks + checkmate::assert_numeric( + truth, + lower = 1e-323 + ) + checkmate::assert_numeric( + estimate, + lower = 1e-323 + ) + + # Log-transform + truth_log <- log10(truth) + estimate_log <- log10(estimate) + + # More checks + yardstick::check_numeric_metric( + truth_log, + estimate_log, + case_weights = NULL + ) + + kge_cpp(truth_log, estimate_log, na_rm = na_rm, version = "2012") +} + +#' @rdname kgelog +#' @export +kgelog_low <- function(data, ...) { + UseMethod("kgelog_low") +} + +kgelog_low <- yardstick::new_numeric_metric( + kgelog_low, + direction = "maximize" +) + +#' @rdname kgelog +#' @export +kgelog_low.data.frame <- function( + data, + truth, + estimate, + na_rm = TRUE, + ... +) { + yardstick::numeric_metric_summarizer( + name = "kgelog_low", + fn = kgelog_low_vec, + data = data, + truth = !!rlang::enquo(truth), + estimate = !!rlang::enquo(estimate), + na_rm = na_rm + ) +} + +#' @rdname kgelog +#' @export +kgelog_low_vec <- function( + truth, + estimate, + na_rm = TRUE, + ... +) { + # checks + checkmate::assert_numeric( + truth, + lower = 1e-323 + ) + checkmate::assert_numeric( + estimate, + lower = 1e-323 + ) + + # Keep only low flows + min_q <- min(truth, na.rm = TRUE) + max_q <- max(truth, na.rm = TRUE) + threshold <- (min_q + 0.05 * (max_q - min_q)) + checks <- truth <= threshold + + # Log-transform + truth_log <- log10(truth[checks]) + estimate_log <- log10(estimate[checks]) + + # More checks + yardstick::check_numeric_metric( + truth_log, + estimate_log, + case_weights = NULL + ) + + kge_cpp( + truth_log, + estimate_log, + na_rm = na_rm, + version = "2012" + ) +} + +#' @rdname kgelog +#' @export +kgelog_hi <- function(data, ...) { + UseMethod("kgelog_hi") +} + +kgelog_hi <- yardstick::new_numeric_metric( + kgelog_hi, + direction = "maximize" +) + +#' @rdname kgelog +#' @export +kgelog_hi.data.frame <- function( + data, + truth, + estimate, + na_rm = TRUE, + ... +) { + yardstick::numeric_metric_summarizer( + name = "kgelog_hi", + fn = kgelog_hi, + data = data, + truth = !!rlang::enquo(truth), + estimate = !!rlang::enquo(estimate), + na_rm = na_rm + ) +} + +#' @rdname kgelog +#' @export +kgelog_hi_vec <- function( + truth, + estimate, + na_rm = TRUE, + ... +) { + # checks + checkmate::assert_numeric( + truth, + lower = 1e-323 + ) + checkmate::assert_numeric( + estimate, + lower = 1e-323 + ) + + # Keep only low flows + min_q <- min(truth, na.rm = TRUE) + max_q <- max(truth, na.rm = TRUE) + threshold <- (min_q + 0.05 * (max_q - min_q)) + checks <- truth > threshold + + # Log-transform + truth_log <- log10(truth[checks]) + estimate_log <- log10(estimate[checks]) + + # More checks + yardstick::check_numeric_metric( + truth_log, + estimate_log, + case_weights = NULL + ) + + kge_cpp( + truth_log, + estimate_log, + na_rm = na_rm, + version = "2012" + ) +} diff --git a/R/mse.R b/R/mse.R index 5c8d938..fee84ec 100644 --- a/R/mse.R +++ b/R/mse.R @@ -1,100 +1,186 @@ -#' Mean Squared Error (MSE) -#' @keywords gof -#' -#' @description -#' The MSE is a metric that evaluates the goodness of fit between model -#' simulations and observations (*Fisher, 1920*). Measured in the squared -#' units of `truth` and `estimate` and can vary from \eqn{-\infty} to -#' \eqn{+\infty}. -#' -#' @details -#' The MSE is estimated as follows (Clark et al., 2021): -#' \deqn{ -#' MSE = \frac{1}{n} \sum_{i=1}^{n}{(sim_i - obs_i)^2} -#' } -#' where: -#' \itemize{ -#' \item \eqn{sim} defines model simulations at time step \eqn{i} -#' \item \eqn{obs} defines model observations at time step \eqn{i} -#' } -#' -#' @family numeric metrics -#' @family accuracy metrics -#' @templateVar fn mse -#' @template return -#' -#' @param data A `data.frame` containing the columns specified by the `truth` -#' and `estimate` arguments. -#' -#' @param truth The column identifier for the true results -#' (that is `numeric`). This should be an unquoted column name although -#' this argument is passed by expression and supports -#' [quasiquotation][rlang::quasiquotation] (you can unquote column -#' names). For `_vec()` functions, a `numeric` vector. -#' -#' @param estimate The column identifier for the predicted -#' results (that is also `numeric`). As with `truth` this can be -#' specified different ways but the primary method is to use an -#' unquoted variable name. For `_vec()` functions, a `numeric` vector. -#' -#' @param na_rm A `logical` value indicating whether `NA` -#' values should be stripped before the computation proceeds. -#' -#' @param ... Not currently used. -#' -#' @references -#' Fisher, R. A. (1920). Accuracy of observation, a mathematical -#' examination of the methods of determining, by the mean error and -#' by the mean square error. Monthly Notices of the Royal Astronomical -#' Society, 80, 758–770. \doi{10.1093/mnras/80.8.758} -#' -#' Clark, M. P., Vogel, R. M., Lamontagne, J. R., Mizukami, N., -#' Knoben, W. J. M., Tang, G., Gharari, S., Freer, J. E., Whitfield, -#' P. H., Shook, K. R., & Papalexiou, S. M. (2021). The Abuse of Popular -#' Performance Metrics in Hydrologic Modeling. Water Resources Research, 57(9), -#' e2020WR029001. \doi{10.1029/2020WR029001} -#' -#' @template examples-numeric -#' -#' @export -#' -mse <- function(data, ...) { - UseMethod("mse") -} - -mse <- yardstick::new_numeric_metric( - mse, - direction = "minimize" -) - -#' @rdname mse -#' @export -mse.data.frame <- function( - data, - truth, - estimate, - na_rm = TRUE, - ... -) { - yardstick::numeric_metric_summarizer( - name = "mse", - fn = mse_vec, - data = data, - truth = !!rlang::enquo(truth), - estimate = !!rlang::enquo(estimate), - na_rm = na_rm - ) -} - -#' @rdname mse -#' @export -mse_vec <- function( - truth, - estimate, - na_rm = TRUE, - ... -) { - yardstick::check_numeric_metric(truth, estimate, case_weights = NULL) - - mse_cpp(truth, estimate, na_rm = na_rm) -} +#' Mean Squared Error (MSE) +#' @keywords gof +#' +#' @description +#' The MSE is a metric that evaluates the goodness of fit between model +#' simulations and observations (*Fisher, 1920*). Measured in the squared +#' units of `truth` and `estimate` and can vary from \eqn{-\infty} to +#' \eqn{+\infty}. +#' +#' @details +#' The MSE is estimated as follows (Clark et al., 2021): +#' \deqn{ +#' MSE = \frac{1}{n} \sum_{i=1}^{n}{(sim_i - obs_i)^2} +#' } +#' where: +#' \itemize{ +#' \item \eqn{sim} defines model simulations at time step \eqn{i} +#' \item \eqn{obs} defines model observations at time step \eqn{i} +#' } +#' +#' @family numeric metrics +#' @family accuracy metrics +#' @templateVar fn mse +#' @template return +#' +#' @param data A `data.frame` containing the columns specified by the `truth` +#' and `estimate` arguments. +#' +#' @param truth The column identifier for the true results +#' (that is `numeric`). This should be an unquoted column name although +#' this argument is passed by expression and supports +#' [quasiquotation][rlang::quasiquotation] (you can unquote column +#' names). For `_vec()` functions, a `numeric` vector. +#' +#' @param estimate The column identifier for the predicted +#' results (that is also `numeric`). As with `truth` this can be +#' specified different ways but the primary method is to use an +#' unquoted variable name. For `_vec()` functions, a `numeric` vector. +#' +#' @param na_rm A `logical` value indicating whether `NA` +#' values should be stripped before the computation proceeds. +#' +#' @param ... Not currently used. +#' +#' @references +#' Fisher, R. A. (1920). Accuracy of observation, a mathematical +#' examination of the methods of determining, by the mean error and +#' by the mean square error. Monthly Notices of the Royal Astronomical +#' Society, 80, 758–770. \doi{10.1093/mnras/80.8.758} +#' +#' Clark, M. P., Vogel, R. M., Lamontagne, J. R., Mizukami, N., +#' Knoben, W. J. M., Tang, G., Gharari, S., Freer, J. E., Whitfield, +#' P. H., Shook, K. R., & Papalexiou, S. M. (2021). The Abuse of Popular +#' Performance Metrics in Hydrologic Modeling. Water Resources Research, 57(9), +#' e2020WR029001. \doi{10.1029/2020WR029001} +#' +#' @template examples-numeric +#' +#' @export +#' +mse <- function(data, ...) { + UseMethod("mse") +} + +mse <- yardstick::new_numeric_metric( + mse, + direction = "minimize" +) + +#' @rdname mse +#' @export +mse.data.frame <- function( + data, + truth, + estimate, + na_rm = TRUE, + ... +) { + yardstick::numeric_metric_summarizer( + name = "mse", + fn = mse_vec, + data = data, + truth = !!rlang::enquo(truth), + estimate = !!rlang::enquo(estimate), + na_rm = na_rm + ) +} + +#' @rdname mse +#' @export +mse_vec <- function( + truth, + estimate, + na_rm = TRUE, + ... +) { + yardstick::check_numeric_metric(truth, estimate, case_weights = NULL) + + mse_cpp(truth, estimate, na_rm = na_rm, sqrt = FALSE) +} + +# TODO: +# Add description and details + +#' Root Mean Squared Error (RMSE) +#' @keywords gof +#' +#' @details +#' The RMSE is estimated as follows: +#' \deqn{ +#' RMSE = \sqrt{\frac{1}{n} \sum_{i=1}^{n}{(sim_i - obs_i)^2}} +#' } +#' where: +#' \itemize{ +#' \item \eqn{sim} defines model simulations at time step \eqn{i} +#' \item \eqn{obs} defines model observations at time step \eqn{i} +#' } +#' +#' @family numeric metrics +#' @family accuracy metrics +#' @templateVar fn rmse +#' @template return +#' +#' @param data A `data.frame` containing the columns specified by the `truth` +#' and `estimate` arguments. +#' +#' @param truth The column identifier for the true results +#' (that is `numeric`). This should be an unquoted column name although +#' this argument is passed by expression and supports +#' [quasiquotation][rlang::quasiquotation] (you can unquote column +#' names). For `_vec()` functions, a `numeric` vector. +#' +#' @param estimate The column identifier for the predicted +#' results (that is also `numeric`). As with `truth` this can be +#' specified different ways but the primary method is to use an +#' unquoted variable name. For `_vec()` functions, a `numeric` vector. +#' +#' @param na_rm A `logical` value indicating whether `NA` +#' values should be stripped before the computation proceeds. +#' +#' @param ... Not currently used. +#' +#' @template examples-numeric +#' +#' @export +#' +rmse <- function(data, ...) { + UseMethod("rmse") +} + +rmse <- yardstick::new_numeric_metric( + rmse, + direction = "minimize" +) + +#' @rdname rmse +#' @export +rmse.data.frame <- function( + data, + truth, + estimate, + na_rm = TRUE, + ... +) { + yardstick::numeric_metric_summarizer( + name = "rmse", + fn = rmse_vec, + data = data, + truth = !!rlang::enquo(truth), + estimate = !!rlang::enquo(estimate), + na_rm = na_rm + ) +} + +#' @rdname rmse +#' @export +rmse_vec <- function( + truth, + estimate, + na_rm = TRUE, + ... +) { + yardstick::check_numeric_metric(truth, estimate, case_weights = NULL) + + mse_cpp(truth, estimate, na_rm = na_rm, sqrt = TRUE) +} diff --git a/R/nse.R b/R/nse.R index 805a25c..b6973f0 100644 --- a/R/nse.R +++ b/R/nse.R @@ -1,121 +1,120 @@ -#' Nash-Sutcliffe Efficiency (NSE) -#' @keywords gof -#' -#' @description -#' Calculate the Nash-Sutcliffe efficiency (*Nash & Sutcliffe, 1970*). -#' Dimensionless (from \eqn{-\infty} to 1). `nse()` indicates how well the plot -#' of observed versus simulated data fits the 1:1 line. -#' -#' @details -#' The Nash-Sutcliffe efficiency is a normalized statistic that determines -#' the relative magnitude of the residual variance ("noise") compared to the -#' measured data variance ("information"; *Nash and Sutcliffe, 1970*). -#' -#' The formula for NSE is: -#' -#' \deqn{ -#' NSE = 1 - \frac{ -#' \sum_{i=1}^{n}{(sim_i - obs_i)^2} -#' }{ -#' \sum_{i=1}^{n}{(obs_i - \mu_{obs})^2} -#' } -#' } -#' where: -#' \itemize{ -#' \item \eqn{sim} defines model simulations at time step \eqn{i} -#' \item \eqn{obs} defines model observations at time step \eqn{i} -#' \item \eqn{\mu_{obs}} defines mean of model observations -#' } -#' -#' According to Moriasi et al. (2015) the metric interpretation can be -#' as follows: -#' -#' - **Excellent**/**Very Good** -- `nse()` > 0.8 -#' - **Good** -- 0.6 <= `nse()` <= 0.8 -#' - **Satisfactory** -- 0.5 < `nse()` < 0.6 -#' - **Poor** -- `nse()` <= 0.5 -#' -#' @family numeric metrics -#' @family accuracy metrics -#' @templateVar fn nse -#' @template return -#' -#' @param data A `data.frame` containing the columns specified by the `truth` -#' and `estimate` arguments. -#' -#' @param truth The column identifier for the true results -#' (that is `numeric`). This should be an unquoted column name although -#' this argument is passed by expression and supports -#' [quasiquotation][rlang::quasiquotation] (you can unquote column -#' names). For `_vec()` functions, a `numeric` vector. -#' -#' @param estimate The column identifier for the predicted -#' results (that is also `numeric`). As with `truth` this can be -#' specified different ways but the primary method is to use an -#' unquoted variable name. For `_vec()` functions, a `numeric` vector. -#' -#' @param na_rm A `logical` value indicating whether `NA` -#' values should be stripped before the computation proceeds. -#' -#' @param performance The optional column, indicating should the `nse()` return -#' metric interpretation. See details. -#' -#' @param ... Not currently used. -#' -#' @references -#' Nash, J. E., & Sutcliffe, J. V. (1970). River flow forecasting through -#' conceptual models part I — A discussion of principles. Journal of Hydrology, -#' 10(3), 282–290. \doi{10.1016/0022-1694(70)90255-6} -#' -#' Moriasi, D. N., Gitau, M. W., Pai, N., & Daggupati, P. (2015). Hydrologic -#' and Water Quality Models: Performance Measures and Evaluation Criteria. -#' Transactions of the ASABE, 58(6), 1763–1785. -#' \doi{10.13031/trans.58.10715} -#' -#' @template examples-numeric -#' -#' @export -#' -nse <- function(data, ...) { - UseMethod("nse") -} - -nse <- yardstick::new_numeric_metric( - nse, - direction = "maximize" -) - -#' @rdname nse -#' @export -nse.data.frame <- function( - data, - truth, - estimate, - na_rm = TRUE, - performance = FALSE, - ... -) { - yardstick::numeric_metric_summarizer( - name = "nse", - fn = nse_vec, - data = data, - truth = !!rlang::enquo(truth), - estimate = !!rlang::enquo(estimate), - na_rm = na_rm, - fn_options = list(performance = performance) - ) -} - -#' @rdname nse -#' @export -nse_vec <- function( - truth, - estimate, - na_rm = TRUE, - performance = FALSE, - ... -) { - yardstick::check_numeric_metric(truth, estimate, case_weights = NULL) - - nse_cpp(truth, estimate, na_rm = na_rm, performance = performance) -} +#' Nash-Sutcliffe Efficiency (NSE) +#' @keywords gof +#' +#' @description +#' Calculate the Nash-Sutcliffe efficiency (*Nash & Sutcliffe, 1970*). +#' Dimensionless (from \eqn{-\infty} to 1). `nse()` indicates how well the plot +#' of observed versus simulated data fits the 1:1 line. +#' +#' @details +#' The Nash-Sutcliffe efficiency is a normalized statistic that determines +#' the relative magnitude of the residual variance ("noise") compared to the +#' measured data variance ("information"; *Nash and Sutcliffe, 1970*). +#' +#' The formula for NSE is: +#' +#' \deqn{ +#' NSE = 1 - \frac{ +#' \sum_{i=1}^{n}{(sim_i - obs_i)^2} +#' }{ +#' \sum_{i=1}^{n}{(obs_i - \mu_{obs})^2} +#' } +#' } +#' where: +#' \itemize{ +#' \item \eqn{sim} defines model simulations at time step \eqn{i} +#' \item \eqn{obs} defines model observations at time step \eqn{i} +#' \item \eqn{\mu_{obs}} defines mean of model observations +#' } +#' +#' According to Moriasi et al. (2015) the metric interpretation can be +#' as follows: +#' +#' - **Excellent**/**Very Good** -- `nse()` > 0.8 +#' - **Good** -- 0.6 <= `nse()` <= 0.8 +#' - **Satisfactory** -- 0.5 < `nse()` < 0.6 +#' - **Poor** -- `nse()` <= 0.5 +#' +#' @family NSE variants +#' @templateVar fn nse +#' @template return +#' +#' @param data A `data.frame` containing the columns specified by the `truth` +#' and `estimate` arguments. +#' +#' @param truth The column identifier for the true results +#' (that is `numeric`). This should be an unquoted column name although +#' this argument is passed by expression and supports +#' [quasiquotation][rlang::quasiquotation] (you can unquote column +#' names). For `_vec()` functions, a `numeric` vector. +#' +#' @param estimate The column identifier for the predicted +#' results (that is also `numeric`). As with `truth` this can be +#' specified different ways but the primary method is to use an +#' unquoted variable name. For `_vec()` functions, a `numeric` vector. +#' +#' @param na_rm A `logical` value indicating whether `NA` +#' values should be stripped before the computation proceeds. +#' +#' @param performance The optional column, indicating should the `nse()` return +#' metric interpretation. See details. +#' +#' @param ... Not currently used. +#' +#' @references +#' Nash, J. E., & Sutcliffe, J. V. (1970). River flow forecasting through +#' conceptual models part I — A discussion of principles. Journal of Hydrology, +#' 10(3), 282–290. \doi{10.1016/0022-1694(70)90255-6} +#' +#' Moriasi, D. N., Gitau, M. W., Pai, N., & Daggupati, P. (2015). Hydrologic +#' and Water Quality Models: Performance Measures and Evaluation Criteria. +#' Transactions of the ASABE, 58(6), 1763–1785. +#' \doi{10.13031/trans.58.10715} +#' +#' @template examples-numeric +#' +#' @export +#' +nse <- function(data, ...) { + UseMethod("nse") +} + +nse <- yardstick::new_numeric_metric( + nse, + direction = "maximize" +) + +#' @rdname nse +#' @export +nse.data.frame <- function( + data, + truth, + estimate, + na_rm = TRUE, + performance = FALSE, + ... +) { + yardstick::numeric_metric_summarizer( + name = "nse", + fn = nse_vec, + data = data, + truth = !!rlang::enquo(truth), + estimate = !!rlang::enquo(estimate), + na_rm = na_rm, + fn_options = list(performance = performance) + ) +} + +#' @rdname nse +#' @export +nse_vec <- function( + truth, + estimate, + na_rm = TRUE, + performance = FALSE, + ... +) { + yardstick::check_numeric_metric(truth, estimate, case_weights = NULL) + + nse_cpp(truth, estimate, na_rm = na_rm, performance = performance) +} diff --git a/R/pbias.R b/R/pbias.R index 4c4f069..dd7f8f9 100644 --- a/R/pbias.R +++ b/R/pbias.R @@ -1,119 +1,118 @@ -#' Percent BIAS (pBIAS) -#' @keywords gof -#' -#' @description -#' \eqn{pBIAS} is the deviation of data being evaluated, expressed as a -#' percentage. It measures the average tendency of the simulated data to be -#' larger or smaller than their observed counterparts (*Moriasi et al., 2015*). -#' The optimal value of \eqn{pBIAS} is 0.0, with low-magnitude values -#' indicating accurate mode simulation. Positive values indicate model -#' underestimation bias, and negative values indicate model overestimation -#' bias (*Gupta et al., 1999*). -#' -#' @details -#' The formula for \eqn{pBIAS} is: -#' -#' \deqn{ -#' pBIAS = 100 \times \frac{\sum_{i=1}^{n}{(sim_i - obs_i)}} -#' {\sum_{i=1}^{n}{obs_i}} -#' } -#' -#' where: -#' \itemize{ -#' \item \eqn{sim} defines model simulations at time step \eqn{i} -#' \item \eqn{obs} defines model observations at time step \eqn{i} -#' } -#' -#' According to Moriasi et al. (2015) the metric interpretation can be as -#' follows: -#' -#' - **Excellent**/**Very Good** -- `pbias()` < ±5.0 -#' - **Good** -- ±5.0 <= `pbias()` < ±10.0 -#' - **Satisfactory** -- ±10.0 <= `pbias()` < ±15.0 -#' - **Poor** -- `pbias()` >= ±15.0 -#' -#' @family numeric metrics -#' @family accuracy metrics -#' @templateVar fn pbias -#' @template return -#' -#' @param data A `data.frame` containing the columns specified by the `truth` -#' and `estimate` arguments. -#' -#' @param truth The column identifier for the true results -#' (that is `numeric`). This should be an unquoted column name although -#' this argument is passed by expression and supports -#' [quasiquotation][rlang::quasiquotation] (you can unquote column -#' names). For `_vec()` functions, a `numeric` vector. -#' -#' @param estimate The column identifier for the predicted -#' results (that is also `numeric`). As with `truth` this can be -#' specified different ways but the primary method is to use an -#' unquoted variable name. For `_vec()` functions, a `numeric` vector. -#' -#' @param na_rm A `logical` value indicating whether `NA` -#' values should be stripped before the computation proceeds. -#' -#' @param performance The optional column, indicating should the `pbias()` -#' return metric interpretation. See details. -#' -#' @param ... Not currently used. -#' -#' @references -#' Moriasi, D. N., Gitau, M. W., Pai, N., & Daggupati, P. (2015). Hydrologic -#' and Water Quality Models: Performance Measures and Evaluation Criteria. -#' Transactions of the ASABE, 58(6), 1763–1785. -#' \doi{10.13031/trans.58.10715} -#' -#' Gupta, H. V., S. Sorooshian, and P. O. Yapo. (1999). -#' Status of automatic calibration for hydrologic models: Comparison with -#' multilevel expert calibration. J. Hydrologic Eng. 4(2): 135-143 -#' \doi{10.1061/(ASCE)1084-0699(1999)4:2(135)} -#' -#' @template examples-numeric -#' -#' @export -#' -pbias <- function(data, ...) { - UseMethod("pbias") -} - -pbias <- yardstick::new_numeric_metric( - pbias, - direction = "minimize" -) - -#' @rdname pbias -#' @export -pbias.data.frame <- function( - data, - truth, - estimate, - na_rm = TRUE, - performance = FALSE, - ... -) { - yardstick::numeric_metric_summarizer( - name = "pbias", - fn = pbias_vec, - data = data, - truth = !!rlang::enquo(truth), - estimate = !!rlang::enquo(estimate), - na_rm = na_rm, - fn_options = list(performance = performance) - ) -} - -#' @rdname pbias -#' @export -pbias_vec <- function( - truth, - estimate, - na_rm = TRUE, - performance = FALSE, - ... -) { - yardstick::check_numeric_metric(truth, estimate, case_weights = NULL) - - pbias_cpp(truth, estimate, na_rm = na_rm, performance = performance) -} +#' Percent BIAS (pBIAS) +#' @keywords gof +#' +#' @description +#' \eqn{pBIAS} is the deviation of data being evaluated, expressed as a +#' percentage. It measures the average tendency of the simulated data to be +#' larger or smaller than their observed counterparts (*Moriasi et al., 2015*). +#' The optimal value of \eqn{pBIAS} is 0.0, with low-magnitude values +#' indicating accurate mode simulation. Positive values indicate model +#' underestimation bias, and negative values indicate model overestimation +#' bias (*Gupta et al., 1999*). +#' +#' @details +#' The formula for \eqn{pBIAS} is: +#' +#' \deqn{ +#' pBIAS = 100 \times \frac{\sum_{i=1}^{n}{(sim_i - obs_i)}} +#' {\sum_{i=1}^{n}{obs_i}} +#' } +#' +#' where: +#' \itemize{ +#' \item \eqn{sim} defines model simulations at time step \eqn{i} +#' \item \eqn{obs} defines model observations at time step \eqn{i} +#' } +#' +#' According to Moriasi et al. (2015) the metric interpretation can be as +#' follows: +#' +#' - **Excellent**/**Very Good** -- `pbias()` < ±5.0 +#' - **Good** -- ±5.0 <= `pbias()` < ±10.0 +#' - **Satisfactory** -- ±10.0 <= `pbias()` < ±15.0 +#' - **Poor** -- `pbias()` >= ±15.0 +#' +#' @family numeric metrics +#' @templateVar fn pbias +#' @template return +#' +#' @param data A `data.frame` containing the columns specified by the `truth` +#' and `estimate` arguments. +#' +#' @param truth The column identifier for the true results +#' (that is `numeric`). This should be an unquoted column name although +#' this argument is passed by expression and supports +#' [quasiquotation][rlang::quasiquotation] (you can unquote column +#' names). For `_vec()` functions, a `numeric` vector. +#' +#' @param estimate The column identifier for the predicted +#' results (that is also `numeric`). As with `truth` this can be +#' specified different ways but the primary method is to use an +#' unquoted variable name. For `_vec()` functions, a `numeric` vector. +#' +#' @param na_rm A `logical` value indicating whether `NA` +#' values should be stripped before the computation proceeds. +#' +#' @param performance The optional column, indicating should the `pbias()` +#' return metric interpretation. See details. +#' +#' @param ... Not currently used. +#' +#' @references +#' Moriasi, D. N., Gitau, M. W., Pai, N., & Daggupati, P. (2015). Hydrologic +#' and Water Quality Models: Performance Measures and Evaluation Criteria. +#' Transactions of the ASABE, 58(6), 1763–1785. +#' \doi{10.13031/trans.58.10715} +#' +#' Gupta, H. V., S. Sorooshian, and P. O. Yapo. (1999). +#' Status of automatic calibration for hydrologic models: Comparison with +#' multilevel expert calibration. J. Hydrologic Eng. 4(2): 135-143 +#' \doi{10.1061/(ASCE)1084-0699(1999)4:2(135)} +#' +#' @template examples-numeric +#' +#' @export +#' +pbias <- function(data, ...) { + UseMethod("pbias") +} + +pbias <- yardstick::new_numeric_metric( + pbias, + direction = "minimize" +) + +#' @rdname pbias +#' @export +pbias.data.frame <- function( + data, + truth, + estimate, + na_rm = TRUE, + performance = FALSE, + ... +) { + yardstick::numeric_metric_summarizer( + name = "pbias", + fn = pbias_vec, + data = data, + truth = !!rlang::enquo(truth), + estimate = !!rlang::enquo(estimate), + na_rm = na_rm, + fn_options = list(performance = performance) + ) +} + +#' @rdname pbias +#' @export +pbias_vec <- function( + truth, + estimate, + na_rm = TRUE, + performance = FALSE, + ... +) { + yardstick::check_numeric_metric(truth, estimate, case_weights = NULL) + + pbias_cpp(truth, estimate, na_rm = na_rm, performance = performance) +} diff --git a/R/press.R b/R/press.R index 308ce6f..95e094a 100644 --- a/R/press.R +++ b/R/press.R @@ -1,112 +1,111 @@ -#' PRediction Error Sum of Squares (PRESS) -#' @keywords regression -#' -#' @description -#' \eqn{PRESS} is a measure of the quality of a regression model using -#' residuals. \eqn{PRESS} is a validation-type estimator of error that uses -#' the deleted residuals to provide an estimate of the prediction error. -#' When comparing alternate regression models, selecting the model with the -#' lowest value of the \eqn{PRESS} statistic is a good approach because it -#' means that the equation produces the least error when making new predictions -#' (see *Helsel et al., 2020*). -#' -#' It is particularly valuable in assessing multiple forms of multiple -#' linear regressions, but it is also useful for -#' simply comparing different options for a single explanatory variable in -#' single-variable regression models. -#' -#' @details -#' The \eqn{PRESS} is only relevant for comparisons to other regression models -#' with the same response variable units (*Rasmunsen et al., 2009*). -#' -#' It estimates as follows: -#' \deqn{ -#' PRESS = \sum_{i=1}^{n}{(sim_i - obs_i)^2} -#' } -#' -#' where: -#' \itemize{ -#' \item \eqn{sim} defines model simulations at time step \eqn{i} -#' \item \eqn{obs} defines model observations at time step \eqn{i} -#' } -#' -#' @note -#' The $PRESS$ statistic is not appropriate for comparison of models having -#' different transformations of response variable, e.g. linear regression and -#' log-transformed linear regression (*Helsel et al., 2020*). -#' -#' @family numeric metrics -#' @family accuracy metrics -#' @templateVar fn press -#' @template return -#' -#' @param data A `data.frame` containing the columns specified by the `truth` -#' and `estimate` arguments. -#' @param truth The column identifier for the true results -#' (that is `numeric`). This should be an unquoted column name although -#' this argument is passed by expression and supports -#' [quasiquotation][rlang::quasiquotation] (you can unquote column -#' names). For `_vec()` functions, a `numeric` vector. -#' @param estimate The column identifier for the predicted -#' results (that is also `numeric`). As with `truth` this can be -#' specified different ways but the primary method is to use an -#' unquoted variable name. For `_vec()` functions, a `numeric` vector. -#' @param na_rm A `logical` value indicating whether `NA` -#' values should be stripped before the computation proceeds. -#' @param ... Not currently used. -#' -#' @references -#' Rasmussen, P. P., Gray, J. R., Glysson, G. D. & Ziegler, A. C. -#' Guidelines and procedures for computing time-series suspended-sediment -#' concentrations and loads from in-stream turbidity-sensor and streamflow -#' data. in U.S. Geological Survey Techniques and Methods book 3, chap. -#' C4 53 (2009) \url{https://pubs.usgs.gov/tm/tm3c4/}. -#' -#' Helsel, D. R., Hirsch, R. M., Ryberg, K. R., Archfield, S. A. & -#' Gilroy, E. J. Statistical Methods in Water Resources. 484 (2020) -#' \doi{10.3133/tm4A3}. -#' -#' @template examples-numeric -#' -#' @export -#' -press <- function(data, ...) { - UseMethod("press") -} - -press <- yardstick::new_numeric_metric( - press, - direction = "minimize" -) - -#' @rdname press -#' @export -press.data.frame <- function( - data, - truth, - estimate, - na_rm = TRUE, - ... -) { - yardstick::numeric_metric_summarizer( - name = "press", - fn = press_vec, - data = data, - truth = !!rlang::enquo(truth), - estimate = !!rlang::enquo(estimate), - na_rm = na_rm - ) -} - -#' @rdname press -#' @export -press_vec <- function( - truth, - estimate, - na_rm = TRUE, - ... -) { - yardstick::check_numeric_metric(truth, estimate, case_weights = NULL) - - press_cpp(truth, estimate, na_rm = na_rm) -} +#' PRediction Error Sum of Squares (PRESS) +#' @keywords regression +#' +#' @description +#' \eqn{PRESS} is a measure of the quality of a regression model using +#' residuals. \eqn{PRESS} is a validation-type estimator of error that uses +#' the deleted residuals to provide an estimate of the prediction error. +#' When comparing alternate regression models, selecting the model with the +#' lowest value of the \eqn{PRESS} statistic is a good approach because it +#' means that the equation produces the least error when making new predictions +#' (see *Helsel et al., 2020*). +#' +#' It is particularly valuable in assessing multiple forms of multiple +#' linear regressions, but it is also useful for +#' simply comparing different options for a single explanatory variable in +#' single-variable regression models. +#' +#' @details +#' The \eqn{PRESS} is only relevant for comparisons to other regression models +#' with the same response variable units (*Rasmunsen et al., 2009*). +#' +#' It estimates as follows: +#' \deqn{ +#' PRESS = \sum_{i=1}^{n}{(sim_i - obs_i)^2} +#' } +#' +#' where: +#' \itemize{ +#' \item \eqn{sim} defines model simulations at time step \eqn{i} +#' \item \eqn{obs} defines model observations at time step \eqn{i} +#' } +#' +#' @note +#' The $PRESS$ statistic is not appropriate for comparison of models having +#' different transformations of response variable, e.g. linear regression and +#' log-transformed linear regression (*Helsel et al., 2020*). +#' +#' @family regression metrics +#' @templateVar fn press +#' @template return +#' +#' @param data A `data.frame` containing the columns specified by the `truth` +#' and `estimate` arguments. +#' @param truth The column identifier for the true results +#' (that is `numeric`). This should be an unquoted column name although +#' this argument is passed by expression and supports +#' [quasiquotation][rlang::quasiquotation] (you can unquote column +#' names). For `_vec()` functions, a `numeric` vector. +#' @param estimate The column identifier for the predicted +#' results (that is also `numeric`). As with `truth` this can be +#' specified different ways but the primary method is to use an +#' unquoted variable name. For `_vec()` functions, a `numeric` vector. +#' @param na_rm A `logical` value indicating whether `NA` +#' values should be stripped before the computation proceeds. +#' @param ... Not currently used. +#' +#' @references +#' Rasmussen, P. P., Gray, J. R., Glysson, G. D. & Ziegler, A. C. +#' Guidelines and procedures for computing time-series suspended-sediment +#' concentrations and loads from in-stream turbidity-sensor and streamflow +#' data. in U.S. Geological Survey Techniques and Methods book 3, chap. +#' C4 53 (2009) \url{https://pubs.usgs.gov/tm/tm3c4/}. +#' +#' Helsel, D. R., Hirsch, R. M., Ryberg, K. R., Archfield, S. A. & +#' Gilroy, E. J. Statistical Methods in Water Resources. 484 (2020) +#' \doi{10.3133/tm4A3}. +#' +#' @template examples-numeric +#' +#' @export +#' +press <- function(data, ...) { + UseMethod("press") +} + +press <- yardstick::new_numeric_metric( + press, + direction = "minimize" +) + +#' @rdname press +#' @export +press.data.frame <- function( + data, + truth, + estimate, + na_rm = TRUE, + ... +) { + yardstick::numeric_metric_summarizer( + name = "press", + fn = press_vec, + data = data, + truth = !!rlang::enquo(truth), + estimate = !!rlang::enquo(estimate), + na_rm = na_rm + ) +} + +#' @rdname press +#' @export +press_vec <- function( + truth, + estimate, + na_rm = TRUE, + ... +) { + yardstick::check_numeric_metric(truth, estimate, case_weights = NULL) + + press_cpp(truth, estimate, na_rm = na_rm) +} diff --git a/R/sfe.R b/R/sfe.R index 85e4639..d851d87 100644 --- a/R/sfe.R +++ b/R/sfe.R @@ -1,101 +1,100 @@ -#' Standard Factorial Error (SFE) -#' @keywords regression -#' -#' @description -#' Prediction standard factorial error estimated -#' using standard regression methods (see *Herschy, 1978*). -#' -#' @details -#' The metric is widely used for assessing Sediment Rating Curves -#' (e.g., Hicks et al. 2020). The model is usually considered 'unacceptable' -#' if the \eqn{SFE > 2}, see Hicks et al. (2011). -#' -#' It is estimated as follows: -#' \deqn{SFE = \exp\left(\sqrt{\frac{1}{n} \sum_{i=1}^{n} -#' \left( \log\left(\frac{obs_i}{sim_i} \right) \right)^2 }\right)} -#' where: -#' \itemize{ -#' \item \eqn{sim} defines model simulations at time step \eqn{i} -#' \item \eqn{obs} defines model observations at time step \eqn{i} -#' } -#' -#' @family numeric metrics -#' @family accuracy metrics -#' @templateVar fn sfe -#' @template return -#' -#' @param data A `data.frame` containing the columns specified by the `truth` -#' and `estimate` arguments. -#' @param truth The column identifier for the true results -#' (that is `numeric`). This should be an unquoted column name although -#' this argument is passed by expression and supports -#' [quasiquotation][rlang::quasiquotation] (you can unquote column -#' names). For `_vec()` functions, a `numeric` vector. -#' @param estimate The column identifier for the predicted -#' results (that is also `numeric`). As with `truth` this can be -#' specified different ways but the primary method is to use an -#' unquoted variable name. For `_vec()` functions, a `numeric` vector. -#' @param na_rm A `logical` value indicating whether `NA` -#' values should be stripped before the computation proceeds. -#' @param ... Not currently used. -#' -#' @references -#' Herschy, R.W. 1978: Accuracy. Chapter 10 In: Herschy, R.W. (ed.) -#' Hydrometry - principles and practices. John Wiley and Sons, Chichester, -#' 511 p. -#' -#' Hicks, D. M., Shankar, U., McKerchar, A. I., Basher, L., Lynn, I., -#' Page, M., & Jessen, M. (2011). Suspended Sediment Yields from New Zealand -#' Rivers. Journal of Hydrology (New Zealand), 50(1), 81–142. -#' \doi{10.3316/informit.315190637227597} -#' -#' Hicks, M., Doyle, M., Watson, J., Holwerda, N., Lynch, B., Wyatt, J., -#' Jones, H., & Hill, R. (2020). Measurement of Fluvial Suspended Sediment -#' Load and its Composition (No. 1.0.0; National Environmental Monitoring -#' Standards, p. 138). -#' \url{https://www.nems.org.nz/documents/suspended-sediment} -#' -#' @template examples-numeric -#' -#' @export -#' -sfe <- function(data, ...) { - UseMethod("sfe") -} - -sfe <- yardstick::new_numeric_metric( - sfe, - direction = "minimize" -) - -#' @rdname sfe -#' @export -sfe.data.frame <- function( - data, - truth, - estimate, - na_rm = TRUE, - ... -) { - yardstick::numeric_metric_summarizer( - name = "sfe", - fn = sfe_vec, - data = data, - truth = !!rlang::enquo(truth), - estimate = !!rlang::enquo(estimate), - na_rm = na_rm - ) -} - -#' @rdname sfe -#' @export -sfe_vec <- function( - truth, - estimate, - na_rm = TRUE, - ... -) { - yardstick::check_numeric_metric(truth, estimate, case_weights = NULL) - - sfe_cpp(truth, estimate, na_rm = na_rm) -} +#' Standard Factorial Error (SFE) +#' @keywords regression +#' +#' @description +#' Prediction standard factorial error estimated +#' using standard regression methods (see *Herschy, 1978*). +#' +#' @details +#' The metric is widely used for assessing Sediment Rating Curves +#' (e.g., Hicks et al. 2020). The model is usually considered 'unacceptable' +#' if the \eqn{SFE > 2}, see Hicks et al. (2011). +#' +#' It is estimated as follows: +#' \deqn{SFE = \exp\left(\sqrt{\frac{1}{n} \sum_{i=1}^{n} +#' \left( \log\left(\frac{obs_i}{sim_i} \right) \right)^2 }\right)} +#' where: +#' \itemize{ +#' \item \eqn{sim} defines model simulations at time step \eqn{i} +#' \item \eqn{obs} defines model observations at time step \eqn{i} +#' } +#' +#' @family regression metrics +#' @templateVar fn sfe +#' @template return +#' +#' @param data A `data.frame` containing the columns specified by the `truth` +#' and `estimate` arguments. +#' @param truth The column identifier for the true results +#' (that is `numeric`). This should be an unquoted column name although +#' this argument is passed by expression and supports +#' [quasiquotation][rlang::quasiquotation] (you can unquote column +#' names). For `_vec()` functions, a `numeric` vector. +#' @param estimate The column identifier for the predicted +#' results (that is also `numeric`). As with `truth` this can be +#' specified different ways but the primary method is to use an +#' unquoted variable name. For `_vec()` functions, a `numeric` vector. +#' @param na_rm A `logical` value indicating whether `NA` +#' values should be stripped before the computation proceeds. +#' @param ... Not currently used. +#' +#' @references +#' Herschy, R.W. 1978: Accuracy. Chapter 10 In: Herschy, R.W. (ed.) +#' Hydrometry - principles and practices. John Wiley and Sons, Chichester, +#' 511 p. +#' +#' Hicks, D. M., Shankar, U., McKerchar, A. I., Basher, L., Lynn, I., +#' Page, M., & Jessen, M. (2011). Suspended Sediment Yields from New Zealand +#' Rivers. Journal of Hydrology (New Zealand), 50(1), 81–142. +#' \doi{10.3316/informit.315190637227597} +#' +#' Hicks, M., Doyle, M., Watson, J., Holwerda, N., Lynch, B., Wyatt, J., +#' Jones, H., & Hill, R. (2020). Measurement of Fluvial Suspended Sediment +#' Load and its Composition (No. 1.0.0; National Environmental Monitoring +#' Standards, p. 138). +#' \url{https://www.nems.org.nz/documents/suspended-sediment} +#' +#' @template examples-numeric +#' +#' @export +#' +sfe <- function(data, ...) { + UseMethod("sfe") +} + +sfe <- yardstick::new_numeric_metric( + sfe, + direction = "minimize" +) + +#' @rdname sfe +#' @export +sfe.data.frame <- function( + data, + truth, + estimate, + na_rm = TRUE, + ... +) { + yardstick::numeric_metric_summarizer( + name = "sfe", + fn = sfe_vec, + data = data, + truth = !!rlang::enquo(truth), + estimate = !!rlang::enquo(estimate), + na_rm = na_rm + ) +} + +#' @rdname sfe +#' @export +sfe_vec <- function( + truth, + estimate, + na_rm = TRUE, + ... +) { + yardstick::check_numeric_metric(truth, estimate, case_weights = NULL) + + sfe_cpp(truth, estimate, na_rm = na_rm) +} diff --git a/R/tidyhydro-package.R b/R/tidyhydro-package.R index b8144fe..0cc9deb 100644 --- a/R/tidyhydro-package.R +++ b/R/tidyhydro-package.R @@ -1,2 +1,2 @@ -#' @keywords internal -"_PACKAGE" +#' @keywords internal +"_PACKAGE" diff --git a/R/variability.R b/R/variability.R new file mode 100644 index 0000000..e1f6608 --- /dev/null +++ b/R/variability.R @@ -0,0 +1,72 @@ +# TODO: +# - Add tests +# - Add description + +#' Coefficient of Variation (Cv) +#' @keywords summary_stats +#' +#' @family descriptive statistics +#' @templateVar fn cv +#' @template return +#' +#' @param data A `data.frame` containing the columns specified by the `truth` +#' and `estimate` arguments. +#' +#' @param truth The column identifier for the true results +#' (that is `numeric`). This should be an unquoted column name although +#' this argument is passed by expression and supports +#' [quasiquotation][rlang::quasiquotation] (you can unquote column +#' names). For `_vec()` functions, a `numeric` vector. +#' +#' @param na_rm A `logical` value indicating whether `NA` +#' values should be stripped before the computation proceeds. +#' +#' @param ... Not currently used. +#' +#' @template examples-description +#' +#' @export +#' + +cv <- function(data, ...) { + UseMethod("cv") +} + +cv <- new_var_measure(cv) + +#' @rdname cv +#' @export +cv.data.frame <- function( + data, + truth, + na_rm = TRUE, + ... +) { + yardstick::numeric_metric_summarizer( + name = "cv", + fn = cv_vec, + data = data, + truth = !!rlang::enquo(truth), + estimate = !!rlang::enquo(truth), + na_rm = na_rm + ) +} + +#' @rdname cv +#' @export +cv_vec <- function( + truth, + na_rm = TRUE, + ... +) { + yardstick::check_numeric_metric(truth, truth, case_weights = NULL) + + if (na_rm) { + truth <- truth[!is.na(truth)] + } + + x0 <- mean(truth) + k <- truth / x0 + + sqrt(sum((k - 1)^2) / (length(truth) - 1)) +} diff --git a/README.Rmd b/README.Rmd index 425e7f3..012439b 100644 --- a/README.Rmd +++ b/README.Rmd @@ -1,103 +1,114 @@ ---- -output: - github_document: - html_preview: false ---- - - - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - fig.path = "man/figures/README-", - out.width = "100%" -) - -requireNamespace("hydroGOF", quietly = TRUE) -requireNamespace("bench", quietly = TRUE) -``` - -# tidyhydro - - -

- - - - - - - -

- - -The `tidyhydro` package provides a set of commonly used metrics in hydrology (such as _NSE_, _KGE_, _pBIAS_) for use within a [`tidymodels`](https://www.tidymodels.org/) infrastructure. Originally inspired by the [`yardstick`](https://github.com/tidymodels/yardstick/tree/main) and [`hydroGOF`](https://github.com/hzambran/hydroGOF) packages, this library is mainly written in C++ and provides a very quick estimation of desired goodness-of-fit criteria. - -Additionally, you'll find here a C++ implementation of lesser-known yet powerful metrics used in reports from the United States Geological Survey (USGS) and the National Environmental Monitoring Standards (NEMS) guidelines. Examples include _PRESS_ (Prediction Error Sum of Squares), _SFE_ (Standard Factorial Error), and _MSPE_ (Model Standard Percentage Error) and others. Based on the equations from _Helsel et al._ ([2020](https://pubs.usgs.gov/publication/tm4A3)), _Rasmunsen et al._ ([2008](https://pubs.usgs.gov/tm/tm3c4/)), _Hicks et al._ ([2020](https://www.nems.org.nz/documents/suspended-sediment)) and etc. (see documentation for details). - -## Example -The `tidyhydro` package follows the philosophy of [`yardstick`](https://github.com/tidymodels/yardstick/tree/main) and provides S3 class methods for vectors and data frames. For example, one can estimate `KGE`, `NSE` or `pBIAS` for a data frame like this: - -```{r example} -library(tidyhydro) -str(avacha) - -kge(avacha, obs, sim) -``` - -or create a [`metric_set`](https://yardstick.tidymodels.org/reference/metric_set.html) and estimate several parameters at once like this: - -```{r metricset} -hydro_metrics <- yardstick::metric_set(nse, pbias) - -hydro_metrics(avacha, obs, sim) -``` - -We do understand that sometimes one needs a qualitative interpretation of the model. Therefore, we populated some functions with a `performance` argument. When `performance = TRUE`, the metric interpretation will be returned according to Moriasi et al. ([2015](https://elibrary.asabe.org/abstract.asp?aid=46548&t=3&dabs=Y&redir=&redirType=)). - -```{r interpretation} -hydro_metrics(avacha, obs, sim, performance = TRUE) -``` - -## Installation - -You can install the development version of `tidyhydro` from [GitHub](https://github.com/atsyplenkov/tidyhydro) with: - -``` r -# install.packages("pak") -pak::pak("atsyplenkov/tidyhydro") -``` - -## Benchmarking -Since the package uses `Rcpp` in the background, it performs slightly faster than base R and other R packages (see [benchmarks](https://atsyplenkov.github.io/tidyhydro/articles/benchmarks.html)). This is particularly noticeable with large datasets: -```{r benchmarking} -set.seed(12234) -x <- runif(10^6) -y <- runif(10^6) - -nse <- function(truth, estimate, na_rm = TRUE) { - #fmt: skip - 1 - (sum((truth - estimate)^2, na.rm = na_rm) / - sum((truth - mean(truth, na.rm = na_rm))^2, na.rm = na_rm)) -} - -bench::mark( - tidyhydro = tidyhydro::nse_vec(truth = x, estimate = y), - hydroGOF = hydroGOF::NSE(sim = y, obs = x), - baseR = nse(truth = x, estimate = y), - check = TRUE, - relative = TRUE, - filter_gc = FALSE, - iterations = 50L -) -``` - -## Code of Conduct - -Please note that the tidyhydro project is released with a [Contributor Code of Conduct](https://atsyplenkov.github.io/tidyhydro/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms. - -## See also -* [`hydroGOF`](https://github.com/hzambran/hydroGOF) - Goodness-of-fit functions for comparison of simulated and observed hydrological time series. -* [`yardstick`](https://github.com/tidymodels/yardstick/tree/main) - tidy methods for models performance assessment. +--- +output: + github_document: + html_preview: false +--- + + + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.path = "man/figures/README-", + out.width = "100%" +) + +requireNamespace("hydroGOF", quietly = TRUE) +requireNamespace("bench", quietly = TRUE) +``` + +# tidyhydro + + +

+ + + + + + + +

+ + +The `tidyhydro` package provides a set of commonly used metrics in hydrology (such as _NSE_, _KGE_, _pBIAS_) for use within a [`tidymodels`](https://www.tidymodels.org/) infrastructure. Originally inspired by the [`yardstick`](https://github.com/tidymodels/yardstick/tree/main) and [`hydroGOF`](https://github.com/hzambran/hydroGOF) packages, this library is mainly written in C++ and provides a very quick estimation of desired goodness-of-fit criteria. + +Additionally, you'll find here a C++ implementation of lesser-known yet powerful metrics and descriptive statistics recommended in the United States Geological Survey (USGS) and the National Environmental Monitoring Standards (NEMS) guidelines. Examples include _PRESS_ (Prediction Error Sum of Squares), _SFE_ (Standard Factorial Error), _MSPE_ (Model Standard Percentage Error) and others. Based on the equations from _Helsel et al._ ([2020](https://pubs.usgs.gov/publication/tm4A3)), _Rasmunsen et al._ ([2008](https://pubs.usgs.gov/tm/tm3c4/)), _Hicks et al._ ([2020](https://www.nems.org.nz/documents/suspended-sediment)) and etc. (see documentation for details). + +## Performance metrics +The `tidyhydro` package follows the philosophy of [`yardstick`](https://github.com/tidymodels/yardstick/tree/main) and provides S3 class methods for vectors and data frames. For example, one can estimate `KGE`, `NSE` or `pBIAS` for a data frame like this: + +```{r example} +library(tidyhydro) +str(avacha) + +kge(avacha, obs, sim) +``` + +or create a [`metric_set`](https://yardstick.tidymodels.org/reference/metric_set.html) and estimate several parameters at once like this: + +```{r metricset} +hydro_metrics <- yardstick::metric_set(nse, pbias) + +hydro_metrics(avacha, obs, sim) +``` + +We do understand that sometimes one needs a qualitative interpretation of the model. Therefore, we populated some functions with a `performance` argument. When `performance = TRUE`, the metric interpretation will be returned according to Moriasi et al. ([2015](https://elibrary.asabe.org/abstract.asp?aid=46548&t=3&dabs=Y&redir=&redirType=)). + +```{r interpretation} +hydro_metrics(avacha, obs, sim, performance = TRUE) +``` + +## Descriptive statistics +In addition to `metric`, inherited from `yardstick`, the `tidyhydro` introduces the `measure` objects. It aims to calculate descriptive statistics of a single dataset, such as `cv()` — coefficient of variation (a measure of variability) or `gm()` — geometric mean (a measure of central tendency): + +```{r measureset} +# Coefficient of Variation +cv(avacha, obs) + +# Geometric mean +gm_vec(avacha$obs) +``` + +## Installation + +You can install the development version of `tidyhydro` from [GitHub](https://github.com/atsyplenkov/tidyhydro) with: + +``` r +# install.packages("pak") +pak::pak("atsyplenkov/tidyhydro") +``` + +## Benchmarking +Since the package uses `Rcpp` in the background, it performs slightly faster than base R and other R packages (see [benchmarks](https://atsyplenkov.github.io/tidyhydro/articles/benchmarks.html)). This is particularly noticeable with large datasets: +```{r benchmarking} +set.seed(12234) +x <- runif(10^6) +y <- runif(10^6) + +nse <- function(truth, estimate, na_rm = TRUE) { + #fmt: skip + 1 - (sum((truth - estimate)^2, na.rm = na_rm) / + sum((truth - mean(truth, na.rm = na_rm))^2, na.rm = na_rm)) +} + +bench::mark( + tidyhydro = tidyhydro::nse_vec(truth = x, estimate = y), + hydroGOF = hydroGOF::NSE(sim = y, obs = x), + baseR = nse(truth = x, estimate = y), + check = TRUE, + relative = TRUE, + filter_gc = FALSE, + iterations = 50L +) +``` + +## Code of Conduct + +Please note that the tidyhydro project is released with a [Contributor Code of Conduct](https://atsyplenkov.github.io/tidyhydro/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms. + +## See also +* [`hydroGOF`](https://github.com/hzambran/hydroGOF) - Goodness-of-fit functions for comparison of simulated and observed hydrological time series. +* [`yardstick`](https://github.com/tidymodels/yardstick/tree/main) - tidy methods for models performance assessment. diff --git a/README.md b/README.md index 1da867f..e733c2b 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,9 @@ # tidyhydro +

+ The `tidyhydro` package provides a set of commonly used metrics in @@ -26,17 +29,18 @@ library is mainly written in C++ and provides a very quick estimation of desired goodness-of-fit criteria. Additionally, you’ll find here a C++ implementation of lesser-known yet -powerful metrics used in reports from the United States Geological -Survey (USGS) and the National Environmental Monitoring Standards (NEMS) -guidelines. Examples include *PRESS* (Prediction Error Sum of Squares), -*SFE* (Standard Factorial Error), and *MSPE* (Model Standard Percentage -Error) and others. Based on the equations from *Helsel et al.* +powerful metrics and descriptive statistics recommended in the United +States Geological Survey (USGS) and the National Environmental +Monitoring Standards (NEMS) guidelines. Examples include *PRESS* +(Prediction Error Sum of Squares), *SFE* (Standard Factorial Error), +*MSPE* (Model Standard Percentage Error) and others. Based on the +equations from *Helsel et al.* ([2020](https://pubs.usgs.gov/publication/tm4A3)), *Rasmunsen et al.* ([2008](https://pubs.usgs.gov/tm/tm3c4/)), *Hicks et al.* ([2020](https://www.nems.org.nz/documents/suspended-sediment)) and etc. (see documentation for details). -## Example +## Performance metrics The `tidyhydro` package follows the philosophy of [`yardstick`](https://github.com/tidymodels/yardstick/tree/main) and @@ -88,6 +92,27 @@ hydro_metrics(avacha, obs, sim, performance = TRUE) #> 2 pbias standard Excellent ``` +## Descriptive statistics + +In addition to `metric`, inherited from `yardstick`, the `tidyhydro` +introduces the `measure` objects. It aims to calculate descriptive +statistics of a single dataset, such as `cv()` — coefficient of +variation (a measure of variability) or `gm()` — geometric mean (a +measure of central tendency): + +``` r +# Coefficient of Variation +cv(avacha, obs) +#> # A tibble: 1 Ă— 3 +#> .metric .estimator .estimate +#> +#> 1 cv standard 0.533 + +# Geometric mean +gm_vec(avacha$obs) +#> [1] 128.9476 +``` + ## Installation You can install the development version of `tidyhydro` from @@ -128,9 +153,9 @@ bench::mark( #> # A tibble: 3 Ă— 6 #> expression min median `itr/sec` mem_alloc `gc/sec` #> -#> 1 tidyhydro 1 1 22.7 NaN NaN -#> 2 hydroGOF 15.2 19.1 1 Inf Inf -#> 3 baseR 8.66 10.6 2.44 Inf Inf +#> 1 tidyhydro 1 1 13.3 NaN NaN +#> 2 hydroGOF 9.69 8.63 1 Inf Inf +#> 3 baseR 5.80 5.54 2.27 Inf Inf ``` ## Code of Conduct @@ -142,8 +167,8 @@ By contributing to this project, you agree to abide by its terms. ## See also -- [`hydroGOF`](https://github.com/hzambran/hydroGOF) - Goodness-of-fit - functions for comparison of simulated and observed hydrological time - series. -- [`yardstick`](https://github.com/tidymodels/yardstick/tree/main) - - tidy methods for models performance assessment. +- [`hydroGOF`](https://github.com/hzambran/hydroGOF) - Goodness-of-fit + functions for comparison of simulated and observed hydrological time + series. +- [`yardstick`](https://github.com/tidymodels/yardstick/tree/main) - + tidy methods for models performance assessment. diff --git a/_pkgdown.yml b/_pkgdown.yml index ffde41a..1cfad57 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,38 +1,42 @@ -url: https://atsyplenkov.github.io/tidyhydro/ - -repo: - url: - home: https://github.com/atsyplenkov/tidyhydro - source: https://github.com/atsyplenkov/tidyhydro/tree/master - issues: https://github.com/atsyplenkov/tidyhydro/issues - -template: - bootstrap: 5 - math-rendering: mathjax - bslib: - base_font: {google: "Roboto"} - heading_font: {google: "Inter Tight"} - code_font: {google: "JetBrains Mono"} - -footer: - structure: - left: [developed_by] - right: [legal] - components: - legal: "[MIT](https://opensource.org/license/mit) License | Copyright (c) 2025 tidyhydro authors" - developed_by: Developed by [Anatolii Tsyplenkov](https://github.com/atsyplenkov) - -reference: -- title: "Goodness-of-fit (GOF) criteria" - desc: "Commonly used GOF metrics in hydrological models" - contents: - - has_keyword("gof") - -- title: "Regression" - desc: "Metrics and coefficients recommended by USGS and NEMS for regression analysis of hydrological variables" - contents: - - has_keyword("regression") - -- title: "Example datasets" - contents: - - has_keyword("data") +url: https://atsyplenkov.github.io/tidyhydro/ + +repo: + url: + home: https://github.com/atsyplenkov/tidyhydro + source: https://github.com/atsyplenkov/tidyhydro/tree/master + issues: https://github.com/atsyplenkov/tidyhydro/issues + +template: + bootstrap: 5 + math-rendering: mathjax + bslib: + base_font: {google: "Roboto"} + heading_font: {google: "Inter Tight"} + code_font: {google: "JetBrains Mono"} + +footer: + structure: + left: [developed_by] + right: [legal] + components: + legal: "[MIT](https://opensource.org/license/mit) License | Copyright (c) 2025 tidyhydro authors" + developed_by: Developed by [Anatolii Tsyplenkov](https://github.com/atsyplenkov) + +reference: +- title: "Goodness-of-fit (GOF) criteria" + desc: "Commonly used GOF metrics in hydrological models" + contents: + - has_keyword("gof") + +- title: "Regression" + desc: "Metrics and coefficients recommended by USGS and NEMS for regression analysis of hydrological variables" + contents: + - has_keyword("regression") + +- title: "Summary statistics" + contents: + - has_keyword("summary_stats") + +- title: "Example datasets" + contents: + - has_keyword("data") diff --git a/codemeta.json b/codemeta.json index 760184e..8e4daaf 100644 --- a/codemeta.json +++ b/codemeta.json @@ -8,13 +8,13 @@ "codeRepository": "https://github.com/atsyplenkov/tidyhydro", "issueTracker": "https://github.com/atsyplenkov/tidyhydro/issues", "license": "https://spdx.org/licenses/MIT", - "version": "0.1.1.9000", + "version": "0.1.2", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", "url": "https://r-project.org" }, - "runtimePlatform": "R version 4.5.1 Patched (2025-06-14 r88315)", + "runtimePlatform": "R version 4.5.1 Patched (2025-06-20 r88332 ucrt)", "author": [ { "@type": "Person", @@ -80,18 +80,6 @@ "url": "https://cran.r-project.org" }, "sameAs": "https://CRAN.R-project.org/package=quickcheck" - }, - { - "@type": "SoftwareApplication", - "identifier": "quarto", - "name": "quarto", - "provider": { - "@id": "https://cran.r-project.org", - "@type": "Organization", - "name": "Comprehensive R Archive Network (CRAN)", - "url": "https://cran.r-project.org" - }, - "sameAs": "https://CRAN.R-project.org/package=quarto" } ], "softwareRequirements": { @@ -140,9 +128,22 @@ }, "sameAs": "https://CRAN.R-project.org/package=yardstick" }, + "5": { + "@type": "SoftwareApplication", + "identifier": "checkmate", + "name": "checkmate", + "version": ">= 2.3.1", + "provider": { + "@id": "https://cran.r-project.org", + "@type": "Organization", + "name": "Comprehensive R Archive Network (CRAN)", + "url": "https://cran.r-project.org" + }, + "sameAs": "https://CRAN.R-project.org/package=checkmate" + }, "SystemRequirements": null }, - "fileSize": "103.052KB", + "fileSize": "7300.242KB", "releaseNotes": "https://github.com/atsyplenkov/tidyhydro/blob/master/NEWS.md", "readme": "https://github.com/atsyplenkov/tidyhydro/blob/master/README.md", "keywords": ["r", "r-package", "rstats"] diff --git a/man-roxygen/examples-description.R b/man-roxygen/examples-description.R new file mode 100644 index 0000000..097ae9c --- /dev/null +++ b/man-roxygen/examples-description.R @@ -0,0 +1,10 @@ +#' @examples +#' library(tidyhydro) +#' +#' <%=fn %> +#' +#' # Supply truth as bare column names +#' <%=fn %>(avacha, obs) +#' +#' # Or as numeric vectors +#' <%=fn %>_vec(avacha$obs) diff --git a/man-roxygen/examples-numeric.R b/man-roxygen/examples-numeric.R index 3067b21..fc0a832 100644 --- a/man-roxygen/examples-numeric.R +++ b/man-roxygen/examples-numeric.R @@ -1,9 +1,8 @@ -#' @examples -#' library(tidyhydro) -#' data(avacha) -#' -#' # Supply truth and predictions as bare column names -#' <%=fn %>(avacha, obs, sim) -#' -#' # Or as numeric vectors -#' <%=fn %>_vec(avacha$obs, avacha$sim) +#' @examples +#' library(tidyhydro) +#' +#' # Supply truth and predictions as bare column names +#' <%=fn %>(avacha, obs, sim) +#' +#' # Or as numeric vectors +#' <%=fn %>_vec(avacha$obs, avacha$sim) diff --git a/man-roxygen/return.R b/man-roxygen/return.R index d97b027..a111a0b 100644 --- a/man-roxygen/return.R +++ b/man-roxygen/return.R @@ -1,9 +1,9 @@ -#' @return -#' -#' A `tibble` with columns `.metric`, `.estimator`, -#' and `.estimate` and 1 row of values. -#' -#' For grouped data frames, the number of rows returned will be the same as -#' the number of groups. -#' -#' For `<%=fn %>_vec()`, a single `numeric` value (or `NA`). +#' @return +#' +#' A `tibble` with columns `.metric`, `.estimator`, +#' and `.estimate` and 1 row of values. +#' +#' For grouped data frames, the number of rows returned will be the same as +#' the number of groups. +#' +#' For `<%=fn %>_vec()`, a single `numeric` value (or `NA`). diff --git a/man/avacha.Rd b/man/avacha.Rd index d3375ab..9951d21 100644 --- a/man/avacha.Rd +++ b/man/avacha.Rd @@ -1,35 +1,36 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\docType{data} -\name{avacha} -\alias{avacha} -\title{Mean Daily Water Discharge At Avacha River (Elizovo City)} -\source{ -\itemize{ -\item \url{https://gmvo.skniivh.ru/} -\item \url{https://ewds.climate.copernicus.eu/datasets/cems-glofas-historical} -} -} -\value{ -\item{avacha}{a data frame} -} -\description{ -Mean Daily Water Discharge At Avacha River (Elizovo City) -} -\details{ -These data contain the measured (\code{obs}) mean daily water discharge -values (in \eqn{m^3/s}) at the Avacha River -- Elizovo City state gauging -station for the 2022 calendar year. They are accompanied by the GloFAS v4.0 -reanalysis water discharge values for the last 24 hours (\code{sim}), derived -from -\url{https://ewds.climate.copernicus.eu/datasets/cems-glofas-historical}. - -Read more about GloFAS Water Discharge reanalysis -- -\url{https://confluence.ecmwf.int/display/CEMS/GloFAS+v4.0} -} -\examples{ -data(avacha) -str(avacha) -} -\keyword{data} -\keyword{datasets} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{avacha} +\alias{avacha} +\title{Mean daily water discharge at the Avacha River (Elizovo City)} +\source{ +\itemize{ +\item observed water discharge \url{https://gmvo.skniivh.ru/} +\item simulated water discharge +\url{https://ewds.climate.copernicus.eu/datasets/cems-glofas-historical} +} +} +\value{ +\item{avacha}{a data frame} +} +\description{ +Mean daily water discharge at the Avacha River (Elizovo City) +} +\details{ +These data contain the measured (\code{obs}) mean daily water discharge +values (in \eqn{m^3/s}) at the Avacha River streamgage near Elizovo City, +Russia, 2022 calendar year. They are accompanied by the GloFAS v4.0 +reanalysis water discharge values for the last 24 hours (\code{sim}), derived +from +\url{https://ewds.climate.copernicus.eu/datasets/cems-glofas-historical}. + +Read more about GloFAS Water Discharge reanalysis -- +\url{https://confluence.ecmwf.int/display/CEMS/GloFAS+v4.0} +} +\examples{ +data(avacha) +str(avacha) +} +\keyword{data} +\keyword{datasets} diff --git a/man/cv.Rd b/man/cv.Rd new file mode 100644 index 0000000..e657124 --- /dev/null +++ b/man/cv.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/variability.R +\name{cv} +\alias{cv} +\alias{cv.data.frame} +\alias{cv_vec} +\title{Coefficient of Variation (Cv)} +\usage{ +cv(data, ...) + +\method{cv}{data.frame}(data, truth, na_rm = TRUE, ...) + +cv_vec(truth, na_rm = TRUE, ...) +} +\arguments{ +\item{data}{A \code{data.frame} containing the columns specified by the \code{truth} +and \code{estimate} arguments.} + +\item{...}{Not currently used.} + +\item{truth}{The column identifier for the true results +(that is \code{numeric}). This should be an unquoted column name although +this argument is passed by expression and supports +\link[rlang:topic-inject]{quasiquotation} (you can unquote column +names). For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{na_rm}{A \code{logical} value indicating whether \code{NA} +values should be stripped before the computation proceeds.} +} +\value{ +A \code{tibble} with columns \code{.metric}, \code{.estimator}, +and \code{.estimate} and 1 row of values. + +For grouped data frames, the number of rows returned will be the same as +the number of groups. + +For \code{cv_vec()}, a single \code{numeric} value (or \code{NA}). +} +\description{ +Coefficient of Variation (Cv) +} +\examples{ +library(tidyhydro) + +cv + +# Supply truth as bare column names +cv(avacha, obs) + +# Or as numeric vectors +cv_vec(avacha$obs) +} +\seealso{ +Other descriptive statistics: +\code{\link{gm}()} +} +\concept{descriptive statistics} +\keyword{summary_stats} diff --git a/man/gm.Rd b/man/gm.Rd new file mode 100644 index 0000000..cabfd2d --- /dev/null +++ b/man/gm.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/central-tendency.R +\name{gm} +\alias{gm} +\alias{gm.data.frame} +\alias{gm_vec} +\title{Geometric Mean (GM)} +\usage{ +gm(data, ...) + +\method{gm}{data.frame}(data, truth, na_rm = TRUE, ...) + +gm_vec(truth, na_rm = TRUE, ...) +} +\arguments{ +\item{data}{A \code{data.frame} containing the columns specified by the \code{truth} +and \code{estimate} arguments.} + +\item{...}{Not currently used.} + +\item{truth}{The column identifier for the true results +(that is \code{numeric}). This should be an unquoted column name although +this argument is passed by expression and supports +\link[rlang:topic-inject]{quasiquotation} (you can unquote column +names). For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{na_rm}{A \code{logical} value indicating whether \code{NA} +values should be stripped before the computation proceeds.} +} +\value{ +A \code{tibble} with columns \code{.metric}, \code{.estimator}, +and \code{.estimate} and 1 row of values. + +For grouped data frames, the number of rows returned will be the same as +the number of groups. + +For \code{gm_vec()}, a single \code{numeric} value (or \code{NA}). +} +\description{ +Geometric Mean (GM) +} +\examples{ +library(tidyhydro) + +gm + +# Supply truth as bare column names +gm(avacha, obs) + +# Or as numeric vectors +gm_vec(avacha$obs) +} +\seealso{ +Other descriptive statistics: +\code{\link{cv}()} +} +\concept{descriptive statistics} +\keyword{summary_stats} diff --git a/man/kge.Rd b/man/kge.Rd index b618f5d..286f11c 100644 --- a/man/kge.Rd +++ b/man/kge.Rd @@ -1,125 +1,111 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/kge.R -\name{kge} -\alias{kge} -\alias{kge.data.frame} -\alias{kge_vec} -\title{Kling-Gupta Efficiency (KGE)} -\usage{ -kge(data, ...) - -\method{kge}{data.frame}(data, truth, estimate, na_rm = TRUE, ...) - -kge_vec(truth, estimate, na_rm = TRUE, ...) -} -\arguments{ -\item{data}{A \code{data.frame} containing the columns specified by the \code{truth} -and \code{estimate} arguments.} - -\item{...}{Not currently used.} - -\item{truth}{The column identifier for the true results -(that is \code{numeric}). This should be an unquoted column name although -this argument is passed by expression and supports -\link[rlang:topic-inject]{quasiquotation} (you can unquote column -names). For \verb{_vec()} functions, a \code{numeric} vector.} - -\item{estimate}{The column identifier for the predicted -results (that is also \code{numeric}). As with \code{truth} this can be -specified different ways but the primary method is to use an -unquoted variable name. For \verb{_vec()} functions, a \code{numeric} vector.} - -\item{na_rm}{A \code{logical} value indicating whether \code{NA} -values should be stripped before the computation proceeds.} -} -\value{ -A \code{tibble} with columns \code{.metric}, \code{.estimator}, -and \code{.estimate} and 1 row of values. - -For grouped data frames, the number of rows returned will be the same as -the number of groups. - -For \code{kge_vec()}, a single \code{numeric} value (or \code{NA}). -} -\description{ -Calculate the Kling-Gupta Efficiency (\emph{Gupta et al., 2009}). -Dimensionless (from \eqn{-\infty} to 1). \code{kge()} assesses the accuracy of -simulated data by considering correlation, bias, and variability relative -to observed data. -} -\details{ -The Kling-Gupta Efficiency is a composite metric that decomposes model -performance into three components: correlation (\eqn{r}), -variability ratio (\eqn{\alpha}), and bias ratio (\eqn{\beta}). -It improves upon the Nash-Sutcliffe Efficiency (see \link{nse}) -by explicitly accounting for each source of error (\emph{Gupta et al., 2009}). - -The Kling-Gupta Efficiency is estimated as follows: -\deqn{ -KGE = 1 - \sqrt{(r - 1)^2 + (\alpha - 1)^2 + (\beta - 1)^2} -} -where: -\itemize{ -\item \eqn{r} is the linear Pearson correlation coefficient between -observed and simulated values -\item \eqn{\alpha = \sigma_{sim} / \sigma_{obs}} is the ratio of the -standard deviations (variability ratio) -\item \eqn{\beta = \mu_{sim} / \mu_{obs}} is the ratio of the -means (bias ratio) -} -} -\note{ -Unlike the Nash–Sutcliffe Efficiency (\link{nse}), the KGE does not have an -inherent benchmark such as "mean flow", and \eqn{KGE = 0} does not -correspond to a baseline performance. -Therefore, KGE values should not be interpreted as "good" or "bad" based -solely on their sign or magnitude. -Instead, users are encouraged to examine the individual components -(\eqn{r}, \eqn{\alpha}, \eqn{\beta}) -to understand the nature of model performance and consider defining -explicit benchmarks based on the study context. - -For further discussion, see Knoben et al. (2019), who caution against -directly translating NSE-based interpretation thresholds to KGE. -} -\examples{ -library(tidyhydro) -data(avacha) - -# Supply truth and predictions as bare column names -kge(avacha, obs, sim) - -# Or as numeric vectors -kge_vec(avacha$obs, avacha$sim) -} -\references{ -Gupta, H.V.; Kling, H.; Yilmaz, K.K.; Martinez, G.F. (2009). -Decomposition of the mean squared error and kge performance criteria: -Implications for improving hydrological modelling. Journal of Hydrology, -377(1-2), 80-91. \doi{10.1016/j.jhydrol.2009.08.003} - -Knoben, W. J. M., Freer, J. E., & Woods, R. A. (2019). -Technical note: Inherent benchmark or not? Comparing Nash–Sutcliffe and -Kling–Gupta efficiency scores. Hydrology and Earth System Sciences, 23, -4323–4331. \doi{10.5194/hess-23-4323-2019} -} -\seealso{ -Other numeric metrics: -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{sfe}()} - -Other accuracy metrics: -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{sfe}()} -} -\concept{accuracy metrics} -\concept{numeric metrics} -\keyword{gof} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kge.R +\name{kge} +\alias{kge} +\alias{kge.data.frame} +\alias{kge_vec} +\title{Kling-Gupta Efficiency (KGE)} +\usage{ +kge(data, ...) + +\method{kge}{data.frame}(data, truth, estimate, na_rm = TRUE, ...) + +kge_vec(truth, estimate, na_rm = TRUE, ...) +} +\arguments{ +\item{data}{A \code{data.frame} containing the columns specified by the \code{truth} +and \code{estimate} arguments.} + +\item{...}{Not currently used.} + +\item{truth}{The column identifier for the true results +(that is \code{numeric}). This should be an unquoted column name although +this argument is passed by expression and supports +\link[rlang:topic-inject]{quasiquotation} (you can unquote column +names). For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{estimate}{The column identifier for the predicted +results (that is also \code{numeric}). As with \code{truth} this can be +specified different ways but the primary method is to use an +unquoted variable name. For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{na_rm}{A \code{logical} value indicating whether \code{NA} +values should be stripped before the computation proceeds.} +} +\value{ +A \code{tibble} with columns \code{.metric}, \code{.estimator}, +and \code{.estimate} and 1 row of values. + +For grouped data frames, the number of rows returned will be the same as +the number of groups. + +For \code{kge_vec()}, a single \code{numeric} value (or \code{NA}). +} +\description{ +Calculate the Kling-Gupta Efficiency (\emph{Gupta et al., 2009}). +Dimensionless (from \eqn{-\infty} to 1). \code{kge()} assesses the accuracy of +simulated data by considering correlation, bias, and variability relative +to observed data. +} +\details{ +The Kling-Gupta Efficiency is a composite metric that decomposes model +performance into three components: correlation (\eqn{r}), +variability ratio (\eqn{\alpha}), and bias ratio (\eqn{\beta}). +It improves upon the Nash-Sutcliffe Efficiency (see \link{nse}) +by explicitly accounting for each source of error (\emph{Gupta et al., 2009}). + +The Kling-Gupta Efficiency is estimated as follows: +\deqn{ +KGE = 1 - \sqrt{(r - 1)^2 + (\alpha - 1)^2 + (\beta - 1)^2} +} +where: +\itemize{ +\item \eqn{r} is the linear Pearson correlation coefficient between +observed and simulated values +\item \eqn{\alpha = \sigma_{sim} / \sigma_{obs}} is the ratio of the +standard deviations (variability ratio) +\item \eqn{\beta = \mu_{sim} / \mu_{obs}} is the ratio of the +means (bias ratio) +} +} +\note{ +Unlike the Nash–Sutcliffe Efficiency (\link{nse}), the KGE does not have an +inherent benchmark such as "mean flow", and \eqn{KGE = 0} does not +correspond to a baseline performance. +Therefore, KGE values should not be interpreted as "good" or "bad" based +solely on their sign or magnitude. +Instead, users are encouraged to examine the individual components +(\eqn{r}, \eqn{\alpha}, \eqn{\beta}) +to understand the nature of model performance and consider defining +explicit benchmarks based on the study context. + +For further discussion, see \emph{Knoben et al.} (2019), who caution against +directly translating NSE-based interpretation thresholds to KGE. +} +\examples{ +library(tidyhydro) + +# Supply truth and predictions as bare column names +kge(avacha, obs, sim) + +# Or as numeric vectors +kge_vec(avacha$obs, avacha$sim) +} +\references{ +Gupta, H.V.; Kling, H.; Yilmaz, K.K.; Martinez, G.F. (2009). +Decomposition of the mean squared error and kge performance criteria: +Implications for improving hydrological modelling. Journal of Hydrology, +377(1-2), 80-91. \doi{10.1016/j.jhydrol.2009.08.003} + +Knoben, W. J. M., Freer, J. E., & Woods, R. A. (2019). +Technical note: Inherent benchmark or not? Comparing Nash–Sutcliffe and +Kling–Gupta efficiency scores. Hydrology and Earth System Sciences, 23, +4323–4331. \doi{10.5194/hess-23-4323-2019} +} +\seealso{ +Other KGE variants: +\code{\link{kge2012}()}, +\code{\link{kgelog}()} +} +\concept{KGE variants} +\keyword{gof} diff --git a/man/kge2012.Rd b/man/kge2012.Rd index a628735..42db04c 100644 --- a/man/kge2012.Rd +++ b/man/kge2012.Rd @@ -1,129 +1,115 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/kge.R -\name{kge2012} -\alias{kge2012} -\alias{kge2012.data.frame} -\alias{kge2012_vec} -\title{Modified Kling-Gupta Efficiency (KGE')} -\usage{ -kge2012(data, ...) - -\method{kge2012}{data.frame}(data, truth, estimate, na_rm = TRUE, ...) - -kge2012_vec(truth, estimate, na_rm = TRUE, ...) -} -\arguments{ -\item{data}{A \code{data.frame} containing the columns specified by the \code{truth} -and \code{estimate} arguments.} - -\item{...}{Not currently used.} - -\item{truth}{The column identifier for the true results -(that is \code{numeric}). This should be an unquoted column name although -this argument is passed by expression and supports -\link[rlang:topic-inject]{quasiquotation} (you can unquote column -names). For \verb{_vec()} functions, a \code{numeric} vector.} - -\item{estimate}{The column identifier for the predicted -results (that is also \code{numeric}). As with \code{truth} this can be -specified different ways but the primary method is to use an -unquoted variable name. For \verb{_vec()} functions, a \code{numeric} vector.} - -\item{na_rm}{A \code{logical} value indicating whether \code{NA} -values should be stripped before the computation proceeds.} -} -\value{ -A \code{tibble} with columns \code{.metric}, \code{.estimator}, -and \code{.estimate} and 1 row of values. - -For grouped data frames, the number of rows returned will be the same as -the number of groups. - -For \code{kge2012_vec()}, a single \code{numeric} value (or \code{NA}). -} -\description{ -Calculate the modified Kling-Gupta Efficiency (\emph{Kling et al., 2012}), -aka \eqn{KGE'}. Dimensionless (from \eqn{-\infty} to 1). -\code{kge2012()} assesses the accuracy of -simulated data by considering correlation, bias, and variability relative -to observed data. -} -\details{ -The Modified Kling-Gupta Efficiency is a composite metric that decomposes -model performance into three components: correlation (\eqn{r}), -bias ratio (\eqn{\beta}), and variability ratio (\eqn{\gamma}). -It improves upon the Kling-Gupta Efficiency (see \link{kge}) by replacing -standard deviation with Coefficient of Variation. This ensures that the -bias and variability ratios are not cross-correlated, -which otherwise may occur when e.g. the precipitation inputs are biased. - -The Modified Kling-Gupta Efficiency (\eqn{KGE'}) is estimated as follows: -\deqn{ -KGE' = 1 - \sqrt{(r - 1)^2 + (\beta - 1)^2 + (\gamma - 1)^2} -} -where: -\itemize{ -\item \eqn{r} is the linear Pearson correlation coefficient between -observed and simulated values -\item \eqn{\beta = \mu_{sim} / \mu_{obs}} is the ratio of the -means (bias ratio) -\item \eqn{ - \gamma = \frac{\sigma_{sim} / \mu_{sim}}{\sigma_{sim} / \mu_{sim}} - } is the ratio of the Coefficients of Variation (variability ratio) -} -} -\note{ -Unlike the Nash–Sutcliffe Efficiency (\link{nse}), the KGE does not have an -inherent benchmark such as "mean flow", and \eqn{KGE' = 0} does not -correspond to a baseline performance. -Therefore, \eqn{KGE'} values should not be interpreted as "good" or "bad" -based solely on their sign or magnitude. -Instead, users are encouraged to examine the individual components -(\eqn{r}, \eqn{\beta}, \eqn{\gamma}) -to understand the nature of model performance and consider defining -explicit benchmarks based on the study context. - -For further discussion, see Knoben et al. (2019), who caution against -directly translating NSE-based interpretation thresholds to KGE. -} -\examples{ -library(tidyhydro) -data(avacha) - -# Supply truth and predictions as bare column names -kge2012(avacha, obs, sim) - -# Or as numeric vectors -kge2012_vec(avacha$obs, avacha$sim) -} -\references{ -Kling, H., Fuchs, M., & Paulin, M. (2012). Runoff conditions in the upper -Danube basin under an ensemble of climate change scenarios. -Journal of Hydrology, 424–425, 264–277. -\doi{10.1016/j.jhydrol.2012.01.011} - -Knoben, W. J. M., Freer, J. E., & Woods, R. A. (2019). -Technical note: Inherent benchmark or not? Comparing Nash–Sutcliffe and -Kling–Gupta efficiency scores. Hydrology and Earth System Sciences, 23, -4323–4331. \doi{10.5194/hess-23-4323-2019} -} -\seealso{ -Other numeric metrics: -\code{\link{kge}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{sfe}()} - -Other accuracy metrics: -\code{\link{kge}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{sfe}()} -} -\concept{accuracy metrics} -\concept{numeric metrics} -\keyword{gof} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kge.R +\name{kge2012} +\alias{kge2012} +\alias{kge2012.data.frame} +\alias{kge2012_vec} +\title{Modified Kling-Gupta Efficiency (KGE')} +\usage{ +kge2012(data, ...) + +\method{kge2012}{data.frame}(data, truth, estimate, na_rm = TRUE, ...) + +kge2012_vec(truth, estimate, na_rm = TRUE, ...) +} +\arguments{ +\item{data}{A \code{data.frame} containing the columns specified by the \code{truth} +and \code{estimate} arguments.} + +\item{...}{Not currently used.} + +\item{truth}{The column identifier for the true results +(that is \code{numeric}). This should be an unquoted column name although +this argument is passed by expression and supports +\link[rlang:topic-inject]{quasiquotation} (you can unquote column +names). For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{estimate}{The column identifier for the predicted +results (that is also \code{numeric}). As with \code{truth} this can be +specified different ways but the primary method is to use an +unquoted variable name. For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{na_rm}{A \code{logical} value indicating whether \code{NA} +values should be stripped before the computation proceeds.} +} +\value{ +A \code{tibble} with columns \code{.metric}, \code{.estimator}, +and \code{.estimate} and 1 row of values. + +For grouped data frames, the number of rows returned will be the same as +the number of groups. + +For \code{kge2012_vec()}, a single \code{numeric} value (or \code{NA}). +} +\description{ +Calculate the modified Kling-Gupta Efficiency (\emph{Kling et al., 2012}), +aka \eqn{KGE'}. Dimensionless (from \eqn{-\infty} to 1). +\code{kge2012()} assesses the accuracy of +simulated data by considering correlation, bias, and variability relative +to observed data. +} +\details{ +The Modified Kling-Gupta Efficiency is a composite metric that decomposes +model performance into three components: correlation (\eqn{r}), +bias ratio (\eqn{\beta}), and variability ratio (\eqn{\gamma}). +It improves upon the Kling-Gupta Efficiency (see \link{kge}) by replacing +standard deviation with Coefficient of Variation. This ensures that the +bias and variability ratios are not cross-correlated, +which otherwise may occur when e.g. the precipitation inputs are biased. + +The Modified Kling-Gupta Efficiency (\eqn{KGE'}) is estimated as follows: +\deqn{ +KGE' = 1 - \sqrt{(r - 1)^2 + (\beta - 1)^2 + (\gamma - 1)^2} +} +where: +\itemize{ +\item \eqn{r} is the linear Pearson correlation coefficient between +observed and simulated values +\item \eqn{\beta = \mu_{sim} / \mu_{obs}} is the ratio of the +means (bias ratio) +\item \eqn{ + \gamma = \frac{\sigma_{sim} / \mu_{sim}}{\sigma_{sim} / \mu_{sim}} + } is the ratio of the Coefficients of Variation (variability ratio) +} +} +\note{ +Unlike the Nash–Sutcliffe Efficiency (\link{nse}), the KGE does not have an +inherent benchmark such as "mean flow", and \eqn{KGE' = 0} does not +correspond to a baseline performance. +Therefore, \eqn{KGE'} values should not be interpreted as "good" or "bad" +based solely on their sign or magnitude. +Instead, users are encouraged to examine the individual components +(\eqn{r}, \eqn{\beta}, \eqn{\gamma}) +to understand the nature of model performance and consider defining +explicit benchmarks based on the study context. + +For further discussion, see \emph{Knoben et al.} (2019), who caution against +directly translating NSE-based interpretation thresholds to KGE. +} +\examples{ +library(tidyhydro) + +# Supply truth and predictions as bare column names +kge2012(avacha, obs, sim) + +# Or as numeric vectors +kge2012_vec(avacha$obs, avacha$sim) +} +\references{ +Kling, H., Fuchs, M., & Paulin, M. (2012). Runoff conditions in the upper +Danube basin under an ensemble of climate change scenarios. +Journal of Hydrology, 424–425, 264–277. +\doi{10.1016/j.jhydrol.2012.01.011} + +Knoben, W. J. M., Freer, J. E., & Woods, R. A. (2019). +Technical note: Inherent benchmark or not? Comparing Nash–Sutcliffe and +Kling–Gupta efficiency scores. Hydrology and Earth System Sciences, 23, +4323–4331. \doi{10.5194/hess-23-4323-2019} +} +\seealso{ +Other KGE variants: +\code{\link{kge}()}, +\code{\link{kgelog}()} +} +\concept{KGE variants} +\keyword{gof} diff --git a/man/kgelog.Rd b/man/kgelog.Rd new file mode 100644 index 0000000..2078ace --- /dev/null +++ b/man/kgelog.Rd @@ -0,0 +1,156 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kge.R +\name{kgelog} +\alias{kgelog} +\alias{kgelog.data.frame} +\alias{kgelog_vec} +\alias{kgelog_low} +\alias{kgelog_low.data.frame} +\alias{kgelog_low_vec} +\alias{kgelog_hi} +\alias{kgelog_hi.data.frame} +\alias{kgelog_hi_vec} +\title{Log-transformed Modified Kling-Gupta Efficiency} +\usage{ +kgelog(data, ...) + +\method{kgelog}{data.frame}(data, truth, estimate, na_rm = TRUE, ...) + +kgelog_vec(truth, estimate, na_rm = TRUE, ...) + +kgelog_low(data, ...) + +\method{kgelog_low}{data.frame}(data, truth, estimate, na_rm = TRUE, ...) + +kgelog_low_vec(truth, estimate, na_rm = TRUE, ...) + +kgelog_hi(data, ...) + +\method{kgelog_hi}{data.frame}(data, truth, estimate, na_rm = TRUE, ...) + +kgelog_hi_vec(truth, estimate, na_rm = TRUE, ...) +} +\arguments{ +\item{data}{A \code{data.frame} containing the columns specified by the \code{truth} +and \code{estimate} arguments.} + +\item{...}{Not currently used.} + +\item{truth}{The column identifier for the true results +(that is \code{numeric}). This should be an unquoted column name although +this argument is passed by expression and supports +\link[rlang:topic-inject]{quasiquotation} (you can unquote column +names). For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{estimate}{The column identifier for the predicted +results (that is also \code{numeric}). As with \code{truth} this can be +specified different ways but the primary method is to use an +unquoted variable name. For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{na_rm}{A \code{logical} value indicating whether \code{NA} +values should be stripped before the computation proceeds.} +} +\value{ +A \code{tibble} with columns \code{.metric}, \code{.estimator}, +and \code{.estimate} and 1 row of values. + +For grouped data frames, the number of rows returned will be the same as +the number of groups. + +For \code{kgelog_vec()}, a single \code{numeric} value (or \code{NA}). +} +\description{ +Calculate the modified Kling-Gupta Efficiency (\emph{Kling et al., 2012}) on +\strong{log-transformed} data as proposed in \emph{Mai} (2023), +namely \eqn{KGE_{log}}, \eqn{KGE_{logQ_{low}}} and \eqn{KGE_{logQ_{hi}}}. +All are dimensionless (from \eqn{-\infty} to 1). + +This metric is recommended for emphasising low flows. By transforming the +discharge data logarithmically, it gives more weight to smaller flow +values, which is important for understanding drought conditions or +baseflow behaviour (see \emph{Mai 2023}; \emph{Mizukami et al., 2019}). +} +\details{ +While the \code{kgelog()} function proposes the log-transformed version of the +\link{kge2012}, functions such as \code{kgelog_low()} and \code{kgelog_hi()} +also perform data subsetting according to conditions specified in +\emph{Mai} (2023). + +The metrics \code{kgelog_low()} and \code{kgelog_hi()} are then the \eqn{KGE'} +of the log-transformed observed and simulated streamflow considering +only low-flow and high-flow time steps, respectively. + +A data point is considered in the derivation of \code{kgelog_low()} if the +observed streamflow (\eqn{\text{obs}}) for that time step satisfies +the following conditions: + +\deqn{ +0.0 < \text{obs} \le min(\text{obs}) + 0.05 \times +(max(\text{obs}) - min(\text{obs})) +} + +A data point is considered in the derivation of \code{kgelog_hi()} if the +observed streamflow (\eqn{\text{obs}}) for that time step satisfies +the following conditions: + +\deqn{ +\text{obs} > min(\text{obs}) + 0.05 \times +(max(\text{obs}) - min(\text{obs})) +} +} +\note{ +Please note that the decision if a time step is a low-flow or high-flow +time step is solely based on the observations which means it is always +the same time steps for a given basin and time period while being +independent of the simulation (\emph{Mai}, 2023). + +Unlike the Nash–Sutcliffe Efficiency (\link{nse}), the KGE does not have an +inherent benchmark such as "mean flow", and \eqn{KGE' = 0} does not +correspond to a baseline performance. +Therefore, \eqn{KGE_{log}} values should not be interpreted as "good" +or "bad" based solely on their sign or magnitude. +Instead, users are encouraged to examine the individual components +(\eqn{r}, \eqn{\beta}, \eqn{\gamma}) +to understand the nature of model performance and consider defining +explicit benchmarks based on the study context. + +For further discussion, see \emph{Knoben et al.} (2019), who caution against +directly translating NSE-based interpretation thresholds to KGE. +} +\examples{ +library(tidyhydro) + +# Supply truth and predictions as bare column names +kgelog(avacha, obs, sim) + +# Or as numeric vectors +kgelog_vec(avacha$obs, avacha$sim) +} +\references{ +Kling, H., Fuchs, M., & Paulin, M. (2012). Runoff conditions in the upper +Danube basin under an ensemble of climate change scenarios. +Journal of Hydrology, 424–425, 264–277. +\doi{10.1016/j.jhydrol.2012.01.011} + +Knoben, W. J. M., Freer, J. E., & Woods, R. A. (2019). +Technical note: Inherent benchmark or not? Comparing Nash–Sutcliffe and +Kling–Gupta efficiency scores. Hydrology and Earth System Sciences, 23, +4323–4331. \doi{10.5194/hess-23-4323-2019} + +Mai, J. (2023). Ten strategies towards successful calibration of +environmental models. Journal of Hydrology, 620, 129414. +\doi{10.1016/j.jhydrol.2023.129414} + +Mizukami, N., Rakovec, O., Newman, A. J., Clark, M. P., Wood, A. W., +Gupta, H. V., & Kumar, R. (2019). On the choice of calibration metrics +for “high-flow” estimation using hydrologic models. +Hydrology and Earth System Sciences, 23(6), 2601–2614. +\doi{10.5194/hess-23-2601-2019} +} +\seealso{ +Other KGE variants: +\code{\link{kge}()}, +\code{\link{kge2012}()} +} +\concept{KGE variants} +\keyword{gof} diff --git a/man/mse.Rd b/man/mse.Rd index c869a6a..aca1c4d 100644 --- a/man/mse.Rd +++ b/man/mse.Rd @@ -1,102 +1,92 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mse.R -\name{mse} -\alias{mse} -\alias{mse.data.frame} -\alias{mse_vec} -\title{Mean Squared Error (MSE)} -\usage{ -mse(data, ...) - -\method{mse}{data.frame}(data, truth, estimate, na_rm = TRUE, ...) - -mse_vec(truth, estimate, na_rm = TRUE, ...) -} -\arguments{ -\item{data}{A \code{data.frame} containing the columns specified by the \code{truth} -and \code{estimate} arguments.} - -\item{...}{Not currently used.} - -\item{truth}{The column identifier for the true results -(that is \code{numeric}). This should be an unquoted column name although -this argument is passed by expression and supports -\link[rlang:topic-inject]{quasiquotation} (you can unquote column -names). For \verb{_vec()} functions, a \code{numeric} vector.} - -\item{estimate}{The column identifier for the predicted -results (that is also \code{numeric}). As with \code{truth} this can be -specified different ways but the primary method is to use an -unquoted variable name. For \verb{_vec()} functions, a \code{numeric} vector.} - -\item{na_rm}{A \code{logical} value indicating whether \code{NA} -values should be stripped before the computation proceeds.} -} -\value{ -A \code{tibble} with columns \code{.metric}, \code{.estimator}, -and \code{.estimate} and 1 row of values. - -For grouped data frames, the number of rows returned will be the same as -the number of groups. - -For \code{mse_vec()}, a single \code{numeric} value (or \code{NA}). -} -\description{ -The MSE is a metric that evaluates the goodness of fit between model -simulations and observations (\emph{Fisher, 1920}). Measured in the squared -units of \code{truth} and \code{estimate} and can vary from \eqn{-\infty} to -\eqn{+\infty}. -} -\details{ -The MSE is estimated as follows (Clark et al., 2021): -\deqn{ -MSE = \frac{1}{n} \sum_{i=1}^{n}{(sim_i - obs_i)^2} -} -where: -\itemize{ -\item \eqn{sim} defines model simulations at time step \eqn{i} -\item \eqn{obs} defines model observations at time step \eqn{i} -} -} -\examples{ -library(tidyhydro) -data(avacha) - -# Supply truth and predictions as bare column names -mse(avacha, obs, sim) - -# Or as numeric vectors -mse_vec(avacha$obs, avacha$sim) -} -\references{ -Fisher, R. A. (1920). Accuracy of observation, a mathematical -examination of the methods of determining, by the mean error and -by the mean square error. Monthly Notices of the Royal Astronomical -Society, 80, 758–770. \doi{10.1093/mnras/80.8.758} - -Clark, M. P., Vogel, R. M., Lamontagne, J. R., Mizukami, N., -Knoben, W. J. M., Tang, G., Gharari, S., Freer, J. E., Whitfield, -P. H., Shook, K. R., & Papalexiou, S. M. (2021). The Abuse of Popular -Performance Metrics in Hydrologic Modeling. Water Resources Research, 57(9), -e2020WR029001. \doi{10.1029/2020WR029001} -} -\seealso{ -Other numeric metrics: -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{sfe}()} - -Other accuracy metrics: -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{sfe}()} -} -\concept{accuracy metrics} -\concept{numeric metrics} -\keyword{gof} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mse.R +\name{mse} +\alias{mse} +\alias{mse.data.frame} +\alias{mse_vec} +\title{Mean Squared Error (MSE)} +\usage{ +mse(data, ...) + +\method{mse}{data.frame}(data, truth, estimate, na_rm = TRUE, ...) + +mse_vec(truth, estimate, na_rm = TRUE, ...) +} +\arguments{ +\item{data}{A \code{data.frame} containing the columns specified by the \code{truth} +and \code{estimate} arguments.} + +\item{...}{Not currently used.} + +\item{truth}{The column identifier for the true results +(that is \code{numeric}). This should be an unquoted column name although +this argument is passed by expression and supports +\link[rlang:topic-inject]{quasiquotation} (you can unquote column +names). For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{estimate}{The column identifier for the predicted +results (that is also \code{numeric}). As with \code{truth} this can be +specified different ways but the primary method is to use an +unquoted variable name. For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{na_rm}{A \code{logical} value indicating whether \code{NA} +values should be stripped before the computation proceeds.} +} +\value{ +A \code{tibble} with columns \code{.metric}, \code{.estimator}, +and \code{.estimate} and 1 row of values. + +For grouped data frames, the number of rows returned will be the same as +the number of groups. + +For \code{mse_vec()}, a single \code{numeric} value (or \code{NA}). +} +\description{ +The MSE is a metric that evaluates the goodness of fit between model +simulations and observations (\emph{Fisher, 1920}). Measured in the squared +units of \code{truth} and \code{estimate} and can vary from \eqn{-\infty} to +\eqn{+\infty}. +} +\details{ +The MSE is estimated as follows (Clark et al., 2021): +\deqn{ +MSE = \frac{1}{n} \sum_{i=1}^{n}{(sim_i - obs_i)^2} +} +where: +\itemize{ +\item \eqn{sim} defines model simulations at time step \eqn{i} +\item \eqn{obs} defines model observations at time step \eqn{i} +} +} +\examples{ +library(tidyhydro) + +# Supply truth and predictions as bare column names +mse(avacha, obs, sim) + +# Or as numeric vectors +mse_vec(avacha$obs, avacha$sim) +} +\references{ +Fisher, R. A. (1920). Accuracy of observation, a mathematical +examination of the methods of determining, by the mean error and +by the mean square error. Monthly Notices of the Royal Astronomical +Society, 80, 758–770. \doi{10.1093/mnras/80.8.758} + +Clark, M. P., Vogel, R. M., Lamontagne, J. R., Mizukami, N., +Knoben, W. J. M., Tang, G., Gharari, S., Freer, J. E., Whitfield, +P. H., Shook, K. R., & Papalexiou, S. M. (2021). The Abuse of Popular +Performance Metrics in Hydrologic Modeling. Water Resources Research, 57(9), +e2020WR029001. \doi{10.1029/2020WR029001} +} +\seealso{ +Other numeric metrics: +\code{\link{pbias}()}, +\code{\link{rmse}()} + +Other accuracy metrics: +\code{\link{rmse}()} +} +\concept{accuracy metrics} +\concept{numeric metrics} +\keyword{gof} diff --git a/man/new-measure.Rd b/man/new-measure.Rd new file mode 100644 index 0000000..36acd76 --- /dev/null +++ b/man/new-measure.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aaa-new.R +\name{new-measure} +\alias{new-measure} +\alias{new_tendency_measure} +\alias{new_var_measure} +\alias{new_sym_measure} +\title{Construct a new measure function} +\usage{ +new_tendency_measure(fn) + +new_var_measure(fn) + +new_sym_measure(fn) +} +\arguments{ +\item{fn}{A function. The measure function to attach a measure-specific class} +} +\description{ +These functions provide convenient wrappers to create the three types of +measure functions in \code{tidyhydro}: measures of central tendency, variability +and symmetry. They add a measure-specific class to \code{fn} and +mimic a behaviour of \link[yardstick:metric_set]{metric_set}. These features +are used by measure_set. + +See \href{https://www.tidymodels.org/learn/develop/metrics/}{Custom performance metrics} for more +information about creating custom metrics. +} +\keyword{summary_stats} diff --git a/man/nse.Rd b/man/nse.Rd index a1a1aad..23113b5 100644 --- a/man/nse.Rd +++ b/man/nse.Rd @@ -1,121 +1,102 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/nse.R -\name{nse} -\alias{nse} -\alias{nse.data.frame} -\alias{nse_vec} -\title{Nash-Sutcliffe Efficiency (NSE)} -\usage{ -nse(data, ...) - -\method{nse}{data.frame}(data, truth, estimate, na_rm = TRUE, performance = FALSE, ...) - -nse_vec(truth, estimate, na_rm = TRUE, performance = FALSE, ...) -} -\arguments{ -\item{data}{A \code{data.frame} containing the columns specified by the \code{truth} -and \code{estimate} arguments.} - -\item{...}{Not currently used.} - -\item{truth}{The column identifier for the true results -(that is \code{numeric}). This should be an unquoted column name although -this argument is passed by expression and supports -\link[rlang:topic-inject]{quasiquotation} (you can unquote column -names). For \verb{_vec()} functions, a \code{numeric} vector.} - -\item{estimate}{The column identifier for the predicted -results (that is also \code{numeric}). As with \code{truth} this can be -specified different ways but the primary method is to use an -unquoted variable name. For \verb{_vec()} functions, a \code{numeric} vector.} - -\item{na_rm}{A \code{logical} value indicating whether \code{NA} -values should be stripped before the computation proceeds.} - -\item{performance}{The optional column, indicating should the \code{nse()} return -metric interpretation. See details.} -} -\value{ -A \code{tibble} with columns \code{.metric}, \code{.estimator}, -and \code{.estimate} and 1 row of values. - -For grouped data frames, the number of rows returned will be the same as -the number of groups. - -For \code{nse_vec()}, a single \code{numeric} value (or \code{NA}). -} -\description{ -Calculate the Nash-Sutcliffe efficiency (\emph{Nash & Sutcliffe, 1970}). -Dimensionless (from \eqn{-\infty} to 1). \code{nse()} indicates how well the plot -of observed versus simulated data fits the 1:1 line. -} -\details{ -The Nash-Sutcliffe efficiency is a normalized statistic that determines -the relative magnitude of the residual variance ("noise") compared to the -measured data variance ("information"; \emph{Nash and Sutcliffe, 1970}). - -The formula for NSE is: - -\deqn{ - NSE = 1 - \frac{ - \sum_{i=1}^{n}{(sim_i - obs_i)^2} - }{ - \sum_{i=1}^{n}{(obs_i - \mu_{obs})^2} - } -} -where: -\itemize{ -\item \eqn{sim} defines model simulations at time step \eqn{i} -\item \eqn{obs} defines model observations at time step \eqn{i} -\item \eqn{\mu_{obs}} defines mean of model observations -} - -According to Moriasi et al. (2015) the metric interpretation can be -as follows: -\itemize{ -\item \strong{Excellent}/\strong{Very Good} -- \code{nse()} > 0.8 -\item \strong{Good} -- 0.6 <= \code{nse()} <= 0.8 -\item \strong{Satisfactory} -- 0.5 < \code{nse()} < 0.6 -\item \strong{Poor} -- \code{nse()} <= 0.5 -} -} -\examples{ -library(tidyhydro) -data(avacha) - -# Supply truth and predictions as bare column names -nse(avacha, obs, sim) - -# Or as numeric vectors -nse_vec(avacha$obs, avacha$sim) -} -\references{ -Nash, J. E., & Sutcliffe, J. V. (1970). River flow forecasting through -conceptual models part I — A discussion of principles. Journal of Hydrology, -10(3), 282–290. \doi{10.1016/0022-1694(70)90255-6} - -Moriasi, D. N., Gitau, M. W., Pai, N., & Daggupati, P. (2015). Hydrologic -and Water Quality Models: Performance Measures and Evaluation Criteria. -Transactions of the ASABE, 58(6), 1763–1785. -\doi{10.13031/trans.58.10715} -} -\seealso{ -Other numeric metrics: -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{sfe}()} - -Other accuracy metrics: -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{sfe}()} -} -\concept{accuracy metrics} -\concept{numeric metrics} -\keyword{gof} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nse.R +\name{nse} +\alias{nse} +\alias{nse.data.frame} +\alias{nse_vec} +\title{Nash-Sutcliffe Efficiency (NSE)} +\usage{ +nse(data, ...) + +\method{nse}{data.frame}(data, truth, estimate, na_rm = TRUE, performance = FALSE, ...) + +nse_vec(truth, estimate, na_rm = TRUE, performance = FALSE, ...) +} +\arguments{ +\item{data}{A \code{data.frame} containing the columns specified by the \code{truth} +and \code{estimate} arguments.} + +\item{...}{Not currently used.} + +\item{truth}{The column identifier for the true results +(that is \code{numeric}). This should be an unquoted column name although +this argument is passed by expression and supports +\link[rlang:topic-inject]{quasiquotation} (you can unquote column +names). For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{estimate}{The column identifier for the predicted +results (that is also \code{numeric}). As with \code{truth} this can be +specified different ways but the primary method is to use an +unquoted variable name. For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{na_rm}{A \code{logical} value indicating whether \code{NA} +values should be stripped before the computation proceeds.} + +\item{performance}{The optional column, indicating should the \code{nse()} return +metric interpretation. See details.} +} +\value{ +A \code{tibble} with columns \code{.metric}, \code{.estimator}, +and \code{.estimate} and 1 row of values. + +For grouped data frames, the number of rows returned will be the same as +the number of groups. + +For \code{nse_vec()}, a single \code{numeric} value (or \code{NA}). +} +\description{ +Calculate the Nash-Sutcliffe efficiency (\emph{Nash & Sutcliffe, 1970}). +Dimensionless (from \eqn{-\infty} to 1). \code{nse()} indicates how well the plot +of observed versus simulated data fits the 1:1 line. +} +\details{ +The Nash-Sutcliffe efficiency is a normalized statistic that determines +the relative magnitude of the residual variance ("noise") compared to the +measured data variance ("information"; \emph{Nash and Sutcliffe, 1970}). + +The formula for NSE is: + +\deqn{ + NSE = 1 - \frac{ + \sum_{i=1}^{n}{(sim_i - obs_i)^2} + }{ + \sum_{i=1}^{n}{(obs_i - \mu_{obs})^2} + } +} +where: +\itemize{ +\item \eqn{sim} defines model simulations at time step \eqn{i} +\item \eqn{obs} defines model observations at time step \eqn{i} +\item \eqn{\mu_{obs}} defines mean of model observations +} + +According to Moriasi et al. (2015) the metric interpretation can be +as follows: +\itemize{ +\item \strong{Excellent}/\strong{Very Good} -- \code{nse()} > 0.8 +\item \strong{Good} -- 0.6 <= \code{nse()} <= 0.8 +\item \strong{Satisfactory} -- 0.5 < \code{nse()} < 0.6 +\item \strong{Poor} -- \code{nse()} <= 0.5 +} +} +\examples{ +library(tidyhydro) + +# Supply truth and predictions as bare column names +nse(avacha, obs, sim) + +# Or as numeric vectors +nse_vec(avacha$obs, avacha$sim) +} +\references{ +Nash, J. E., & Sutcliffe, J. V. (1970). River flow forecasting through +conceptual models part I — A discussion of principles. Journal of Hydrology, +10(3), 282–290. \doi{10.1016/0022-1694(70)90255-6} + +Moriasi, D. N., Gitau, M. W., Pai, N., & Daggupati, P. (2015). Hydrologic +and Water Quality Models: Performance Measures and Evaluation Criteria. +Transactions of the ASABE, 58(6), 1763–1785. +\doi{10.13031/trans.58.10715} +} +\concept{NSE variants} +\keyword{gof} diff --git a/man/pbias.Rd b/man/pbias.Rd index 91351d7..688716d 100644 --- a/man/pbias.Rd +++ b/man/pbias.Rd @@ -1,119 +1,105 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pbias.R -\name{pbias} -\alias{pbias} -\alias{pbias.data.frame} -\alias{pbias_vec} -\title{Percent BIAS (pBIAS)} -\usage{ -pbias(data, ...) - -\method{pbias}{data.frame}(data, truth, estimate, na_rm = TRUE, performance = FALSE, ...) - -pbias_vec(truth, estimate, na_rm = TRUE, performance = FALSE, ...) -} -\arguments{ -\item{data}{A \code{data.frame} containing the columns specified by the \code{truth} -and \code{estimate} arguments.} - -\item{...}{Not currently used.} - -\item{truth}{The column identifier for the true results -(that is \code{numeric}). This should be an unquoted column name although -this argument is passed by expression and supports -\link[rlang:topic-inject]{quasiquotation} (you can unquote column -names). For \verb{_vec()} functions, a \code{numeric} vector.} - -\item{estimate}{The column identifier for the predicted -results (that is also \code{numeric}). As with \code{truth} this can be -specified different ways but the primary method is to use an -unquoted variable name. For \verb{_vec()} functions, a \code{numeric} vector.} - -\item{na_rm}{A \code{logical} value indicating whether \code{NA} -values should be stripped before the computation proceeds.} - -\item{performance}{The optional column, indicating should the \code{pbias()} -return metric interpretation. See details.} -} -\value{ -A \code{tibble} with columns \code{.metric}, \code{.estimator}, -and \code{.estimate} and 1 row of values. - -For grouped data frames, the number of rows returned will be the same as -the number of groups. - -For \code{pbias_vec()}, a single \code{numeric} value (or \code{NA}). -} -\description{ -\eqn{pBIAS} is the deviation of data being evaluated, expressed as a -percentage. It measures the average tendency of the simulated data to be -larger or smaller than their observed counterparts (\emph{Moriasi et al., 2015}). -The optimal value of \eqn{pBIAS} is 0.0, with low-magnitude values -indicating accurate mode simulation. Positive values indicate model -underestimation bias, and negative values indicate model overestimation -bias (\emph{Gupta et al., 1999}). -} -\details{ -The formula for \eqn{pBIAS} is: - -\deqn{ - pBIAS = 100 \times \frac{\sum_{i=1}^{n}{(sim_i - obs_i)}} - {\sum_{i=1}^{n}{obs_i}} -} - -where: -\itemize{ -\item \eqn{sim} defines model simulations at time step \eqn{i} -\item \eqn{obs} defines model observations at time step \eqn{i} -} - -According to Moriasi et al. (2015) the metric interpretation can be as -follows: -\itemize{ -\item \strong{Excellent}/\strong{Very Good} -- \code{pbias()} < ±5.0 -\item \strong{Good} -- ±5.0 <= \code{pbias()} < ±10.0 -\item \strong{Satisfactory} -- ±10.0 <= \code{pbias()} < ±15.0 -\item \strong{Poor} -- \code{pbias()} >= ±15.0 -} -} -\examples{ -library(tidyhydro) -data(avacha) - -# Supply truth and predictions as bare column names -pbias(avacha, obs, sim) - -# Or as numeric vectors -pbias_vec(avacha$obs, avacha$sim) -} -\references{ -Moriasi, D. N., Gitau, M. W., Pai, N., & Daggupati, P. (2015). Hydrologic -and Water Quality Models: Performance Measures and Evaluation Criteria. -Transactions of the ASABE, 58(6), 1763–1785. -\doi{10.13031/trans.58.10715} - -Gupta, H. V., S. Sorooshian, and P. O. Yapo. (1999). -Status of automatic calibration for hydrologic models: Comparison with -multilevel expert calibration. J. Hydrologic Eng. 4(2): 135-143 -\doi{10.1061/(ASCE)1084-0699(1999)4:2(135)} -} -\seealso{ -Other numeric metrics: -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{press}()}, -\code{\link{sfe}()} - -Other accuracy metrics: -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{press}()}, -\code{\link{sfe}()} -} -\concept{accuracy metrics} -\concept{numeric metrics} -\keyword{gof} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pbias.R +\name{pbias} +\alias{pbias} +\alias{pbias.data.frame} +\alias{pbias_vec} +\title{Percent BIAS (pBIAS)} +\usage{ +pbias(data, ...) + +\method{pbias}{data.frame}(data, truth, estimate, na_rm = TRUE, performance = FALSE, ...) + +pbias_vec(truth, estimate, na_rm = TRUE, performance = FALSE, ...) +} +\arguments{ +\item{data}{A \code{data.frame} containing the columns specified by the \code{truth} +and \code{estimate} arguments.} + +\item{...}{Not currently used.} + +\item{truth}{The column identifier for the true results +(that is \code{numeric}). This should be an unquoted column name although +this argument is passed by expression and supports +\link[rlang:topic-inject]{quasiquotation} (you can unquote column +names). For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{estimate}{The column identifier for the predicted +results (that is also \code{numeric}). As with \code{truth} this can be +specified different ways but the primary method is to use an +unquoted variable name. For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{na_rm}{A \code{logical} value indicating whether \code{NA} +values should be stripped before the computation proceeds.} + +\item{performance}{The optional column, indicating should the \code{pbias()} +return metric interpretation. See details.} +} +\value{ +A \code{tibble} with columns \code{.metric}, \code{.estimator}, +and \code{.estimate} and 1 row of values. + +For grouped data frames, the number of rows returned will be the same as +the number of groups. + +For \code{pbias_vec()}, a single \code{numeric} value (or \code{NA}). +} +\description{ +\eqn{pBIAS} is the deviation of data being evaluated, expressed as a +percentage. It measures the average tendency of the simulated data to be +larger or smaller than their observed counterparts (\emph{Moriasi et al., 2015}). +The optimal value of \eqn{pBIAS} is 0.0, with low-magnitude values +indicating accurate mode simulation. Positive values indicate model +underestimation bias, and negative values indicate model overestimation +bias (\emph{Gupta et al., 1999}). +} +\details{ +The formula for \eqn{pBIAS} is: + +\deqn{ + pBIAS = 100 \times \frac{\sum_{i=1}^{n}{(sim_i - obs_i)}} + {\sum_{i=1}^{n}{obs_i}} +} + +where: +\itemize{ +\item \eqn{sim} defines model simulations at time step \eqn{i} +\item \eqn{obs} defines model observations at time step \eqn{i} +} + +According to Moriasi et al. (2015) the metric interpretation can be as +follows: +\itemize{ +\item \strong{Excellent}/\strong{Very Good} -- \code{pbias()} < ±5.0 +\item \strong{Good} -- ±5.0 <= \code{pbias()} < ±10.0 +\item \strong{Satisfactory} -- ±10.0 <= \code{pbias()} < ±15.0 +\item \strong{Poor} -- \code{pbias()} >= ±15.0 +} +} +\examples{ +library(tidyhydro) + +# Supply truth and predictions as bare column names +pbias(avacha, obs, sim) + +# Or as numeric vectors +pbias_vec(avacha$obs, avacha$sim) +} +\references{ +Moriasi, D. N., Gitau, M. W., Pai, N., & Daggupati, P. (2015). Hydrologic +and Water Quality Models: Performance Measures and Evaluation Criteria. +Transactions of the ASABE, 58(6), 1763–1785. +\doi{10.13031/trans.58.10715} + +Gupta, H. V., S. Sorooshian, and P. O. Yapo. (1999). +Status of automatic calibration for hydrologic models: Comparison with +multilevel expert calibration. J. Hydrologic Eng. 4(2): 135-143 +\doi{10.1061/(ASCE)1084-0699(1999)4:2(135)} +} +\seealso{ +Other numeric metrics: +\code{\link{mse}()}, +\code{\link{rmse}()} +} +\concept{numeric metrics} +\keyword{gof} diff --git a/man/press.Rd b/man/press.Rd index 5519c98..feaa185 100644 --- a/man/press.Rd +++ b/man/press.Rd @@ -1,118 +1,103 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/press.R -\name{press} -\alias{press} -\alias{press.data.frame} -\alias{press_vec} -\title{PRediction Error Sum of Squares (PRESS)} -\usage{ -press(data, ...) - -\method{press}{data.frame}(data, truth, estimate, na_rm = TRUE, ...) - -press_vec(truth, estimate, na_rm = TRUE, ...) -} -\arguments{ -\item{data}{A \code{data.frame} containing the columns specified by the \code{truth} -and \code{estimate} arguments.} - -\item{...}{Not currently used.} - -\item{truth}{The column identifier for the true results -(that is \code{numeric}). This should be an unquoted column name although -this argument is passed by expression and supports -\link[rlang:topic-inject]{quasiquotation} (you can unquote column -names). For \verb{_vec()} functions, a \code{numeric} vector.} - -\item{estimate}{The column identifier for the predicted -results (that is also \code{numeric}). As with \code{truth} this can be -specified different ways but the primary method is to use an -unquoted variable name. For \verb{_vec()} functions, a \code{numeric} vector.} - -\item{na_rm}{A \code{logical} value indicating whether \code{NA} -values should be stripped before the computation proceeds.} -} -\value{ -A \code{tibble} with columns \code{.metric}, \code{.estimator}, -and \code{.estimate} and 1 row of values. - -For grouped data frames, the number of rows returned will be the same as -the number of groups. - -For \code{press_vec()}, a single \code{numeric} value (or \code{NA}). -} -\description{ -\eqn{PRESS} is a measure of the quality of a regression model using -residuals. \eqn{PRESS} is a validation-type estimator of error that uses -the deleted residuals to provide an estimate of the prediction error. -When comparing alternate regression models, selecting the model with the -lowest value of the \eqn{PRESS} statistic is a good approach because it -means that the equation produces the least error when making new predictions -(see \emph{Helsel et al., 2020}). - -It is particularly valuable in assessing multiple forms of multiple -linear regressions, but it is also useful for -simply comparing different options for a single explanatory variable in -single-variable regression models. -} -\details{ -The \eqn{PRESS} is only relevant for comparisons to other regression models -with the same response variable units (\emph{Rasmunsen et al., 2009}). - -It estimates as follows: -\deqn{ - PRESS = \sum_{i=1}^{n}{(sim_i - obs_i)^2} -} - -where: -\itemize{ -\item \eqn{sim} defines model simulations at time step \eqn{i} -\item \eqn{obs} defines model observations at time step \eqn{i} -} -} -\note{ -The $PRESS$ statistic is not appropriate for comparison of models having -different transformations of response variable, e.g. linear regression and -log-transformed linear regression (\emph{Helsel et al., 2020}). -} -\examples{ -library(tidyhydro) -data(avacha) - -# Supply truth and predictions as bare column names -press(avacha, obs, sim) - -# Or as numeric vectors -press_vec(avacha$obs, avacha$sim) -} -\references{ -Rasmussen, P. P., Gray, J. R., Glysson, G. D. & Ziegler, A. C. -Guidelines and procedures for computing time-series suspended-sediment -concentrations and loads from in-stream turbidity-sensor and streamflow -data. in U.S. Geological Survey Techniques and Methods book 3, chap. -C4 53 (2009) \url{https://pubs.usgs.gov/tm/tm3c4/}. - -Helsel, D. R., Hirsch, R. M., Ryberg, K. R., Archfield, S. A. & -Gilroy, E. J. Statistical Methods in Water Resources. 484 (2020) -\doi{10.3133/tm4A3}. -} -\seealso{ -Other numeric metrics: -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{sfe}()} - -Other accuracy metrics: -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{sfe}()} -} -\concept{accuracy metrics} -\concept{numeric metrics} -\keyword{regression} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/press.R +\name{press} +\alias{press} +\alias{press.data.frame} +\alias{press_vec} +\title{PRediction Error Sum of Squares (PRESS)} +\usage{ +press(data, ...) + +\method{press}{data.frame}(data, truth, estimate, na_rm = TRUE, ...) + +press_vec(truth, estimate, na_rm = TRUE, ...) +} +\arguments{ +\item{data}{A \code{data.frame} containing the columns specified by the \code{truth} +and \code{estimate} arguments.} + +\item{...}{Not currently used.} + +\item{truth}{The column identifier for the true results +(that is \code{numeric}). This should be an unquoted column name although +this argument is passed by expression and supports +\link[rlang:topic-inject]{quasiquotation} (you can unquote column +names). For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{estimate}{The column identifier for the predicted +results (that is also \code{numeric}). As with \code{truth} this can be +specified different ways but the primary method is to use an +unquoted variable name. For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{na_rm}{A \code{logical} value indicating whether \code{NA} +values should be stripped before the computation proceeds.} +} +\value{ +A \code{tibble} with columns \code{.metric}, \code{.estimator}, +and \code{.estimate} and 1 row of values. + +For grouped data frames, the number of rows returned will be the same as +the number of groups. + +For \code{press_vec()}, a single \code{numeric} value (or \code{NA}). +} +\description{ +\eqn{PRESS} is a measure of the quality of a regression model using +residuals. \eqn{PRESS} is a validation-type estimator of error that uses +the deleted residuals to provide an estimate of the prediction error. +When comparing alternate regression models, selecting the model with the +lowest value of the \eqn{PRESS} statistic is a good approach because it +means that the equation produces the least error when making new predictions +(see \emph{Helsel et al., 2020}). + +It is particularly valuable in assessing multiple forms of multiple +linear regressions, but it is also useful for +simply comparing different options for a single explanatory variable in +single-variable regression models. +} +\details{ +The \eqn{PRESS} is only relevant for comparisons to other regression models +with the same response variable units (\emph{Rasmunsen et al., 2009}). + +It estimates as follows: +\deqn{ + PRESS = \sum_{i=1}^{n}{(sim_i - obs_i)^2} +} + +where: +\itemize{ +\item \eqn{sim} defines model simulations at time step \eqn{i} +\item \eqn{obs} defines model observations at time step \eqn{i} +} +} +\note{ +The $PRESS$ statistic is not appropriate for comparison of models having +different transformations of response variable, e.g. linear regression and +log-transformed linear regression (\emph{Helsel et al., 2020}). +} +\examples{ +library(tidyhydro) + +# Supply truth and predictions as bare column names +press(avacha, obs, sim) + +# Or as numeric vectors +press_vec(avacha$obs, avacha$sim) +} +\references{ +Rasmussen, P. P., Gray, J. R., Glysson, G. D. & Ziegler, A. C. +Guidelines and procedures for computing time-series suspended-sediment +concentrations and loads from in-stream turbidity-sensor and streamflow +data. in U.S. Geological Survey Techniques and Methods book 3, chap. +C4 53 (2009) \url{https://pubs.usgs.gov/tm/tm3c4/}. + +Helsel, D. R., Hirsch, R. M., Ryberg, K. R., Archfield, S. A. & +Gilroy, E. J. Statistical Methods in Water Resources. 484 (2020) +\doi{10.3133/tm4A3}. +} +\seealso{ +Other regression metrics: +\code{\link{sfe}()} +} +\concept{regression metrics} +\keyword{regression} diff --git a/man/rmse.Rd b/man/rmse.Rd new file mode 100644 index 0000000..010f6e2 --- /dev/null +++ b/man/rmse.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mse.R +\name{rmse} +\alias{rmse} +\alias{rmse.data.frame} +\alias{rmse_vec} +\title{Root Mean Squared Error (RMSE)} +\usage{ +rmse(data, ...) + +\method{rmse}{data.frame}(data, truth, estimate, na_rm = TRUE, ...) + +rmse_vec(truth, estimate, na_rm = TRUE, ...) +} +\arguments{ +\item{data}{A \code{data.frame} containing the columns specified by the \code{truth} +and \code{estimate} arguments.} + +\item{...}{Not currently used.} + +\item{truth}{The column identifier for the true results +(that is \code{numeric}). This should be an unquoted column name although +this argument is passed by expression and supports +\link[rlang:topic-inject]{quasiquotation} (you can unquote column +names). For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{estimate}{The column identifier for the predicted +results (that is also \code{numeric}). As with \code{truth} this can be +specified different ways but the primary method is to use an +unquoted variable name. For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{na_rm}{A \code{logical} value indicating whether \code{NA} +values should be stripped before the computation proceeds.} +} +\value{ +A \code{tibble} with columns \code{.metric}, \code{.estimator}, +and \code{.estimate} and 1 row of values. + +For grouped data frames, the number of rows returned will be the same as +the number of groups. + +For \code{rmse_vec()}, a single \code{numeric} value (or \code{NA}). +} +\description{ +Root Mean Squared Error (RMSE) +} +\details{ +The RMSE is estimated as follows: +\deqn{ +RMSE = \sqrt{\frac{1}{n} \sum_{i=1}^{n}{(sim_i - obs_i)^2}} +} +where: +\itemize{ +\item \eqn{sim} defines model simulations at time step \eqn{i} +\item \eqn{obs} defines model observations at time step \eqn{i} +} +} +\examples{ +library(tidyhydro) + +# Supply truth and predictions as bare column names +rmse(avacha, obs, sim) + +# Or as numeric vectors +rmse_vec(avacha$obs, avacha$sim) +} +\seealso{ +Other numeric metrics: +\code{\link{mse}()}, +\code{\link{pbias}()} + +Other accuracy metrics: +\code{\link{mse}()} +} +\concept{accuracy metrics} +\concept{numeric metrics} +\keyword{gof} diff --git a/man/sfe.Rd b/man/sfe.Rd index 182cf68..70de7c6 100644 --- a/man/sfe.Rd +++ b/man/sfe.Rd @@ -1,107 +1,92 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sfe.R -\name{sfe} -\alias{sfe} -\alias{sfe.data.frame} -\alias{sfe_vec} -\title{Standard Factorial Error (SFE)} -\usage{ -sfe(data, ...) - -\method{sfe}{data.frame}(data, truth, estimate, na_rm = TRUE, ...) - -sfe_vec(truth, estimate, na_rm = TRUE, ...) -} -\arguments{ -\item{data}{A \code{data.frame} containing the columns specified by the \code{truth} -and \code{estimate} arguments.} - -\item{...}{Not currently used.} - -\item{truth}{The column identifier for the true results -(that is \code{numeric}). This should be an unquoted column name although -this argument is passed by expression and supports -\link[rlang:topic-inject]{quasiquotation} (you can unquote column -names). For \verb{_vec()} functions, a \code{numeric} vector.} - -\item{estimate}{The column identifier for the predicted -results (that is also \code{numeric}). As with \code{truth} this can be -specified different ways but the primary method is to use an -unquoted variable name. For \verb{_vec()} functions, a \code{numeric} vector.} - -\item{na_rm}{A \code{logical} value indicating whether \code{NA} -values should be stripped before the computation proceeds.} -} -\value{ -A \code{tibble} with columns \code{.metric}, \code{.estimator}, -and \code{.estimate} and 1 row of values. - -For grouped data frames, the number of rows returned will be the same as -the number of groups. - -For \code{sfe_vec()}, a single \code{numeric} value (or \code{NA}). -} -\description{ -Prediction standard factorial error estimated -using standard regression methods (see \emph{Herschy, 1978}). -} -\details{ -The metric is widely used for assessing Sediment Rating Curves -(e.g., Hicks et al. 2020). The model is usually considered 'unacceptable' -if the \eqn{SFE > 2}, see Hicks et al. (2011). - -It is estimated as follows: -\deqn{SFE = \exp\left(\sqrt{\frac{1}{n} \sum_{i=1}^{n} -\left( \log\left(\frac{obs_i}{sim_i} \right) \right)^2 }\right)} -where: -\itemize{ -\item \eqn{sim} defines model simulations at time step \eqn{i} -\item \eqn{obs} defines model observations at time step \eqn{i} -} -} -\examples{ -library(tidyhydro) -data(avacha) - -# Supply truth and predictions as bare column names -sfe(avacha, obs, sim) - -# Or as numeric vectors -sfe_vec(avacha$obs, avacha$sim) -} -\references{ -Herschy, R.W. 1978: Accuracy. Chapter 10 In: Herschy, R.W. (ed.) -Hydrometry - principles and practices. John Wiley and Sons, Chichester, -511 p. - -Hicks, D. M., Shankar, U., McKerchar, A. I., Basher, L., Lynn, I., -Page, M., & Jessen, M. (2011). Suspended Sediment Yields from New Zealand -Rivers. Journal of Hydrology (New Zealand), 50(1), 81–142. -\doi{10.3316/informit.315190637227597} - -Hicks, M., Doyle, M., Watson, J., Holwerda, N., Lynch, B., Wyatt, J., -Jones, H., & Hill, R. (2020). Measurement of Fluvial Suspended Sediment -Load and its Composition (No. 1.0.0; National Environmental Monitoring -Standards, p. 138). -\url{https://www.nems.org.nz/documents/suspended-sediment} -} -\seealso{ -Other numeric metrics: -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{press}()} - -Other accuracy metrics: -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{press}()} -} -\concept{accuracy metrics} -\concept{numeric metrics} -\keyword{regression} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sfe.R +\name{sfe} +\alias{sfe} +\alias{sfe.data.frame} +\alias{sfe_vec} +\title{Standard Factorial Error (SFE)} +\usage{ +sfe(data, ...) + +\method{sfe}{data.frame}(data, truth, estimate, na_rm = TRUE, ...) + +sfe_vec(truth, estimate, na_rm = TRUE, ...) +} +\arguments{ +\item{data}{A \code{data.frame} containing the columns specified by the \code{truth} +and \code{estimate} arguments.} + +\item{...}{Not currently used.} + +\item{truth}{The column identifier for the true results +(that is \code{numeric}). This should be an unquoted column name although +this argument is passed by expression and supports +\link[rlang:topic-inject]{quasiquotation} (you can unquote column +names). For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{estimate}{The column identifier for the predicted +results (that is also \code{numeric}). As with \code{truth} this can be +specified different ways but the primary method is to use an +unquoted variable name. For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{na_rm}{A \code{logical} value indicating whether \code{NA} +values should be stripped before the computation proceeds.} +} +\value{ +A \code{tibble} with columns \code{.metric}, \code{.estimator}, +and \code{.estimate} and 1 row of values. + +For grouped data frames, the number of rows returned will be the same as +the number of groups. + +For \code{sfe_vec()}, a single \code{numeric} value (or \code{NA}). +} +\description{ +Prediction standard factorial error estimated +using standard regression methods (see \emph{Herschy, 1978}). +} +\details{ +The metric is widely used for assessing Sediment Rating Curves +(e.g., Hicks et al. 2020). The model is usually considered 'unacceptable' +if the \eqn{SFE > 2}, see Hicks et al. (2011). + +It is estimated as follows: +\deqn{SFE = \exp\left(\sqrt{\frac{1}{n} \sum_{i=1}^{n} +\left( \log\left(\frac{obs_i}{sim_i} \right) \right)^2 }\right)} +where: +\itemize{ +\item \eqn{sim} defines model simulations at time step \eqn{i} +\item \eqn{obs} defines model observations at time step \eqn{i} +} +} +\examples{ +library(tidyhydro) + +# Supply truth and predictions as bare column names +sfe(avacha, obs, sim) + +# Or as numeric vectors +sfe_vec(avacha$obs, avacha$sim) +} +\references{ +Herschy, R.W. 1978: Accuracy. Chapter 10 In: Herschy, R.W. (ed.) +Hydrometry - principles and practices. John Wiley and Sons, Chichester, +511 p. + +Hicks, D. M., Shankar, U., McKerchar, A. I., Basher, L., Lynn, I., +Page, M., & Jessen, M. (2011). Suspended Sediment Yields from New Zealand +Rivers. Journal of Hydrology (New Zealand), 50(1), 81–142. +\doi{10.3316/informit.315190637227597} + +Hicks, M., Doyle, M., Watson, J., Holwerda, N., Lynch, B., Wyatt, J., +Jones, H., & Hill, R. (2020). Measurement of Fluvial Suspended Sediment +Load and its Composition (No. 1.0.0; National Environmental Monitoring +Standards, p. 138). +\url{https://www.nems.org.nz/documents/suspended-sediment} +} +\seealso{ +Other regression metrics: +\code{\link{press}()} +} +\concept{regression metrics} +\keyword{regression} diff --git a/man/tidyhydro-package.Rd b/man/tidyhydro-package.Rd index a44dc51..a52488d 100644 --- a/man/tidyhydro-package.Rd +++ b/man/tidyhydro-package.Rd @@ -1,24 +1,24 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tidyhydro-package.R -\docType{package} -\name{tidyhydro-package} -\alias{tidyhydro} -\alias{tidyhydro-package} -\title{tidyhydro: Tidy Metrics for Assessing Hydrological Models Performance} -\description{ -Provides tidy tools for comparing simulated and observed hydrological time series. Includes compatibility with the 'yardstick' package for model performance evaluation using commonly used metrics such as the Nash–Sutcliffe Efficiency (NSE), Kling–Gupta Efficiency (KGE), percent bias (pBIAS) and etc. -} -\seealso{ -Useful links: -\itemize{ - \item \url{https://github.com/atsyplenkov/tidyhydro} - \item \url{https://atsyplenkov.github.io/tidyhydro/} - \item Report bugs at \url{https://github.com/atsyplenkov/tidyhydro/issues} -} - -} -\author{ -\strong{Maintainer}: Anatoly Tsyplenkov \email{atsyplenkov@fastmail.com} (\href{https://orcid.org/0000-0003-4144-8402}{ORCID}) [copyright holder] - -} -\keyword{internal} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tidyhydro-package.R +\docType{package} +\name{tidyhydro-package} +\alias{tidyhydro} +\alias{tidyhydro-package} +\title{tidyhydro: Tidy Metrics for Assessing Hydrological Models Performance} +\description{ +Provides tidy tools to measure the characteristics of hydrological time series and to assess the performance of hydrological models. Includes compatibility with the 'yardstick' package for model performance evaluation using commonly used metrics such as the Nash–Sutcliffe Efficiency (NSE), Kling–Gupta Efficiency (KGE), percent bias (pBIAS) and etc. Additionally provides a set of measures to calculate the descriptive statistics of a single dataset in accordance with Helsel et al. (2020). Helsel DR, Hirsch RM, Ryberg KR, Archfield SA, Gilroy EJ. Statistical methods in water resources. Reston, VA: 2020. \url{https://doi.org/10.3133/tm4A3}. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/atsyplenkov/tidyhydro} + \item \url{https://atsyplenkov.github.io/tidyhydro/} + \item Report bugs at \url{https://github.com/atsyplenkov/tidyhydro/issues} +} + +} +\author{ +\strong{Maintainer}: Anatoly Tsyplenkov \email{atsyplenkov@fastmail.com} (\href{https://orcid.org/0000-0003-4144-8402}{ORCID}) [copyright holder] + +} +\keyword{internal} diff --git a/paper/main.qmd b/paper/main.qmd new file mode 100644 index 0000000..f2ca109 --- /dev/null +++ b/paper/main.qmd @@ -0,0 +1,72 @@ +--- +title: Bridging R and Whitebox Workflows +subtitle: A proposal for the 2025 ISC Grant Program +short-title: wbw +code-repo: "Access the code, data, and analysis at " +author: + - name: Anatolii Tsyplenkov + email: tsyplenkova@landcareresearch.co.nz + orcid: 0000-0003-4144-8402 + affiliations: + - "Manaaki Whenua – Landcare Research, 4410 Palmerston North, New Zealand" + +date: today +date-format: iso +bibliography: references.bib + +fig-dpi: 500 + +highlight-style: tango + +execute: + echo: false + output: true + warning: false + +format: + hikmah-pdf: + papersize: A4 + linestretch: 1.1 + # Font settings + sansfont: "Jost" + mainfont: "Inter" + monofont: "Fira Code" + # Page size + geometry: + - top=0.5cm + - right=1cm + - bottom=1.5cm + - left=1.25cm + # Use biblatex-chicago + biblatex-chicago: true + biblio-style: authordate + biblatexoptions: + - backend=biber + - autolang=hyphen + - isbn=false + - uniquename=false + +header-includes: | + \usepackage{xcolor} + \definecolor{codebg}{gray}{0.9} + \let\oldtexttt\texttt + \renewcommand{\texttt}[1]{\colorbox{codebg}{\oldtexttt{#1}}} + +--- + +{{< include proposal/signatories.qmd >}} + +{{< include proposal/problemdefinition.qmd >}} + +{{< include proposal/proposal.qmd >}} + +{{< include proposal/timeline.qmd >}} + +{{< include proposal/requirements.qmd >}} + +{{< include proposal/success.qmd >}} + +# References + +::: refs +::: \ No newline at end of file diff --git a/paper/references.bib b/paper/references.bib new file mode 100644 index 0000000..2f81d39 --- /dev/null +++ b/paper/references.bib @@ -0,0 +1,362 @@ +@article{aertsLargesampleAssessmentVarying2022, + title = {Large-Sample Assessment of Varying Spatial Resolution on the Streamflow Estimates of the Wflow\_sbm Hydrological Model}, + author = {Aerts, Jerom P. M. and Hut, Rolf W. and {van de Giesen}, Nick C. and Drost, Niels and {van Verseveld}, Willem J. and Weerts, Albrecht H. and Hazenberg, Pieter}, + year = {2022}, + month = aug, + journal = {Hydrology and Earth System Sciences}, + volume = {26}, + number = {16}, + pages = {4407--4430}, + publisher = {Copernicus GmbH}, + issn = {1027-5606}, + doi = {10.5194/hess-26-4407-2022}, + url = {https://hess.copernicus.org/articles/26/4407/2022/}, + urldate = {2025-06-24}, + abstract = {Distributed hydrological modelling moves into the realm of hyper-resolution modelling. This results in a plethora of scaling-related challenges that remain unsolved. To the user, in light of model result interpretation, finer-resolution output might imply an increase in understanding of the complex interplay of heterogeneity within the hydrological system. Here we investigate spatial scaling in the form of varying spatial resolution by evaluating the streamflow estimates of the distributed wflow\_sbm hydrological model based on 454 basins from the large-sample CAMELS data set. Model instances are derived at three spatial resolutions, namely 3 km, 1 km, and 200 m. The results show that a finer spatial resolution does not necessarily lead to better streamflow estimates at the basin outlet. Statistical testing of the objective function distributions (Kling--Gupta efficiency (KGE) score) of the three model instances resulted in only a statistical difference between the 3 km and 200 m streamflow estimates. However, an assessment of sampling uncertainty shows high uncertainties surrounding the KGE score throughout the domain. This makes the conclusion based on the statistical testing inconclusive. The results do indicate strong locality in the differences between model instances expressed by differences in KGE scores of on average 0.22 with values larger than 0.5. The results of this study open up research paths that can investigate the changes in flux and state partitioning due to spatial scaling. This will help to further understand the challenges that need to be resolved for hyper-resolution hydrological modelling.}, + langid = {english}, + file = {C:\Users\TsyplenkovA\OneDrive - MWLR\ATS\Personal\zotero-library\Aerts et al.\Aerts et al._2022_Large-sample assessment of varying spatial resolut.pdf} +} + +@article{clarkAbusePopularPerformance2021, + title = {The {{Abuse}} of {{Popular Performance Metrics}} in {{Hydrologic Modeling}}}, + author = {Clark, Martyn P. and Vogel, Richard M. and Lamontagne, Jonathan R. and Mizukami, Naoki and Knoben, Wouter J. M. and Tang, Guoqiang and Gharari, Shervan and Freer, Jim E. and Whitfield, Paul H. and Shook, Kevin R. and Papalexiou, Simon Michael}, + year = {2021}, + journal = {Water Resources Research}, + volume = {57}, + number = {9}, + pages = {e2020WR029001}, + issn = {1944-7973}, + doi = {10.1029/2020WR029001}, + url = {https://onlinelibrary.wiley.com/doi/abs/10.1029/2020WR029001}, + urldate = {2025-06-22}, + abstract = {The goal of this commentary is to critically evaluate the use of popular performance metrics in hydrologic modeling. We focus on the Nash-Sutcliffe Efficiency (NSE) and the Kling-Gupta Efficiency (KGE) metrics, which are both widely used in hydrologic research and practice around the world. Our specific objectives are: (a) to provide tools that quantify the sampling uncertainty in popular performance metrics; (b) to quantify sampling uncertainty in popular performance metrics across a large sample of catchments; and (c) to prescribe the further research that is, needed to improve the estimation, interpretation, and use of popular performance metrics in hydrologic modeling. Our large-sample analysis demonstrates that there is substantial sampling uncertainty in the NSE and KGE estimators. This occurs because the probability distribution of squared errors between model simulations and observations has heavy tails, meaning that performance metrics can be heavily influenced by just a few data points. Our results highlight obvious (yet ignored) abuses of performance metrics that contaminate the conclusions of many hydrologic modeling studies: It is essential to quantify the sampling uncertainty in performance metrics when justifying the use of a model for a specific purpose and when comparing the performance of competing models.}, + copyright = {{\copyright} 2021. The Authors.}, + langid = {english}, + file = {C:\Users\TsyplenkovA\OneDrive - MWLR\ATS\Personal\zotero-library\Clark et al.\Clark et al._2021_The Abuse of Popular Performance Metrics in Hydrol.pdf} +} + +@article{ducSignalprocessingbasedInterpretationNash2023, + title = {A Signal-Processing-Based Interpretation of the {{Nash}}--{{Sutcliffe}} Efficiency}, + author = {Duc, Le and Sawada, Yohei}, + year = {2023}, + month = may, + journal = {Hydrology and Earth System Sciences}, + volume = {27}, + number = {9}, + pages = {1827--1839}, + publisher = {Copernicus GmbH}, + issn = {1027-5606}, + doi = {10.5194/hess-27-1827-2023}, + url = {https://hess.copernicus.org/articles/27/1827/2023/}, + urldate = {2025-06-22}, + abstract = {The Nash--Sutcliffe efficiency (NSE) is a widely used score in hydrology, but it is not common in the other environmental sciences. One of the reasons for its unpopularity is that its scientific meaning is somehow unclear in the literature. This study attempts to establish a solid foundation for the NSE from the viewpoint of signal progressing. Thus, a simulation is viewed as a received signal containing a wanted signal (observations) contaminated by an unwanted signal (noise). This view underlines an important role of the error model between simulations and observations. By assuming an additive error model, it is easy to point out that the NSE is equivalent to an important quantity in signal processing: the signal-to-noise ratio. Moreover, the NSE and the Kling--Gupta efficiency (KGE) are shown to be equivalent, at least when there are no biases, in the sense that they measure the relative magnitude of the power of noise to the power of the variation in observations. The scientific meaning of the NSE suggests a natural way to define NSE=0 as the threshold for good or bad model distinction, and this has no relation to the benchmark simulation that is equal to the observed mean. Corresponding to NSE=0, the threshold of the KGE is given by approximately 0.5. In the general cases, when the additive error model is replaced by a mixed additive--multiplicative error model, the traditional NSE is shown to be prone to contradiction in model evaluations. Therefore, an extension of the NSE is derived, which only requires one to divide the traditional noise-to-signal ratio by the multiplicative bias. This has a practical implication: if the multiplicative bias is not considered, the traditional NSE and KGE underestimate or overestimate the generalized NSE and KGE when the multiplicative bias is greater or smaller than one, respectively. In particular, the observed mean turns out to be the worst simulation from the viewpoint of the generalized NSE.}, + langid = {english}, + file = {C:\Users\TsyplenkovA\OneDrive - MWLR\ATS\Personal\zotero-library\Duc and Sawada\Duc and Sawada_2023_A signal-processing-based interpretation of the Na.pdf} +} + +@article{guptaDecompositionMeanSquared2009, + title = {Decomposition of the Mean Squared Error and {{NSE}} Performance Criteria: {{Implications}} for Improving Hydrological Modelling}, + shorttitle = {Decomposition of the Mean Squared Error and {{NSE}} Performance Criteria}, + author = {Gupta, Hoshin V. and Kling, Harald and Yilmaz, Koray K. and Martinez, Guillermo F.}, + year = {2009}, + month = oct, + journal = {Journal of Hydrology}, + volume = {377}, + number = {1}, + pages = {80--91}, + issn = {0022-1694}, + doi = {10.1016/j.jhydrol.2009.08.003}, + url = {http://www.sciencedirect.com/science/article/pii/S0022169409004843}, + urldate = {2020-09-26}, + abstract = {The mean squared error (MSE) and the related normalization, the Nash--Sutcliffe efficiency (NSE), are the two criteria most widely used for calibration and evaluation of hydrological models with observed data. Here, we present a diagnostically interesting decomposition of NSE (and hence MSE), which facilitates analysis of the relative importance of its different components in the context of hydrological modelling, and show how model calibration problems can arise due to interactions among these components. The analysis is illustrated by calibrating a simple conceptual precipitation-runoff model to daily data for a number of Austrian basins having a broad range of hydro-meteorological characteristics. Evaluation of the results clearly demonstrates the problems that can be associated with any calibration based on the NSE (or MSE) criterion. While we propose and test an alternative criterion that can help to reduce model calibration problems, the primary purpose of this study is not to present an improved measure of model performance. Instead, we seek to show that there are systematic problems inherent with any optimization based on formulations related to the MSE. The analysis and results have implications to the manner in which we calibrate and evaluate environmental models; we discuss these and suggest possible ways forward that may move us towards an improved and diagnostically meaningful approach to model performance evaluation and identification.}, + langid = {english}, + file = {C:\Users\TsyplenkovA\OneDrive - MWLR\ATS\Personal\zotero-library\Gupta et al\gupta_et_al_2009_decomposition_of_the_mean_squared_error_and_nse_performance_criteria_-.pdf} +} + +@techreport{helselStatisticalMethodsWater2002, + type = {{{USGS Numbered Series}}}, + title = {Statistical Methods in Water Resources}, + author = {Helsel, Dennis R. and Hirsch, Robert M.}, + year = {2002}, + journal = {Statistical methods in water resources}, + series = {Techniques of {{Water-Resources Investigations}}}, + number = {04-A3}, + address = {Reston, VA}, + institution = {U.S. Geological Survey}, + doi = {10.3133/twri04A3}, + url = {http://pubs.er.usgs.gov/publication/twri04A3}, + urldate = {2021-02-14}, + abstract = {PrefaceThis book began as class notes for a course we teach on applied statistical methods to hydrologists of the Water Resources Division, U. S. Geological Survey (USGS). It reflects our attempts to teach statistical methods which are appropriate for analysis of water resources data. As interest in this course has grown outside of the USGS, incentive grew to develop the material into a textbook. The topics covered are those we feel are of greatest usefulness to the practicing water resources scientist. Yet all topics can be directly applied to many other types of environmental data.This book is not a stand-alone text on statistics, or a text on statistical hydrology. For example, in addition to this material we use a textbook on introductory statistics in the USGS training course. As a consequence, discussions of topics such as probability theory required in a general statistics textbook will not be found here. Derivations of most equations are not presented. Important tables included in all general statistics texts, such as quantiles of the normal distribution, are not found here. Neither are details of how statistical distributions should be fitted to flood data -- these are adequately covered in numerous books on statistical hydrology.We have instead chosen to emphasize topics not always found in introductory statistics textbooks, and often not adequately covered in statistical textbooks for scientists and engineers. Tables included here, for example, are those found more often in books on nonparametric statistics than in books likely to have been used in college courses for engineers. This book points the environmental and water resources scientist to robust and nonparametric statistics, and to exploratory data analysis. We believe that the characteristics of environmental (and perhaps most other 'real') data drive analysis methods towards use of robust and nonparametric methods.Exercises are included at the end of chapters. In our course, students compute each type of analysis (t-test, regression, etc.) the first time by hand. We choose the smaller, simpler examples for hand computation. In this way the mechanics of the process are fully understood, and computer software is seen as less mysterious.We wish to acknowledge and thank several other scientists at the U. S. Geological Survey for contributing ideas to this book. In particular, we thank those who have served as the other instructors at the USGS training course. Ed Gilroy has critiqued and improved much of the material found in this book. Tim Cohn has contributed in several areas, particularly to the sections on bias correction in regression, and methods for data below the reporting limit. Richard Alexander has added to the trend analysis chapter, and Charles Crawford has contributed ideas for regression and ANOVA. Their work has undoubtedly made its way into this book}, + keywords = {fundamentals-of-earth-science}, + file = {C:\Users\TsyplenkovA\OneDrive - MWLR\ATS\Personal\zotero-library\Helsel_Hirsch\Helsel_Hirsch_2002_Statistical methods in water resources.pdf} +} + +@techreport{helselStatisticalMethodsWater2020, + type = {Report}, + title = {Statistical Methods in Water Resources}, + author = {Helsel, Dennis R. and Hirsch, Robert M. and Ryberg, Karen R. and Archfield, Stacey A. and Gilroy, Edward J.}, + year = {2020}, + number = {4-A3}, + pages = {484}, + address = {Reston, VA}, + doi = {10.3133/tm4A3}, + url = {http://pubs.er.usgs.gov/publication/tm4A3}, + langid = {english}, + keywords = {fundamentals-of-earth-science}, + file = {C:\Users\TsyplenkovA\OneDrive - MWLR\ATS\Personal\zotero-library\Helsel et al\Helsel et al_2020_Statistical methods in water resources.pdf} +} + +@techreport{hicksSuspendedSedimentMeasurement2025, + title = {Suspended {{Sediment}}. {{Measurement}} of {{Fluvial Suspended Sediment Load}} and Its {{Composition}}}, + author = {Hicks, Murray and Doyle, Martin and Watson, Jeff and Holwerda, Nicholas and Lynch, Barry and Wyatt, Justin and Jones, Haydon and Hill, Reece}, + year = {2025}, + month = may, + number = {2.0.0}, + pages = {138}, + url = {https://www.nems.org.nz/documents/suspended-sediment}, + langid = {english}, + file = {C:\Users\TsyplenkovA\OneDrive - MWLR\ATS\Personal\zotero-library\Hicks et al.\Hicks et al._2025_Measurement of Fluvial Suspended Sediment Load and.pdf} +} + +@article{klingRunoffConditionsUpper2012, + title = {Runoff Conditions in the Upper {{Danube}} Basin under an Ensemble of Climate Change Scenarios}, + author = {Kling, Harald and Fuchs, Martin and Paulin, Maria}, + year = {2012}, + month = mar, + journal = {Journal of Hydrology}, + volume = {424--425}, + pages = {264--277}, + issn = {0022-1694}, + doi = {10.1016/j.jhydrol.2012.01.011}, + url = {https://www.sciencedirect.com/science/article/pii/S0022169412000431}, + urldate = {2025-06-26}, + abstract = {Runoff conditions are strongly controlled by climate. Therefore, any uncertainties in the projections about future climate directly translate to uncertainties in future runoff. If several climate models are applied with the same emission scenario, there may be large differences in the climate projections due to model related biases and natural climate variability. To address this issue, an ensemble modelling approach -- which considers a set of climate models -- is applied in this study with a monthly, conceptual hydrological model for assessing future runoff conditions in the upper Danube basin (101,810km2). Observed data of the past 120years of the HISTALP data-set are used to evaluate runoff simulations under historic climate variations as well as to test the delta-change method for bias correction of climate data. Uncertainties caused by the hydrological model or by the method for bias correction appear to be small. Projections about future climate are obtained from 21 regional climate models (RCMs) of the ENSEMBLES project for the A1B emission scenario. Evaluation and ranking of the RCMs reveals that some of the models have considerable biases in simulation of spatio-temporal patterns of historic precipitation and temperature. There is however, no systematic relationship between historic performance and projected future change. Even for the better performing RCMs the differences in the simulation results are large. This is a strong argument for using an ensemble modelling approach, which yields a range of future runoff conditions instead of a deterministic projection. In general, a strong decrease of summer runoff is simulated, whereas there is no clear signal for changes in winter runoff. The spread between different RCMs in future seasonal runoff is larger than for the monthly flow duration curve. Overall, the projected changes in future runoff conditions become more pronounced towards the end of the 21st century.}, + file = {C\:\\Users\\TsyplenkovA\\OneDrive - MWLR\\ATS\\Personal\\zotero-library\\Kling et al.\\Kling et al._2012_Runoff conditions in the upper Danube basin under.pdf;C\:\\Users\\TsyplenkovA\\OneDrive - MWLR\\ATS\\Personal\\zotero-library\\Kling et al.\\Kling et al._2012_Runoff conditions in the upper Danube basin under.pdf} +} + +@article{knobenTechnicalNoteInherent2019, + title = {Technical Note: {{Inherent}} Benchmark or Not? {{Comparing Nash}}--{{Sutcliffe}} and {{Kling}}--{{Gupta}} Efficiency Scores}, + shorttitle = {Technical Note}, + author = {Knoben, Wouter J. M. and Freer, Jim E. and Woods, Ross A.}, + year = {2019}, + month = oct, + journal = {Hydrology and Earth System Sciences}, + volume = {23}, + number = {10}, + pages = {4323--4331}, + publisher = {Copernicus GmbH}, + issn = {1027-5606}, + doi = {10.5194/hess-23-4323-2019}, + url = {https://hess.copernicus.org/articles/23/4323/2019/}, + urldate = {2025-06-22}, + abstract = {A traditional metric used in hydrology to summarize model performance is the Nash--Sutcliffe efficiency (NSE). Increasingly an alternative metric, the Kling--Gupta efficiency (KGE), is used instead. When NSE is used, NSE\ =\ 0 corresponds to using the mean flow as a benchmark predictor. The same reasoning is applied in various studies that use KGE as a metric: negative KGE values are viewed as bad model performance, and only positive values are seen as good model performance. Here we show that using the mean flow as a predictor does not result in KGE\ =\ 0, but instead KGE\ =1-{\textsurd}2{$\approx$}-0.41. Thus, KGE values greater than -0.41 indicate that a model improves upon the mean flow benchmark -- even if the model's KGE value is negative. NSE and KGE values cannot be directly compared, because their relationship is non-unique and depends in part on the coefficient of variation of the observed time series. Therefore, modellers who use the KGE metric should not let their understanding of NSE values guide them in interpreting KGE values and instead develop new understanding based on the constitutive parts of the KGE metric and the explicit use of benchmark values to compare KGE scores against. More generally, a strong case can be made for moving away from ad hoc use of aggregated efficiency metrics and towards a framework based on purpose-dependent evaluation metrics and benchmarks that allows for more robust model adequacy assessment.}, + langid = {english}, + file = {C:\Users\TsyplenkovA\OneDrive - MWLR\ATS\Personal\zotero-library\Knoben et al.\Knoben et al._2019_Technical note Inherent benchmark or not Compari.pdf} +} + +@article{kongPhenofitPackageExtracting2022, + title = {Phenofit: {{An R}} Package for Extracting Vegetation Phenology from Time Series Remote Sensing}, + shorttitle = {Phenofit}, + author = {Kong, Dongdong and McVicar, Tim R. and Xiao, Mingzhong and Zhang, Yongqiang and {Pe{\~n}a-Arancibia}, Jorge L. and Filippa, Gianluca and Xie, Yuxuan and Gu, Xihui}, + year = {2022}, + journal = {Methods in Ecology and Evolution}, + volume = {13}, + number = {7}, + pages = {1508--1527}, + issn = {2041-210X}, + doi = {10.1111/2041-210X.13870}, + url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/2041-210X.13870}, + urldate = {2023-02-24}, + abstract = {Satellite-derived vegetation indices (VIs) provide a way to analyse vegetation phenology over decades globally. However, these data are often contaminated by different kinds of optical noise (e.g. cloud, cloud shadow, snow, aerosol), making accurate phenology extraction challenging. We present an open-source state-of-the-art R package called phenofit\$\$ phenofit \$\$ to extract vegetation phenological information from satellite-derived VIs. phenofit\$\$ phenofit \$\$ adopts state-of-the-art phenology extraction methods, such as a weight updating function for reducing optical noise contamination, a growing season division function for separating the VI time series into different vegetation cycles, and rough and fine fitting functions for reconstructing VI time series. They work together to make phenology extraction from frequently contaminated VIs easier and more accurate. Compared against other widely used phenology extraction tools, for example, TIMESAT\$\$ {\textbackslash}mathrmTIMESAT \$\$ and phenopix\$\$ phenopix \$\$, phenofit\$\$ phenofit \$\$ provides flexible input and output options, a practical growing season division function, rich curve fitting and phenology extraction functions, and robust performance under different kinds of optical noise. In addition to working with VIs from mesoscale satellites (e.g. MODIS and AVHRR), phenofit\$\$ phenofit \$\$ can also reconstruct vegetation time series and extract phenology using other sources, such as VIs from high-resolution optical satellites (e.g. Sentinel-2 and Landsat) and radar satellites (e.g. Sentinel-1), vegetation greenness indices from digital cameras and gross primary production estimations from eddy-covariance sites. As such, phenofit\$\$ phenofit \$\$ can contribute to the study of ecological process dynamics and assist in effective modelling of global change impacts on vegetation, as caused by climate variability and human intervention. Code and data of case studies are available at https://zenodo.org/record/6425745 (Kong, 2022a).}, + langid = {english}, + keywords = {landuse-change,rstats,savitzky-golay,timeseries,whittaker}, + file = {C:\Users\TsyplenkovA\OneDrive - MWLR\ATS\Personal\zotero-library\Kong et al\Kong et al_2022_phenofit.pdf} +} + +@article{maiTenStrategiesSuccessful2023, + title = {Ten Strategies towards Successful Calibration of Environmental Models}, + author = {Mai, Juliane}, + year = {2023}, + month = may, + journal = {Journal of Hydrology}, + volume = {620}, + pages = {129414}, + issn = {0022-1694}, + doi = {10.1016/j.jhydrol.2023.129414}, + url = {https://www.sciencedirect.com/science/article/pii/S0022169423003566}, + urldate = {2025-07-10}, + abstract = {Model calibration is the procedure of finding model settings such that simulated model outputs best match the observed data. Model calibration is necessary when the model parameters cannot directly be measured as is the case with a wide range of environmental models where parameters are conceptually describing upscaled and effective physical processes. Model calibration is therefore an important step of environmental modeling as the model might otherwise provide random outputs if never compared to a ground truth. Model calibration itself is often referred to be an art due to its plenitude of intertwined steps and necessary decisions along the way before a calibration can be carried out or can be regarded successful. This work provides a general guide specifying which steps a modeler needs to undertake, how to diagnose the success of each step, and how to identify the right action to revise steps that were not successful. The procedure is formalized into ten iterative steps generally appearing in calibration experiments. Each step of this ``calibration life cycle'' is either illustrated with an exemplary calibration experiment or providing an explicit checklist the modeler can follow. These ten strategies are: (1) using sensitivity information to guide the calibration, (2) handling of parameters with constraints, (3) handling of data ranging orders of magnitude, (4) choosing the data to base the calibration on, (5) presenting various methods to sample model parameters, (6) finding appropriate parameter ranges, (7) choosing objective functions, (8) selecting a calibration algorithm, (9) determining the success and quality of a multi-objective calibration, and (10) providing a checklist to diagnose calibration performance using ideas introduced in the previous steps. The formal definition of strategies through the calibration process is providing an overview while shedding a light on connections between these main ingredients to calibrate an environmental model and will therefore enable especially novice modelers to succeed.}, + file = {C:\Users\TsyplenkovA\OneDrive - MWLR\ATS\Personal\zotero-library\Mai\Mai_2023_Ten strategies towards successful calibration of e.pdf} +} + +@article{makowskiMethodsAlgorithmsCorrelation2020, + title = {Methods and {{Algorithms}} for {{Correlation Analysis}} in {{R}}}, + author = {Makowski, Dominique and {Ben-Shachar}, Mattan S. and Patil, Indrajeet and Ludecke, Daniel}, + year = {2020}, + journal = {Journal of Open Source Software}, + volume = {5}, + number = {51}, + pages = {2306}, + doi = {10.21105/joss.02306}, + keywords = {rstats}, + file = {C:\Users\TsyplenkovA\OneDrive - MWLR\ATS\Personal\zotero-library\Makowski et al.\Makowski et al._2020_Methods and Algorithms for Correlation Analysis in.pdf} +} + +@article{markowskaRangrPackageMechanistic2025, + title = {Rangr: {{An R}} Package for Mechanistic, Spatially Explicit Simulation of Species Range Dynamics}, + shorttitle = {Rangr}, + author = {Markowska, Katarzyna and Malinowska, Katarzyna and Kuczy{\'n}ski, Lechos{\l}aw}, + year = {2025}, + journal = {Methods in Ecology and Evolution}, + volume = {16}, + number = {3}, + pages = {468--476}, + issn = {2041-210X}, + doi = {10.1111/2041-210X.14475}, + url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/2041-210X.14475}, + urldate = {2025-07-10}, + abstract = {Global change driven by human activities is causing profound shifts in species distributions. Understanding the mechanisms that influence these dynamics is crucial for biodiversity management. Several mechanistic, spatially explicit models have been proposed to address this issue, but they do not cover the full range of potential functionalities. We present a new open-source R package called rangr, which integrates population dynamics and dispersal into a mechanistic virtual species simulator. The package can be used to study the effects of environmental change on population growth and range shifts. It extends the capabilities of previously available simulators by allowing simple and straightforward definition of population dynamics (including positive density dependence), extensive possibilities for defining dispersal kernels and the ability to generate virtual ecologist data. We showcased rangr functionality by simulating the invasion of the collared dove (Streptopelia decaocto). First, we demonstrated how to set up a simulation with different dispersal kernels by investigating the role of long-distance dispersal events on colonisation outcome. Second, we showed the use of rangr to assess the potential of an Allee effect to impede biological invasion. Finally, we used the virtual ecologist framework to determine the timeframe required to detect the spread of an invasive species. The rangr package, which comes with extensive documentation and vignettes, is easy to set up, flexible, fast, fully configurable and capable of emulating the observation process. These features make rangr particularly well suited to generating data that replicate existing wildlife monitoring programmes.}, + copyright = {{\copyright} 2025 The Author(s). Methods in Ecology and Evolution published by John Wiley \& Sons Ltd on behalf of British Ecological Society.}, + langid = {english}, + file = {C:\Users\TsyplenkovA\OneDrive - MWLR\ATS\Personal\zotero-library\Markowska et al.\Markowska et al._2025_rangr An R package for mechanistic, spatially exp.pdf} +} + +@article{melsenRiseNashSutcliffeEfficiency2025, + title = {The Rise of the {{Nash-Sutcliffe}} Efficiency in Hydrology}, + author = {Melsen, Lieke A. and Puy, Arnald and Torfs, Paul J. J. F. and Saltelli, Andrea}, + year = {2025}, + month = jun, + journal = {Hydrological Sciences Journal}, + volume = {70}, + number = {8}, + pages = {1248--1259}, + publisher = {Taylor \& Francis}, + issn = {0262-6667}, + doi = {10.1080/02626667.2025.2475105}, + url = {https://doi.org/10.1080/02626667.2025.2475105}, + urldate = {2025-07-12}, + abstract = {The Nash-Sutcliffe efficiency (NSE) is commonly used as a model evaluation metric in hydrology, but its prominence is often taken for granted. This study explores the social factors behind its adoption. Introduced in 1970, the NSE gained traction as computational advancements spurred the growth of hydrological models and evaluation metrics. This, in turn, led to the need to converge on broadly accepted metrics. In 1990, a committee recommended the NSE alongside two other metrics. One of the main developers of SWAT, a widely used hydrological model, adopted only the NSE part of this recommendation, solidifying the NSE's dominance. This storyline shows that the NSE's primacy appears to be derived more from tradition than from any demonstration of technical superiority. To date, path dependence is visible in on-going research efforts resulting from the popularity of the NSE. This historical perspective highlights how social processes have shaped the way hydrological models are evaluated.}, + file = {C:\Users\TsyplenkovA\OneDrive - MWLR\ATS\Personal\zotero-library\Melsen et al.\Melsen et al._2025_The rise of the Nash-Sutcliffe efficiency in hydro.pdf} +} + +@article{mizukamiChoiceCalibrationMetrics2019, + title = {On the Choice of Calibration Metrics for ``High-Flow'' Estimation Using Hydrologic Models}, + author = {Mizukami, Naoki and Rakovec, Oldrich and Newman, Andrew J. and Clark, Martyn P. and Wood, Andrew W. and Gupta, Hoshin V. and Kumar, Rohini}, + year = {2019}, + month = jun, + journal = {Hydrology and Earth System Sciences}, + volume = {23}, + number = {6}, + pages = {2601--2614}, + publisher = {Copernicus GmbH}, + issn = {1027-5606}, + doi = {10.5194/hess-23-2601-2019}, + url = {https://hess.copernicus.org/articles/23/2601/2019/}, + urldate = {2025-07-10}, + abstract = {Calibration is an essential step for improving the accuracy of simulations generated using hydrologic models. A key modeling decision is selecting the performance metric to be optimized. It has been common to use squared error performance metrics, or normalized variants such as Nash--Sutcliffe efficiency (NSE), based on the idea that their squared-error nature will emphasize the estimates of high flows. However, we conclude that NSE-based model calibrations actually result in poor reproduction of high-flow events, such as the annual peak flows that are used for flood frequency estimation. Using three different types of performance metrics, we calibrate two hydrological models at a daily step, the Variable Infiltration Capacity (VIC) model and the mesoscale Hydrologic Model (mHM), and evaluate their ability to simulate high-flow events for 492 basins throughout the contiguous United States. The metrics investigated are (1) NSE, (2) Kling--Gupta efficiency (KGE) and its variants, and (3) annual peak flow bias (APFB), where the latter is an application-specific metric that focuses on annual peak flows. As expected, the APFB metric produces the best annual peak flow estimates; however, performance on other high-flow-related metrics is poor. In contrast, the use of NSE results in annual peak flow estimates that are more than 20\ \% worse, primarily due to the tendency of NSE to underestimate observed flow variability. On the other hand, the use of KGE results in annual peak flow estimates that are better than from NSE, owing to improved flow time series metrics (mean and variance), with only a slight degradation in performance with respect to other related metrics, particularly when a non-standard weighting of the components of KGE is used. Stochastically generated ensemble simulations based on model residuals show the ability to improve the high-flow metrics, regardless of the deterministic performances. However, we emphasize that improving the fidelity of streamflow dynamics from deterministically calibrated models is still important, as it may improve high-flow metrics (for the right reasons). Overall, this work highlights the need for a deeper understanding of performance metric behavior and design in relation to the desired goals of model calibration.}, + langid = {english}, + file = {C:\Users\TsyplenkovA\OneDrive - MWLR\ATS\Personal\zotero-library\Mizukami et al.\Mizukami et al._2019_On the choice of calibration metrics for “high-flo.pdf} +} + +@article{moriasiHydrologicWaterQuality2015, + title = {Hydrologic and {{Water Quality Models}}: {{Performance Measures}} and {{Evaluation Criteria}}}, + shorttitle = {Hydrologic and {{Water Quality Models}}}, + author = {Moriasi, Daniel N. and Gitau, Margaret W. and Pai, Naresh and Daggupati, Prasad}, + year = {2015}, + month = dec, + journal = {Transactions of the ASABE}, + volume = {58}, + number = {6}, + pages = {1763--1785}, + issn = {21510032, 21510040}, + doi = {10.13031/trans.58.10715}, + url = {http://elibrary.asabe.org/abstract.asp?aid=46548&t=3&dabs=Y&redir=&redirType=}, + urldate = {2024-03-06}, + abstract = {Performance measures (PMs) and corresponding performance evaluation criteria (PEC) are important aspects of calibrating and validating hydrologic and water quality models and should be updated with advances in modeling science. We synthesized PMs and PEC from a previous special collection, performed a meta-analysis of performance data reported in recent peer-reviewed literature for three widely published watershed-scale models (SWAT, HSPF, WARMF), and one field-scale model (ADAPT), and provided guidelines for model performance evaluation. Based on the synthesis, meta-analysis, and personal modeling experiences, we recommend the coefficient of determination (R2; in conjunction with the gradient and intercept of the corresponding regression line), Nash-Sutcliffe efficiency (NSE), index of agreement (d), root mean square error (RMSE; alongside the ratio of RMSE and standard deviation of measured data, RSR), percent bias (PBIAS), and several graphical PMs to evaluate model performance. We recommend that model performance can be judged satisfactory for flow simulations if monthly R2 {$>$} 0.70 and d {$>$} 0.75 for field-scale models, and daily, monthly, or annual R2 {$>$} 0.60, NSE {$>$} 0.50, and PBIAS {$\leq$} 15\% for watershed-scale models. Model performance at the watershed scale can be evaluated as satisfactory if monthly R2 {$>$} 0.40 and NSE {$>$} 0.45 and daily, monthly, or annual PBIAS {$\leq$} 20\% for sediment; monthly R2 {$>$} 0.40 and NSE {$>$} 0.35 and daily, monthly, or annual PBIAS {$\leq$} 30\% for phosphorus (P); and monthly R2 {$>$} 0.30 and NSE {$>$} 0.35 and daily, monthly, or annual PBIAS {$\leq$} 30\% for nitrogen (N). For RSR, we recommend that previously published PEC be used as detailed in this article. We also recommend that these PEC be used primarily for the four models for which there were adequate data, and used only with caution for other models. These PEC can be adjusted within acceptable bounds based on additional considerations, such as the quality and quantity of available measured data, spatial and temporal scales, and project scope and magnitude, and updated based on the framework presented herein. This initial meta-analysis sets the stage for a more comprehensive meta-analysis to revise PEC as new PMs and more data become available.}, + file = {C:\Users\TsyplenkovA\OneDrive - MWLR\ATS\Personal\zotero-library\Moriasi et al\Moriasi et al_2015_Hydrologic and Water Quality Models.pdf} +} + +@article{nashRiverFlowForecasting1970, + title = {River Flow Forecasting through Conceptual Models Part {{I}} --- {{A}} Discussion of Principles}, + author = {Nash, J. E. and Sutcliffe, J. V.}, + year = {1970}, + month = apr, + journal = {Journal of Hydrology}, + volume = {10}, + number = {3}, + pages = {282--290}, + issn = {0022-1694}, + doi = {10.1016/0022-1694(70)90255-6}, + url = {https://www.sciencedirect.com/science/article/pii/0022169470902556}, + urldate = {2022-10-01}, + abstract = {The principles governing the application of the conceptual model technique to river flow forecasting are discussed. The necessity for a systematic approach to the development and testing of the model is explained and some preliminary ideas suggested.}, + langid = {english}, + file = {C:\Users\TsyplenkovA\OneDrive - MWLR\ATS\Personal\zotero-library\Nash and Sutcliffe\Nash and Sutcliffe_1970_River flow forecasting through conceptual models p.pdf} +} + +@article{oliverTutorialGuideGeostatistics2014, + title = {A Tutorial Guide to Geostatistics: {{Computing}} and Modelling Variograms and Kriging}, + shorttitle = {A Tutorial Guide to Geostatistics}, + author = {Oliver, M. A. and Webster, R.}, + year = {2014}, + month = feb, + journal = {CATENA}, + volume = {113}, + pages = {56--69}, + issn = {0341-8162}, + doi = {10.1016/j.catena.2013.09.006}, + url = {https://www.sciencedirect.com/science/article/pii/S0341816213002385}, + urldate = {2025-06-28}, + abstract = {Many environmental scientists are analysing spatial data by geostatistical methods and interpolating from sparse sample data by kriging to make maps. They recognize its merits in providing unbiased estimates with minimum variance. Several statistical packages now have the facilities they require, as do some geographic information systems. In the latter kriging is an option for interpolation that can be done at the press of a few buttons. Unfortunately, the ease conferred by this allows one to krige without understanding and to produce unreliable and even misleading results. Crucial for sound kriging is a plausible function for the spatial covariances or, more widely, of the variogram. The variogram must be estimated reliably and then modelled with valid mathematical functions. This requires an understanding of the assumptions in the underlying theory of random processes on which geostatistics is based. Here we guide readers through computing the sample variogram and modelling it by weighted least-squares fitting. We explain how to choose the most suitable functions by a combination of graphics and statistical diagnostics. Ordinary kriging follows straightforwardly from the model, but small changes in the model function and its parameters can affect the kriging error variances. When kriging is automated these effects remain unknown. We explain the choices to be made when kriging, i.e. whether the support is at points or over blocks, and whether the predictions are global or within moving windows.}, + file = {C\:\\Users\\TsyplenkovA\\OneDrive - MWLR\\ATS\\Personal\\zotero-library\\Oliver and Webster\\Oliver and Webster_2014_A tutorial guide to geostatistics Computing and m.pdf;C\:\\Users\\TsyplenkovA\\OneDrive - MWLR\\ATS\\Personal\\zotero-library\\Oliver and Webster\\Oliver and Webster_2014_A tutorial guide to geostatistics Computing and m.pdf} +} + +@incollection{rasmussenGuidelinesProceduresComputing2009, + title = {Guidelines and Procedures for Computing Time-Series Suspended-Sediment Concentrations and Loads from in-Stream Turbidity-Sensor and Streamflow Data}, + booktitle = {U.{{S}}. {{Geological Survey Techniques}} and {{Methods}} Book 3, Chap. {{C4}}}, + author = {Rasmussen, P. P. and Gray, J. R. and Glysson, G. D. and Ziegler, A. C.}, + year = {2009}, + pages = {53}, + url = {https://pubs.usgs.gov/tm/tm3c4/}, + isbn = {978-1-4113-2410-7}, + langid = {english}, + file = {C:\Users\TsyplenkovA\OneDrive - MWLR\ATS\Personal\zotero-library\Rasmussen et al.\Rasmussen et al._2009_Guidelines and procedures for computing time-serie.pdf} +} + +@article{sciainiNLMRLandscapetoolsIntegrated2018, + title = {{{NLMR}} and Landscapetools: {{An}} Integrated Environment for Simulating and Modifying Neutral Landscape Models in {{R}}}, + shorttitle = {{{NLMR}} and Landscapetools}, + author = {Sciaini, Marco and Fritsch, Matthias and Scherer, C{\'e}dric and Simpkins, Craig Eric}, + year = {2018}, + journal = {Methods in Ecology and Evolution}, + volume = {9}, + number = {11}, + pages = {2240--2248}, + issn = {2041-210X}, + doi = {10.1111/2041-210X.13076}, + url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/2041-210X.13076}, + urldate = {2025-07-10}, + abstract = {Neutral landscape models (NLMs) simulate landscape patterns based on theoretical distributions and can be used to systematically study the effect of landscape structure on ecological processes. NLMs are commonly used in landscape ecology to enhance the findings of field studies as well as in simulation studies to provide an underlying landscape. However, their creation so far has been limited to software that is platform dependent, does not allow a reproducible workflow or is not embedded in R, the prevailing programming language used by ecologists. Here, we present two complementary R packages NLMR and landscapetools, that allow users to generate and manipulate NLMs in a single environment. They grant the simulation of the widest collection of NLMs found in any single piece of software thus far while allowing for easy manipulation in a self-contained and reproducible workflow. The combination of both packages should stimulate a wider usage of NLMs in ecology. NLMR is a comprehensive collection of algorithms with which to simulate NLMs. landscapetools provides a utility toolbox which facilitates an easy workflow with simulated neutral landscapes and other raster data. We show two example applications that illustrate potential use cases for NLMR and landscapetools: First, an agent-based simulation study in which the effect of spatial structure on disease persistence was studied. The second example shows how increases in spatial scaling can introduce biases in calculated landscape metrics. Simplifying the workflow around generating and handling NLMs should encourage an uptake in the usage of NLMs. NLMR and landscapetools are both generic frameworks that can be used in a variety of applications and are a further step to having a unified simulation environment in R for answering spatial research questions.}, + copyright = {{\copyright} 2018 The Authors. Methods in Ecology and Evolution {\copyright} 2018 British Ecological Society}, + langid = {english}, + file = {C:\Users\TsyplenkovA\OneDrive - MWLR\ATS\Personal\zotero-library\Sciaini et al.\Sciaini et al._2018_NLMR and landscapetools An integrated environment.pdf} +} + +@article{zizkaCoordinateCleanerStandardizedCleaning2019, + title = {{{CoordinateCleaner}}: {{Standardized}} Cleaning of Occurrence Records from Biological Collection Databases}, + shorttitle = {{{CoordinateCleaner}}}, + author = {Zizka, Alexander and Silvestro, Daniele and Andermann, Tobias and Azevedo, Josu{\'e} and Duarte Ritter, Camila and Edler, Daniel and Farooq, Harith and Herdean, Andrei and Ariza, Mar{\'i}a and Scharn, Ruud and Svantesson, Sten and Wengstr{\"o}m, Niklas and Zizka, Vera and Antonelli, Alexandre}, + year = {2019}, + journal = {Methods in Ecology and Evolution}, + volume = {10}, + number = {5}, + pages = {744--751}, + issn = {2041-210X}, + doi = {10.1111/2041-210X.13152}, + url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/2041-210X.13152}, + urldate = {2025-07-10}, + abstract = {Species occurrence records from online databases are an indispensable resource in ecological, biogeographical and palaeontological research. However, issues with data quality, especially incorrect geo-referencing or dating, can diminish their usefulness. Manual cleaning is time-consuming, error prone, difficult to reproduce and limited to known geographical areas and taxonomic groups, making it impractical for datasets with thousands or millions of records. Here, we present CoordinateCleaner, an r-package to scan datasets of species occurrence records for geo-referencing and dating imprecisions and data entry errors in a standardized and reproducible way. CoordinateCleaner is tailored to problems common in biological and palaeontological databases and can handle datasets with millions of records. The software includes (a) functions to flag potentially problematic coordinate records based on geographical gazetteers, (b) a global database of 9,691 geo-referenced biodiversity institutions to identify records that are likely from horticulture or captivity, (c) novel algorithms to identify datasets with rasterized data, conversion errors and strong decimal rounding and (d) spatio-temporal tests for fossils. We describe the individual functions available in CoordinateCleaner and demonstrate them on more than 90 million occurrences of flowering plants from the Global Biodiversity Information Facility (GBIF) and 19,000 fossil occurrences from the Palaeobiology Database (PBDB). We find that in GBIF more than 3.4 million records (3.7\%) are potentially problematic and that 179 of the tested contributing datasets (18.5\%) might be biased by rasterized coordinates. In PBDB, 1205 records (6.3\%) are potentially problematic. All cleaning functions and the biodiversity institution database are open-source and available within the CoordinateCleaner r-package.}, + copyright = {{\copyright} 2019 The Authors. Methods in Ecology and Evolution published by John Wiley \& Sons Ltd on behalf of British Ecological Society.}, + langid = {english}, + file = {C:\Users\TsyplenkovA\OneDrive - MWLR\ATS\Personal\zotero-library\Zizka et al.\Zizka et al._2019_CoordinateCleaner Standardized cleaning of occurr.pdf} +} diff --git a/paper/src/comparison.qmd b/paper/src/comparison.qmd new file mode 100644 index 0000000..27cb559 --- /dev/null +++ b/paper/src/comparison.qmd @@ -0,0 +1,57 @@ + +```{r} +library(hydroGOF) +library(ggplot2) +devtools::load_all() + +theme_set(theme_minimal()) +``` + +```{r} +nse_base <- function(truth, estimate, na_rm = TRUE) { + #fmt: skip + 1 - (sum((truth - estimate)^2, na.rm = na_rm) / + sum((truth - mean(truth, na.rm = na_rm))^2, na.rm = na_rm)) +} +``` + +```{r} +# Benchmark +numpoints <- seq(25, 10^4, by = 50) +df <- list() + +for (i in seq_along(numpoints)) { + set.seed(1234) + x <- rnorm(n = numpoints[i], mean = 100, sd = 25) + y <- rnorm(n = numpoints[i], mean = 110, sd = 25) + df[[as.character(numpoints[i])]] <- + bench::mark( + tidyhydro = nse_vec(x, y), + hydroGOF = hydroGOF::NSE(y, x), + base = nse_base(x, y), + iterations = 9L, + check = TRUE, + time_unit = "us" + ) +} +``` + + +```{r} +dplyr::bind_rows(df, .id = "numpoints") |> + dplyr::mutate(numpoints = as.numeric(numpoints)) |> + ggplot(aes(x = numpoints, y = median, color = as.character(expression))) + + geom_point(alpha = 0.3) + + geom_smooth(se = FALSE) + + scale_y_continuous( + limits = c(0, 250), + expand = expansion(mult = c(0, 0)), + labels = scales::label_number(big.mark = ",") + ) + + scale_x_continuous( + limits = c(0, 10000), + expand = expansion(mult = c(0, 0)), + labels = scales::label_number(big.mark = ",") + ) + +``` \ No newline at end of file diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 0b7f8fc..5b1ce17 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -25,15 +25,16 @@ BEGIN_RCPP END_RCPP } // mse_cpp -SEXP mse_cpp(NumericVector truth, NumericVector estimate, bool na_rm); -RcppExport SEXP _tidyhydro_mse_cpp(SEXP truthSEXP, SEXP estimateSEXP, SEXP na_rmSEXP) { +SEXP mse_cpp(NumericVector truth, NumericVector estimate, bool na_rm, bool sqrt); +RcppExport SEXP _tidyhydro_mse_cpp(SEXP truthSEXP, SEXP estimateSEXP, SEXP na_rmSEXP, SEXP sqrtSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericVector >::type truth(truthSEXP); Rcpp::traits::input_parameter< NumericVector >::type estimate(estimateSEXP); Rcpp::traits::input_parameter< bool >::type na_rm(na_rmSEXP); - rcpp_result_gen = Rcpp::wrap(mse_cpp(truth, estimate, na_rm)); + Rcpp::traits::input_parameter< bool >::type sqrt(sqrtSEXP); + rcpp_result_gen = Rcpp::wrap(mse_cpp(truth, estimate, na_rm, sqrt)); return rcpp_result_gen; END_RCPP } @@ -94,7 +95,7 @@ END_RCPP static const R_CallMethodDef CallEntries[] = { {"_tidyhydro_kge_cpp", (DL_FUNC) &_tidyhydro_kge_cpp, 4}, - {"_tidyhydro_mse_cpp", (DL_FUNC) &_tidyhydro_mse_cpp, 3}, + {"_tidyhydro_mse_cpp", (DL_FUNC) &_tidyhydro_mse_cpp, 4}, {"_tidyhydro_nse_cpp", (DL_FUNC) &_tidyhydro_nse_cpp, 4}, {"_tidyhydro_pbias_cpp", (DL_FUNC) &_tidyhydro_pbias_cpp, 4}, {"_tidyhydro_press_cpp", (DL_FUNC) &_tidyhydro_press_cpp, 3}, diff --git a/src/kge.cpp b/src/kge.cpp index b0557bd..f58d04e 100644 --- a/src/kge.cpp +++ b/src/kge.cpp @@ -1,88 +1,88 @@ -#include -#include -#ifdef _OPENMP -#include -#endif -using namespace Rcpp; - -// [[Rcpp::export]] -SEXP kge_cpp(NumericVector obs, NumericVector sim, bool na_rm = true, std::string version = "2012") { - if (obs.size() != sim.size()) { - stop("'obs' and 'sim' must have the same length"); - } - if (version != "2009" && version != "2012") { - stop("Only versions '2009' and '2012' are supported"); - } - - const int n = obs.size(); - double mean_obs = 0.0, mean_sim = 0.0; - double var_obs = 0.0, var_sim = 0.0; - double r_num = 0.0; - int count = 0; - - // First pass: compute means - if (na_rm) { - double sum_obs = 0.0, sum_sim = 0.0; - #pragma omp parallel for reduction(+:sum_obs, sum_sim, count) schedule(static) if(n > 1000) - for (int i = 0; i < n; i++) { - if (!ISNAN(obs[i]) && !ISNAN(sim[i])) { - sum_obs += obs[i]; - sum_sim += sim[i]; - count++; - } - } - if (count == 0) return wrap(NA_REAL); - mean_obs = sum_obs / count; - mean_sim = sum_sim / count; - } else { - for (int i = 0; i < n; i++) { - mean_obs += obs[i]; - mean_sim += sim[i]; - } - mean_obs /= n; - mean_sim /= n; - count = n; - } - - // Second pass: compute variances and correlation numerator - if (na_rm) { - #pragma omp parallel for reduction(+:var_obs, var_sim, r_num) schedule(static) if(n > 1000) - for (int i = 0; i < n; i++) { - if (!ISNAN(obs[i]) && !ISNAN(sim[i])) { - const double d_obs = obs[i] - mean_obs; - const double d_sim = sim[i] - mean_sim; - var_obs += d_obs * d_obs; - var_sim += d_sim * d_sim; - r_num += d_obs * d_sim; - } - } - } else { - for (int i = 0; i < n; i++) { - const double d_obs = obs[i] - mean_obs; - const double d_sim = sim[i] - mean_sim; - var_obs += d_obs * d_obs; - var_sim += d_sim * d_sim; - r_num += d_obs * d_sim; - } - } - - if (var_obs == 0.0 || var_sim == 0.0) return wrap(NA_REAL); - - double r = r_num / std::sqrt(var_obs * var_sim); - double beta = mean_sim / mean_obs; - - double result; - if (version == "2009") { - double alpha = std::sqrt(var_sim / var_obs); - result = 1.0 - std::sqrt(std::pow(r - 1.0, 2.0) + - std::pow(alpha - 1.0, 2.0) + - std::pow(beta - 1.0, 2.0)); - } else { // version == "2012" - double gamma = (std::sqrt(var_sim)/mean_sim)/(std::sqrt(var_obs)/mean_obs); - result = 1.0 - std::sqrt(std::pow(r - 1.0, 2.0) + - std::pow(gamma - 1.0, 2.0) + - std::pow(beta - 1.0, 2.0)); - } - - return wrap(result); -} +#include +#include +#ifdef _OPENMP +#include +#endif +using namespace Rcpp; + +// [[Rcpp::export]] +SEXP kge_cpp(NumericVector obs, NumericVector sim, bool na_rm = true, std::string version = "2012") { + if (obs.size() != sim.size()) { + stop("'obs' and 'sim' must have the same length"); + } + if (version != "2009" && version != "2012") { + stop("Only versions '2009' and '2012' are supported"); + } + + const int n = obs.size(); + double mean_obs = 0.0, mean_sim = 0.0; + double var_obs = 0.0, var_sim = 0.0; + double r_num = 0.0; + int count = 0; + + // First pass: compute means + if (na_rm) { + double sum_obs = 0.0, sum_sim = 0.0; + #pragma omp parallel for reduction(+:sum_obs, sum_sim, count) schedule(static) if(n > 1000) + for (int i = 0; i < n; i++) { + if (!ISNAN(obs[i]) && !ISNAN(sim[i])) { + sum_obs += obs[i]; + sum_sim += sim[i]; + count++; + } + } + if (count == 0) return wrap(NA_REAL); + mean_obs = sum_obs / count; + mean_sim = sum_sim / count; + } else { + for (int i = 0; i < n; i++) { + mean_obs += obs[i]; + mean_sim += sim[i]; + } + mean_obs /= n; + mean_sim /= n; + count = n; + } + + // Second pass: compute variances and correlation numerator + if (na_rm) { + #pragma omp parallel for reduction(+:var_obs, var_sim, r_num) schedule(static) if(n > 1000) + for (int i = 0; i < n; i++) { + if (!ISNAN(obs[i]) && !ISNAN(sim[i])) { + const double d_obs = obs[i] - mean_obs; + const double d_sim = sim[i] - mean_sim; + var_obs += d_obs * d_obs; + var_sim += d_sim * d_sim; + r_num += d_obs * d_sim; + } + } + } else { + for (int i = 0; i < n; i++) { + const double d_obs = obs[i] - mean_obs; + const double d_sim = sim[i] - mean_sim; + var_obs += d_obs * d_obs; + var_sim += d_sim * d_sim; + r_num += d_obs * d_sim; + } + } + + if (var_obs == 0.0 || var_sim == 0.0) return wrap(NA_REAL); + + double r = r_num / std::sqrt(var_obs * var_sim); + double beta = mean_sim / mean_obs; + + double result; + if (version == "2009") { + double alpha = std::sqrt(var_sim / var_obs); + result = 1.0 - std::sqrt(std::pow(r - 1.0, 2.0) + + std::pow(alpha - 1.0, 2.0) + + std::pow(beta - 1.0, 2.0)); + } else { // version == "2012" + double gamma = (std::sqrt(var_sim)/mean_sim)/(std::sqrt(var_obs)/mean_obs); + result = 1.0 - std::sqrt(std::pow(r - 1.0, 2.0) + + std::pow(gamma - 1.0, 2.0) + + std::pow(beta - 1.0, 2.0)); + } + + return wrap(result); +} diff --git a/src/mse.cpp b/src/mse.cpp index fa2eef2..0923672 100644 --- a/src/mse.cpp +++ b/src/mse.cpp @@ -1,42 +1,49 @@ -#include -// [[Rcpp::plugins(openmp)]] -#ifdef _OPENMP -#include -#endif -using namespace Rcpp; - -// [[Rcpp::export]] -SEXP mse_cpp(NumericVector truth, NumericVector estimate, bool na_rm = true) { - if (truth.size() != estimate.size()) { - stop("'truth' and 'estimate' must have the same length"); - } - - const int n = truth.length(); - const double* t = REAL(truth); - const double* e = REAL(estimate); - double num = 0.0; // numerator - double den = 0.0; // denominator - - // Second pass: calculate numerator and denominator - if (na_rm) { - #pragma omp parallel for reduction(+:num,den) schedule(static) if(n > 1000) - for (int i = 0; i < n; i++) { - if (!ISNAN(t[i]) && !ISNAN(e[i])) { - const double diff1 = t[i] - e[i]; - num += diff1 * diff1; - den += 1.0; - } - } - } else { - #pragma omp parallel for simd reduction(+:num,den) schedule(static) if(n > 1000) - for (int i = 0; i < n; i++) { - const double diff1 = t[i] - e[i]; - num += diff1 * diff1; - den += 1.0; - } - } - - double metric = num / den; - - return wrap(metric); -} +#include +// [[Rcpp::plugins(openmp)]] +#ifdef _OPENMP +#include +#endif +using namespace Rcpp; + +// [[Rcpp::export]] +SEXP mse_cpp(NumericVector truth, NumericVector estimate, bool na_rm = true, bool sqrt = true) { + if (truth.size() != estimate.size()) { + stop("'truth' and 'estimate' must have the same length"); + } + + const int n = truth.length(); + const double* t = REAL(truth); + const double* e = REAL(estimate); + double num = 0.0; // numerator + double den = 0.0; // denominator + + if (na_rm) { + #pragma omp parallel for reduction(+:num,den) schedule(static) if(n > 1000) + for (int i = 0; i < n; i++) { + if (!ISNAN(t[i]) && !ISNAN(e[i])) { + const double diff1 = t[i] - e[i]; + num += diff1 * diff1; + den += 1.0; + } + } + } else { + #pragma omp parallel for simd reduction(+:num,den) schedule(static) if(n > 1000) + for (int i = 0; i < n; i++) { + const double diff1 = t[i] - e[i]; + num += diff1 * diff1; + den += 1.0; + } + } + + // Wheter to return MSE or RMSE + double metric; + if (sqrt){ + // Root Mean Squared Error + metric = std::sqrt(num / den); + } else { + // Mean Squared Error + metric = num / den; + } + + return wrap(metric); +} diff --git a/src/nse.cpp b/src/nse.cpp index cf90028..b523a57 100644 --- a/src/nse.cpp +++ b/src/nse.cpp @@ -1,84 +1,84 @@ -#include -// [[Rcpp::plugins(openmp)]] -#ifdef _OPENMP -#include -#endif -using namespace Rcpp; - -// [[Rcpp::export]] -SEXP nse_cpp(NumericVector truth, NumericVector estimate, bool performance = false, bool na_rm = true) { - if (truth.size() != estimate.size()) { - stop("'truth' and 'estimate' must have the same length"); - } - - const int n = truth.length(); - const double* t = REAL(truth); - const double* e = REAL(estimate); - double num = 0.0; // numerator - double den = 0.0; // denominator - double t_mean = 0.0; // mean of truth - - // First pass: calculate mean and handle NAs - if (na_rm) { - double sum = 0.0; - int count = 0; - - #pragma omp parallel for reduction(+:sum,count) schedule(static) if(n > 1000) - for (int i = 0; i < n; i++) { - if (!ISNAN(t[i]) && !ISNAN(e[i])) { - sum += t[i]; - count++; - } - } - - t_mean = sum / count; - } else { - #pragma omp parallel for reduction(+:t_mean) schedule(static) if(n > 1000) - for (int i = 0; i < n; i++) { - t_mean += t[i]; - } - t_mean /= n; - } - - // Second pass: calculate numerator and denominator - if (na_rm) { - #pragma omp parallel for reduction(+:num,den) schedule(static) if(n > 1000) - for (int i = 0; i < n; i++) { - if (!ISNAN(t[i]) && !ISNAN(e[i])) { - const double diff1 = t[i] - e[i]; - const double diff2 = t[i] - t_mean; - num += diff1 * diff1; - den += diff2 * diff2; - } - } - } else { - #pragma omp parallel for simd reduction(+:num,den) schedule(static) if(n > 1000) - for (int i = 0; i < n; i++) { - const double diff1 = t[i] - e[i]; - const double diff2 = t[i] - t_mean; - num += diff1 * diff1; - den += diff2 * diff2; - } - } - - double metric = 1.0 - (num / den); - - if (!performance) { - return wrap(metric); - } else { - CharacterVector metric_values = CharacterVector::create("Poor", "Satisfactory", "Good", "Excellent"); - CharacterVector result(1); - - if (metric <= 0.5) { - result[0] = metric_values[0]; - } else if (metric > 0.5 && metric < 0.6) { - result[0] = metric_values[1]; - } else if (metric >= 0.6 && metric <= 0.8) { - result[0] = metric_values[2]; - } else { - result[0] = metric_values[3]; - } - - return result; - } -} +#include +// [[Rcpp::plugins(openmp)]] +#ifdef _OPENMP +#include +#endif +using namespace Rcpp; + +// [[Rcpp::export]] +SEXP nse_cpp(NumericVector truth, NumericVector estimate, bool performance = false, bool na_rm = true) { + if (truth.size() != estimate.size()) { + stop("'truth' and 'estimate' must have the same length"); + } + + const int n = truth.length(); + const double* t = REAL(truth); + const double* e = REAL(estimate); + double num = 0.0; // numerator + double den = 0.0; // denominator + double t_mean = 0.0; // mean of truth + + // First pass: calculate mean and handle NAs + if (na_rm) { + double sum = 0.0; + int count = 0; + + #pragma omp parallel for reduction(+:sum,count) schedule(static) if(n > 1000) + for (int i = 0; i < n; i++) { + if (!ISNAN(t[i]) && !ISNAN(e[i])) { + sum += t[i]; + count++; + } + } + + t_mean = sum / count; + } else { + #pragma omp parallel for reduction(+:t_mean) schedule(static) if(n > 1000) + for (int i = 0; i < n; i++) { + t_mean += t[i]; + } + t_mean /= n; + } + + // Second pass: calculate numerator and denominator + if (na_rm) { + #pragma omp parallel for reduction(+:num,den) schedule(static) if(n > 1000) + for (int i = 0; i < n; i++) { + if (!ISNAN(t[i]) && !ISNAN(e[i])) { + const double diff1 = t[i] - e[i]; + const double diff2 = t[i] - t_mean; + num += diff1 * diff1; + den += diff2 * diff2; + } + } + } else { + #pragma omp parallel for simd reduction(+:num,den) schedule(static) if(n > 1000) + for (int i = 0; i < n; i++) { + const double diff1 = t[i] - e[i]; + const double diff2 = t[i] - t_mean; + num += diff1 * diff1; + den += diff2 * diff2; + } + } + + double metric = 1.0 - (num / den); + + if (!performance) { + return wrap(metric); + } else { + CharacterVector metric_values = CharacterVector::create("Poor", "Satisfactory", "Good", "Excellent"); + CharacterVector result(1); + + if (metric <= 0.5) { + result[0] = metric_values[0]; + } else if (metric > 0.5 && metric < 0.6) { + result[0] = metric_values[1]; + } else if (metric >= 0.6 && metric <= 0.8) { + result[0] = metric_values[2]; + } else { + result[0] = metric_values[3]; + } + + return result; + } +} diff --git a/src/pbias.cpp b/src/pbias.cpp index 4f2b4b2..8e56bea 100644 --- a/src/pbias.cpp +++ b/src/pbias.cpp @@ -1,59 +1,59 @@ -#include -// [[Rcpp::plugins(openmp)]] -#ifdef _OPENMP -#include -#endif -using namespace Rcpp; - -// [[Rcpp::export]] -SEXP pbias_cpp(NumericVector truth, NumericVector estimate, bool performance = false, bool na_rm = true) -{ - if (truth.size() != estimate.size()) { - stop("'truth' and 'estimate' must have the same length"); - } - - const int n = truth.length(); - const double* t = REAL(truth); - const double* e = REAL(estimate); - double num = 0.0; // sum(estimate - truth) - double den = 0.0; // sum(truth) - - if (na_rm) { - #pragma omp parallel for reduction(+:num,den) schedule(static) if(n > 1000) - for (int i = 0; i < n; i++) { - if (!ISNAN(t[i]) && !ISNAN(e[i])) { - num += e[i] - t[i]; - den += t[i]; - } - } - } else { - #pragma omp parallel for simd reduction(+:num,den) schedule(static) if(n > 1000) - for (int i = 0; i < n; i++) { - num += e[i] - t[i]; - den += t[i]; - } - } - - double metric = 100.0 * (num / den); - - if (!performance) { - return wrap(metric); - } else { - CharacterVector metric_values = CharacterVector::create("Poor", "Satisfactory", "Good", "Excellent"); - CharacterVector result(1); - - const double abs_metric = std::abs(metric); - - if (abs_metric >= 15.0) { - result[0] = metric_values[0]; - } else if (abs_metric >= 10.0) { - result[0] = metric_values[1]; - } else if (abs_metric >= 5.0) { - result[0] = metric_values[2]; - } else { - result[0] = metric_values[3]; - } - - return result; - } -} +#include +// [[Rcpp::plugins(openmp)]] +#ifdef _OPENMP +#include +#endif +using namespace Rcpp; + +// [[Rcpp::export]] +SEXP pbias_cpp(NumericVector truth, NumericVector estimate, bool performance = false, bool na_rm = true) +{ + if (truth.size() != estimate.size()) { + stop("'truth' and 'estimate' must have the same length"); + } + + const int n = truth.length(); + const double* t = REAL(truth); + const double* e = REAL(estimate); + double num = 0.0; // sum(estimate - truth) + double den = 0.0; // sum(truth) + + if (na_rm) { + #pragma omp parallel for reduction(+:num,den) schedule(static) if(n > 1000) + for (int i = 0; i < n; i++) { + if (!ISNAN(t[i]) && !ISNAN(e[i])) { + num += e[i] - t[i]; + den += t[i]; + } + } + } else { + #pragma omp parallel for simd reduction(+:num,den) schedule(static) if(n > 1000) + for (int i = 0; i < n; i++) { + num += e[i] - t[i]; + den += t[i]; + } + } + + double metric = 100.0 * (num / den); + + if (!performance) { + return wrap(metric); + } else { + CharacterVector metric_values = CharacterVector::create("Poor", "Satisfactory", "Good", "Excellent"); + CharacterVector result(1); + + const double abs_metric = std::abs(metric); + + if (abs_metric >= 15.0) { + result[0] = metric_values[0]; + } else if (abs_metric >= 10.0) { + result[0] = metric_values[1]; + } else if (abs_metric >= 5.0) { + result[0] = metric_values[2]; + } else { + result[0] = metric_values[3]; + } + + return result; + } +} diff --git a/tests/testthat/helper-numeric.R b/tests/testthat/helper-numeric.R index 9e97614..6ef4d06 100644 --- a/tests/testthat/helper-numeric.R +++ b/tests/testthat/helper-numeric.R @@ -1,15 +1,15 @@ -# Imported from {yardstick} package -# github.com/tidymodels/yardstick/blob/main/tests/testthat/helper-numeric.R - -generate_numeric_test_data <- function() { - set.seed(1812) - out <- data.frame(obs = rnorm(1000)) - out$pred <- .2 + 1.1 * out$obs + rnorm(1000, sd = 0.5) - out$pred_na <- out$pred - ind <- (1:100) * 10 - out$pred_na[ind] <- NA - out$rand <- sample(out$pred) - out$rand_na <- out$rand - out$rand_na[ind] <- NA - out -} +# Imported from {yardstick} package +# github.com/tidymodels/yardstick/blob/main/tests/testthat/helper-numeric.R + +generate_numeric_test_data <- function() { + set.seed(1812) + out <- data.frame(obs = rnorm(1000)) + out$pred <- .2 + 1.1 * out$obs + rnorm(1000, sd = 0.5) + out$pred_na <- out$pred + ind <- (1:100) * 10 + out$pred_na[ind] <- NA + out$rand <- sample(out$pred) + out$rand_na <- out$rand + out$rand_na[ind] <- NA + out +} diff --git a/tests/testthat/test-hydrogof.R b/tests/testthat/test-hydrogof.R index ef1872e..a6846b2 100644 --- a/tests/testthat/test-hydrogof.R +++ b/tests/testthat/test-hydrogof.R @@ -1,266 +1,377 @@ -# Property-based testing -# https://www.etiennebacher.com/posts/2024-10-01-using-property-testing-in-r - -options( - quickcheck.tests = 20L, - quickcheck.shrinks = 10L, - quickcheck.discards = 10L -) - -test_that("nse", { - skip_if_not_installed("quickcheck") - skip_if_not_installed("hydroGOF") - - # With NA - # !NB: two tests to ensure that the C++ functions return the - # same results as hydroGOF with both NA values present and without - quickcheck::for_all( - obs = quickcheck::double_bounded( - left = -1000, - right = 1000, - len = 50, - any_na = TRUE - ), - sim = quickcheck::double_bounded( - left = -1000, - right = 1000, - len = 50, - any_na = TRUE - ), - property = function(obs, sim) { - new <- nse_vec(truth = obs, estimate = sim, na_rm = TRUE) - old <- hydroGOF::NSE(sim = sim, obs = obs, na.rm = TRUE) - - expect_equal(new, old) - } - ) - - # Without NA - quickcheck::for_all( - obs = quickcheck::double_bounded( - left = -1000, - right = 1000, - len = 50, - any_na = FALSE - ), - sim = quickcheck::double_bounded( - left = -1000, - right = 1000, - len = 50, - any_na = FALSE - ), - property = function(obs, sim) { - new <- nse_vec(truth = obs, estimate = sim, na_rm = TRUE) - old <- hydroGOF::NSE(sim = sim, obs = obs, na.rm = TRUE) - - expect_equal(new, old) - } - ) -}) - -test_that("kge", { - skip_if_not_installed("quickcheck") - skip_if_not_installed("hydroGOF") - - # With NA - quickcheck::for_all( - obs = quickcheck::double_bounded( - left = -1000, - right = 1000, - len = 50, - any_na = TRUE - ), - sim = quickcheck::double_bounded( - left = -1000, - right = 1000, - len = 50, - any_na = TRUE - ), - property = function(obs, sim) { - new <- kge_vec(truth = obs, estimate = sim, na_rm = TRUE) - old <- hydroGOF::KGE(sim = sim, obs = obs, na.rm = TRUE) - - expect_equal(new, old) - } - ) - - # Without NA - quickcheck::for_all( - obs = quickcheck::double_bounded( - left = -1000, - right = 1000, - len = 50, - any_na = FALSE - ), - sim = quickcheck::double_bounded( - left = -1000, - right = 1000, - len = 50, - any_na = FALSE - ), - property = function(obs, sim) { - new <- kge_vec(truth = obs, estimate = sim, na_rm = TRUE) - old <- hydroGOF::KGE(sim = sim, obs = obs, na.rm = TRUE) - - expect_equal(new, old) - } - ) -}) - - -test_that("kge2012", { - skip_if_not_installed("quickcheck") - skip_if_not_installed("hydroGOF") - - # With NA - quickcheck::for_all( - obs = quickcheck::double_bounded( - left = -1000, - right = 1000, - len = 50, - any_na = TRUE - ), - sim = quickcheck::double_bounded( - left = -1000, - right = 1000, - len = 50, - any_na = TRUE - ), - property = function(obs, sim) { - new <- kge2012_vec(truth = obs, estimate = sim, na_rm = TRUE) - old <- hydroGOF::KGE( - sim = sim, - obs = obs, - na.rm = TRUE, - method = "2012" - ) - - expect_equal(new, old) - } - ) - - # Without NA - quickcheck::for_all( - obs = quickcheck::double_bounded( - left = -1000, - right = 1000, - len = 50, - any_na = FALSE - ), - sim = quickcheck::double_bounded( - left = -1000, - right = 1000, - len = 50, - any_na = FALSE - ), - property = function(obs, sim) { - new <- kge2012_vec(truth = obs, estimate = sim, na_rm = TRUE) - old <- hydroGOF::KGE( - sim = sim, - obs = obs, - na.rm = TRUE, - method = "2012" - ) - - expect_equal(new, old) - } - ) -}) - -test_that("pbias", { - skip_if_not_installed("quickcheck") - skip_if_not_installed("hydroGOF") - - # With NA - quickcheck::for_all( - obs = quickcheck::double_bounded( - left = -1000, - right = 1000, - len = 50, - any_na = TRUE - ), - sim = quickcheck::double_bounded( - left = -1000, - right = 1000, - len = 50, - any_na = TRUE - ), - property = function(obs, sim) { - new <- pbias_vec(truth = obs, estimate = sim, na_rm = TRUE) - old <- hydroGOF::pbias(sim = sim, obs = obs, na.rm = TRUE, dec = 9) - - expect_equal(new, old) - } - ) - - # Without NA - quickcheck::for_all( - obs = quickcheck::double_bounded( - left = -1000, - right = 1000, - len = 50, - any_na = FALSE - ), - sim = quickcheck::double_bounded( - left = -1000, - right = 1000, - len = 50, - any_na = FALSE - ), - property = function(obs, sim) { - new <- pbias_vec(truth = obs, estimate = sim, na_rm = TRUE) - old <- hydroGOF::pbias(sim = sim, obs = obs, na.rm = TRUE, dec = 9) - - expect_equal(new, old) - } - ) -}) - -test_that("mse", { - skip_if_not_installed("quickcheck") - skip_if_not_installed("hydroGOF") - - # With NA - quickcheck::for_all( - obs = quickcheck::double_bounded( - left = -1000, - right = 1000, - len = 50, - any_na = TRUE - ), - sim = quickcheck::double_bounded( - left = -1000, - right = 1000, - len = 50, - any_na = TRUE - ), - property = function(obs, sim) { - new <- mse_vec(truth = obs, estimate = sim, na_rm = TRUE) - old <- hydroGOF::mse(sim = sim, obs = obs, na.rm = TRUE) - - expect_equal(new, old) - } - ) - - # Without NA - quickcheck::for_all( - obs = quickcheck::double_bounded( - left = -1000, - right = 1000, - len = 50, - any_na = FALSE - ), - sim = quickcheck::double_bounded( - left = -1000, - right = 1000, - len = 50, - any_na = FALSE - ), - property = function(obs, sim) { - new <- mse_vec(truth = obs, estimate = sim, na_rm = TRUE) - old <- hydroGOF::mse(sim = sim, obs = obs, na.rm = TRUE) - - expect_equal(new, old) - } - ) -}) +# Property-based testing +# https://www.etiennebacher.com/posts/2024-10-01-using-property-testing-in-r + +# Tests to ensure an accordance in results between hydroGOF and +# tidyhydro packages. + +options( + quickcheck.tests = 20L, + quickcheck.shrinks = 10L, + quickcheck.discards = 10L +) + +test_that("nse", { + skip_if_not_installed("quickcheck") + skip_if_not_installed("hydroGOF") + + # With NA + # !NB: two tests to ensure that the C++ functions return the + # same results as hydroGOF with both NA values present and without + quickcheck::for_all( + obs = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = TRUE + ), + sim = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = TRUE + ), + property = function(obs, sim) { + new <- nse_vec(truth = obs, estimate = sim, na_rm = TRUE) + old <- hydroGOF::NSE(sim = sim, obs = obs, na.rm = TRUE) + + expect_equal(new, old) + } + ) + + # Without NA + quickcheck::for_all( + obs = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = FALSE + ), + sim = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = FALSE + ), + property = function(obs, sim) { + new <- nse_vec(truth = obs, estimate = sim, na_rm = TRUE) + old <- hydroGOF::NSE(sim = sim, obs = obs, na.rm = TRUE) + + expect_equal(new, old) + } + ) +}) + +test_that("kge", { + skip_if_not_installed("quickcheck") + skip_if_not_installed("hydroGOF") + + # With NA + quickcheck::for_all( + obs = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = TRUE + ), + sim = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = TRUE + ), + property = function(obs, sim) { + new <- kge_vec(truth = obs, estimate = sim, na_rm = TRUE) + old <- hydroGOF::KGE(sim = sim, obs = obs, na.rm = TRUE) + + expect_equal(new, old) + } + ) + + # Without NA + quickcheck::for_all( + obs = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = FALSE + ), + sim = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = FALSE + ), + property = function(obs, sim) { + new <- kge_vec(truth = obs, estimate = sim, na_rm = TRUE) + old <- hydroGOF::KGE(sim = sim, obs = obs, na.rm = TRUE) + + expect_equal(new, old) + } + ) +}) + + +test_that("kge2012", { + skip_if_not_installed("quickcheck") + skip_if_not_installed("hydroGOF") + + # With NA + quickcheck::for_all( + obs = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = TRUE + ), + sim = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = TRUE + ), + property = function(obs, sim) { + new <- kge2012_vec(truth = obs, estimate = sim, na_rm = TRUE) + old <- hydroGOF::KGE( + sim = sim, + obs = obs, + na.rm = TRUE, + method = "2012" + ) + + expect_equal(new, old) + } + ) + + # Without NA + quickcheck::for_all( + obs = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = FALSE + ), + sim = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = FALSE + ), + property = function(obs, sim) { + new <- kge2012_vec(truth = obs, estimate = sim, na_rm = TRUE) + old <- hydroGOF::KGE( + sim = sim, + obs = obs, + na.rm = TRUE, + method = "2012" + ) + + expect_equal(new, old) + } + ) +}) + +test_that("kgelog", { + skip_if_not_installed("quickcheck") + skip_if_not_installed("hydroGOF") + + # With NA + quickcheck::for_all( + obs = quickcheck::double_bounded( + left = 1, + right = 2000, + len = 50, + any_na = TRUE + ), + sim = quickcheck::double_bounded( + left = 1, + right = 2000, + len = 50, + any_na = TRUE + ), + property = function(obs, sim) { + new <- kgelog_vec(truth = obs, estimate = sim, na_rm = TRUE) + old <- hydroGOF::KGE( + sim = log10(sim), + obs = log10(obs), + na.rm = TRUE, + method = "2012" + ) + + expect_equal(new, old) + } + ) + + # Without NA + quickcheck::for_all( + obs = quickcheck::double_bounded( + left = 1, + right = 2000, + len = 50, + any_na = FALSE + ), + sim = quickcheck::double_bounded( + left = 1, + right = 2000, + len = 50, + any_na = FALSE + ), + property = function(obs, sim) { + new <- kgelog_vec(truth = obs, estimate = sim, na_rm = TRUE) + old <- hydroGOF::KGE( + sim = log10(sim), + obs = log10(obs), + na.rm = TRUE, + method = "2012" + ) + + expect_equal(new, old) + } + ) +}) + +test_that("pbias", { + skip_if_not_installed("quickcheck") + skip_if_not_installed("hydroGOF") + + # With NA + quickcheck::for_all( + obs = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = TRUE + ), + sim = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = TRUE + ), + property = function(obs, sim) { + new <- pbias_vec(truth = obs, estimate = sim, na_rm = TRUE) + old <- hydroGOF::pbias(sim = sim, obs = obs, na.rm = TRUE, dec = 9) + + expect_equal(new, old) + } + ) + + # Without NA + quickcheck::for_all( + obs = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = FALSE + ), + sim = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = FALSE + ), + property = function(obs, sim) { + new <- pbias_vec(truth = obs, estimate = sim, na_rm = TRUE) + old <- hydroGOF::pbias(sim = sim, obs = obs, na.rm = TRUE, dec = 9) + + expect_equal(new, old) + } + ) +}) + +test_that("mse", { + skip_if_not_installed("quickcheck") + skip_if_not_installed("hydroGOF") + + # With NA + quickcheck::for_all( + obs = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = TRUE + ), + sim = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = TRUE + ), + property = function(obs, sim) { + new <- mse_vec(truth = obs, estimate = sim, na_rm = TRUE) + old <- hydroGOF::mse(sim = sim, obs = obs, na.rm = TRUE) + + expect_equal(new, old) + } + ) + + # Without NA + quickcheck::for_all( + obs = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = FALSE + ), + sim = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = FALSE + ), + property = function(obs, sim) { + new <- mse_vec(truth = obs, estimate = sim, na_rm = TRUE) + old <- hydroGOF::mse(sim = sim, obs = obs, na.rm = TRUE) + + expect_equal(new, old) + } + ) +}) + +test_that("rmse", { + skip_if_not_installed("quickcheck") + skip_if_not_installed("hydroGOF") + + # With NA + quickcheck::for_all( + obs = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = TRUE + ), + sim = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = TRUE + ), + property = function(obs, sim) { + new <- rmse_vec(truth = obs, estimate = sim, na_rm = TRUE) + old <- hydroGOF::rmse(sim = sim, obs = obs, na.rm = TRUE) + + expect_equal(new, old) + } + ) + + # Without NA + quickcheck::for_all( + obs = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = FALSE + ), + sim = quickcheck::double_bounded( + left = -2000, + right = 2000, + len = 50, + any_na = FALSE + ), + property = function(obs, sim) { + new <- rmse_vec(truth = obs, estimate = sim, na_rm = TRUE) + old <- hydroGOF::rmse(sim = sim, obs = obs, na.rm = TRUE) + + expect_equal(new, old) + } + ) +}) diff --git a/tests/testthat/test-kge.R b/tests/testthat/test-kge.R index a204896..43dd9e7 100644 --- a/tests/testthat/test-kge.R +++ b/tests/testthat/test-kge.R @@ -1,68 +1,83 @@ -test_that("na_rm works as expected", { - ex_dat <- generate_numeric_test_data() - not_na <- !is.na(ex_dat$pred_na) - - # No missing data, na.rm = FALSE - expect_equal( - kge_vec(truth = ex_dat$obs, estimate = ex_dat$pred, na_rm = FALSE), - #fmt: skip - 1 - sqrt( - (cor(ex_dat$pred, ex_dat$obs) - 1)^2 + - (sd(ex_dat$pred) / sd(ex_dat$obs) - 1)^2 + - (mean(ex_dat$pred) / mean(ex_dat$obs) - 1)^2 - ) - ) - - # No missing data, na.rm = TRUE - expect_equal( - kge_vec(truth = ex_dat$obs, estimate = ex_dat$pred, na_rm = TRUE), - #fmt: skip - 1 - sqrt( - (cor(ex_dat$pred, ex_dat$obs) - 1)^2 + - (sd(ex_dat$pred) / sd(ex_dat$obs) - 1)^2 + - (mean(ex_dat$pred) / mean(ex_dat$obs) - 1)^2 - ) - ) - - # Missing data is present, na.rm = FALSE - expect_equal( - kge_vec(truth = ex_dat$obs, estimate = ex_dat$pred_na, na_rm = FALSE), - NA_real_ - ) - - # Missing data is present, na.rm = TRUE - expect_equal( - kge(ex_dat, truth = obs, estimate = pred_na, na_rm = TRUE)[[".estimate"]], - #fmt: skip - 1 - sqrt( - (cor(ex_dat$pred[not_na], ex_dat$obs[not_na]) - 1)^2 + - (sd(ex_dat$pred[not_na]) / sd(ex_dat$obs[not_na]) - 1)^2 + - (mean(ex_dat$pred[not_na]) / mean(ex_dat$obs[not_na]) - 1)^2 - ), - tolerance = 0.0001 - ) -}) - -test_that("Integer columns are allowed", { - ex_dat <- generate_numeric_test_data() - ex_dat$obs <- as.integer(ex_dat$obs) - - expect_equal( - kge(ex_dat, truth = "obs", estimate = "pred", na_rm = FALSE)[[".estimate"]], - #fmt: skip - 1 - sqrt( - (cor(ex_dat$pred, ex_dat$obs) - 1)^2 + - (sd(ex_dat$pred) / sd(ex_dat$obs) - 1)^2 + - (mean(ex_dat$pred) / mean(ex_dat$obs) - 1)^2 - ) - ) -}) - -test_that("Internal function works as expected", { - ex_dat <- generate_numeric_test_data() - - expect_true( - kge_cpp(ex_dat$obs, ex_dat$pred, na_rm = TRUE, version = "2009") != - kge_cpp(ex_dat$obs, ex_dat$pred, na_rm = TRUE, version = "2012") - ) -}) +test_that("na_rm works as expected", { + ex_dat <- generate_numeric_test_data() + not_na <- !is.na(ex_dat$pred_na) + + # No missing data, na.rm = FALSE + expect_equal( + kge_cpp( + obs = ex_dat$obs, + sim = ex_dat$pred, + na_rm = FALSE, + version = "2009" + ), + #fmt: skip + 1 - sqrt( + (cor(ex_dat$pred, ex_dat$obs) - 1)^2 + + (sd(ex_dat$pred) / sd(ex_dat$obs) - 1)^2 + + (mean(ex_dat$pred) / mean(ex_dat$obs) - 1)^2 + ) + ) + + # No missing data, na.rm = TRUE + expect_equal( + kge_cpp( + obs = ex_dat$obs, + sim = ex_dat$pred, + na_rm = TRUE, + version = "2009" + ), + #fmt: skip + 1 - sqrt( + (cor(ex_dat$pred, ex_dat$obs) - 1)^2 + + (sd(ex_dat$pred) / sd(ex_dat$obs) - 1)^2 + + (mean(ex_dat$pred) / mean(ex_dat$obs) - 1)^2 + ) + ) + + # Missing data is present, na.rm = FALSE + expect_equal( + kge_cpp( + obs = ex_dat$obs, + sim = ex_dat$pred_na, + na_rm = FALSE, + version = "2009" + ), + NA_real_ + ) + + # Missing data is present, na.rm = TRUE + expect_equal( + kge_cpp(ex_dat$obs, ex_dat$pred_na, na_rm = TRUE, version = "2009"), + #fmt: skip + 1 - sqrt( + (cor(ex_dat$pred[not_na], ex_dat$obs[not_na]) - 1)^2 + + (sd(ex_dat$pred[not_na]) / sd(ex_dat$obs[not_na]) - 1)^2 + + (mean(ex_dat$pred[not_na]) / mean(ex_dat$obs[not_na]) - 1)^2 + ), + tolerance = 0.0001 + ) +}) + +test_that("Integer columns are allowed", { + ex_dat <- generate_numeric_test_data() + ex_dat$obs <- as.integer(ex_dat$obs) + + expect_equal( + kge_cpp(ex_dat$obs, ex_dat$pred, na_rm = FALSE, version = "2009"), + #fmt: skip + 1 - sqrt( + (cor(ex_dat$pred, ex_dat$obs) - 1)^2 + + (sd(ex_dat$pred) / sd(ex_dat$obs) - 1)^2 + + (mean(ex_dat$pred) / mean(ex_dat$obs) - 1)^2 + ) + ) +}) + +test_that("Internal function works as expected", { + ex_dat <- generate_numeric_test_data() + + expect_true( + kge_cpp(ex_dat$obs, ex_dat$pred, na_rm = TRUE, version = "2009") != + kge_cpp(ex_dat$obs, ex_dat$pred, na_rm = TRUE, version = "2012") + ) +}) diff --git a/tests/testthat/test-mse.R b/tests/testthat/test-mse.R index 3334913..b9da4a7 100644 --- a/tests/testthat/test-mse.R +++ b/tests/testthat/test-mse.R @@ -1,37 +1,58 @@ -test_that("na_rm works as expected", { - ex_dat <- generate_numeric_test_data() - not_na <- !is.na(ex_dat$pred_na) - - # No missing data, na.rm = FALSE - expect_equal( - mse_vec(truth = ex_dat$obs, estimate = ex_dat$pred, na_rm = FALSE), - mean((ex_dat$pred - ex_dat$obs)^2) - ) - # No missing data, na.rm = TRUE - expect_equal( - mse_vec(truth = ex_dat$obs, estimate = ex_dat$pred, na_rm = TRUE), - mean((ex_dat$pred - ex_dat$obs)^2) - ) - - # Missing data is present, na.rm = FALSE - expect_equal( - mse_vec(truth = ex_dat$obs, estimate = ex_dat$pred_na, na_rm = FALSE), - NA_real_ - ) - - # Missing data is present, na.rm = TRUE - expect_equal( - mse(ex_dat, truth = obs, estimate = pred_na, na_rm = TRUE)[[".estimate"]], - mean((ex_dat$pred_na - ex_dat$obs)^2, na.rm = TRUE) - ) -}) - -test_that("Integer columns are allowed", { - ex_dat <- generate_numeric_test_data() - ex_dat$obs <- as.integer(ex_dat$obs) - - expect_equal( - mse(ex_dat, truth = "obs", estimate = "pred", na_rm = FALSE)[[".estimate"]], - mean((ex_dat$pred - ex_dat$obs)^2, na.rm = FALSE) - ) -}) +test_that("na_rm works as expected", { + ex_dat <- generate_numeric_test_data() + not_na <- !is.na(ex_dat$pred_na) + + # No missing data, na.rm = FALSE + expect_equal( + mse_vec(truth = ex_dat$obs, estimate = ex_dat$pred, na_rm = FALSE), + mean((ex_dat$pred - ex_dat$obs)^2) + ) + expect_equal( + rmse_vec(truth = ex_dat$obs, estimate = ex_dat$pred, na_rm = FALSE), + sqrt(mean((ex_dat$pred - ex_dat$obs)^2)) + ) + + # No missing data, na.rm = TRUE + expect_equal( + mse_vec(truth = ex_dat$obs, estimate = ex_dat$pred, na_rm = TRUE), + mean((ex_dat$pred - ex_dat$obs)^2) + ) + expect_equal( + rmse_vec(truth = ex_dat$obs, estimate = ex_dat$pred, na_rm = TRUE), + sqrt(mean((ex_dat$pred - ex_dat$obs)^2)) + ) + + # Missing data is present, na.rm = FALSE + expect_equal( + mse_vec(truth = ex_dat$obs, estimate = ex_dat$pred_na, na_rm = FALSE), + NA_real_ + ) + expect_equal( + rmse_vec(truth = ex_dat$obs, estimate = ex_dat$pred_na, na_rm = FALSE), + NA_real_ + ) + + # Missing data is present, na.rm = TRUE + expect_equal( + mse(ex_dat, truth = obs, estimate = pred_na, na_rm = TRUE)[[".estimate"]], + mean((ex_dat$pred_na - ex_dat$obs)^2, na.rm = TRUE) + ) + expect_equal( + rmse(ex_dat, truth = obs, estimate = pred_na, na_rm = TRUE)[[".estimate"]], + sqrt(mean((ex_dat$pred_na - ex_dat$obs)^2, na.rm = TRUE)) + ) +}) + +test_that("Integer columns are allowed", { + ex_dat <- generate_numeric_test_data() + ex_dat$obs <- as.integer(ex_dat$obs) + + expect_equal( + mse(ex_dat, truth = "obs", estimate = "pred", na_rm = FALSE)[[".estimate"]], + mean((ex_dat$pred - ex_dat$obs)^2, na.rm = FALSE) + ) + expect_equal( + rmse(ex_dat, truth = "obs", estimate = "pred", na_rm = FALSE)[[".estimate"]], + sqrt(mean((ex_dat$pred - ex_dat$obs)^2, na.rm = FALSE)) + ) +}) diff --git a/tests/testthat/test-nse.R b/tests/testthat/test-nse.R index e74dfcb..9b33d7d 100644 --- a/tests/testthat/test-nse.R +++ b/tests/testthat/test-nse.R @@ -1,54 +1,54 @@ -test_that("na_rm works as expected", { - ex_dat <- generate_numeric_test_data() - not_na <- !is.na(ex_dat$pred_na) - - # No missing data, na.rm = FALSE - expect_equal( - nse_vec(truth = ex_dat$obs, estimate = ex_dat$pred, na_rm = FALSE), - #fmt:skip - 1 - (sum((ex_dat$obs - ex_dat$pred)^2) / - sum((ex_dat$obs - mean(ex_dat$obs))^2)) - ) - # No missing data, na.rm = TRUE - expect_equal( - nse_vec(truth = ex_dat$obs, estimate = ex_dat$pred, na_rm = TRUE), - #fmt:skip - 1 - (sum((ex_dat$obs - ex_dat$pred)^2) / - sum((ex_dat$obs - mean(ex_dat$obs))^2)) - ) - - # Missing data is present, na.rm = FALSE - expect_equal( - nse_vec(truth = ex_dat$obs, estimate = ex_dat$pred_na, na_rm = FALSE), - NA_real_ - ) - - # Missing data is present, na.rm = TRUE - expect_equal( - nse(ex_dat, truth = obs, estimate = pred_na, na_rm = TRUE)[[".estimate"]], - #fmt:skip - 1 - (sum((ex_dat$obs[not_na] - ex_dat$pred[not_na])^2) / - sum((ex_dat$obs[not_na] - mean(ex_dat$obs[not_na]))^2)) - ) -}) - -test_that("Integer columns are allowed", { - ex_dat <- generate_numeric_test_data() - ex_dat$obs <- as.integer(ex_dat$obs) - - expect_equal( - nse(ex_dat, truth = "obs", estimate = "pred", na_rm = FALSE)[[".estimate"]], - #fmt:skip - 1 - (sum((ex_dat$obs - ex_dat$pred)^2) / - sum((ex_dat$obs - mean(ex_dat$obs))^2)) - ) -}) - -test_that("Result interpretation is returned", { - ex_dat <- generate_numeric_test_data() - - expect_equal( - nse_vec(truth = ex_dat$obs, estimate = ex_dat$pred, performance = TRUE), - "Good" - ) -}) +test_that("na_rm works as expected", { + ex_dat <- generate_numeric_test_data() + not_na <- !is.na(ex_dat$pred_na) + + # No missing data, na.rm = FALSE + expect_equal( + nse_vec(truth = ex_dat$obs, estimate = ex_dat$pred, na_rm = FALSE), + #fmt:skip + 1 - (sum((ex_dat$obs - ex_dat$pred)^2) / + sum((ex_dat$obs - mean(ex_dat$obs))^2)) + ) + # No missing data, na.rm = TRUE + expect_equal( + nse_vec(truth = ex_dat$obs, estimate = ex_dat$pred, na_rm = TRUE), + #fmt:skip + 1 - (sum((ex_dat$obs - ex_dat$pred)^2) / + sum((ex_dat$obs - mean(ex_dat$obs))^2)) + ) + + # Missing data is present, na.rm = FALSE + expect_equal( + nse_vec(truth = ex_dat$obs, estimate = ex_dat$pred_na, na_rm = FALSE), + NA_real_ + ) + + # Missing data is present, na.rm = TRUE + expect_equal( + nse(ex_dat, truth = obs, estimate = pred_na, na_rm = TRUE)[[".estimate"]], + #fmt:skip + 1 - (sum((ex_dat$obs[not_na] - ex_dat$pred[not_na])^2) / + sum((ex_dat$obs[not_na] - mean(ex_dat$obs[not_na]))^2)) + ) +}) + +test_that("Integer columns are allowed", { + ex_dat <- generate_numeric_test_data() + ex_dat$obs <- as.integer(ex_dat$obs) + + expect_equal( + nse(ex_dat, truth = "obs", estimate = "pred", na_rm = FALSE)[[".estimate"]], + #fmt:skip + 1 - (sum((ex_dat$obs - ex_dat$pred)^2) / + sum((ex_dat$obs - mean(ex_dat$obs))^2)) + ) +}) + +test_that("Result interpretation is returned", { + ex_dat <- generate_numeric_test_data() + + expect_equal( + nse_vec(truth = ex_dat$obs, estimate = ex_dat$pred, performance = TRUE), + "Good" + ) +}) diff --git a/tests/testthat/test-pbias.R b/tests/testthat/test-pbias.R index 40a02a5..340094c 100644 --- a/tests/testthat/test-pbias.R +++ b/tests/testthat/test-pbias.R @@ -1,73 +1,73 @@ -test_that("na_rm works as expected", { - ex_dat <- generate_numeric_test_data() - not_na <- !is.na(ex_dat$pred_na) - - # No missing data, na.rm = FALSE - expect_equal( - pbias_vec(truth = ex_dat$obs, estimate = ex_dat$pred, na_rm = FALSE), - #fmt:skip - 100 * (sum(ex_dat$pred - ex_dat$obs) / sum(ex_dat$obs)) - ) - # No missing data, na.rm = TRUE - expect_equal( - pbias_vec(truth = ex_dat$obs, estimate = ex_dat$pred, na_rm = TRUE), - #fmt:skip - 100 * (sum(ex_dat$pred - ex_dat$obs) / sum(ex_dat$obs)) - ) - - # Missing data is present, na.rm = FALSE - expect_equal( - pbias_vec(truth = ex_dat$obs, estimate = ex_dat$pred_na, na_rm = FALSE), - NA_real_ - ) - - # Missing data is present, na.rm = TRUE - expect_equal( - pbias(ex_dat, truth = obs, estimate = pred_na, na_rm = TRUE)[[".estimate"]], - #fmt:skip - 100 * - (sum(ex_dat$pred[not_na] - ex_dat$obs[not_na]) / sum(ex_dat$obs[not_na])), - tolerance = 0.0001 - ) -}) - -test_that("Integer columns are allowed", { - ex_dat <- generate_numeric_test_data() - ex_dat$obs <- as.integer(ex_dat$obs) - - expect_equal( - pbias_vec(truth = ex_dat$obs, estimate = ex_dat$pred, na_rm = FALSE), - #fmt:skip - 100 * (sum(ex_dat$pred - ex_dat$obs) / sum(ex_dat$obs)) - ) -}) - -test_that("Result similar to {hydroGOF} package", { - skip_if_not_installed("hydroGOF") - - ex_dat <- generate_numeric_test_data() - not_na <- !is.na(ex_dat$pred_na) - - # General case - expect_equal( - pbias_vec(truth = ex_dat$obs, estimate = ex_dat$pred, na_rm = FALSE), - hydroGOF::pbias(obs = ex_dat$obs, sim = ex_dat$pred), - tolerance = 0.1 - ) - - # With missing data - expect_equal( - pbias_vec(truth = ex_dat$obs, estimate = ex_dat$pred_na, na_rm = TRUE), - hydroGOF::pbias(obs = ex_dat$obs, sim = ex_dat$pred_na, na.rm = TRUE), - tolerance = 0.0001 - ) -}) - -test_that("Result interpretation is returned", { - ex_dat <- generate_numeric_test_data() - - expect_equal( - pbias_vec(truth = ex_dat$obs, estimate = ex_dat$pred, performance = TRUE), - "Poor" - ) -}) +test_that("na_rm works as expected", { + ex_dat <- generate_numeric_test_data() + not_na <- !is.na(ex_dat$pred_na) + + # No missing data, na.rm = FALSE + expect_equal( + pbias_vec(truth = ex_dat$obs, estimate = ex_dat$pred, na_rm = FALSE), + #fmt:skip + 100 * (sum(ex_dat$pred - ex_dat$obs) / sum(ex_dat$obs)) + ) + # No missing data, na.rm = TRUE + expect_equal( + pbias_vec(truth = ex_dat$obs, estimate = ex_dat$pred, na_rm = TRUE), + #fmt:skip + 100 * (sum(ex_dat$pred - ex_dat$obs) / sum(ex_dat$obs)) + ) + + # Missing data is present, na.rm = FALSE + expect_equal( + pbias_vec(truth = ex_dat$obs, estimate = ex_dat$pred_na, na_rm = FALSE), + NA_real_ + ) + + # Missing data is present, na.rm = TRUE + expect_equal( + pbias(ex_dat, truth = obs, estimate = pred_na, na_rm = TRUE)[[".estimate"]], + #fmt:skip + 100 * + (sum(ex_dat$pred[not_na] - ex_dat$obs[not_na]) / sum(ex_dat$obs[not_na])), + tolerance = 0.0001 + ) +}) + +test_that("Integer columns are allowed", { + ex_dat <- generate_numeric_test_data() + ex_dat$obs <- as.integer(ex_dat$obs) + + expect_equal( + pbias_vec(truth = ex_dat$obs, estimate = ex_dat$pred, na_rm = FALSE), + #fmt:skip + 100 * (sum(ex_dat$pred - ex_dat$obs) / sum(ex_dat$obs)) + ) +}) + +test_that("Result similar to {hydroGOF} package", { + skip_if_not_installed("hydroGOF") + + ex_dat <- generate_numeric_test_data() + not_na <- !is.na(ex_dat$pred_na) + + # General case + expect_equal( + pbias_vec(truth = ex_dat$obs, estimate = ex_dat$pred, na_rm = FALSE), + hydroGOF::pbias(obs = ex_dat$obs, sim = ex_dat$pred), + tolerance = 0.1 + ) + + # With missing data + expect_equal( + pbias_vec(truth = ex_dat$obs, estimate = ex_dat$pred_na, na_rm = TRUE), + hydroGOF::pbias(obs = ex_dat$obs, sim = ex_dat$pred_na, na.rm = TRUE), + tolerance = 0.0001 + ) +}) + +test_that("Result interpretation is returned", { + ex_dat <- generate_numeric_test_data() + + expect_equal( + pbias_vec(truth = ex_dat$obs, estimate = ex_dat$pred, performance = TRUE), + "Poor" + ) +}) diff --git a/tests/testthat/test-press.R b/tests/testthat/test-press.R index 565456a..d4f8e9f 100644 --- a/tests/testthat/test-press.R +++ b/tests/testthat/test-press.R @@ -1,23 +1,23 @@ -test_that("press", { - ex_dat <- generate_numeric_test_data() - not_na <- !is.na(ex_dat$pred_na) - - expect_equal( - press( - ex_dat, - truth = "obs", - estimate = "pred", - na_rm = FALSE - )[[".estimate"]], - sum((ex_dat$obs - ex_dat$pred)^2) - ) - expect_equal( - press( - ex_dat, - truth = obs, - estimate = "pred_na", - na_rm = TRUE - )[[".estimate"]], - sum((ex_dat$obs[not_na] - ex_dat$pred[not_na])^2) - ) -}) +test_that("press", { + ex_dat <- generate_numeric_test_data() + not_na <- !is.na(ex_dat$pred_na) + + expect_equal( + press( + ex_dat, + truth = "obs", + estimate = "pred", + na_rm = FALSE + )[[".estimate"]], + sum((ex_dat$obs - ex_dat$pred)^2) + ) + expect_equal( + press( + ex_dat, + truth = obs, + estimate = "pred_na", + na_rm = TRUE + )[[".estimate"]], + sum((ex_dat$obs[not_na] - ex_dat$pred[not_na])^2) + ) +}) diff --git a/tests/testthat/test-system-crash.R b/tests/testthat/test-system-crash.R index 9312a85..536690f 100644 --- a/tests/testthat/test-system-crash.R +++ b/tests/testthat/test-system-crash.R @@ -1,141 +1,217 @@ -# Property-based testing -# https://www.etiennebacher.com/posts/2024-10-01-using-property-testing-in-r - -options( - quickcheck.tests = 20L, - quickcheck.shrinks = 10L, - quickcheck.discards = 10L -) - -test_that("nse", { - skip_if_not_installed("quickcheck") - - quickcheck::for_all( - obs = quickcheck::any_atomic(any_na = TRUE), - sim = quickcheck::any_atomic(any_na = TRUE), - na_flag = quickcheck::logical_(any_na = FALSE), - property = function(obs, sim, na_flag) { - suppressWarnings( - try( - nse_vec(truth = obs, estimate = sim, na_rm = na_flag), - silent = TRUE - ) - ) - expect_true(TRUE) - } - ) -}) - -test_that("mse", { - skip_if_not_installed("quickcheck") - - quickcheck::for_all( - obs = quickcheck::any_atomic(any_na = TRUE), - sim = quickcheck::any_atomic(any_na = TRUE), - na_flag = quickcheck::logical_(any_na = FALSE), - property = function(obs, sim, na_flag) { - suppressWarnings( - try( - mse_vec(truth = obs, estimate = sim, na_rm = na_flag), - silent = TRUE - ) - ) - expect_true(TRUE) - } - ) -}) - -test_that("kge", { - skip_if_not_installed("quickcheck") - - quickcheck::for_all( - obs = quickcheck::any_atomic(any_na = TRUE), - sim = quickcheck::any_atomic(any_na = TRUE), - na_flag = quickcheck::logical_(any_na = FALSE), - property = function(obs, sim, na_flag) { - suppressWarnings( - try( - kge_vec(truth = obs, estimate = sim, na_rm = na_flag), - silent = TRUE - ) - ) - expect_true(TRUE) - } - ) -}) - -test_that("kge2012", { - skip_if_not_installed("quickcheck") - - quickcheck::for_all( - obs = quickcheck::any_atomic(any_na = TRUE), - sim = quickcheck::any_atomic(any_na = TRUE), - na_flag = quickcheck::logical_(any_na = FALSE), - property = function(obs, sim, na_flag) { - suppressWarnings( - try( - kge2012_vec(truth = obs, estimate = sim, na_rm = na_flag), - silent = TRUE - ) - ) - expect_true(TRUE) - } - ) -}) - -test_that("pbias", { - skip_if_not_installed("quickcheck") - - quickcheck::for_all( - obs = quickcheck::any_atomic(any_na = TRUE), - sim = quickcheck::any_atomic(any_na = TRUE), - na_flag = quickcheck::logical_(any_na = FALSE), - property = function(obs, sim, na_flag) { - suppressWarnings( - try( - pbias_vec(truth = obs, estimate = sim, na_rm = na_flag), - silent = TRUE - ) - ) - expect_true(TRUE) - } - ) -}) - -test_that("sfe", { - skip_if_not_installed("quickcheck") - - quickcheck::for_all( - obs = quickcheck::any_atomic(any_na = TRUE), - sim = quickcheck::any_atomic(any_na = TRUE), - na_flag = quickcheck::logical_(any_na = FALSE), - property = function(obs, sim, na_flag) { - suppressWarnings( - try( - sfe_vec(truth = obs, estimate = sim, na_rm = na_flag), - silent = TRUE - ) - ) - expect_true(TRUE) - } - ) -}) - -test_that("press", { - skip_if_not_installed("quickcheck") - - quickcheck::for_all( - obs = quickcheck::any_atomic(any_na = TRUE), - sim = quickcheck::any_atomic(any_na = TRUE), - na_flag = quickcheck::logical_(any_na = FALSE), - property = function(obs, sim, na_flag) { - suppressWarnings( - try( - press_vec(truth = obs, estimate = sim, na_rm = na_flag), - silent = TRUE - ) - ) - expect_true(TRUE) - } - ) -}) +# Property-based testing +# https://www.etiennebacher.com/posts/2024-10-01-using-property-testing-in-r + +options( + quickcheck.tests = 20L, + quickcheck.shrinks = 10L, + quickcheck.discards = 10L +) + +test_that("nse", { + skip_if_not_installed("quickcheck") + + quickcheck::for_all( + obs = quickcheck::any_atomic(any_na = TRUE), + sim = quickcheck::any_atomic(any_na = TRUE), + na_flag = quickcheck::logical_(any_na = FALSE), + property = function(obs, sim, na_flag) { + suppressWarnings( + try( + nse_vec(truth = obs, estimate = sim, na_rm = na_flag), + silent = TRUE + ) + ) + expect_true(TRUE) + } + ) +}) + +test_that("mse", { + skip_if_not_installed("quickcheck") + + quickcheck::for_all( + obs = quickcheck::any_atomic(any_na = TRUE), + sim = quickcheck::any_atomic(any_na = TRUE), + na_flag = quickcheck::logical_(any_na = FALSE), + property = function(obs, sim, na_flag) { + suppressWarnings( + try( + mse_vec(truth = obs, estimate = sim, na_rm = na_flag), + silent = TRUE + ) + ) + expect_true(TRUE) + } + ) +}) + +test_that("rmse", { + skip_if_not_installed("quickcheck") + + quickcheck::for_all( + obs = quickcheck::any_atomic(any_na = TRUE), + sim = quickcheck::any_atomic(any_na = TRUE), + na_flag = quickcheck::logical_(any_na = FALSE), + property = function(obs, sim, na_flag) { + suppressWarnings( + try( + rmse_vec(truth = obs, estimate = sim, na_rm = na_flag), + silent = TRUE + ) + ) + expect_true(TRUE) + } + ) +}) + +test_that("kge", { + skip_if_not_installed("quickcheck") + + quickcheck::for_all( + obs = quickcheck::any_atomic(any_na = TRUE), + sim = quickcheck::any_atomic(any_na = TRUE), + na_flag = quickcheck::logical_(any_na = FALSE), + property = function(obs, sim, na_flag) { + suppressWarnings( + try( + kge_vec(truth = obs, estimate = sim, na_rm = na_flag), + silent = TRUE + ) + ) + expect_true(TRUE) + } + ) +}) + +test_that("kge2012", { + skip_if_not_installed("quickcheck") + + quickcheck::for_all( + obs = quickcheck::any_atomic(any_na = TRUE), + sim = quickcheck::any_atomic(any_na = TRUE), + na_flag = quickcheck::logical_(any_na = FALSE), + property = function(obs, sim, na_flag) { + suppressWarnings( + try( + kge2012_vec(truth = obs, estimate = sim, na_rm = na_flag), + silent = TRUE + ) + ) + expect_true(TRUE) + } + ) +}) + +test_that("kgelog", { + skip_if_not_installed("quickcheck") + + quickcheck::for_all( + obs = quickcheck::any_atomic(any_na = TRUE), + sim = quickcheck::any_atomic(any_na = TRUE), + na_flag = quickcheck::logical_(any_na = FALSE), + property = function(obs, sim, na_flag) { + suppressWarnings( + try( + kgelog_vec(truth = obs, estimate = sim, na_rm = na_flag), + silent = TRUE + ) + ) + expect_true(TRUE) + } + ) +}) + +test_that("kgelog_low", { + skip_if_not_installed("quickcheck") + + quickcheck::for_all( + obs = quickcheck::any_atomic(any_na = TRUE), + sim = quickcheck::any_atomic(any_na = TRUE), + na_flag = quickcheck::logical_(any_na = FALSE), + property = function(obs, sim, na_flag) { + suppressWarnings( + try( + kgelog_low_vec(truth = obs, estimate = sim, na_rm = na_flag), + silent = TRUE + ) + ) + expect_true(TRUE) + } + ) +}) + +test_that("kgelog_hi", { + skip_if_not_installed("quickcheck") + + quickcheck::for_all( + obs = quickcheck::any_atomic(any_na = TRUE), + sim = quickcheck::any_atomic(any_na = TRUE), + na_flag = quickcheck::logical_(any_na = FALSE), + property = function(obs, sim, na_flag) { + suppressWarnings( + try( + kgelog_hi_vec(truth = obs, estimate = sim, na_rm = na_flag), + silent = TRUE + ) + ) + expect_true(TRUE) + } + ) +}) + +test_that("pbias", { + skip_if_not_installed("quickcheck") + + quickcheck::for_all( + obs = quickcheck::any_atomic(any_na = TRUE), + sim = quickcheck::any_atomic(any_na = TRUE), + na_flag = quickcheck::logical_(any_na = FALSE), + property = function(obs, sim, na_flag) { + suppressWarnings( + try( + pbias_vec(truth = obs, estimate = sim, na_rm = na_flag), + silent = TRUE + ) + ) + expect_true(TRUE) + } + ) +}) + +test_that("sfe", { + skip_if_not_installed("quickcheck") + + quickcheck::for_all( + obs = quickcheck::any_atomic(any_na = TRUE), + sim = quickcheck::any_atomic(any_na = TRUE), + na_flag = quickcheck::logical_(any_na = FALSE), + property = function(obs, sim, na_flag) { + suppressWarnings( + try( + sfe_vec(truth = obs, estimate = sim, na_rm = na_flag), + silent = TRUE + ) + ) + expect_true(TRUE) + } + ) +}) + +test_that("press", { + skip_if_not_installed("quickcheck") + + quickcheck::for_all( + obs = quickcheck::any_atomic(any_na = TRUE), + sim = quickcheck::any_atomic(any_na = TRUE), + na_flag = quickcheck::logical_(any_na = FALSE), + property = function(obs, sim, na_flag) { + suppressWarnings( + try( + press_vec(truth = obs, estimate = sim, na_rm = na_flag), + silent = TRUE + ) + ) + expect_true(TRUE) + } + ) +}) diff --git a/vignettes/.gitignore b/vignettes/.gitignore index ebd7da3..075b254 100644 --- a/vignettes/.gitignore +++ b/vignettes/.gitignore @@ -1 +1 @@ -/.quarto/ +/.quarto/ diff --git a/vignettes/articles/.gitignore b/vignettes/articles/.gitignore index ffdf081..fbda3f7 100644 --- a/vignettes/articles/.gitignore +++ b/vignettes/articles/.gitignore @@ -1,5 +1,5 @@ -*.html -*.R -*_files - -/.quarto/ +*.html +*.R +*_files + +/.quarto/ diff --git a/vignettes/articles/benchmarks.qmd b/vignettes/articles/benchmarks.qmd index e2f52fe..027fdec 100644 --- a/vignettes/articles/benchmarks.qmd +++ b/vignettes/articles/benchmarks.qmd @@ -20,7 +20,7 @@ library(hydroGOF) ```{r} # NSE bench::mark( - tidyhydro = nse_vec(truth = avacha$obs, estimate = avacha$sim), + tidyhydro = tidyhydro::nse_vec(truth = avacha$obs, estimate = avacha$sim), hydroGOF = hydroGOF::NSE(sim = avacha$sim, obs = avacha$obs), relative = TRUE, check = TRUE, @@ -30,7 +30,7 @@ bench::mark( # KGE bench::mark( - tidyhydro = kge_vec(truth = avacha$obs, estimate = avacha$sim), + tidyhydro = tidyhydro::kge_vec(truth = avacha$obs, estimate = avacha$sim), hydroGOF = hydroGOF::KGE(sim = avacha$sim, obs = avacha$obs, method = "2009"), relative = TRUE, check = TRUE, @@ -40,7 +40,7 @@ bench::mark( # KGE' bench::mark( - tidyhydro = kge2012_vec(truth = avacha$obs, estimate = avacha$sim), + tidyhydro = tidyhydro::kge2012_vec(truth = avacha$obs, estimate = avacha$sim), hydroGOF = hydroGOF::KGE(sim = avacha$sim, obs = avacha$obs, method = "2012"), relative = TRUE, check = TRUE, @@ -50,7 +50,7 @@ bench::mark( # pBIAS bench::mark( - tidyhydro = pbias_vec(truth = avacha$obs, estimate = avacha$sim), + tidyhydro = tidyhydro::pbias_vec(truth = avacha$obs, estimate = avacha$sim), hydroGOF = hydroGOF::pbias(sim = avacha$sim, obs = avacha$obs, dec = 9), relative = TRUE, check = TRUE, @@ -60,11 +60,23 @@ bench::mark( # MSE bench::mark( - tidyhydro = mse_vec(truth = avacha$obs, estimate = avacha$sim), + tidyhydro = tidyhydro::mse_vec(truth = avacha$obs, estimate = avacha$sim), hydroGOF = hydroGOF::mse(sim = avacha$sim, obs = avacha$obs), relative = TRUE, check = TRUE, iterations = 25L, filter_gc = FALSE ) + +# RMSE +bench::mark( + tidyhydro = tidyhydro::rmse_vec(truth = avacha$obs, estimate = avacha$sim), + hydroGOF = hydroGOF::rmse(sim = avacha$sim, obs = avacha$obs), + yardstick = yardstick::rmse_vec(truth = avacha$obs, estimate = avacha$sim), + relative = TRUE, + check = TRUE, + iterations = 25L, + filter_gc = FALSE +) + ``` \ No newline at end of file