diff --git a/.Rbuildignore b/.Rbuildignore index 01ddd28..e6e6215 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -18,3 +18,4 @@ ^codemeta\.json$ ^CODE_OF_CONDUCT\.md$ ^src/.*\.o$ +^dev\.R$ diff --git a/DESCRIPTION b/DESCRIPTION index f42f776..af23c5d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,7 +11,8 @@ Depends: R (>= 4.1.0) Imports: Rcpp (>= 1.0.12), rlang (>= 1.1.0), - yardstick (>= 1.3.1) + yardstick (>= 1.3.1), + checkmate (>= 2.3.1) LinkingTo: Rcpp Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 @@ -26,3 +27,16 @@ URL: https://github.com/atsyplenkov/tidyhydro, https://atsyplenkov.github.io/tid BugReports: https://github.com/atsyplenkov/tidyhydro/issues LazyData: true Config/Needs/website: bench, ggplot2, quarto, lubridate, dplyr +Collate: + 'RcppExports.R' + 'aaa-new.R' + 'central-tendency.R' + 'data.R' + 'kge.R' + 'mse.R' + 'nse.R' + 'pbias.R' + 'press.R' + 'sfe.R' + 'tidyhydro-package.R' + 'variability.R' diff --git a/NAMESPACE b/NAMESPACE index 28f3986..1e63bf2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,15 +2,10 @@ useDynLib(tidyhydro, .registration = TRUE) importFrom(Rcpp, evalCpp) # exportPattern("^[[:alpha:]]+") -S3method(nse, data.frame) -S3method(kge, data.frame) -S3method(kge2012, data.frame) -S3method(mse, data.frame) -S3method(rmse, data.frame) -S3method(pbias, data.frame) -S3method(press, data.frame) -S3method(cv, data.frame) -S3method(sfe, data.frame) +S3method(print, measure) +S3method(format, measure) + +# general functions export(nse) export(kge) export(kge2012) @@ -20,6 +15,21 @@ export(pbias) export(press) export(sfe) export(cv) +export(gm) + +# data.frame methods +S3method(nse, data.frame) +S3method(kge, data.frame) +S3method(kge2012, data.frame) +S3method(mse, data.frame) +S3method(rmse, data.frame) +S3method(pbias, data.frame) +S3method(press, data.frame) +S3method(sfe, data.frame) +S3method(cv, data.frame) +S3method(gm, data.frame) + +# vector functions export(nse_vec) export(kge_vec) export(kge2012_vec) @@ -29,3 +39,4 @@ export(pbias_vec) export(press_vec) export(sfe_vec) export(cv_vec) +export(gm_vec) diff --git a/R/aaa-new.R b/R/aaa-new.R new file mode 100644 index 0000000..01064be --- /dev/null +++ b/R/aaa-new.R @@ -0,0 +1,73 @@ +# Modified after https://github.com/tidymodels/yardstick/blob/main/R/aaa-new.R + +#' Construct a new measure function +#' @keywords summary_stats +#' +#' @description +#' These functions provide convenient wrappers to create the three types of +#' measure functions in `tidyhydro`: measures of central tendency, variability +#' and symmetry. They add a measure-specific class to `fn` and +#' mimic a behaviour of [metric_set][yardstick::metric_set]. These features +#' are used by measure_set. +#' +#' See [Custom performance +#' metrics](https://www.tidymodels.org/learn/develop/metrics/) for more +#' information about creating custom metrics. +#' +#' @param fn A function. The measure function to attach a measure-specific class +#' +#' @name new-measure +NULL + +#' @rdname new-measure +#' @export +new_tendency_measure <- function(fn) { + new_measure(fn, class = "tendency_measure") +} + +#' @rdname new-measure +#' @export +new_var_measure <- function(fn) { + new_measure(fn, class = "var_measure") +} + +#' @rdname new-measure +#' @export +new_sym_measure <- function(fn) { + new_measure(fn, class = "sym_measure") +} + +new_measure <- function(fn, class = NULL) { + checkmate::assert_function(fn, args = "data") + + class <- c(class, "measure", "function") + + structure(fn, class = class) +} + +is_measure <- function(x) { + inherits(x, "measure") +} + +#' @noRd +#' @export +print.measure <- function(x, ...) { + cat(format(x), sep = "\n") + invisible(x) +} + +#' @noRd +#' @export +format.measure <- function(x, ...) { + first_class <- class(x)[[1]] + measure_type <- + switch( + first_class, + "tendency_measure" = "Measure of Central Tendency", + "var_measure" = "Measure of Variability", + "sym_measure" = "Measure of Distribution Symmetry", + "measure" + ) + + cat(paste("A", measure_type)) +} diff --git a/R/central-tendency.R b/R/central-tendency.R new file mode 100644 index 0000000..749c094 --- /dev/null +++ b/R/central-tendency.R @@ -0,0 +1,66 @@ +#' Geometric Mean (GM) +#' @keywords summary_stats +#' +#' @family descriptive statistics +#' @templateVar fn gm +#' @template return +#' +#' @param data A `data.frame` containing the columns specified by the `truth` +#' and `estimate` arguments. +#' +#' @param truth The column identifier for the true results +#' (that is `numeric`). This should be an unquoted column name although +#' this argument is passed by expression and supports +#' [quasiquotation][rlang::quasiquotation] (you can unquote column +#' names). For `_vec()` functions, a `numeric` vector. +#' +#' @param na_rm A `logical` value indicating whether `NA` +#' values should be stripped before the computation proceeds. +#' +#' @param ... Not currently used. +#' +#' @template examples-description +#' +#' @export +#' + +# TODO: +# Add tests + +gm <- function(data, ...) { + UseMethod("gm") +} + +gm <- new_tendency_measure(gm) + +#' @rdname gm +#' @export +gm.data.frame <- function( + data, + truth, + na_rm = TRUE, + ... +) { + yardstick::numeric_metric_summarizer( + name = "gm", + fn = gm_vec, + data = data, + truth = !!rlang::enquo(truth), + estimate = !!rlang::enquo(truth), + na_rm = na_rm + ) +} + +#' @rdname gm +#' @export +gm_vec <- function( + truth, + na_rm = TRUE, + ... +) { + checkmate::assert_numeric( + truth, + lower = 1e-323 + ) + exp(mean(log(truth), na.rm = na_rm)) +} diff --git a/R/kge.R b/R/kge.R index d126ff2..70ff4fa 100644 --- a/R/kge.R +++ b/R/kge.R @@ -42,8 +42,7 @@ #' For further discussion, see Knoben et al. (2019), who caution against #' directly translating NSE-based interpretation thresholds to KGE. #' -#' @family numeric metrics -#' @family accuracy metrics +#' @family KGE variants #' @templateVar fn kge #' @template return #' @@ -170,8 +169,7 @@ kge_vec <- function( #' For further discussion, see Knoben et al. (2019), who caution against #' directly translating NSE-based interpretation thresholds to KGE. #' -#' @family numeric metrics -#' @family accuracy metrics +#' @family KGE variants #' @templateVar fn kge2012 #' @template return #' diff --git a/R/nse.R b/R/nse.R index 805a25c..bf1c330 100644 --- a/R/nse.R +++ b/R/nse.R @@ -35,8 +35,7 @@ #' - **Satisfactory** -- 0.5 < `nse()` < 0.6 #' - **Poor** -- `nse()` <= 0.5 #' -#' @family numeric metrics -#' @family accuracy metrics +#' @family NSE variants #' @templateVar fn nse #' @template return #' diff --git a/R/pbias.R b/R/pbias.R index 4c4f069..03bcf7b 100644 --- a/R/pbias.R +++ b/R/pbias.R @@ -33,7 +33,6 @@ #' - **Poor** -- `pbias()` >= ±15.0 #' #' @family numeric metrics -#' @family accuracy metrics #' @templateVar fn pbias #' @template return #' diff --git a/R/press.R b/R/press.R index 308ce6f..975e7ae 100644 --- a/R/press.R +++ b/R/press.R @@ -35,8 +35,7 @@ #' different transformations of response variable, e.g. linear regression and #' log-transformed linear regression (*Helsel et al., 2020*). #' -#' @family numeric metrics -#' @family accuracy metrics +#' @family regression metrics #' @templateVar fn press #' @template return #' diff --git a/R/sfe.R b/R/sfe.R index 85e4639..072fbc3 100644 --- a/R/sfe.R +++ b/R/sfe.R @@ -19,8 +19,7 @@ #' \item \eqn{obs} defines model observations at time step \eqn{i} #' } #' -#' @family numeric metrics -#' @family accuracy metrics +#' @family regression metrics #' @templateVar fn sfe #' @template return #' diff --git a/R/variability.R b/R/variability.R index 1cf353b..c06f7c4 100644 --- a/R/variability.R +++ b/R/variability.R @@ -1,8 +1,7 @@ #' Coefficient of Variation (Cv) -#' @keywords summary +#' @keywords summary_stats #' -#' @family numeric metrics -#' @family accuracy metrics +#' @family descriptive statistics #' @templateVar fn cv #' @template return #' @@ -25,14 +24,14 @@ #' @export #' +# TODO: +# Add tests + cv <- function(data, ...) { UseMethod("cv") } -cv <- yardstick::new_numeric_metric( - cv, - direction = "minimize" -) +cv <- new_var_measure(cv) #' @rdname cv #' @export diff --git a/_pkgdown.yml b/_pkgdown.yml index ffde41a..9110cb5 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -33,6 +33,10 @@ reference: contents: - has_keyword("regression") +- title: "Summary statistics" + contents: + - has_keyword("summary_stats") + - title: "Example datasets" contents: - has_keyword("data") diff --git a/man-roxygen/examples-description.R b/man-roxygen/examples-description.R index bdf06e4..68a1e2f 100644 --- a/man-roxygen/examples-description.R +++ b/man-roxygen/examples-description.R @@ -1,8 +1,9 @@ #' @examples #' library(tidyhydro) -#' data(avacha) #' -#' # Supply truth and predictions as bare column names +#' <%=fn %> +#' +#' # Supply truth as bare column names #' <%=fn %>(avacha, obs) #' #' # Or as numeric vectors diff --git a/man-roxygen/examples-numeric.R b/man-roxygen/examples-numeric.R index 3067b21..471a729 100644 --- a/man-roxygen/examples-numeric.R +++ b/man-roxygen/examples-numeric.R @@ -1,6 +1,5 @@ #' @examples #' library(tidyhydro) -#' data(avacha) #' #' # Supply truth and predictions as bare column names #' <%=fn %>(avacha, obs, sim) diff --git a/man/cv.Rd b/man/cv.Rd index f7ff7d5..e657124 100644 --- a/man/cv.Rd +++ b/man/cv.Rd @@ -41,35 +41,18 @@ Coefficient of Variation (Cv) } \examples{ library(tidyhydro) -data(avacha) -# Supply truth and predictions as bare column names +cv + +# Supply truth as bare column names cv(avacha, obs) # Or as numeric vectors cv_vec(avacha$obs) } \seealso{ -Other numeric metrics: -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{rmse}()}, -\code{\link{sfe}()} - -Other accuracy metrics: -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{rmse}()}, -\code{\link{sfe}()} +Other descriptive statistics: +\code{\link{gm}()} } -\concept{accuracy metrics} -\concept{numeric metrics} -\keyword{summary} +\concept{descriptive statistics} +\keyword{summary_stats} diff --git a/man/gm.Rd b/man/gm.Rd new file mode 100644 index 0000000..cabfd2d --- /dev/null +++ b/man/gm.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/central-tendency.R +\name{gm} +\alias{gm} +\alias{gm.data.frame} +\alias{gm_vec} +\title{Geometric Mean (GM)} +\usage{ +gm(data, ...) + +\method{gm}{data.frame}(data, truth, na_rm = TRUE, ...) + +gm_vec(truth, na_rm = TRUE, ...) +} +\arguments{ +\item{data}{A \code{data.frame} containing the columns specified by the \code{truth} +and \code{estimate} arguments.} + +\item{...}{Not currently used.} + +\item{truth}{The column identifier for the true results +(that is \code{numeric}). This should be an unquoted column name although +this argument is passed by expression and supports +\link[rlang:topic-inject]{quasiquotation} (you can unquote column +names). For \verb{_vec()} functions, a \code{numeric} vector.} + +\item{na_rm}{A \code{logical} value indicating whether \code{NA} +values should be stripped before the computation proceeds.} +} +\value{ +A \code{tibble} with columns \code{.metric}, \code{.estimator}, +and \code{.estimate} and 1 row of values. + +For grouped data frames, the number of rows returned will be the same as +the number of groups. + +For \code{gm_vec()}, a single \code{numeric} value (or \code{NA}). +} +\description{ +Geometric Mean (GM) +} +\examples{ +library(tidyhydro) + +gm + +# Supply truth as bare column names +gm(avacha, obs) + +# Or as numeric vectors +gm_vec(avacha$obs) +} +\seealso{ +Other descriptive statistics: +\code{\link{cv}()} +} +\concept{descriptive statistics} +\keyword{summary_stats} diff --git a/man/kge.Rd b/man/kge.Rd index 422c1b7..77439d8 100644 --- a/man/kge.Rd +++ b/man/kge.Rd @@ -84,7 +84,6 @@ directly translating NSE-based interpretation thresholds to KGE. } \examples{ library(tidyhydro) -data(avacha) # Supply truth and predictions as bare column names kge(avacha, obs, sim) @@ -104,26 +103,8 @@ Kling–Gupta efficiency scores. Hydrology and Earth System Sciences, 23, 4323–4331. \doi{10.5194/hess-23-4323-2019} } \seealso{ -Other numeric metrics: -\code{\link{cv}()}, -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{rmse}()}, -\code{\link{sfe}()} - -Other accuracy metrics: -\code{\link{cv}()}, -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{rmse}()}, -\code{\link{sfe}()} +Other KGE variants: +\code{\link{kge2012}()} } -\concept{accuracy metrics} -\concept{numeric metrics} +\concept{KGE variants} \keyword{gof} diff --git a/man/kge2012.Rd b/man/kge2012.Rd index 784b553..157c0ee 100644 --- a/man/kge2012.Rd +++ b/man/kge2012.Rd @@ -88,7 +88,6 @@ directly translating NSE-based interpretation thresholds to KGE. } \examples{ library(tidyhydro) -data(avacha) # Supply truth and predictions as bare column names kge2012(avacha, obs, sim) @@ -108,26 +107,8 @@ Kling–Gupta efficiency scores. Hydrology and Earth System Sciences, 23, 4323–4331. \doi{10.5194/hess-23-4323-2019} } \seealso{ -Other numeric metrics: -\code{\link{cv}()}, -\code{\link{kge}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{rmse}()}, -\code{\link{sfe}()} - -Other accuracy metrics: -\code{\link{cv}()}, -\code{\link{kge}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{rmse}()}, -\code{\link{sfe}()} +Other KGE variants: +\code{\link{kge}()} } -\concept{accuracy metrics} -\concept{numeric metrics} +\concept{KGE variants} \keyword{gof} diff --git a/man/mse.Rd b/man/mse.Rd index 699612f..86fab4f 100644 --- a/man/mse.Rd +++ b/man/mse.Rd @@ -60,7 +60,6 @@ where: } \examples{ library(tidyhydro) -data(avacha) # Supply truth and predictions as bare column names mse(avacha, obs, sim) @@ -82,24 +81,11 @@ e2020WR029001. \doi{10.1029/2020WR029001} } \seealso{ Other numeric metrics: -\code{\link{cv}()}, -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{nse}()}, \code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{rmse}()}, -\code{\link{sfe}()} +\code{\link{rmse}()} Other accuracy metrics: -\code{\link{cv}()}, -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{rmse}()}, -\code{\link{sfe}()} +\code{\link{rmse}()} } \concept{accuracy metrics} \concept{numeric metrics} diff --git a/man/new-measure.Rd b/man/new-measure.Rd new file mode 100644 index 0000000..4ef32fc --- /dev/null +++ b/man/new-measure.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aaa-new.R +\name{new-measure} +\alias{new-measure} +\alias{new_tendency_measure} +\alias{new_var_measure} +\alias{new_sym_measure} +\title{Construct a new measure function} +\usage{ +new_tendency_measure(fn) + +new_var_measure(fn) + +new_sym_measure(fn) +} +\arguments{ +\item{fn}{A function. The measure function to attach a measure-specific class} +} +\description{ +These functions provide convenient wrappers to create the three types of +measure functions in \code{tidyhydro}: measures of central tendency, variability +and symmetry. They add a measure-specific class to \code{fn} and +mimic a behaviour of \link[yardstick:metric_set]{metric_set}. These features +are used by \link{measure_set}. + +See \href{https://www.tidymodels.org/learn/develop/metrics/}{Custom performance metrics} for more +information about creating custom metrics. +} +\keyword{summary_stats} diff --git a/man/nse.Rd b/man/nse.Rd index 5d1a878..8840d29 100644 --- a/man/nse.Rd +++ b/man/nse.Rd @@ -81,7 +81,6 @@ as follows: } \examples{ library(tidyhydro) -data(avacha) # Supply truth and predictions as bare column names nse(avacha, obs, sim) @@ -99,27 +98,5 @@ and Water Quality Models: Performance Measures and Evaluation Criteria. Transactions of the ASABE, 58(6), 1763–1785. \doi{10.13031/trans.58.10715} } -\seealso{ -Other numeric metrics: -\code{\link{cv}()}, -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{rmse}()}, -\code{\link{sfe}()} - -Other accuracy metrics: -\code{\link{cv}()}, -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{rmse}()}, -\code{\link{sfe}()} -} -\concept{accuracy metrics} -\concept{numeric metrics} +\concept{NSE variants} \keyword{gof} diff --git a/man/pbias.Rd b/man/pbias.Rd index 47214d0..82b29a4 100644 --- a/man/pbias.Rd +++ b/man/pbias.Rd @@ -78,7 +78,6 @@ follows: } \examples{ library(tidyhydro) -data(avacha) # Supply truth and predictions as bare column names pbias(avacha, obs, sim) @@ -99,25 +98,8 @@ multilevel expert calibration. J. Hydrologic Eng. 4(2): 135-143 } \seealso{ Other numeric metrics: -\code{\link{cv}()}, -\code{\link{kge}()}, -\code{\link{kge2012}()}, \code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{press}()}, -\code{\link{rmse}()}, -\code{\link{sfe}()} - -Other accuracy metrics: -\code{\link{cv}()}, -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{press}()}, -\code{\link{rmse}()}, -\code{\link{sfe}()} +\code{\link{rmse}()} } -\concept{accuracy metrics} \concept{numeric metrics} \keyword{gof} diff --git a/man/press.Rd b/man/press.Rd index 505e780..97d1c71 100644 --- a/man/press.Rd +++ b/man/press.Rd @@ -77,7 +77,6 @@ log-transformed linear regression (\emph{Helsel et al., 2020}). } \examples{ library(tidyhydro) -data(avacha) # Supply truth and predictions as bare column names press(avacha, obs, sim) @@ -97,26 +96,8 @@ Gilroy, E. J. Statistical Methods in Water Resources. 484 (2020) \doi{10.3133/tm4A3}. } \seealso{ -Other numeric metrics: -\code{\link{cv}()}, -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{rmse}()}, -\code{\link{sfe}()} - -Other accuracy metrics: -\code{\link{cv}()}, -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{rmse}()}, +Other regression metrics: \code{\link{sfe}()} } -\concept{accuracy metrics} -\concept{numeric metrics} +\concept{regression metrics} \keyword{regression} diff --git a/man/rmse.Rd b/man/rmse.Rd index b6d9e8d..010f6e2 100644 --- a/man/rmse.Rd +++ b/man/rmse.Rd @@ -57,7 +57,6 @@ where: } \examples{ library(tidyhydro) -data(avacha) # Supply truth and predictions as bare column names rmse(avacha, obs, sim) @@ -67,24 +66,11 @@ rmse_vec(avacha$obs, avacha$sim) } \seealso{ Other numeric metrics: -\code{\link{cv}()}, -\code{\link{kge}()}, -\code{\link{kge2012}()}, \code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{sfe}()} +\code{\link{pbias}()} Other accuracy metrics: -\code{\link{cv}()}, -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{sfe}()} +\code{\link{mse}()} } \concept{accuracy metrics} \concept{numeric metrics} diff --git a/man/sfe.Rd b/man/sfe.Rd index 1925558..aab2c19 100644 --- a/man/sfe.Rd +++ b/man/sfe.Rd @@ -61,7 +61,6 @@ where: } \examples{ library(tidyhydro) -data(avacha) # Supply truth and predictions as bare column names sfe(avacha, obs, sim) @@ -86,26 +85,8 @@ Standards, p. 138). \url{https://www.nems.org.nz/documents/suspended-sediment} } \seealso{ -Other numeric metrics: -\code{\link{cv}()}, -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{rmse}()} - -Other accuracy metrics: -\code{\link{cv}()}, -\code{\link{kge}()}, -\code{\link{kge2012}()}, -\code{\link{mse}()}, -\code{\link{nse}()}, -\code{\link{pbias}()}, -\code{\link{press}()}, -\code{\link{rmse}()} +Other regression metrics: +\code{\link{press}()} } -\concept{accuracy metrics} -\concept{numeric metrics} +\concept{regression metrics} \keyword{regression}