diff --git a/NAMESPACE b/NAMESPACE index b987108..60ae0d2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,10 +6,12 @@ export(compute_fisher) export(compute_kolmogorov_smirnoff) export(compute_prop_test) export(compute_ttest) +export(compute_unpaired_ttest) export(compute_wilcox) export(emergency_dates) export(generate_indicator_schema) export(get_associated_cpv_from_emergency) +export(get_country_id_from_name) export(grab_cpv) export(ind_1) export(ind_2) @@ -20,6 +22,8 @@ export(ind_6) export(ind_7) export(ind_8) export(ind_9) +export(test_set_1) +export(test_set_2) import(rlang) importFrom(DescTools,Gini) importFrom(dplyr,across) diff --git a/R/01-winning-rate-across-crisis.R b/R/01-winning-rate-across-crisis.R index 97b90fb..a2f7b5a 100644 --- a/R/01-winning-rate-across-crisis.R +++ b/R/01-winning-rate-across-crisis.R @@ -1,77 +1,3 @@ -#' compute Fisher-exact test https://en.wikipedia.org/wiki/Fisher%27s_exact_test -#' @description compute fisher test pvalue and estimate in piped expression -#' @keywords internal -#' @export -compute_fisher <- function(a, b, c, d) { - if (any(is.na(list(a, b, c, d)))) { - stop("All inputs must be non-missing") - } - - data <- matrix(c(a, b, c, d), ncol = 2) - c( - p_value = round(fisher.test(data, alternative = "greater")$p.value, 3), - estimate = round(fisher.test(data, alternative = "greater")$estimate, 3) - ) -} - -#' compute Barnard test https://en.wikipedia.org/wiki/Barnard%27s_test -#' @description compute Barnard test pvalue and estimate in piped expression -#' @keywords internal -#' @export -compute_barnard <- function(a, b, c, d, method = "boschloo") { - if (any(is.na(list(a, b, c, d)))) { - stop("All inputs must be non-missing") - } - # only pre - if ((a + b) > 0 & (c + d) == 0) { - 1 - } - # only post - else if ((a + b) == 0 & (c + d) > 0) { - 0 - } else { - data <- matrix(c(d, b, c, a), ncol = 2) - out_barn <- DescTools::BarnardTest(data, alternative = "greater", method = "boschloo") %>% - suppressWarnings() - c( - p_value = round(out_barn$p.value, 5), - estimate = round(out_barn$estimate, 3) - ) - } -} - -#' compute Z-test proportional -#' @description compute Z-test pvalue and estimate in piped expression -#' @keywords internal -#' @export -compute_prop_test <- function(a, b, c, d, correct = FALSE) { - if (any(is.na(list(a, b, c, d)))) { - stop("All inputs must be non-missing") - } - - m_1 <- a + b - m_2 <- c + d - p_1 <- b / m_1 - p_2 <- d / m_2 - diff_p2_p1 <- p_2 - p_1 - - c( - p_value = stats::prop.test( - x = c(d, b), - n = c(m_2, m_1), - correct = correct, - alternative = "greater" - )$p.value %>% suppressWarnings(), - estimate = stats::prop.test( - x = c(d, b), - n = c(m_2, m_1), - correct = correct, - alternative = "greater" - )$estimate %>% suppressWarnings() - ) -} - - #' Compute Winning rate across the crisis indicator #' #' @description @@ -90,6 +16,8 @@ compute_prop_test <- function(a, b, c, d, correct = FALSE) { #' @param emergency_name emergency name character string for which you wish to calculate the indicator for, e.g. "Coronavirus" "Terremoto Aquila" #' @param test_type character vector string to identifying the test type you want to apply, available alternatives are c("barnard", "fisher", "z-test") #' @param stat_unit statistical unit of measurement, aggregation variable, the indicator target +#' @param cpvs a vector of cpv on which contracts are filtered +#' @param ... other parameters for generate_indicator_schema as country_name #' @return indicator schema as from [generate_indicator_schema()] #' @examples #' \dontrun{ @@ -118,28 +46,19 @@ ind_1 <- function(data, publication_date, emergency_name, stat_unit, - test_type) { + test_type, + cpvs, + ...) { indicator_id <- 1 indicator_name <- "Winning rate across the crisis" aggregation_type <- quo_squash(enquo(stat_unit)) - emergency_scenario <- emergency_dates(emergency_name) - cpvs <- get_associated_cpv_from_emergency(emergency_scenario$em_name) - cpv_col <- grab_cpv(data = data) - test <- function(a, b, c, d, test_type) { - switch(test_type, - "barnard" = { - compute_barnard(a, b, c, d) - }, - "fisher" = { - compute_fisher(a, b, c, d) - }, - "z-test" = { - compute_prop_test(a, b, c, d) - }, - stop(paste0("No handler for ", test_type)) - ) + + emergency_scenario <- emergency_dates(emergency_name) + if (missing(cpvs)) { + cpvs <- get_associated_cpv_from_emergency(emergency_scenario$em_name) } + cpv_col <- grab_cpv(data = data) data %>% dplyr::mutate( @@ -173,7 +92,7 @@ ind_1 <- function(data, dplyr::mutate( ## apply test tab = paste(n_11, n_12, n_21, n_22, sep = "-"), - test = test(n_11, n_12, n_21, n_22, test_type)[1], + test = test_set_1(n_11, n_12, n_21, n_22, test_type)[1], # new companies --> at risk test = dplyr::if_else(m_1 == 0 & n_22 > 0, true = 0, @@ -187,7 +106,8 @@ ind_1 <- function(data, indicator_value = 1 - test, # 1 - pvalue aggregation_name = {{ stat_unit }}, aggregation_type = as_string(aggregation_type), - emergency = emergency_scenario + emergency = emergency_scenario, + ... ) %>% return() } diff --git a/R/02-awd-eco-value-across-crisis.R b/R/02-awd-eco-value-across-crisis.R index d3949ed..7709c39 100644 --- a/R/02-awd-eco-value-across-crisis.R +++ b/R/02-awd-eco-value-across-crisis.R @@ -1,34 +1,3 @@ -#' compute Wilcoxon-Mann-Whitney test in dplyr https://it.wikipedia.org/wiki/Test_di_Wilcoxon-Mann-Whitney -#' @description compute Wilcoxon-Mann-Whitney test pvalue -#' @keywords internal -#' @export -compute_wilcox <- function(data, var, group, exact = TRUE, alternative = "greater") { - test_res <- data %>% - wilcox.test(var ~ group, data = ., exact = exact, alternative = alternative) - c( - p_value = round(test_res$p.value, 3), - estimate = round(test_res$statistic, 3) - ) -} - -#' compute Kolmogorov Smirnov test in dplyr https://it.wikipedia.org/wiki/Test_di_Kolmogorov-Smirnov -#' @description compute Kolmogorov Smirnov test pvalue -#' @keywords internal -#' @export -compute_kolmogorov_smirnoff <- function(data, var, group, alternative = "less") { - test_res <- suppressWarnings({ - data %>% - ks.test(var ~ group, data = ., alternative = alternative) - }) - - c( - p_value = round(test_res$p.value, 3), - estimate = round(test_res$statistic, 3) - ) -} - - - #' Compute Awarded economic value across the crisis indicator #' #' @description @@ -47,7 +16,9 @@ compute_kolmogorov_smirnoff <- function(data, var, group, alternative = "less") #' @param stat_unit statistical target unit of measurement, aggregation variable, the indicator target #' @param publication_date the date in which the tender was published #' @param test_type character vector identifying the type of test you want to execute, alternatives are c("ks", "wilcoxon") +#' @param cpvs a vector of cpv on which contracts are filtered #' @param emergency_name emergency name character string for which you want to evaluate the indicator, e.g. "Coronavirus" "Terremoto Aquila" +#' @param ... other parameters for generate_indicator_schema as country_name #' @return indicator schema as from `generate_indicator_schema()` rows determined by aggregation level and `indicator_value` based on statistical test performed in `ind_2` #' @examples #' \dontrun{ @@ -79,34 +50,22 @@ ind_2 <- function(data, publication_date, emergency_name, stat_unit, - test_type) { + test_type, + cpvs, + ...) { indicator_id <- 2 indicator_name <- "Awarded economic value across the crisis" aggregation_type <- quo_squash(enquo(stat_unit)) + emergency_scenario <- emergency_dates(emergency_name) - cpvs <- get_associated_cpv_from_emergency(emergency_scenario$em_name) - cpv_col <- grab_cpv(data = data) - test <- function(data, var, group, test_type) { - # temporary: if two levels in group are not found: - if (length(unique(group)) != 2) { - # print("999") - 999 - } else { - # print("test") - switch(test_type, - "ks" = { - compute_kolmogorov_smirnoff(data, var, group) - }, - "wilcoxon" = { - compute_wilcox(data, var, group) - }, - stop(paste0("No handler for ", test_type)) - ) - } + if (missing(cpvs)) { + cpvs <- get_associated_cpv_from_emergency(emergency_scenario$em_name) } + cpv_col <- grab_cpv(data = data) + data %>% dplyr::mutate( prepost = dplyr::if_else(lubridate::ymd({{ publication_date }}) >= emergency_scenario$em_date, @@ -136,7 +95,7 @@ ind_2 <- function(data, npre > 0 & npost == 0 ~ 1, # not at risk, pvalue=1 npre == 0 & npost > 0 ~ 0, # at risk, pvalue=0 # npre > 0 & npost > 0 ~ test(var = {{ contract_value }}, group = prepost, data = ., test_type)[1], - TRUE ~ test(var = {{ contract_value }}, group = prepost, data = ., test_type)[1] + TRUE ~ test_set_2(var = {{ contract_value }}, group = prepost, data = ., test_type)[1] ) ) %>% generate_indicator_schema( @@ -145,7 +104,8 @@ ind_2 <- function(data, indicator_value = 1 - test, # 1 - pvalue aggregation_name = {{ stat_unit }}, aggregation_type = as_string(aggregation_type), - emergency = emergency_scenario + emergency = emergency_scenario, + ... ) %>% return() } diff --git a/R/03-ec-dev-across-crisis.R b/R/03-ec-dev-across-crisis.R index 7fb652d..154610a 100644 --- a/R/03-ec-dev-across-crisis.R +++ b/R/03-ec-dev-across-crisis.R @@ -15,8 +15,11 @@ #' @param award_value The date when the tender was awarded #' @param sums_paid The amount paid by the C.A. #' @param stat_unit the statistical unit of measurement (can be a vector of grouping variables), i.e. variable to group by +#' @param cpvs a vector of cpv on which contracts are filtered #' @param emergency_name emergency name character string for which you want to evaluate the indicator, e.g. "Coronavirus" "Terremoto Aquila" +#' @param test_type test type belonging to set 2 i.e. "ks", "wilcoxon" #' @param publication_date The date when the tender was published +#' @param ... other parameters for generate_indicator_schema as country_name #' @return indicator schema as from `generate_indicator_schema` #' @details DETAILS #' @examples @@ -28,6 +31,7 @@ #' award_value = importo_aggiudicazione, #' sums_paid = importo_lotto, #' stat_unit = cf_amministrazione_appaltante, +#' test_type = "wilcoxon", #' emergency_name = "coronavirus" #' ) #' } @@ -44,14 +48,23 @@ ind_3 <- function(data, sums_paid, stat_unit, emergency_name, - publication_date) { + publication_date, + test_type, + cpvs, + ...) { indicator_id <- 3 indicator_name <- "Economic deviation across the crisis" aggregation_type <- quo_squash(enquo(stat_unit)) emergency_scenario <- emergency_dates(emergency_name) - cpvs <- get_associated_cpv_from_emergency(emergency_scenario$em_name) + if (missing(cpvs)) { + cpvs <- get_associated_cpv_from_emergency(emergency_scenario$em_name) + } cpv_col <- grab_cpv(data = data) + if (missing(test_type)) { + test_type <- "wilcoxon" + } + data %>% dplyr::filter(!is.na({{ award_value }}) & !is.na({{ sums_paid }}) & @@ -80,7 +93,7 @@ ind_3 <- function(data, mean_post = mean(ratio[prepost == "post"]), median_pre = median(ratio[prepost == "pre"]), median_post = median(ratio[prepost == "post"]), - ind_3 = compute_kolmogorov_smirnoff(var = ratio, group = prepost, data = .)[1] + ind_3 = test_set_2(var = ratio, group = prepost, data = ., test_type)[1] ) %>% generate_indicator_schema( indicator_id = indicator_id, diff --git a/R/04-len-dev-across-crisis.R b/R/04-len-dev-across-crisis.R index bf9acfd..4543041 100644 --- a/R/04-len-dev-across-crisis.R +++ b/R/04-len-dev-across-crisis.R @@ -16,8 +16,11 @@ #' @param eff_end Effective end of the execution of the contract #' @param eff_start Effective contract signature #' @param stat_unit the statistical unit of measurement (can be a vector of grouping variables), i.e. variable to group by +#' @param cpvs a vector of cpv on which contracts are filtered #' @param emergency_name emergency name character string for which you want to evaluate the indicator, e.g. "Coronavirus" "Terremoto Aquila" +#' @param test_type test type belonging to set 2 i.e. "ks", "wilcoxon" #' @param publication_date The date when the tender was published +#' @param ... other parameters for generate_indicator_schema as country_name #' @return indicator schema as from `generate_indicator_schema` #' @details DETAILS #' @examples @@ -30,6 +33,7 @@ #' eff_end = data_effettiva_ultimazione, #' eff_start = data_stipula_contratto, #' stat_unit = cf_amministrazione_appaltante, +#' test_type = "wilcoxon", #' emergency_name = "coronavirus" #' ) #' } @@ -47,14 +51,22 @@ ind_4 <- function(data, eff_end, stat_unit, emergency_name, - publication_date) { + publication_date, + test_type, + cpvs, + ...) { indicator_id <- 4 indicator_name <- "Length deviation across the crisis" aggregation_type <- quo_squash(enquo(stat_unit)) emergency_scenario <- emergency_dates(emergency_name) - cpvs <- get_associated_cpv_from_emergency(emergency_scenario$em_name) + if (missing(cpvs)) { + cpvs <- get_associated_cpv_from_emergency(emergency_scenario$em_name) + } cpv_col <- grab_cpv(data = data) + if (missing(test_type)) { + test_type <- "wilcoxon" + } data %>% dplyr::filter( @@ -87,7 +99,7 @@ ind_4 <- function(data, mean_post = mean(ratio[prepost == "post"]), median_pre = median(ratio[prepost == "pre"]), median_post = median(ratio[prepost == "post"]), - ind_4 = compute_kolmogorov_smirnoff(var = ratio, group = prepost, data = .)[1] + ind_4 = test_set_2(var = ratio, group = prepost, data = ., test_type)[1] ) %>% generate_indicator_schema( indicator_id = indicator_id, @@ -95,7 +107,8 @@ ind_4 <- function(data, indicator_value = 1 - ind_4, # 1 - pvalue aggregation_name = {{ stat_unit }}, aggregation_type = as_string(aggregation_type), - emergency = emergency_scenario + emergency = emergency_scenario, + ... ) %>% return() } diff --git a/R/05-win-share-issuer-across-crisis.R b/R/05-win-share-issuer-across-crisis.R index 1627a54..3a808d5 100644 --- a/R/05-win-share-issuer-across-crisis.R +++ b/R/05-win-share-issuer-across-crisis.R @@ -14,7 +14,9 @@ #' @param publication_date The date when the tender was published #' @param stat_unit Column of entities who decide who win the public procurement #' @param winners Column of winning companies +#' @param cpvs a vector of cpv on which contracts are filtered #' @param emergency_name emergency name character string for which you want to evaluate the indicator, e.g. "Coronavirus" "Terremoto Aquila" +#' @param ... other parameters for generate_indicator_schema as country_name #' @return indicator schema as from `generate_indicator_schema` #' @details DETAILS #' @examples @@ -42,12 +44,16 @@ ind_5 <- function(data, stat_unit, publication_date, winners, - emergency_name) { + emergency_name, + cpvs, + ...) { indicator_id <- 5 indicator_name <- "Winner's share of issuer's contract across the crisis" aggregation_type <- quo_squash(enquo(stat_unit)) emergency_scenario <- emergency_dates(emergency_name) - cpvs <- get_associated_cpv_from_emergency(emergency_scenario$em_name) + if (missing(cpvs)) { + cpvs <- get_associated_cpv_from_emergency(emergency_scenario$em_name) + } cpv_col <- grab_cpv(data = data) data %>% @@ -104,7 +110,8 @@ ind_5 <- function(data, indicator_value = ind5, # no test aggregation_name = {{ stat_unit }}, aggregation_type = as_string(aggregation_type), - emergency = emergency_scenario + emergency = emergency_scenario, + ... ) %>% return() } diff --git a/R/06-communication-def-across-crisis.R b/R/06-communication-def-across-crisis.R index ce2a95c..26dbd58 100644 --- a/R/06-communication-def-across-crisis.R +++ b/R/06-communication-def-across-crisis.R @@ -16,7 +16,9 @@ #' @param emergency_name emergency name character string for which you want to evaluate the indicator, e.g. "Coronavirus" "Terremoto Aquila" #' @param award_col column indentifying id for that contract award #' @param stat_unit statistical unit of measurement and stat_uniting +#' @param cpvs a vector of cpv on which contracts are filtered #' @param test_type type of the test we would like to apply +#' @param ... other parameters for generate_indicator_schema as country_name #' @return indicator schema as from `generate_indicator_schema` #' @examples #' \dontrun{ @@ -47,28 +49,17 @@ ind_6 <- function(data, emergency_name, award_col, stat_unit, - test_type) { + test_type, + cpvs, + ...) { indicator_id <- 6 indicator_name <- "Communication default across the crisis" aggregation_type <- quo_squash(enquo(stat_unit)) emergency_scenario <- emergency_dates(emergency_name) - cpvs <- get_associated_cpv_from_emergency(emergency_scenario$em_name) - cpv_col <- grab_cpv(data = data) - - test <- function(a, b, c, d, test_type) { - switch(test_type, - "barnard" = { - compute_barnard(d, b, c, a) - }, - "fisher" = { - compute_fisher(a, b, c, d) - }, - "z-test" = { - compute_prop_test(a, b, c, d) - }, - stop(paste0("No handler for ", test_type)) - ) + if (missing(cpvs)) { + cpvs <- get_associated_cpv_from_emergency(emergency_scenario$em_name) } + cpv_col <- grab_cpv(data = data) data %>% @@ -106,7 +97,7 @@ ind_6 <- function(data, dplyr::rowwise() %>% dplyr::mutate( ## apply test - test = test(n_11, n_12, n_21, n_22, test_type)[1], + test = test_set_1(n_11, n_12, n_21, n_22, test_type)[1], ) %>% generate_indicator_schema( indicator_id = indicator_id, @@ -114,7 +105,8 @@ ind_6 <- function(data, indicator_value = 1 - test, # 1 - pvalue aggregation_name = {{ stat_unit }}, aggregation_type = as_string(aggregation_type), - emergency = emergency_scenario + emergency = emergency_scenario, + ... ) %>% return() } diff --git a/R/07-os-opportunistic-company.R b/R/07-os-opportunistic-company.R index c6117fa..1cc6edd 100644 --- a/R/07-os-opportunistic-company.R +++ b/R/07-os-opportunistic-company.R @@ -15,7 +15,9 @@ #' @param stat_unit The unique ID Code that identifies the awarded company (ex. VAT or Tax Number) #' @param final_award_date Date of award, as per the minutes #' @param emergency_name emergency name character string for which you want to evaluate the indicator, e.g. "Coronavirus" "Terremoto Aquila" +#' @param cpvs a vector of cpv on which contracts are filtered #' @param years_before int how many years we have to +#' @param ... other parameters for generate_indicator_schema as country_name #' @return indicator schema as from `generate_indicator_schema` #' @examples #' \dontrun{ @@ -43,12 +45,16 @@ ind_7 <- function(data, final_award_date, emergency_name, stat_unit, - years_before) { + years_before, + cpvs, + ...) { indicator_id <- 7 indicator_name <- "One-shot opportunistic companies over the crisis" aggregation_type <- quo_squash(enquo(stat_unit)) emergency_scenario <- emergency_dates(emergency_name) - cpvs <- get_associated_cpv_from_emergency(emergency_scenario$em_name) + if (missing(cpvs)) { + cpvs <- get_associated_cpv_from_emergency(emergency_scenario$em_name) + } cpv_col <- grab_cpv(data = data) data %>% @@ -88,7 +94,8 @@ ind_7 <- function(data, indicator_value = flag_oneshot, # no test aggregation_name = {{ stat_unit }}, aggregation_type = as_string(aggregation_type), - emergency = emergency_scenario + emergency = emergency_scenario, + ... ) %>% return() } diff --git a/R/08-pre-exist-contracts-ext-after-emerg-outb.R b/R/08-pre-exist-contracts-ext-after-emerg-outb.R index 3e3d1d3..d13bd79 100644 --- a/R/08-pre-exist-contracts-ext-after-emerg-outb.R +++ b/R/08-pre-exist-contracts-ext-after-emerg-outb.R @@ -16,7 +16,9 @@ #' @param stat_unit tatistical unit of measurement, aggregation variable, the indicator target. In this case the identifier of agency or winners. #' @param variant_date Date of contract variation #' @param months_win time window for variation to be happening +#' @param cpvs a vector of cpv on which contracts are filtered #' @param emergency_name emergency name character string for which you want to evaluate the indicator, e.g. "Coronavirus" "Terremoto Aquila" +#' @param ... other parameters for generate_indicator_schema as country_name #' @return indicator schema as from `generate_indicator_schema` #' @examples #' \dontrun{ @@ -44,12 +46,16 @@ ind_8 <- function(data, stat_unit, variant_date, emergency_name, - months_win = 6) { + months_win = 6, + cpvs, + ...) { indicator_id <- 8 indicator_name <- "Pre-existing contracts modified after the crisis" aggregation_type <- quo_squash(enquo(stat_unit)) emergency_scenario <- emergency_dates(emergency_name) - cpvs <- get_associated_cpv_from_emergency(emergency_scenario$em_name) + if (missing(cpvs)) { + cpvs <- get_associated_cpv_from_emergency(emergency_scenario$em_name) + } cpv_col <- grab_cpv(data = data) data %>% @@ -98,7 +104,8 @@ ind_8 <- function(data, indicator_value = rf_value, # no test aggregation_name = {{ stat_unit }}, aggregation_type = as_string(aggregation_type), - emergency = emergency_scenario + emergency = emergency_scenario, + ... ) %>% return() } diff --git a/R/09-lengthy-contract.R b/R/09-lengthy-contract.R index c84c819..c7353ed 100644 --- a/R/09-lengthy-contract.R +++ b/R/09-lengthy-contract.R @@ -25,7 +25,9 @@ compute_ttest <- function(mean_to_compare, ground_mean) { #' @param stat_unit the statistical unit of measurement (can be a vector of grouping variables), i.e. variable to group by #' @param eff_start Effective start of the execution of the contract #' @param eff_end Effective end of the execution of the contract +#' @param cpvs a vector of cpv on which contracts are filtered #' @param emergency_name emergency name character string for which you want to evaluate the indicator, e.g. "Coronavirus" "Terremoto Aquila" +#' @param ... other parameters for generate_indicator_schema as country_name #' @return indicator schema as from `generate_indicator_schema` #' @examples #' \dontrun{ @@ -59,12 +61,16 @@ ind_9 <- function(data, stat_unit, eff_start, eff_end, - emergency_name) { + emergency_name, + cpvs, + ...) { indicator_id <- 9 indicator_name <- "Lengthy contracts" aggregation_type <- rlang::quo_squash(rlang::enquo(stat_unit)) emergency_scenario <- emergency_dates(emergency_name) - cpvs <- get_associated_cpv_from_emergency(emergency_scenario$em_name) + if (missing(cpvs)) { + cpvs <- get_associated_cpv_from_emergency(emergency_scenario$em_name) + } cpv_col <- grab_cpv(data = data) data_out <- data %>% @@ -110,7 +116,8 @@ ind_9 <- function(data, indicator_value = 1 - wilctest, # 1 - pvalue aggregation_name = {{ stat_unit }}, aggregation_type = rlang::as_string(aggregation_type), - emergency = emergency_scenario + emergency = emergency_scenario, + ... ) %>% return() } diff --git a/R/statistical_tests.R b/R/statistical_tests.R new file mode 100644 index 0000000..55d8607 --- /dev/null +++ b/R/statistical_tests.R @@ -0,0 +1,162 @@ +#' compute Wilcoxon-Mann-Whitney test in dplyr https://it.wikipedia.org/wiki/Test_di_Wilcoxon-Mann-Whitney +#' @description compute Wilcoxon-Mann-Whitney test pvalue +#' @keywords internal +#' @export +compute_wilcox <- function(data, var, group, exact = TRUE, alternative = "greater", paired = FALSE) { + test_res <- data %>% + wilcox.test(var ~ group, data = ., exact = exact, alternative = alternative) + c( + p_value = round(test_res$p.value, 3), + estimate = round(test_res$statistic, 3) + ) +} + +#' compute Kolmogorov Smirnov test in dplyr https://it.wikipedia.org/wiki/Test_di_Kolmogorov-Smirnov +#' @description compute Kolmogorov Smirnov test pvalue +#' @keywords internal +#' @export +compute_kolmogorov_smirnoff <- function(data, var, group, alternative = "less") { + test_res <- suppressWarnings({ + data %>% + ks.test(var ~ group, data = ., alternative = alternative) + }) + + c( + p_value = round(test_res$p.value, 3), + estimate = round(test_res$statistic, 3) + ) +} + + + +#' compute Fisher-exact test https://en.wikipedia.org/wiki/Fisher%27s_exact_test +#' @description compute fisher test pvalue and estimate in piped expression +#' @keywords internal +#' @export +compute_fisher <- function(a, b, c, d) { + if (any(is.na(list(a, b, c, d)))) { + stop("All inputs must be non-missing") + } + + data <- matrix(c(a, b, c, d), ncol = 2) + c( + p_value = round(fisher.test(data, alternative = "greater")$p.value, 3), + estimate = round(fisher.test(data, alternative = "greater")$estimate, 3) + ) +} + +#' compute Barnard test https://en.wikipedia.org/wiki/Barnard%27s_test +#' @description compute Barnard test pvalue and estimate in piped expression +#' @keywords internal +#' @export +compute_barnard <- function(a, b, c, d, method = "boschloo") { + if (any(is.na(list(a, b, c, d)))) { + stop("All inputs must be non-missing") + } + # only pre + if ((a + b) > 0 & (c + d) == 0) { + 1 + } + # only post + else if ((a + b) == 0 & (c + d) > 0) { + 0 + } else { + data <- matrix(c(d, b, c, a), ncol = 2) + out_barn <- DescTools::BarnardTest(data, alternative = "greater", method = "boschloo") %>% + suppressWarnings() + c( + p_value = round(out_barn$p.value, 5), + estimate = round(out_barn$estimate, 3) + ) + } +} + +#' compute Z-test proportional +#' @description compute Z-test pvalue and estimate in piped expression +#' @keywords internal +#' @export +compute_prop_test <- function(a, b, c, d, correct = FALSE) { + if (any(is.na(list(a, b, c, d)))) { + stop("All inputs must be non-missing") + } + + m_1 <- a + b + m_2 <- c + d + p_1 <- b / m_1 + p_2 <- d / m_2 + diff_p2_p1 <- p_2 - p_1 + + c( + p_value = stats::prop.test( + x = c(d, b), + n = c(m_2, m_1), + correct = correct, + alternative = "greater" + )$p.value %>% suppressWarnings(), + estimate = stats::prop.test( + x = c(d, b), + n = c(m_2, m_1), + correct = correct, + alternative = "greater" + )$estimate %>% suppressWarnings() + ) +} + + +#' compute unpaired t-test test +#' @description compute unpaired t test +#' @keywords internal +#' @export +compute_unpaired_ttest <- function(.data, var, group, alternative = "less", paired = FALSE) { + test_res <- suppressWarnings({ + .data %>% + t.test(formula = .[var] ~ .[group], data = ., alternative = alternative, paired = paired) + }) + + c( + p_value = round(test_res$p.value, 3), + estimate = round(test_res$statistic, 3) + ) +} + +#' switch test wrt statistical circumstances set 1 (indicators 1, ...) +#' @description switch test wrt statistical circumstances +#' @keywords internal +#' @export +test_set_1 <- function(a, b, c, d, test_type) { + switch(test_type, + "barnard" = { + compute_barnard(a, b, c, d) + }, + "fisher" = { + compute_fisher(a, b, c, d) + }, + "z-test" = { + compute_prop_test(a, b, c, d) + }, + stop(paste0("No handler for ", test_type)) + ) +} + +#' switch test wrt statistical circumstances set 2 (indicators 2,3,4 ...) +#' @description switch test wrt statistical circumstances +#' @keywords internal +#' @export +test_set_2 <- function(data, var, group, test_type) { + # temporary: if two levels in group are not found: + if (length(unique(group)) != 2) { + # print("999") + 999 + } else { + # print("test") + switch(test_type, + "ks" = { + compute_kolmogorov_smirnoff(data, var, group) + }, + "wilcoxon" = { + compute_wilcox(data, var, group) + }, + stop(paste0("No handler for ", test_type)) + ) + } +} diff --git a/R/utils.R b/R/utils.R index 10663f9..38a6632 100644 --- a/R/utils.R +++ b/R/utils.R @@ -97,10 +97,37 @@ get_associated_cpv_from_emergency <- function(emergency_name) { return(cpv_match) } + +#' mapping from country name to id (this is used for frontend purposes) +#' @keywords internal +#' @export +get_country_id_from_name <- function(country_name) { + associated_id_list <- list( + "Italy" = 1, + "Spain" = 2, + "Portugal" = 3, + "Ireland" = 4 + ) + country_match <- agrep(country_name, names(associated_id_list), max.distance = 0.3) + + if (length(country_match) > 0) { + country <- list() + country$name <- names(associated_id_list[country_match[1]]) + country$id <- associated_id_list[country_match[1]][[1]] + return(country) + } else { + # if no matches were found, return an error message + return("Error: Country specified not found in list. Country partners avaialble are 'Italy','Spain', 'Portugal', 'Ireland'. If you are willing to partecipate and generate indicators for your country please write email at @maintaner ") + } +} + + + + #' generate indicator schema #' @keywords internal #' @export -generate_indicator_schema <- function(.data, indicator_id, aggregation_type, emergency, indicator_name, ...) { # ... +generate_indicator_schema <- function(.data, indicator_id, aggregation_type, emergency, indicator_name, country_name = "Italy", ...) { # ... common_schema <- .data %>% dplyr::transmute( indicator_id = indicator_id, @@ -111,8 +138,8 @@ generate_indicator_schema <- function(.data, indicator_id, aggregation_type, eme emergency_id = emergency$em_id, # emergency_type = emergency$em_type, emergency_name = emergency$em_name, - country_id = "1", - country_name = "Italy", + country_id = get_country_id_from_name(country_name)$id, + country_name = get_country_id_from_name(country_name)$name, indicator_last_update = lubridate::now(), data_last_update = lubridate::now() ) diff --git a/man/compute_barnard.Rd b/man/compute_barnard.Rd index 2f35057..4ecc75b 100644 --- a/man/compute_barnard.Rd +++ b/man/compute_barnard.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/01-winning-rate-across-crisis.R +% Please edit documentation in R/statistical_tests.R \name{compute_barnard} \alias{compute_barnard} \title{compute Barnard test https://en.wikipedia.org/wiki/Barnard\%27s_test} diff --git a/man/compute_fisher.Rd b/man/compute_fisher.Rd index 8c03853..553c63f 100644 --- a/man/compute_fisher.Rd +++ b/man/compute_fisher.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/01-winning-rate-across-crisis.R +% Please edit documentation in R/statistical_tests.R \name{compute_fisher} \alias{compute_fisher} \title{compute Fisher-exact test https://en.wikipedia.org/wiki/Fisher\%27s_exact_test} diff --git a/man/compute_kolmogorov_smirnoff.Rd b/man/compute_kolmogorov_smirnoff.Rd index 9a9eda0..94efe6d 100644 --- a/man/compute_kolmogorov_smirnoff.Rd +++ b/man/compute_kolmogorov_smirnoff.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/02-awd-eco-value-across-crisis.R +% Please edit documentation in R/statistical_tests.R \name{compute_kolmogorov_smirnoff} \alias{compute_kolmogorov_smirnoff} \title{compute Kolmogorov Smirnov test in dplyr https://it.wikipedia.org/wiki/Test_di_Kolmogorov-Smirnov} diff --git a/man/compute_prop_test.Rd b/man/compute_prop_test.Rd index 938cff0..5efdf77 100644 --- a/man/compute_prop_test.Rd +++ b/man/compute_prop_test.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/01-winning-rate-across-crisis.R +% Please edit documentation in R/statistical_tests.R \name{compute_prop_test} \alias{compute_prop_test} \title{compute Z-test proportional} diff --git a/man/compute_unpaired_ttest.Rd b/man/compute_unpaired_ttest.Rd new file mode 100644 index 0000000..b2ce46a --- /dev/null +++ b/man/compute_unpaired_ttest.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/statistical_tests.R +\name{compute_unpaired_ttest} +\alias{compute_unpaired_ttest} +\title{compute unpaired t-test test} +\usage{ +compute_unpaired_ttest(.data, var, group, alternative = "less", paired = FALSE) +} +\description{ +compute unpaired t test +} +\keyword{internal} diff --git a/man/compute_wilcox.Rd b/man/compute_wilcox.Rd index 1760045..7c28142 100644 --- a/man/compute_wilcox.Rd +++ b/man/compute_wilcox.Rd @@ -1,10 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/02-awd-eco-value-across-crisis.R +% Please edit documentation in R/statistical_tests.R \name{compute_wilcox} \alias{compute_wilcox} \title{compute Wilcoxon-Mann-Whitney test in dplyr https://it.wikipedia.org/wiki/Test_di_Wilcoxon-Mann-Whitney} \usage{ -compute_wilcox(data, var, group, exact = TRUE, alternative = "greater") +compute_wilcox( + data, + var, + group, + exact = TRUE, + alternative = "greater", + paired = FALSE +) } \description{ compute Wilcoxon-Mann-Whitney test pvalue diff --git a/man/generate_indicator_schema.Rd b/man/generate_indicator_schema.Rd index e3d6671..cc9ba70 100644 --- a/man/generate_indicator_schema.Rd +++ b/man/generate_indicator_schema.Rd @@ -10,6 +10,7 @@ generate_indicator_schema( aggregation_type, emergency, indicator_name, + country_name = "Italy", ... ) } diff --git a/man/get_country_id_from_name.Rd b/man/get_country_id_from_name.Rd new file mode 100644 index 0000000..5f89f7b --- /dev/null +++ b/man/get_country_id_from_name.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{get_country_id_from_name} +\alias{get_country_id_from_name} +\title{mapping from country name to id (this is used for frontend purposes)} +\usage{ +get_country_id_from_name(country_name) +} +\description{ +mapping from country name to id (this is used for frontend purposes) +} +\keyword{internal} diff --git a/man/ind_1.Rd b/man/ind_1.Rd index e317412..66649a5 100644 --- a/man/ind_1.Rd +++ b/man/ind_1.Rd @@ -4,7 +4,7 @@ \alias{ind_1} \title{Compute Winning rate across the crisis indicator} \usage{ -ind_1(data, publication_date, emergency_name, stat_unit, test_type) +ind_1(data, publication_date, emergency_name, stat_unit, test_type, cpvs, ...) } \arguments{ \item{data}{bndcp data} @@ -16,6 +16,10 @@ ind_1(data, publication_date, emergency_name, stat_unit, test_type) \item{stat_unit}{statistical unit of measurement, aggregation variable, the indicator target} \item{test_type}{character vector string to identifying the test type you want to apply, available alternatives are c("barnard", "fisher", "z-test")} + +\item{cpvs}{a vector of cpv on which contracts are filtered} + +\item{...}{other parameters for generate_indicator_schema as country_name} } \value{ indicator schema as from \code{\link[=generate_indicator_schema]{generate_indicator_schema()}} diff --git a/man/ind_2.Rd b/man/ind_2.Rd index 3bf4e75..9f4533f 100644 --- a/man/ind_2.Rd +++ b/man/ind_2.Rd @@ -10,7 +10,9 @@ ind_2( publication_date, emergency_name, stat_unit, - test_type + test_type, + cpvs, + ... ) } \arguments{ @@ -25,6 +27,10 @@ ind_2( \item{stat_unit}{statistical target unit of measurement, aggregation variable, the indicator target} \item{test_type}{character vector identifying the type of test you want to execute, alternatives are c("ks", "wilcoxon")} + +\item{cpvs}{a vector of cpv on which contracts are filtered} + +\item{...}{other parameters for generate_indicator_schema as country_name} } \value{ indicator schema as from \code{generate_indicator_schema()} rows determined by aggregation level and \code{indicator_value} based on statistical test performed in \code{ind_2} diff --git a/man/ind_3.Rd b/man/ind_3.Rd index 7db7b6f..0ffa66e 100644 --- a/man/ind_3.Rd +++ b/man/ind_3.Rd @@ -10,7 +10,10 @@ ind_3( sums_paid, stat_unit, emergency_name, - publication_date + publication_date, + test_type, + cpvs, + ... ) } \arguments{ @@ -25,6 +28,12 @@ ind_3( \item{emergency_name}{emergency name character string for which you want to evaluate the indicator, e.g. "Coronavirus" "Terremoto Aquila"} \item{publication_date}{The date when the tender was published} + +\item{test_type}{test type belonging to set 2 i.e. "ks", "wilcoxon"} + +\item{cpvs}{a vector of cpv on which contracts are filtered} + +\item{...}{other parameters for generate_indicator_schema as country_name} } \value{ indicator schema as from \code{generate_indicator_schema} @@ -58,6 +67,7 @@ if (interactive()) { award_value = importo_aggiudicazione, sums_paid = importo_lotto, stat_unit = cf_amministrazione_appaltante, + test_type = "wilcoxon", emergency_name = "coronavirus" ) } diff --git a/man/ind_4.Rd b/man/ind_4.Rd index 29c0d42..4a8de2a 100644 --- a/man/ind_4.Rd +++ b/man/ind_4.Rd @@ -11,7 +11,10 @@ ind_4( eff_end, stat_unit, emergency_name, - publication_date + publication_date, + test_type, + cpvs, + ... ) } \arguments{ @@ -28,6 +31,12 @@ ind_4( \item{emergency_name}{emergency name character string for which you want to evaluate the indicator, e.g. "Coronavirus" "Terremoto Aquila"} \item{publication_date}{The date when the tender was published} + +\item{test_type}{test type belonging to set 2 i.e. "ks", "wilcoxon"} + +\item{cpvs}{a vector of cpv on which contracts are filtered} + +\item{...}{other parameters for generate_indicator_schema as country_name} } \value{ indicator schema as from \code{generate_indicator_schema} @@ -62,6 +71,7 @@ if (interactive()) { eff_end = data_effettiva_ultimazione, eff_start = data_stipula_contratto, stat_unit = cf_amministrazione_appaltante, + test_type = "wilcoxon", emergency_name = "coronavirus" ) } diff --git a/man/ind_5.Rd b/man/ind_5.Rd index 99241b3..b9fce4d 100644 --- a/man/ind_5.Rd +++ b/man/ind_5.Rd @@ -4,7 +4,7 @@ \alias{ind_5} \title{Compute Winner's share of issuer's contract across the crisis indicator} \usage{ -ind_5(data, stat_unit, publication_date, winners, emergency_name) +ind_5(data, stat_unit, publication_date, winners, emergency_name, cpvs, ...) } \arguments{ \item{data}{data to be passed, expects tibble} @@ -16,6 +16,10 @@ ind_5(data, stat_unit, publication_date, winners, emergency_name) \item{winners}{Column of winning companies} \item{emergency_name}{emergency name character string for which you want to evaluate the indicator, e.g. "Coronavirus" "Terremoto Aquila"} + +\item{cpvs}{a vector of cpv on which contracts are filtered} + +\item{...}{other parameters for generate_indicator_schema as country_name} } \value{ indicator schema as from \code{generate_indicator_schema} diff --git a/man/ind_6.Rd b/man/ind_6.Rd index 9e1cecc..88ef991 100644 --- a/man/ind_6.Rd +++ b/man/ind_6.Rd @@ -4,7 +4,16 @@ \alias{ind_6} \title{Compute Communication default across the crisis indicator} \usage{ -ind_6(data, publication_date, emergency_name, award_col, stat_unit, test_type) +ind_6( + data, + publication_date, + emergency_name, + award_col, + stat_unit, + test_type, + cpvs, + ... +) } \arguments{ \item{data}{mock_data_core example bdncp data} @@ -18,6 +27,10 @@ ind_6(data, publication_date, emergency_name, award_col, stat_unit, test_type) \item{stat_unit}{statistical unit of measurement and stat_uniting} \item{test_type}{type of the test we would like to apply} + +\item{cpvs}{a vector of cpv on which contracts are filtered} + +\item{...}{other parameters for generate_indicator_schema as country_name} } \value{ indicator schema as from \code{generate_indicator_schema} diff --git a/man/ind_7.Rd b/man/ind_7.Rd index d9e2937..92fd9ce 100644 --- a/man/ind_7.Rd +++ b/man/ind_7.Rd @@ -4,7 +4,15 @@ \alias{ind_7} \title{Compute One-shot opportunistic companies over the crisiss indicator} \usage{ -ind_7(data, final_award_date, emergency_name, stat_unit, years_before) +ind_7( + data, + final_award_date, + emergency_name, + stat_unit, + years_before, + cpvs, + ... +) } \arguments{ \item{data}{mock_data_core exmaple data} @@ -16,6 +24,10 @@ ind_7(data, final_award_date, emergency_name, stat_unit, years_before) \item{stat_unit}{The unique ID Code that identifies the awarded company (ex. VAT or Tax Number)} \item{years_before}{int how many years we have to} + +\item{cpvs}{a vector of cpv on which contracts are filtered} + +\item{...}{other parameters for generate_indicator_schema as country_name} } \value{ indicator schema as from \code{generate_indicator_schema} diff --git a/man/ind_8.Rd b/man/ind_8.Rd index 678b8c0..ce76785 100644 --- a/man/ind_8.Rd +++ b/man/ind_8.Rd @@ -10,7 +10,9 @@ ind_8( stat_unit, variant_date, emergency_name, - months_win = 6 + months_win = 6, + cpvs, + ... ) } \arguments{ @@ -25,6 +27,10 @@ ind_8( \item{emergency_name}{emergency name character string for which you want to evaluate the indicator, e.g. "Coronavirus" "Terremoto Aquila"} \item{months_win}{time window for variation to be happening} + +\item{cpvs}{a vector of cpv on which contracts are filtered} + +\item{...}{other parameters for generate_indicator_schema as country_name} } \value{ indicator schema as from \code{generate_indicator_schema} diff --git a/man/ind_9.Rd b/man/ind_9.Rd index da565ec..7907838 100644 --- a/man/ind_9.Rd +++ b/man/ind_9.Rd @@ -4,7 +4,16 @@ \alias{ind_9} \title{compute Lenghty Contracts indicator} \usage{ -ind_9(data, publication_date, stat_unit, eff_start, eff_end, emergency_name) +ind_9( + data, + publication_date, + stat_unit, + eff_start, + eff_end, + emergency_name, + cpvs, + ... +) } \arguments{ \item{data}{bndcp data} @@ -18,6 +27,10 @@ ind_9(data, publication_date, stat_unit, eff_start, eff_end, emergency_name) \item{eff_end}{Effective end of the execution of the contract} \item{emergency_name}{emergency name character string for which you want to evaluate the indicator, e.g. "Coronavirus" "Terremoto Aquila"} + +\item{cpvs}{a vector of cpv on which contracts are filtered} + +\item{...}{other parameters for generate_indicator_schema as country_name} } \value{ indicator schema as from \code{generate_indicator_schema} @@ -47,7 +60,7 @@ if (interactive()) { publication_date = data_pubblicazione, stat_unit = cf_amministrazione_appaltante, cpv = cod_cpv, - eff_start = data_inizio_effettiva , + eff_start = data_inizio_effettiva, eff_end = data_effettiva_ultimazione, emergency_name = "coronavirus" ) diff --git a/man/test_set_1.Rd b/man/test_set_1.Rd new file mode 100644 index 0000000..cf13216 --- /dev/null +++ b/man/test_set_1.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/statistical_tests.R +\name{test_set_1} +\alias{test_set_1} +\title{switch test wrt statistical circumstances set 1 (indicators 1, ...)} +\usage{ +test_set_1(a, b, c, d, test_type) +} +\description{ +switch test wrt statistical circumstances +} +\keyword{internal} diff --git a/man/test_set_2.Rd b/man/test_set_2.Rd new file mode 100644 index 0000000..59ad0cf --- /dev/null +++ b/man/test_set_2.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/statistical_tests.R +\name{test_set_2} +\alias{test_set_2} +\title{switch test wrt statistical circumstances set 2 (indicators 2,3,4 ...)} +\usage{ +test_set_2(data, var, group, test_type) +} +\description{ +switch test wrt statistical circumstances +} +\keyword{internal} diff --git a/tests/testthat/test-01-winning-rate-across-crisis.R b/tests/testthat/test-01-winning-rate-across-crisis.R index 404de82..30e9394 100644 --- a/tests/testthat/test-01-winning-rate-across-crisis.R +++ b/tests/testthat/test-01-winning-rate-across-crisis.R @@ -234,3 +234,46 @@ test_that("check if the indicator table, in its column `emergency_name` and `eme ) ) }) + + +## test indciator on other cpvs +## n rows less or equal than the filtered + + +test_that("check if `indicator_value` lays inbetween min/max values accroding to test chosen", { + expect_within_range( + suppressWarnings({ + ind_1( + data = mock_data_core, + publication_date = data_pubblicazione, + stat_unit = cf_amministrazione_appaltante, + emergency_name = "terremoto aquila", + test_type = "fisher" + ) + }), + min = 0, max = 1 + ) +}) + +# expect to have less rows or equal rows when cpvs are filtered +test_that("check if the number of rows when indicator is filtered out by cpv is loweer than the one with more cpvs on it (i.e. the defualt)", { + expect_lte( + suppressWarnings({ + nrow(ind_1( + data = mock_data_core, + publication_date = data_pubblicazione, + stat_unit = cf_amministrazione_appaltante, + emergency_name = "terremoto aquila", + test_type = "fisher", + cpvs = c(33, 34, 38, 39, 41, 44, 65, 85) + )) + }), + expected = nrow(ind_1( + data = mock_data_core, + publication_date = data_pubblicazione, + stat_unit = cf_amministrazione_appaltante, + emergency_name = "terremoto aquila", + test_type = "fisher" + )) + ) +}) diff --git a/tests/testthat/test-02-awd-eco-value-across-crisis.R b/tests/testthat/test-02-awd-eco-value-across-crisis.R index 19d9272..65665f7 100644 --- a/tests/testthat/test-02-awd-eco-value-across-crisis.R +++ b/tests/testthat/test-02-awd-eco-value-across-crisis.R @@ -52,7 +52,7 @@ test_that("check `ind_2()` are 12 columns as according to `generate_indicator_sc contract_value = importo_complessivo_gara, publication_date = data_pubblicazione, stat_unit = cf_amministrazione_appaltante, - test_type = "ks", + test_type = "wilcoxon", emergency_name = "coronavirus" ) }), 12 @@ -75,7 +75,7 @@ test_that("check column names are as according to pre determined schema", { contract_value = importo_complessivo_gara, publication_date = data_pubblicazione, stat_unit = cf_amministrazione_appaltante, - test_type = "ks", + test_type = "wilcoxon", emergency_name = "coronavirus" )) }), col_names, @@ -85,7 +85,7 @@ test_that("check column names are as according to pre determined schema", { -test_that("check if `indicator_value` lays inbetween min/max values accroding to test chosen (Kolmogorv Smirnov)", { +test_that("check if `indicator_value` lays inbetween min/max values accroding to test chosen (Wilcoxon-Mann-Whitney)", { expect_within_range( suppressWarnings({ ind_2( @@ -93,7 +93,7 @@ test_that("check if `indicator_value` lays inbetween min/max values accroding to contract_value = importo_complessivo_gara, publication_date = data_pubblicazione, stat_unit = cf_amministrazione_appaltante, - test_type = "ks", + test_type = "wilcoxon", emergency_name = "coronavirus" ) }), @@ -104,7 +104,7 @@ test_that("check if `indicator_value` lays inbetween min/max values accroding to -test_that("check if `indicator_value` lays inbetween min/max values accroding to test chosen (Wilcoxon-Mann-Whitney)", { +test_that("check if `indicator_value` lays inbetween min/max values accroding to test chosen (Kolmogorv Smirnov)", { expect_within_range( suppressWarnings({ ind_2( @@ -112,7 +112,7 @@ test_that("check if `indicator_value` lays inbetween min/max values accroding to contract_value = importo_complessivo_gara, publication_date = data_pubblicazione, stat_unit = cf_amministrazione_appaltante, - test_type = "wilcoxon", + test_type = "ks", emergency_name = "coronavirus" ) }), @@ -130,7 +130,7 @@ test_that("check if the number of rows is coherent with the aggregation level (` contract_value = importo_complessivo_gara, publication_date = data_pubblicazione, stat_unit = provincia, - test_type = "ks", + test_type = "wilcoxon", emergency_name = "coronavirus" ) }), @@ -147,7 +147,7 @@ test_that("check if the number of rows is coherent with the aggregation level (` contract_value = importo_complessivo_gara, publication_date = data_pubblicazione, stat_unit = cf_amministrazione_appaltante, - test_type = "ks", + test_type = "wilcoxon", emergency_name = "coronavirus" ) }), @@ -165,7 +165,7 @@ test_that("check if `indicator_value` lays inbetween min/max values accroding to contract_value = importo_complessivo_gara, publication_date = data_pubblicazione, stat_unit = cf_amministrazione_appaltante, - test_type = "ks", + test_type = "wilcoxon", emergency_name = "terremoto aquila" ) }), @@ -183,7 +183,7 @@ test_that("check if the number of rows is coherent with the aggregation level (` contract_value = importo_complessivo_gara, publication_date = data_pubblicazione, stat_unit = provincia, - test_type = "ks", + test_type = "wilcoxon", emergency_name = "terremoto aquila" ) }), @@ -201,10 +201,39 @@ test_that("check if `indicator_value` lays inbetween min/max values (different a contract_value = importo_complessivo_gara, publication_date = data_pubblicazione, stat_unit = provincia, - test_type = "ks", + test_type = "wilcoxon", emergency_name = "terremoto aquila" ) }), min = 0, max = 1 ) }) + + + +# expect to have less rows or equal rows when cpvs are filtered +test_that("check if the number of rows when indicator is filtered out by cpv is loweer than the one with more cpvs on it (i.e. the defualt)", { + expect_lte( + suppressWarnings({ + nrow(ind_2( + data = mock_data_core, + contract_value = importo_complessivo_gara, + publication_date = data_pubblicazione, + stat_unit = cf_amministrazione_appaltante, + test_type = "wilcoxon", + emergency_name = "Coronavirus", + cpvs = c(33, 34, 38, 39, 41, 44, 65, 85) + )) + }), + expected = suppressWarnings({ + nrow(ind_2( + data = mock_data_core, + contract_value = importo_complessivo_gara, + publication_date = data_pubblicazione, + stat_unit = cf_amministrazione_appaltante, + test_type = "wilcoxon", + emergency_name = "Coronavirus" + )) + }) + ) +}) diff --git a/tests/testthat/test-03-ec-dev-across-crisis.R b/tests/testthat/test-03-ec-dev-across-crisis.R index fb76ef4..8e1f020 100644 --- a/tests/testthat/test-03-ec-dev-across-crisis.R +++ b/tests/testthat/test-03-ec-dev-across-crisis.R @@ -53,6 +53,7 @@ test_that("check `ind_3()` are 12 columns as according to `generate_indicator_sc award_value = importo_aggiudicazione, sums_paid = importo_lotto, stat_unit = cf_amministrazione_appaltante, + test_type = "wilcoxon", emergency_name = "coronavirus" ) }), 12 @@ -76,6 +77,7 @@ test_that("check column names are as according to pre determined schema", { award_value = importo_aggiudicazione, sums_paid = importo_lotto, stat_unit = cf_amministrazione_appaltante, + test_type = "wilcoxon", emergency_name = "coronavirus" )) }), col_names, @@ -94,6 +96,7 @@ test_that("check if `indicator_value` lays inbetween min/max values accroding to award_value = importo_aggiudicazione, sums_paid = importo_lotto, stat_unit = cf_amministrazione_appaltante, + test_type = "wilcoxon", emergency_name = "coronavirus" ) }), @@ -115,6 +118,7 @@ test_that("check if the number of rows is coherent with the aggregation level (` award_value = importo_aggiudicazione, sums_paid = importo_lotto, stat_unit = provincia, + test_type = "wilcoxon", emergency_name = "coronavirus" ) }), @@ -132,6 +136,7 @@ test_that("check if the number of rows is coherent with the aggregation level (` award_value = importo_aggiudicazione, sums_paid = importo_lotto, stat_unit = cf_amministrazione_appaltante, + test_type = "wilcoxon", emergency_name = "coronavirus" ) }), @@ -150,6 +155,7 @@ test_that("check if `indicator_value` lays inbetween min/max values accroding to award_value = importo_aggiudicazione, sums_paid = importo_lotto, stat_unit = cf_amministrazione_appaltante, + test_type = "wilcoxon", emergency_name = "Terremoto Aquila" ) }), @@ -168,9 +174,46 @@ test_that("check if the number of rows is coherent with the aggregation level (` award_value = importo_aggiudicazione, sums_paid = importo_lotto, stat_unit = provincia, + test_type = "wilcoxon", emergency_name = "coronavirus" ) }), n = 92 # qui diverso perchè c'è filtro su cpv per 33, mi aspetto meno dati ) }) + + + +# expect to have less rows or equal rows when cpvs are filtered +test_that("check if the number of rows when indicator is filtered out by cpv is lower than the one with more cpvs on it (i.e. cpvs provided by subject matter experts)", { + expect_lte( + suppressWarnings({ + nrow(ind_3( + data = mock_data_core, + publication_date = data_pubblicazione, + award_value = importo_aggiudicazione, + sums_paid = importo_lotto, + stat_unit = provincia, + test_type = "wilcoxon", + emergency_name = "coronavirus", + cpvs = c(33, 34, 38, 39, 41, 44, 65, 85) + )) + }), + expected = suppressWarnings({ + nrow(ind_3( + data = mock_data_core, + publication_date = data_pubblicazione, + award_value = importo_aggiudicazione, + sums_paid = importo_lotto, + stat_unit = provincia, + test_type = "wilcoxon", + emergency_name = "coronavirus" + )) + }) + ) +}) + + + + +# TODO suite di test per appunto differenti test_type diff --git a/tests/testthat/test-04-len-dev-across-crisis.R b/tests/testthat/test-04-len-dev-across-crisis.R index 39c5720..3bc3af2 100644 --- a/tests/testthat/test-04-len-dev-across-crisis.R +++ b/tests/testthat/test-04-len-dev-across-crisis.R @@ -148,3 +148,32 @@ test_that("check if `indicator_value` lays inbetween min/max values accroding to min = 0.1, max = 1 ) }) + +# expect to have less rows or equal rows when cpvs are filtered +test_that("check if the number of rows when indicator is filtered out by cpv is loweer than the one with more cpvs on it (i.e. the defualt)", { + expect_lte( + suppressWarnings({ + nrow(ind_4( + data = mock_data_core, + publication_date = data_pubblicazione, + exp_end = data_termine_contrattuale, + eff_end = data_effettiva_ultimazione, + eff_start = data_stipula_contratto, + stat_unit = cf_amministrazione_appaltante, + emergency_name = "coronavirus", + cpvs = c(33, 34, 38, 39, 41, 44, 65, 85) + )) + }), + expected = suppressWarnings({ + nrow(ind_4( + data = mock_data_core, + publication_date = data_pubblicazione, + exp_end = data_termine_contrattuale, + eff_end = data_effettiva_ultimazione, + eff_start = data_stipula_contratto, + stat_unit = cf_amministrazione_appaltante, + emergency_name = "coronavirus" + )) + }) + ) +}) diff --git a/tests/testthat/test-05-win-share-issuer-across-crisis.R b/tests/testthat/test-05-win-share-issuer-across-crisis.R index 9d8b095..c5294f1 100644 --- a/tests/testthat/test-05-win-share-issuer-across-crisis.R +++ b/tests/testthat/test-05-win-share-issuer-across-crisis.R @@ -195,3 +195,29 @@ test_that("check if the indicator table, in its column `emergency_name` and `eme ) ) }) + + +## expect to have less rows or equal rows when cpvs are filtered +test_that("check if the number of rows when indicator is filtered out by cpv is loweer than the one with more cpvs on it (i.e. the defualt)", { + expect_lte( + suppressWarnings({ + nrow(ind_5( + data = mock_data_core, + publication_date = data_pubblicazione, + stat_unit = cf_amministrazione_appaltante, + winners = denominazione, + emergency_name = "Coronavirus", + cpvs = c(33, 34, 38, 39, 41, 44, 65, 85) + )) + }), + expected = suppressWarnings({ + nrow(ind_5( + data = mock_data_core, + publication_date = data_pubblicazione, + stat_unit = cf_amministrazione_appaltante, + winners = denominazione, + emergency_name = "Coronavirus" + )) + }) + ) +}) diff --git a/tests/testthat/test-06-communication-def-across-crisis.R b/tests/testthat/test-06-communication-def-across-crisis.R index 1f528a5..de72daa 100644 --- a/tests/testthat/test-06-communication-def-across-crisis.R +++ b/tests/testthat/test-06-communication-def-across-crisis.R @@ -208,3 +208,32 @@ test_that("check if the indicator table, in its column `emergency_name` and `eme ) ) }) + + + +# expect to have less rows or equal rows when cpvs are filtered +test_that("check if the number of rows when indicator is filtered out by cpv is loweer than the one with more cpvs on it (i.e. the defualt)", { + expect_lte( + suppressWarnings({ + nrow(ind_6( + data = mock_data_core, + publication_date = data_pubblicazione, + emergency_name = "coronavirus", + award_col = id_aggiudicazione, + stat_unit = cf_amministrazione_appaltante, + test_type = "fisher", + cpvs = c(33, 34, 38, 39, 41, 44, 65, 85) + )) + }), + expected = suppressWarnings({ + nrow(ind_6( + data = mock_data_core, + publication_date = data_pubblicazione, + emergency_name = "coronavirus", + award_col = id_aggiudicazione, + stat_unit = cf_amministrazione_appaltante, + test_type = "fisher" + )) + }) + ) +}) diff --git a/tests/testthat/test-07-os-opportunistic-company.R b/tests/testthat/test-07-os-opportunistic-company.R index e83dc6f..f2101bc 100644 --- a/tests/testthat/test-07-os-opportunistic-company.R +++ b/tests/testthat/test-07-os-opportunistic-company.R @@ -134,3 +134,28 @@ test_that("check if `indicator_value` lays inbetween min/max values accroding to min = 0, max = 1 ) }) + +# expect to have less rows or equal rows when cpvs are filtered +test_that("check if the number of rows when indicator is filtered out by cpv is loweer than the one with more cpvs on it (i.e. the defualt)", { + expect_lte( + suppressWarnings({ + nrow(ind_7( + data = mock_data_core, + final_award_date = data_aggiudicazione_definitiva, + stat_unit = codice_fiscale, + emergency_name = "terremoto aquila", + years_before = 1, + cpvs = c(33, 34, 38, 39, 41, 44, 65, 85) + )) + }), + expected = suppressWarnings({ + nrow(ind_7( + data = mock_data_core, + final_award_date = data_aggiudicazione_definitiva, + stat_unit = codice_fiscale, + emergency_name = "terremoto aquila", + years_before = 1 + )) + }) + ) +}) diff --git a/tests/testthat/test-08-pre-exist-contracts-ext-after-emerg-outb.R b/tests/testthat/test-08-pre-exist-contracts-ext-after-emerg-outb.R index 0490311..15ea7c1 100644 --- a/tests/testthat/test-08-pre-exist-contracts-ext-after-emerg-outb.R +++ b/tests/testthat/test-08-pre-exist-contracts-ext-after-emerg-outb.R @@ -160,3 +160,30 @@ test_that("check if the indicator table, in its column `emergency_name` and `eme ) ) }) + + +test_that("check if the number of rows when indicator is filtered out by cpv is loweer than the one with more cpvs on it (i.e. the defualt)", { + expect_lte( + suppressWarnings({ + nrow(ind_8( + data = mock_data_core, + publication_date = data_pubblicazione, + stat_unit = cf_amministrazione_appaltante, + variant_date = data_approvazione_variante, + months_win = 6, + emergency_name = "coronavirus", + cpvs = c(33, 34, 38, 39, 41, 44, 65, 85) + )) + }), + expected = suppressWarnings({ + nrow(ind_8( + data = mock_data_core, + publication_date = data_pubblicazione, + stat_unit = cf_amministrazione_appaltante, + variant_date = data_approvazione_variante, + months_win = 6, + emergency_name = "coronavirus" + )) + }) + ) +}) diff --git a/tests/testthat/test-09-lengthy-contract.R b/tests/testthat/test-09-lengthy-contract.R index 15cd3f9..43b80cd 100644 --- a/tests/testthat/test-09-lengthy-contract.R +++ b/tests/testthat/test-09-lengthy-contract.R @@ -164,3 +164,29 @@ test_that("check if the indicator table, in its column `emergency_name` and `eme ) ) }) + +test_that("check if the number of rows when indicator is filtered out by cpv is loweer than the one with more cpvs on it (i.e. the defualt)", { + expect_lte( + suppressWarnings({ + nrow(ind_9( + data = mock_data_core, + publication_date = data_pubblicazione, + stat_unit = cf_amministrazione_appaltante, + eff_start = data_inizio_effettiva, + eff_end = data_effettiva_ultimazione, + emergency_name = "coronavirus", + cpvs = c(33, 34, 38, 39, 41, 44, 65, 85) + )) + }), + expected = suppressWarnings({ + nrow(ind_9( + data = mock_data_core, + publication_date = data_pubblicazione, + stat_unit = cf_amministrazione_appaltante, + eff_start = data_inizio_effettiva, + eff_end = data_effettiva_ultimazione, + emergency_name = "coronavirus" + )) + }) + ) +}) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R deleted file mode 100644 index 8b13789..0000000 --- a/tests/testthat/test-utils.R +++ /dev/null @@ -1 +0,0 @@ - diff --git a/vignettes/articles/assets/01-ingestion 4.png b/vignettes/articles/assets/01-ingestion 4.png new file mode 100644 index 0000000..74f5591 Binary files /dev/null and b/vignettes/articles/assets/01-ingestion 4.png differ diff --git a/vignettes/articles/assets/01-ingestion 5.png b/vignettes/articles/assets/01-ingestion 5.png new file mode 100644 index 0000000..74f5591 Binary files /dev/null and b/vignettes/articles/assets/01-ingestion 5.png differ diff --git a/vignettes/articles/assets/01-ingestion 6.png b/vignettes/articles/assets/01-ingestion 6.png new file mode 100644 index 0000000..74f5591 Binary files /dev/null and b/vignettes/articles/assets/01-ingestion 6.png differ diff --git a/vignettes/articles/assets/01-ingestion 7.png b/vignettes/articles/assets/01-ingestion 7.png new file mode 100644 index 0000000..74f5591 Binary files /dev/null and b/vignettes/articles/assets/01-ingestion 7.png differ