From 29b6c1fc184a7a494c0f34fb2a4d4a60c3bc4943 Mon Sep 17 00:00:00 2001 From: Nicolas Casajus Date: Tue, 29 Apr 2025 10:53:37 +0200 Subject: [PATCH 01/10] feat: yaml as argument of yaml_to_df() --- R/td_create_metadata_file.R | 2 +- R/utils_yaml.R | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/R/td_create_metadata_file.R b/R/td_create_metadata_file.R index e75f6d0..27d90c8 100644 --- a/R/td_create_metadata_file.R +++ b/R/td_create_metadata_file.R @@ -68,7 +68,7 @@ td_create_metadata_file <- function( cat(metadata, file = filename, append = FALSE) } else { - metadata <- metadata_as_df() + metadata <- yaml_to_df(metadata) metadata[["dataset"]]$"value" <- gsub( "\\.dataset_key", name, diff --git a/R/utils_yaml.R b/R/utils_yaml.R index beed946..5109fa8 100644 --- a/R/utils_yaml.R +++ b/R/utils_yaml.R @@ -54,12 +54,10 @@ read_yaml_template <- function() { #' #' @noRd -metadata_as_df <- function() { - metadata <- read_yaml_template() - +yaml_to_df <- function(metadata) { sheets <- list() - sheets[["status"]] <- data.frame("status" = "draft") + sheets[["status"]] <- data.frame("status" = metadata$"status") sheets[["dataset"]] <- data.frame( "key" = names(unlist(metadata$"dataset")), # to handle sublevel taxonomy @@ -67,8 +65,6 @@ metadata_as_df <- function() { ) rownames(sheets[["dataset"]]) <- NULL - # replace '.' by '_' for spelling harmonization - sheets[["dataset"]]$key <- gsub("\\.", "_", sheets[["dataset"]]$key) sheets[["traits"]] <- as.data.frame(metadata$"traits"[[1]]) sheets[["traits"]] <- rbind(sheets[["traits"]], sheets[["traits"]]) From aeca92ada7152a6e6e3f36b7462703529146a798 Mon Sep 17 00:00:00 2001 From: Nicolas Casajus Date: Tue, 29 Apr 2025 11:27:37 +0200 Subject: [PATCH 02/10] fix: deal w/ categorical traits --- DESCRIPTION | 2 +- R/utils_yaml.R | 51 ++++++++++++++++++++++++++++---------------------- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ec11956..1933c81 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,7 +19,7 @@ RoxygenNote: 7.3.2 Depends: R (>= 4.1.0) Imports: - glue, + plyr, readxl, utils, writexl, diff --git a/R/utils_yaml.R b/R/utils_yaml.R index 5109fa8..7b63364 100644 --- a/R/utils_yaml.R +++ b/R/utils_yaml.R @@ -66,28 +66,35 @@ yaml_to_df <- function(metadata) { rownames(sheets[["dataset"]]) <- NULL - sheets[["traits"]] <- as.data.frame(metadata$"traits"[[1]]) - sheets[["traits"]] <- rbind(sheets[["traits"]], sheets[["traits"]]) - - trait_q <- as.data.frame(metadata$"traits"[[1]]) - trait_q <- data.frame( - trait_q, - "levels_value" = NA, - "levels_description" = NA - ) - - trait_c <- as.data.frame(metadata$"traits"[[2]]) - trait_c <- trait_c[, -grep("^levels", colnames(trait_c))] - - trait_c <- data.frame( - trait_c, - "levels_value" = ".value", - "levels_description" = ".descr" - ) - - trait_c <- rbind(trait_c, trait_c) - - sheets[["traits"]] <- rbind(trait_q, trait_c) + traits <- lapply(metadata$"traits", function(x) { + traits <- as.data.frame(t(unlist(x))) + if (traits$"type" == "categorical") { + check_key_in_yaml(x, "levels") + invisible( + lapply(x$"levels", function(y) check_key_in_yaml(y, "value")) + ) + invisible( + lapply(x$"levels", function(y) check_key_in_yaml(y, "description")) + ) + + level_val_cols <- grep("^levels.value", colnames(traits)) + level_descr_cols <- grep("^levels.description", colnames(traits)) + + categories <- data.frame( + "name" = traits[["name"]], + "levels_value" = unlist(traits[, level_val_cols]), + "levels_description" = unlist(traits[, level_descr_cols]) + ) + + traits <- traits[, -c(level_val_cols, level_descr_cols)] + + traits <- merge(traits, categories, by = "name") + } + + traits + }) + + sheets[["traits"]] <- do.call(plyr::rbind.fill, traits) sheets } From 164ccadd9e729f60de47c1c052701dceb4b4eb94 Mon Sep 17 00:00:00 2001 From: Nicolas Casajus Date: Tue, 29 Apr 2025 11:42:23 +0200 Subject: [PATCH 03/10] fix: check if keys are presents --- R/utils_yaml.R | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/R/utils_yaml.R b/R/utils_yaml.R index 7b63364..9c32c11 100644 --- a/R/utils_yaml.R +++ b/R/utils_yaml.R @@ -67,12 +67,20 @@ yaml_to_df <- function(metadata) { rownames(sheets[["dataset"]]) <- NULL traits <- lapply(metadata$"traits", function(x) { + check_key_in_yaml(x, "name") + check_key_in_yaml(x, "variable") + check_key_in_yaml(x, "category") + check_key_in_yaml(x, "type") + traits <- as.data.frame(t(unlist(x))) + if (traits$"type" == "categorical") { check_key_in_yaml(x, "levels") + invisible( lapply(x$"levels", function(y) check_key_in_yaml(y, "value")) ) + invisible( lapply(x$"levels", function(y) check_key_in_yaml(y, "description")) ) @@ -96,5 +104,29 @@ yaml_to_df <- function(metadata) { sheets[["traits"]] <- do.call(plyr::rbind.fill, traits) + if (!("units" %in% colnames(sheets[["traits"]]))) { + sheets[["traits"]]$"units" <- NA + } + + if (!("levels_value" %in% colnames(sheets[["traits"]]))) { + sheets[["traits"]]$"levels_value" <- NA + } + + if (!("levels_description" %in% colnames(sheets[["traits"]]))) { + sheets[["traits"]]$"levels_description" <- NA + } + + col_order <- c( + "name", + "variable", + "category", + "type", + "units", + "levels_value", + "levels_description" + ) + + sheets[["traits"]] <- sheets[["traits"]][, col_order] + sheets } From 97f1f7e35420470934064bbf9daf2f2b2f273ff0 Mon Sep 17 00:00:00 2001 From: Nicolas Casajus Date: Tue, 29 Apr 2025 11:44:10 +0200 Subject: [PATCH 04/10] fix: keep dot in column names --- R/utils_yaml.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/R/utils_yaml.R b/R/utils_yaml.R index 9c32c11..2f1ed46 100644 --- a/R/utils_yaml.R +++ b/R/utils_yaml.R @@ -90,8 +90,8 @@ yaml_to_df <- function(metadata) { categories <- data.frame( "name" = traits[["name"]], - "levels_value" = unlist(traits[, level_val_cols]), - "levels_description" = unlist(traits[, level_descr_cols]) + "levels.value" = unlist(traits[, level_val_cols]), + "levels.description" = unlist(traits[, level_descr_cols]) ) traits <- traits[, -c(level_val_cols, level_descr_cols)] @@ -108,12 +108,12 @@ yaml_to_df <- function(metadata) { sheets[["traits"]]$"units" <- NA } - if (!("levels_value" %in% colnames(sheets[["traits"]]))) { - sheets[["traits"]]$"levels_value" <- NA + if (!("levels.value" %in% colnames(sheets[["traits"]]))) { + sheets[["traits"]]$"levels.value" <- NA } - if (!("levels_description" %in% colnames(sheets[["traits"]]))) { - sheets[["traits"]]$"levels_description" <- NA + if (!("levels.description" %in% colnames(sheets[["traits"]]))) { + sheets[["traits"]]$"levels.description" <- NA } col_order <- c( @@ -122,8 +122,8 @@ yaml_to_df <- function(metadata) { "category", "type", "units", - "levels_value", - "levels_description" + "levels.value", + "levels.description" ) sheets[["traits"]] <- sheets[["traits"]][, col_order] From fe11d4ca71e9943eacbb7a8aa0b0ae13cb20d98f Mon Sep 17 00:00:00 2001 From: Nicolas Casajus Date: Tue, 29 Apr 2025 11:45:55 +0200 Subject: [PATCH 05/10] fix: check first level keys in yaml --- R/utils_yaml.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/utils_yaml.R b/R/utils_yaml.R index 2f1ed46..6177b3d 100644 --- a/R/utils_yaml.R +++ b/R/utils_yaml.R @@ -55,6 +55,10 @@ read_yaml_template <- function() { #' @noRd yaml_to_df <- function(metadata) { + check_key_in_yaml(metadata, "status") + check_key_in_yaml(metadata, "dataset") + check_key_in_yaml(metadata, "traits") + sheets <- list() sheets[["status"]] <- data.frame("status" = metadata$"status") From 3666d3a3591db67ab4a956d067f1ef21dbdc9228 Mon Sep 17 00:00:00 2001 From: Nicolas Casajus Date: Tue, 29 Apr 2025 11:46:26 +0200 Subject: [PATCH 06/10] style: remove previous yaml_as_df() function --- R/format_yaml_xlsx.R | 97 -------------------------------------------- 1 file changed, 97 deletions(-) diff --git a/R/format_yaml_xlsx.R b/R/format_yaml_xlsx.R index 0c43961..497c1de 100644 --- a/R/format_yaml_xlsx.R +++ b/R/format_yaml_xlsx.R @@ -1,100 +1,3 @@ -#' Transform a yaml metadata to data.frame -#' -#' @description -#' This function transform a metadata written in yaml to -#' a metadata in data.frame format with three items: -#' `status`, `dataset`, and `traits`. -#' The structure of the metadata must follow the structure in `metadata_as_yaml()` -#' -#' @param x a `character` of length 1. It could be a filename with extension '.yaml' or a full yaml string. -#' -#' @return A list with three data.frames: `status`, `dataset`, and `traits`. -#' -#' @export -#' -#' @examples -#' \dontrun{ -#' metadata_as_yaml() |> yaml_as_df() -#' } -yaml_as_df <- function(x) { - # here 'x' could be a text or a filename - check_character_arg(x) - # might need some more testing - if (substr(x, nchar(x) - 3, nchar(x)) == ".yml") { - metadata <- yaml::read_yaml(file = x, readLines.warn = FALSE) - } else { - metadata <- yaml::read_yaml(text = x) - } - - # load the expected structure of metadata - skeleton <- metadata_as_yaml() |> - yaml::read_yaml(text = _) - - # check the main structure of the metadata - # make sure that status, dataset and traits are in metadata - if (any(!names(skeleton) %in% names(metadata))) { - miss <- paste( - names(skeleton)[!names(skeleton) %in% names(metadata)], - sep = " " - ) - stop(paste( - "Missing level", - miss, - "\n Check the expected structure of the yaml file with 'metadata_as_yaml()'" - )) - } - - sheets <- list() - - # status - if (length(metadata$"status") != 1) { - stop(paste("status should contain a single value")) - } else { - sheets[["status"]] <- data.frame("status" = metadata$"status") - } - - # dataset - expected_dataset <- names(unlist(skeleton$"dataset")) - if (any(!expected_dataset %in% names(unlist(metadata$"dataset")))) { - miss <- expected_dataset[ - !expected_dataset %in% names(unlist(metadata$"dataset")) - ] - warning(paste( - "Missing field in 'dataset':", - paste(miss, collapse = ","), - "\n Check the expected structure of the yaml file with 'metadata_as_yaml()'" - )) - } - - sheets[["dataset"]] <- data.frame( - "key" = names(unlist(metadata$"dataset")), # to handle sublevel taxonomy - "value" = unlist(metadata$"dataset") - ) - rownames(sheets[["dataset"]]) <- NULL - # replace '.' by '_' for spelling harmonization - sheets[["dataset"]]$key <- gsub("\\.", "_", sheets[["dataset"]]$key) - - # traits - expected_traits <- unique(names(unlist(skeleton$"traits"))) - if (any(!expected_traits %in% names(unlist(metadata$"traits")))) { - miss <- expected_traits[ - !expected_traits %in% names(unlist(metadata$"traits")) - ] - warning(paste( - "Missing field in 'traits':", - paste(miss, collapse = ","), - "\n Check the expected structure of the yaml file with 'metadata_as_yaml()'" - )) - } - simple_trait <- lapply(metadata$"traits", FUN = handle_traits_yml) - - sheets[["traits"]] <- data.frame( - do.call(rbind, simple_trait) - ) - - sheets -} - #' Create a xlsx file from a yaml metadata #' #' @description From f1c1edf505d54121f940806eb21abcf9e4dc2267 Mon Sep 17 00:00:00 2001 From: Nicolas Casajus Date: Tue, 29 Apr 2025 12:19:36 +0200 Subject: [PATCH 07/10] chore: disable old functions --- NAMESPACE | 3 - R/format_yaml_xlsx.R | 483 +++++++++++++++++++++---------------------- man/xlsx_to_yaml.Rd | 29 --- man/yaml_as_df.Rd | 25 --- man/yaml_to_xlsx.Rd | 30 --- 5 files changed, 241 insertions(+), 329 deletions(-) delete mode 100644 man/xlsx_to_yaml.Rd delete mode 100644 man/yaml_as_df.Rd delete mode 100644 man/yaml_to_xlsx.Rd diff --git a/NAMESPACE b/NAMESPACE index 8ff846c..a4b5919 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,3 @@ # Generated by roxygen2: do not edit by hand export(td_create_metadata_file) -export(xlsx_to_yaml) -export(yaml_as_df) -export(yaml_to_xlsx) diff --git a/R/format_yaml_xlsx.R b/R/format_yaml_xlsx.R index 497c1de..4da5ca4 100644 --- a/R/format_yaml_xlsx.R +++ b/R/format_yaml_xlsx.R @@ -1,242 +1,241 @@ -#' Create a xlsx file from a yaml metadata -#' -#' @description -#' This function transform a metadata saved in yaml format -#' into a metadata saved in a xlsx file. -#' -#' @param name a `character` of length 1. The trait dataset identifier used to -#' create files and folders. Should be short, explicit and without special -#' characters (including accents). -#' -#' @param path a `character` of length 1. The folder name to stored the -#' metadata template file in. Must exist. -#' Default is the current directory. -#' -#' @param out_suffix a `character` of length 1. The suffix to be added to the output file. -#' -#' @param overwrite a `logical` of length 1. If `TRUE` overwrites the metadata -#' template file. -#' Default is `FALSE`. -#' -#' @return No return value. -#' -#' @export -#' -yaml_to_xlsx <- function( - name, - path = ".", - out_suffix = '', - overwrite = FALSE -) { - check_character_arg(name) - check_nonascii_char(name) - check_character_arg(path) - check_path_exists(path) - check_logical_arg(overwrite) - - dir_path <- file.path(path, name) - infile <- file.path(dir_path, paste0(name, "_metadata.yml")) - outfile <- file.path(dir_path, paste0(name, "_metadata", out_suffix, ".xlsx")) - - if (file.exists(outfile) & !overwrite) { - stop( - "The file '", - outfile, - "' already exists.", - "\nUse 'overwrite = TRUE' to replace its content." - ) - } - - if (!file.exists(infile)) { - stop( - "The file '", - infile, - "' can not be found.", - "\nMake sure 'path' and 'name' are spelled correctly." - ) - } - - metadata <- yaml_as_df(infile) - - writexl::write_xlsx(x = metadata, path = outfile) -} - -#' Create a yaml file from a xlsx metadata -#' -#' @description -#' This function transform a metadata saved in yaml format -#' into a metadata saved in a xlsx file. -#' -#' @param name a `character` of length 1. The trait dataset identifier used to -#' create files and folders. Should be short, explicit and without special -#' characters (including accents). -#' -#' @param path a `character` of length 1. The folder name to stored the -#' metadata template file in. Must exist. -#' Default is the current directory. -#' -#' @param out_suffix a `character` of length 1. The suffix to be added to the output file. -#' -#' @param overwrite a `logical` of length 1. If `TRUE` overwrites the metadata -#' template file. Default is `FALSE`. -#' -#' @return No return value. -#' -#' @export -#' -xlsx_to_yaml <- function( - name, - path = ".", - out_suffix = '', - overwrite = FALSE -) { - check_character_arg(name) - check_nonascii_char(name) - check_character_arg(path) - check_path_exists(path) - check_logical_arg(overwrite) - - dir_path <- file.path(path, name) - infile <- file.path(dir_path, paste0(name, "_metadata.xlsx")) - outfile <- file.path(dir_path, paste0(name, "_metadata", out_suffix, ".yml")) - - if (file.exists(outfile) & !overwrite) { - stop( - "The file '", - outfile, - "' already exists.", - "\nUse 'overwrite = TRUE' to replace its content." - ) - } - - if (!file.exists(infile)) { - stop( - "The file '", - infile, - "' can not be found.", - "\nMake sure 'path' and 'name' are spelled correctly." - ) - } - - status <- readxl::read_xlsx(path = infile, sheet = "status") - dataset <- readxl::read_xlsx(path = infile, sheet = "dataset") - traits <- readxl::read_xlsx(path = infile, sheet = "traits") - - # load the expected structure of metadata - skeleton_df <- metadata_as_yaml() |> - yaml_as_df() - - # status - if (length(status) != 1) { - stop(paste("'status' should contain a single value")) - } else { - yml_status <- paste(names(status), as.character(status), sep = ": ") - } - cat(yml_status, sep = "\n", file = outfile) - - # dataset - cat("dataset:", sep = "\n", file = outfile, append = TRUE) - - if (any(names(dataset) != names(skeleton_df$dataset))) { - stop(paste( - "'dataset' should have two columns: ", - paste(names(skeleton_df$dataset), collapse = ", ") - )) - } - if (any(!skeleton_df$dataset$key %in% dataset$key)) { - miss <- skeleton_df$dataset$key[!skeleton_df$dataset$key %in% dataset$key] - warning(paste("Missing field in 'dataset':", paste(miss, collapse = ","))) - } - # make sure col separator is surrounded by ' ' - yml_dataset <- paste0(" ", dataset$key, ": ", dataset$value) - cat(check_yaml(yml_dataset), sep = "\n", file = outfile, append = TRUE) - - # traits - cat("traits:", sep = "\n", file = outfile, append = TRUE) - if (any(!names(traits) %in% names(skeleton_df$traits))) { - stop(paste( - "'dataset' should have seven columns: ", - paste(names(skeleton_df$traits), collapse = ", ") - )) - } - col_levels <- grep("^levels_", names(traits)) - for (trait_i in unique(traits$variable)) { - row_i <- which(traits$variable %in% trait_i) - if (length(row_i) == 1) { - yaml_i <- paste0( - " ", - names(traits)[-col_levels], - ": ", - traits[row_i, -col_levels] - ) - } else { - key_row <- row_i[which.min(apply( - is.na(traits[row_i, -col_levels]), - 1, - sum - ))] - yaml_i <- paste0( - " ", - names(traits)[-col_levels], - ": ", - traits[key_row, -col_levels] - ) - var_level <- gsub("^levels_", " ", names(traits)[col_levels]) - # add '-' at the stat of a new trait - var_level[1] <- gsub("^ ", " - ", var_level[1]) - yaml_level <- apply( - traits[row_i, col_levels], - 1, - function(x) paste(var_level, x, sep = ": ") - ) - } - # add '-' at the stat of a new trait - yaml_i[1] <- gsub("^ ", "- ", yaml_i[1]) - cat(check_yaml(yaml_i), sep = "\n", file = outfile, append = TRUE) - if (length(row_i) > 1) { - cat(" levels:", sep = "\n", file = outfile, append = TRUE) - cat(check_yaml(yaml_level), sep = "\n", file = outfile, append = TRUE) - } - } -} - - -#' Handy function to convert traits in yaml to df -#' -#' @noRd -handle_traits_yml <- function(x) { - if ("levels" %in% names(x)) { - # base trait information - base_i <- x[-grep("^levels", names(x))] - # add levels information - levels_i <- do.call(rbind, x$"levels") - colnames(levels_i) <- paste("levels", colnames(levels_i), sep = "_") - levels_i <- apply(levels_i, 2, unlist) - # merge the two together - out_i <- data.frame( - base_i, - levels_i - ) - } else { - out_i <- data.frame( - x, - "levels_value" = NA, - "levels_description" = NA - ) - } - return(out_i) -} - -#' Function to transform NA and logical values for readibility in yaml format -#' -#' @noRd -check_yaml <- function(x) { - # replace NA - x <- gsub(": NA", ": .na", x) - # replace TRUE and FALSE - x <- gsub(": FALSE", ": no", x) - x <- gsub(": TRUE", ": yes", x) - # yaml doesn't like ending ',' - x <- gsub(",$", "','", x) - return(x) -} +# #' Create a xlsx file from a yaml metadata +# #' +# #' @description +# #' This function transform a metadata saved in yaml format +# #' into a metadata saved in a xlsx file. +# #' +# #' @param name a `character` of length 1. The trait dataset identifier used to +# #' create files and folders. Should be short, explicit and without special +# #' characters (including accents). +# #' +# #' @param path a `character` of length 1. The folder name to stored the +# #' metadata template file in. Must exist. +# #' Default is the current directory. +# #' +# #' @param out_suffix a `character` of length 1. The suffix to be added to the output file. +# #' +# #' @param overwrite a `logical` of length 1. If `TRUE` overwrites the metadata +# #' template file. +# #' Default is `FALSE`. +# #' +# #' @return No return value. +# #' +# #' @export +# #' +# yaml_to_xlsx <- function( +# name, +# path = ".", +# out_suffix = '', +# overwrite = FALSE +# ) { +# check_character_arg(name) +# check_nonascii_char(name) +# check_character_arg(path) +# check_path_exists(path) +# check_logical_arg(overwrite) + +# dir_path <- file.path(path, name) +# infile <- file.path(dir_path, paste0(name, "_metadata.yml")) +# outfile <- file.path(dir_path, paste0(name, "_metadata", out_suffix, ".xlsx")) + +# if (file.exists(outfile) & !overwrite) { +# stop( +# "The file '", +# outfile, +# "' already exists.", +# "\nUse 'overwrite = TRUE' to replace its content." +# ) +# } + +# if (!file.exists(infile)) { +# stop( +# "The file '", +# infile, +# "' can not be found.", +# "\nMake sure 'path' and 'name' are spelled correctly." +# ) +# } + +# metadata <- yaml_as_df(infile) + +# writexl::write_xlsx(x = metadata, path = outfile) +# } + +# #' Create a yaml file from a xlsx metadata +# #' +# #' @description +# #' This function transform a metadata saved in yaml format +# #' into a metadata saved in a xlsx file. +# #' +# #' @param name a `character` of length 1. The trait dataset identifier used to +# #' create files and folders. Should be short, explicit and without special +# #' characters (including accents). +# #' +# #' @param path a `character` of length 1. The folder name to stored the +# #' metadata template file in. Must exist. +# #' Default is the current directory. +# #' +# #' @param out_suffix a `character` of length 1. The suffix to be added to the output file. +# #' +# #' @param overwrite a `logical` of length 1. If `TRUE` overwrites the metadata +# #' template file. Default is `FALSE`. +# #' +# #' @return No return value. +# #' +# #' @export +# #' +# xlsx_to_yaml <- function( +# name, +# path = ".", +# out_suffix = '', +# overwrite = FALSE +# ) { +# check_character_arg(name) +# check_nonascii_char(name) +# check_character_arg(path) +# check_path_exists(path) +# check_logical_arg(overwrite) + +# dir_path <- file.path(path, name) +# infile <- file.path(dir_path, paste0(name, "_metadata.xlsx")) +# outfile <- file.path(dir_path, paste0(name, "_metadata", out_suffix, ".yml")) + +# if (file.exists(outfile) & !overwrite) { +# stop( +# "The file '", +# outfile, +# "' already exists.", +# "\nUse 'overwrite = TRUE' to replace its content." +# ) +# } + +# if (!file.exists(infile)) { +# stop( +# "The file '", +# infile, +# "' can not be found.", +# "\nMake sure 'path' and 'name' are spelled correctly." +# ) +# } + +# status <- readxl::read_xlsx(path = infile, sheet = "status") +# dataset <- readxl::read_xlsx(path = infile, sheet = "dataset") +# traits <- readxl::read_xlsx(path = infile, sheet = "traits") + +# # load the expected structure of metadata +# skeleton_df <- metadata_as_yaml() |> +# yaml_as_df() + +# # status +# if (length(status) != 1) { +# stop(paste("'status' should contain a single value")) +# } else { +# yml_status <- paste(names(status), as.character(status), sep = ": ") +# } +# cat(yml_status, sep = "\n", file = outfile) + +# # dataset +# cat("dataset:", sep = "\n", file = outfile, append = TRUE) + +# if (any(names(dataset) != names(skeleton_df$dataset))) { +# stop(paste( +# "'dataset' should have two columns: ", +# paste(names(skeleton_df$dataset), collapse = ", ") +# )) +# } +# if (any(!skeleton_df$dataset$key %in% dataset$key)) { +# miss <- skeleton_df$dataset$key[!skeleton_df$dataset$key %in% dataset$key] +# warning(paste("Missing field in 'dataset':", paste(miss, collapse = ","))) +# } +# # make sure col separator is surrounded by ' ' +# yml_dataset <- paste0(" ", dataset$key, ": ", dataset$value) +# cat(check_yaml(yml_dataset), sep = "\n", file = outfile, append = TRUE) + +# # traits +# cat("traits:", sep = "\n", file = outfile, append = TRUE) +# if (any(!names(traits) %in% names(skeleton_df$traits))) { +# stop(paste( +# "'dataset' should have seven columns: ", +# paste(names(skeleton_df$traits), collapse = ", ") +# )) +# } +# col_levels <- grep("^levels_", names(traits)) +# for (trait_i in unique(traits$variable)) { +# row_i <- which(traits$variable %in% trait_i) +# if (length(row_i) == 1) { +# yaml_i <- paste0( +# " ", +# names(traits)[-col_levels], +# ": ", +# traits[row_i, -col_levels] +# ) +# } else { +# key_row <- row_i[which.min(apply( +# is.na(traits[row_i, -col_levels]), +# 1, +# sum +# ))] +# yaml_i <- paste0( +# " ", +# names(traits)[-col_levels], +# ": ", +# traits[key_row, -col_levels] +# ) +# var_level <- gsub("^levels_", " ", names(traits)[col_levels]) +# # add '-' at the stat of a new trait +# var_level[1] <- gsub("^ ", " - ", var_level[1]) +# yaml_level <- apply( +# traits[row_i, col_levels], +# 1, +# function(x) paste(var_level, x, sep = ": ") +# ) +# } +# # add '-' at the stat of a new trait +# yaml_i[1] <- gsub("^ ", "- ", yaml_i[1]) +# cat(check_yaml(yaml_i), sep = "\n", file = outfile, append = TRUE) +# if (length(row_i) > 1) { +# cat(" levels:", sep = "\n", file = outfile, append = TRUE) +# cat(check_yaml(yaml_level), sep = "\n", file = outfile, append = TRUE) +# } +# } +# } + +# #' Handy function to convert traits in yaml to df +# #' +# #' @noRd +# handle_traits_yml <- function(x) { +# if ("levels" %in% names(x)) { +# # base trait information +# base_i <- x[-grep("^levels", names(x))] +# # add levels information +# levels_i <- do.call(rbind, x$"levels") +# colnames(levels_i) <- paste("levels", colnames(levels_i), sep = "_") +# levels_i <- apply(levels_i, 2, unlist) +# # merge the two together +# out_i <- data.frame( +# base_i, +# levels_i +# ) +# } else { +# out_i <- data.frame( +# x, +# "levels_value" = NA, +# "levels_description" = NA +# ) +# } +# return(out_i) +# } + +# #' Function to transform NA and logical values for readibility in yaml format +# #' +# #' @noRd +# check_yaml <- function(x) { +# # replace NA +# x <- gsub(": NA", ": .na", x) +# # replace TRUE and FALSE +# x <- gsub(": FALSE", ": no", x) +# x <- gsub(": TRUE", ": yes", x) +# # yaml doesn't like ending ',' +# x <- gsub(",$", "','", x) +# return(x) +# } diff --git a/man/xlsx_to_yaml.Rd b/man/xlsx_to_yaml.Rd deleted file mode 100644 index 726f883..0000000 --- a/man/xlsx_to_yaml.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/format_yaml_xlsx.R -\name{xlsx_to_yaml} -\alias{xlsx_to_yaml} -\title{Create a yaml file from a xlsx metadata} -\usage{ -xlsx_to_yaml(name, path = ".", out_suffix = "", overwrite = FALSE) -} -\arguments{ -\item{name}{a \code{character} of length 1. The trait dataset identifier used to -create files and folders. Should be short, explicit and without special -characters (including accents).} - -\item{path}{a \code{character} of length 1. The folder name to stored the -metadata template file in. Must exist. -Default is the current directory.} - -\item{out_suffix}{a \code{character} of length 1. The suffix to be added to the output file.} - -\item{overwrite}{a \code{logical} of length 1. If \code{TRUE} overwrites the metadata -template file. Default is \code{FALSE}.} -} -\value{ -No return value. -} -\description{ -This function transform a metadata saved in yaml format -into a metadata saved in a xlsx file. -} diff --git a/man/yaml_as_df.Rd b/man/yaml_as_df.Rd deleted file mode 100644 index 8fc2317..0000000 --- a/man/yaml_as_df.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/format_yaml_xlsx.R -\name{yaml_as_df} -\alias{yaml_as_df} -\title{Transform a yaml metadata to data.frame} -\usage{ -yaml_as_df(x) -} -\arguments{ -\item{x}{a \code{character} of length 1. It could be a filename with extension '.yaml' or a full yaml string.} -} -\value{ -A list with three data.frames: \code{status}, \code{dataset}, and \code{traits}. -} -\description{ -This function transform a metadata written in yaml to -a metadata in data.frame format with three items: -\code{status}, \code{dataset}, and \code{traits}. -The structure of the metadata must follow the structure in \code{metadata_as_yaml()} -} -\examples{ -\dontrun{ -metadata_as_yaml() |> yaml_as_df() -} -} diff --git a/man/yaml_to_xlsx.Rd b/man/yaml_to_xlsx.Rd deleted file mode 100644 index d08dedf..0000000 --- a/man/yaml_to_xlsx.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/format_yaml_xlsx.R -\name{yaml_to_xlsx} -\alias{yaml_to_xlsx} -\title{Create a xlsx file from a yaml metadata} -\usage{ -yaml_to_xlsx(name, path = ".", out_suffix = "", overwrite = FALSE) -} -\arguments{ -\item{name}{a \code{character} of length 1. The trait dataset identifier used to -create files and folders. Should be short, explicit and without special -characters (including accents).} - -\item{path}{a \code{character} of length 1. The folder name to stored the -metadata template file in. Must exist. -Default is the current directory.} - -\item{out_suffix}{a \code{character} of length 1. The suffix to be added to the output file.} - -\item{overwrite}{a \code{logical} of length 1. If \code{TRUE} overwrites the metadata -template file. -Default is \code{FALSE}.} -} -\value{ -No return value. -} -\description{ -This function transform a metadata saved in yaml format -into a metadata saved in a xlsx file. -} From b2a4e6f85fe3f813814f66b0e2a9fe8f285166b8 Mon Sep 17 00:00:00 2001 From: Nicolas Casajus Date: Tue, 29 Apr 2025 13:06:04 +0200 Subject: [PATCH 08/10] factor: clean_database_name() --- R/td_create_metadata_file.R | 7 +------ R/utils_yaml.R | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/R/td_create_metadata_file.R b/R/td_create_metadata_file.R index 27d90c8..1b7898b 100644 --- a/R/td_create_metadata_file.R +++ b/R/td_create_metadata_file.R @@ -43,12 +43,7 @@ td_create_metadata_file <- function( check_format_value(format) check_logical_arg(overwrite) - name <- name |> - tolower() |> - trimws() |> - gsub("\\s+", "_", x = _) |> - gsub("[[:punct:]]", "_", x = _) |> - gsub("_+", "_", x = _) + name <- clean_database_name(name) dir_path <- file.path(path, name) file_ext <- ifelse(format == "yaml", ".yml", ".xlsx") diff --git a/R/utils_yaml.R b/R/utils_yaml.R index 6177b3d..62836ae 100644 --- a/R/utils_yaml.R +++ b/R/utils_yaml.R @@ -134,3 +134,17 @@ yaml_to_df <- function(metadata) { sheets } + + +#' Clean database name +#' +#' @noRd + +clean_database_name <- function(name) { + name |> + tolower() |> + trimws() |> + gsub("\\s+", "_", x = _) |> + gsub("[[:punct:]]", "_", x = _) |> + gsub("_+", "_", x = _) +} From 35ccc66c40e06d000c0d242925d66c5ca6a73ed8 Mon Sep 17 00:00:00 2001 From: Nicolas Casajus Date: Tue, 29 Apr 2025 16:57:20 +0200 Subject: [PATCH 09/10] fix: improve name cleaning --- R/utils_yaml.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/utils_yaml.R b/R/utils_yaml.R index 62836ae..3a96d76 100644 --- a/R/utils_yaml.R +++ b/R/utils_yaml.R @@ -146,5 +146,7 @@ clean_database_name <- function(name) { trimws() |> gsub("\\s+", "_", x = _) |> gsub("[[:punct:]]", "_", x = _) |> - gsub("_+", "_", x = _) + gsub("_+", "_", x = _) |> + gsub("^_", "", x = _) |> + gsub("_$", "", x = _) } From dd7c3abc6b7d03e9252f71fcaf237662c35e39b6 Mon Sep 17 00:00:00 2001 From: Nicolas Casajus Date: Tue, 29 Apr 2025 16:57:33 +0200 Subject: [PATCH 10/10] test: add unit tests --- tests/testthat/test-clean_database_name.R | 12 ++ tests/testthat/test-yaml_to_df.R | 193 ++++++++++++++++++++++ 2 files changed, 205 insertions(+) create mode 100644 tests/testthat/test-clean_database_name.R create mode 100644 tests/testthat/test-yaml_to_df.R diff --git a/tests/testthat/test-clean_database_name.R b/tests/testthat/test-clean_database_name.R new file mode 100644 index 0000000..e6b459d --- /dev/null +++ b/tests/testthat/test-clean_database_name.R @@ -0,0 +1,12 @@ +## clean_database_name() ---- + +test_that("clean_database_name() succeeds", { + x <- clean_database_name("Middolo 2023") + + expect_true(class(x) == "character") + expect_equal(length(x), 1L) + expect_true(x == "middolo_2023") + + x <- clean_database_name(" Middolo - 2023..") + expect_true(x == "middolo_2023") +}) diff --git a/tests/testthat/test-yaml_to_df.R b/tests/testthat/test-yaml_to_df.R new file mode 100644 index 0000000..9920c28 --- /dev/null +++ b/tests/testthat/test-yaml_to_df.R @@ -0,0 +1,193 @@ +## yaml_to_df() ---- + +test_that("yaml_to_df() fails", { + metadata <- read_yaml_template() + metadata <- metadata[-1] + + expect_error( + yaml_to_df(metadata), + "No key 'status' found in the YAML file", + fixed = TRUE + ) + + metadata <- read_yaml_template() + metadata <- metadata[-2] + + expect_error( + yaml_to_df(metadata), + "No key 'dataset' found in the YAML file", + fixed = TRUE + ) + + metadata <- read_yaml_template() + metadata <- metadata[-3] + + expect_error( + yaml_to_df(metadata), + "No key 'traits' found in the YAML file", + fixed = TRUE + ) + + metadata <- read_yaml_template() + metadata[[3]][[1]] <- metadata[[3]][[1]][-1] + + expect_error( + yaml_to_df(metadata), + "No key 'variable' found in the YAML file", + fixed = TRUE + ) + + metadata <- read_yaml_template() + metadata[[3]][[1]] <- metadata[[3]][[1]][-2] + + expect_error( + yaml_to_df(metadata), + "No key 'name' found in the YAML file", + fixed = TRUE + ) + + metadata <- read_yaml_template() + metadata[[3]][[1]] <- metadata[[3]][[1]][-3] + + expect_error( + yaml_to_df(metadata), + "No key 'category' found in the YAML file", + fixed = TRUE + ) + + metadata <- read_yaml_template() + metadata[[3]][[1]] <- metadata[[3]][[1]][-4] + + expect_error( + yaml_to_df(metadata), + "No key 'type' found in the YAML file", + fixed = TRUE + ) + + metadata <- read_yaml_template() + metadata[[3]][[2]] <- metadata[[3]][[2]][-1] + + expect_error( + yaml_to_df(metadata), + "No key 'variable' found in the YAML file", + fixed = TRUE + ) + + metadata <- read_yaml_template() + metadata[[3]][[2]] <- metadata[[3]][[2]][-2] + + expect_error( + yaml_to_df(metadata), + "No key 'name' found in the YAML file", + fixed = TRUE + ) + + metadata <- read_yaml_template() + metadata[[3]][[2]] <- metadata[[3]][[2]][-3] + + expect_error( + yaml_to_df(metadata), + "No key 'category' found in the YAML file", + fixed = TRUE + ) + + metadata <- read_yaml_template() + metadata[[3]][[2]] <- metadata[[3]][[2]][-4] + + expect_error( + yaml_to_df(metadata), + "No key 'type' found in the YAML file", + fixed = TRUE + ) + + metadata <- read_yaml_template() + metadata[[3]][[2]]$"levels"[[1]] <- metadata[[3]][[2]]$"levels"[[1]][-1] + + expect_error( + yaml_to_df(metadata), + "No key 'value' found in the YAML file", + fixed = TRUE + ) + + metadata <- read_yaml_template() + metadata[[3]][[2]]$"levels"[[1]] <- metadata[[3]][[2]]$"levels"[[1]][-2] + + expect_error( + yaml_to_df(metadata), + "No key 'description' found in the YAML file", + fixed = TRUE + ) + + metadata <- read_yaml_template() + metadata[[3]][[2]]$"levels"[[2]] <- metadata[[3]][[2]]$"levels"[[2]][-1] + + expect_error( + yaml_to_df(metadata), + "No key 'value' found in the YAML file", + fixed = TRUE + ) + + metadata <- read_yaml_template() + metadata[[3]][[2]]$"levels"[[2]] <- metadata[[3]][[2]]$"levels"[[2]][-2] + + expect_error( + yaml_to_df(metadata), + "No key 'description' found in the YAML file", + fixed = TRUE + ) +}) + + +test_that("yaml_to_df() succeeds - Mixed traits", { + metadata <- read_yaml_template() + x <- yaml_to_df(metadata) + + expect_true(is.list(x)) + expect_true(!is.null(names(x))) + expect_true("status" %in% names(x)) + expect_true("dataset" %in% names(x)) + expect_true("traits" %in% names(x)) + + expect_true(is.data.frame(x[["status"]])) + expect_true(ncol(x[["status"]]) == 1L) + expect_true(nrow(x[["status"]]) == 1L) + expect_true(x[["status"]][1, "status"] == "draft") + + expect_true(is.data.frame(x[["dataset"]])) + expect_true(ncol(x[["dataset"]]) == 2L) + expect_true(nrow(x[["dataset"]]) == 23L) + expect_true(x[["dataset"]][20, "key"] == "taxonomy.genus") + + expect_true(is.data.frame(x[["traits"]])) + expect_true(ncol(x[["traits"]]) == 7L) + expect_true(nrow(x[["traits"]]) == 3L) + expect_true(x[["traits"]][2, "variable"] == x[["traits"]][3, "variable"]) +}) + + +test_that("yaml_to_df() succeeds - One quantitative trait", { + metadata <- read_yaml_template() + metadata[["traits"]] <- metadata[["traits"]][-2] + x <- yaml_to_df(metadata) + + expect_true(ncol(x[["traits"]]) == 7L) + expect_true(nrow(x[["traits"]]) == 1L) + expect_true("levels.value" %in% colnames(x[["traits"]])) + expect_true("levels.description" %in% colnames(x[["traits"]])) + expect_true(is.na(x[["traits"]][1, "levels.value"])) + expect_true(is.na(x[["traits"]][1, "levels.description"])) +}) + + +test_that("yaml_to_df() succeeds - One categorical trait", { + metadata <- read_yaml_template() + metadata[["traits"]] <- metadata[["traits"]][-1] + metadata[["traits"]][[1]] <- metadata[["traits"]][[1]][-5] + x <- yaml_to_df(metadata) + + expect_true(ncol(x[["traits"]]) == 7L) + expect_true(nrow(x[["traits"]]) == 2L) + expect_true("units" %in% colnames(x[["traits"]])) + expect_true(is.na(x[["traits"]][1, "units"])) + expect_true(is.na(x[["traits"]][2, "units"])) +})