diff --git a/DESCRIPTION b/DESCRIPTION index 554486b..48d98db 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,6 +17,7 @@ Encoding: UTF-8 Roxygen: list(markdown = TRUE) Depends: R (>= 4.1.0) Imports: + cli, curl, dplyr, glue, diff --git a/NAMESPACE b/NAMESPACE index e188b12..b13f9c3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export(add_dataset_samples) export(create_dataset) +export(download_dataset) export(download_project) export(download_sample) export(get_auth) @@ -16,6 +17,7 @@ export(scpca_projects) export(set_dataset_email) export(start_dataset_processing) export(view_terms) +export(wait_and_download_dataset) import(httr2) importFrom(dplyr,.data) importFrom(stats,setNames) diff --git a/R/datasets.R b/R/datasets.R index e41271c..f6134f8 100644 --- a/R/datasets.R +++ b/R/datasets.R @@ -281,6 +281,8 @@ get_dataset_status <- function(dataset, auth_token = Sys.getenv("SCPCA_AUTH_TOKE detail <- get_dataset_detail(dataset, auth_token) if (isTRUE(detail$is_failed)) { "failed" + } else if (isTRUE(detail$is_expired)) { + "expired" } else if (isTRUE(detail$is_succeeded)) { "succeeded" } else if (isTRUE(detail$is_processing) || isTRUE(detail$is_started)) { diff --git a/R/downloads.R b/R/downloads.R index e0d43fe..78214fa 100644 --- a/R/downloads.R +++ b/R/downloads.R @@ -9,7 +9,7 @@ #' @param destination the `destination` argument to validate #' #' @noRd -check_destination_is_auth <- function(destination) { +warn_destination_is_auth <- function(destination) { if (is_uuid(destination)) { warning( "`destination` looks like an authorization token (a UUID), not a directory path.", @@ -83,7 +83,7 @@ download_sample <- function( quiet = FALSE, auth_token = Sys.getenv("SCPCA_AUTH_TOKEN") ) { - check_destination_is_auth(destination) + warn_destination_is_auth(destination) auth_token <- resolve_auth_token(auth_token) stopifnot( "quiet must be a logical value" = is.logical(quiet) && length(quiet) == 1 @@ -156,6 +156,8 @@ download_sample <- function( #' If FALSE, existing files will be returned. #' Default is FALSE. #' @param quiet Whether to suppress download progress messages. Default is FALSE. +#' @param unzip Whether to unzip the downloaded file. Default is TRUE. When FALSE, +#' the zip file is saved directly to `destination` and its path is returned. #' @param auth_token An authorization token from [get_auth()]. Defaults to the #' `SCPCA_AUTH_TOKEN` environment variable, which [get_auth()] sets automatically. #' @@ -188,9 +190,10 @@ download_project <- function( overwrite = FALSE, redownload = FALSE, quiet = FALSE, + unzip = TRUE, auth_token = Sys.getenv("SCPCA_AUTH_TOKEN") ) { - check_destination_is_auth(destination) + warn_destination_is_auth(destination) auth_token <- resolve_auth_token(auth_token) stopifnot( "Invalid project_id." = grepl("^SCPCP\\d{6}$", project_id), @@ -285,7 +288,14 @@ download_project <- function( detail <- get_ccdl_dataset_detail(dataset$id, auth_token) download_url <- setNames(detail$download_url, detail$download_filename) - file_paths <- download_and_extract_file(download_url, destination, overwrite, redownload, quiet) + file_paths <- download_and_extract_file( + url = download_url, + parent_dir = destination, + overwrite = overwrite, + redownload = redownload, + quiet = quiet, + unzip = unzip + ) invisible(file_paths) } @@ -297,12 +307,34 @@ download_project <- function( #' @param redownload Whether to re-download if files from the same url already exist #' (if FALSE, existing files will be returned) #' @param quiet Whether to suppress progress messages +#' @param unzip Whether to unzip the downloaded file. Default is TRUE. When FALSE, +#' the zip file is saved directly to `parent_dir` and its path is returned. #' -#' @returns A character vector of extracted file paths +#' @returns A character vector of extracted file paths, or the zip file path when +#' `unzip = FALSE`. #' #' @keywords internal -download_and_extract_file <- function(url, parent_dir, overwrite, redownload, quiet) { +download_and_extract_file <- function(url, parent_dir, overwrite, redownload, quiet, unzip = TRUE) { download_filename <- if (!is.null(names(url))) names(url) else parse_download_file(url) + + if (!unzip) { + zip_path <- file.path(parent_dir, download_filename) + if (file.exists(zip_path) && !overwrite) { + message(glue::glue( + "File {zip_path} already exists; skipping download.", + "\nUse 'overwrite = TRUE' to replace the existing file." + )) + return(zip_path) + } + req <- httr2::request(unname(url)) + if (!quiet) { + message(glue::glue("Downloading {download_filename}...")) + req <- httr2::req_progress(req, type = "down") + } + req |> req_perform(path = zip_path) + return(zip_path) + } + destination_dir <- file.path(parent_dir, stringr::str_remove(download_filename, "\\.zip$")) # exit if directory already exists @@ -366,3 +398,231 @@ parse_download_file <- function(scpca_url) { stringr::str_extract("SCPC[^\\s]+\\.zip") |> unname() } + + +#' Download a custom dataset's files from the ScPCA Portal +#' +#' Downloads and extracts the files for a custom dataset that has finished +#' processing. The dataset must have a status of "succeeded"; use +#' [get_dataset_status()] to check before calling this function, or use +#' [wait_and_download_dataset()] to wait for processing to complete and then +#' download in a single call. +#' +#' The downloaded files are saved in a subdirectory of `destination`, named +#' from the dataset's download filename (which includes the dataset ID, format, +#' and date). +#' +#' @param dataset the dataset UUID string, or a list with an `$id` element, +#' such as the return value of [create_dataset()]. +#' @param destination The path to the directory where the unzipped file directory +#' should be saved. Default is "scpca_data". +#' @param overwrite Whether to overwrite files in existing directories if they +#' already exist. Note that files in existing directories that do not have the +#' same name as one of the downloaded files will not be deleted. Default is FALSE. +#' @param redownload Whether to re-download if files from the same dataset already +#' exist. If FALSE, existing files will be returned. Default is FALSE. +#' @param quiet Whether to suppress download progress messages. Default is FALSE. +#' @param unzip Whether to unzip the downloaded file. Default is TRUE. When FALSE, +#' the zip file is saved directly to `destination` and its path is returned. +#' @param auth_token an authorization token from [get_auth()]. Defaults to the +#' `SCPCA_AUTH_TOKEN` environment variable, which [get_auth()] sets automatically. +#' +#' @importFrom stats setNames +#' +#' @returns a vector of file paths for the downloaded files (invisibly) +#' +#' @import httr2 +#' @export +#' +#' @examples +#' \dontrun{ +#' # Create a dataset, start processing, then download once complete +#' ds <- create_dataset(samples = c("SCPCS000001", "SCPCS000002")) +#' start_dataset_processing(ds, email = "user@example.com") +#' +#' # Check status then download when ready +#' get_dataset_status(ds) +#' download_dataset(ds, destination = "scpca_data") +#' +#' # Or use wait_and_download_dataset() to do all of this in one call +#' wait_and_download_dataset(ds, start = TRUE, email = "user@example.com") +#' } +download_dataset <- function( + dataset, + destination = "scpca_data", + overwrite = FALSE, + redownload = FALSE, + quiet = FALSE, + unzip = TRUE, + auth_token = Sys.getenv("SCPCA_AUTH_TOKEN") +) { + warn_destination_is_auth(destination) + auth_token <- resolve_auth_token(auth_token) + stopifnot( + "unzip must be a logical value" = is.logical(unzip) && length(unzip) == 1, + "overwrite must be a logical value" = is.logical(overwrite) && length(overwrite) == 1, + "redownload must be a logical value" = is.logical(redownload) && length(redownload) == 1, + "quiet must be a logical value" = is.logical(quiet) && length(quiet) == 1 + ) + dataset_id <- resolve_dataset_id(dataset) + detail <- get_dataset_detail(dataset_id, auth_token) + + if (isTRUE(detail$is_expired)) { + stop( + glue::glue( + "Dataset `{dataset_id}` has expired and is no longer available for download.", + " Use `wait_and_download_dataset()` to regenerate it." + ), + call. = FALSE + ) + } + + if (!isTRUE(detail$is_succeeded)) { + status <- if (isTRUE(detail$is_failed)) { + "failed" + } else if (isTRUE(detail$is_processing) || isTRUE(detail$is_started)) { + "processing" + } else { + "pending" + } + stop( + glue::glue( + "Dataset `{dataset_id}` is not ready for download (status: {status}).", + " Use `wait_and_download_dataset()` to wait for processing to complete." + ), + call. = FALSE + ) + } + + if (!dir.exists(destination)) { + dir.create(destination, recursive = TRUE) + } + + download_url <- setNames(detail$download_url, detail$download_filename) + + file_paths <- download_and_extract_file( + url = download_url, + parent_dir = destination, + overwrite = overwrite, + redownload = redownload, + quiet = quiet, + unzip = unzip + ) + invisible(file_paths) +} + + +#' @rdname download_dataset +#' @export +#' +#' @param email optional email address for the download notification. Only used +#' when `start = TRUE`. Passed to [start_dataset_processing()]. +#' @param poll_interval Number of minutes to wait between status checks. +#' Default is 0.5 (30 seconds). +#' @param timeout Maximum number of minutes to wait for processing to complete. +#' Use `Inf` to wait indefinitely. Default is 60 (1 hour). +wait_and_download_dataset <- function( + dataset, + destination = "scpca_data", + email = NULL, + overwrite = FALSE, + redownload = FALSE, + poll_interval = 0.5, + timeout = 60, + quiet = FALSE, + unzip = TRUE, + auth_token = Sys.getenv("SCPCA_AUTH_TOKEN") +) { + warn_destination_is_auth(destination) + auth_token <- resolve_auth_token(auth_token) + stopifnot( + "poll_interval must be a single non-negative number of minutes" = is.numeric(poll_interval) && + length(poll_interval) == 1 && + poll_interval >= 0, + "timeout must be a single positive number or Inf" = is.numeric(timeout) && + length(timeout) == 1 && + timeout >= 0, + "quiet must be a logical value" = is.logical(quiet) && length(quiet) == 1 + ) + dataset_id <- resolve_dataset_id(dataset) + + if (get_dataset_status(dataset_id, auth_token = auth_token) %in% c("pending", "expired")) { + start_dataset_processing(dataset_id, email = email, auth_token = auth_token) + } + + start_time <- Sys.time() + status <- get_dataset_status(dataset_id, auth_token = auth_token) + + if (!quiet) { + cli::cli_progress_bar( + format = "{cli::pb_spin} Waiting for dataset {dataset_id} [{status}] {cli::pb_elapsed}", + clear = FALSE + ) + } + + repeat { + if (status == "succeeded") { + break + } + if (status == "failed") { + if (!quiet) { + cli::cli_progress_done() + } + stop(glue::glue("Dataset `{dataset_id}` processing failed."), call. = FALSE) + } + if (status == "expired") { + if (!quiet) { + cli::cli_progress_done() + } + stop( + glue::glue( + "Dataset `{dataset_id}` unexpectedly expired during processing.", + " Please report this as a bug." + ), + call. = FALSE + ) + } + + elapsed <- as.numeric(difftime(Sys.time(), start_time, units = "mins")) + if (is.finite(timeout) && elapsed >= timeout) { + if (!quiet) { + cli::cli_progress_done() + } + stop( + glue::glue( + "Timed out after {round(elapsed, 1)} minutes waiting for dataset `{dataset_id}`.", + " Use `timeout = Inf` to wait indefinitely." + ), + call. = FALSE + ) + } + + if (!quiet) { + # keep the progress spinner updating every half second until the next poll + next_loop <- Sys.time() + poll_interval * 60 + while (Sys.time() < next_loop) { + cli::cli_progress_update(force = TRUE) + Sys.sleep(0.5) + } + } else { + Sys.sleep(poll_interval * 60) + } + + status <- get_dataset_status(dataset_id, auth_token = auth_token) + if (!quiet) cli::cli_progress_update(force = TRUE) + } + + if (!quiet) { + cli::cli_progress_done() + } + + download_dataset( + dataset_id, + destination = destination, + unzip = unzip, + overwrite = overwrite, + redownload = redownload, + quiet = quiet, + auth_token = auth_token + ) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 1e3e583..7040ffa 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -36,3 +36,4 @@ reference: - set_dataset_email - start_dataset_processing - get_dataset_status + - download_dataset diff --git a/man/download_and_extract_file.Rd b/man/download_and_extract_file.Rd index b435d13..33e1903 100644 --- a/man/download_and_extract_file.Rd +++ b/man/download_and_extract_file.Rd @@ -4,7 +4,14 @@ \alias{download_and_extract_file} \title{Download and extract a single file from a URL} \usage{ -download_and_extract_file(url, parent_dir, overwrite, redownload, quiet) +download_and_extract_file( + url, + parent_dir, + overwrite, + redownload, + quiet, + unzip = TRUE +) } \arguments{ \item{url}{The download URL} @@ -17,9 +24,13 @@ download_and_extract_file(url, parent_dir, overwrite, redownload, quiet) (if FALSE, existing files will be returned)} \item{quiet}{Whether to suppress progress messages} + +\item{unzip}{Whether to unzip the downloaded file. Default is TRUE. When FALSE, +the zip file is saved directly to \code{parent_dir} and its path is returned.} } \value{ -A character vector of extracted file paths +A character vector of extracted file paths, or the zip file path when +\code{unzip = FALSE}. } \description{ Download and extract a single file from a URL diff --git a/man/download_dataset.Rd b/man/download_dataset.Rd new file mode 100644 index 0000000..372f8b8 --- /dev/null +++ b/man/download_dataset.Rd @@ -0,0 +1,90 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/downloads.R +\name{download_dataset} +\alias{download_dataset} +\alias{wait_and_download_dataset} +\title{Download a custom dataset's files from the ScPCA Portal} +\usage{ +download_dataset( + dataset, + destination = "scpca_data", + overwrite = FALSE, + redownload = FALSE, + quiet = FALSE, + unzip = TRUE, + auth_token = Sys.getenv("SCPCA_AUTH_TOKEN") +) + +wait_and_download_dataset( + dataset, + destination = "scpca_data", + email = NULL, + overwrite = FALSE, + redownload = FALSE, + poll_interval = 0.5, + timeout = 60, + quiet = FALSE, + unzip = TRUE, + auth_token = Sys.getenv("SCPCA_AUTH_TOKEN") +) +} +\arguments{ +\item{dataset}{the dataset UUID string, or a list with an \verb{$id} element, +such as the return value of \code{\link[=create_dataset]{create_dataset()}}.} + +\item{destination}{The path to the directory where the unzipped file directory +should be saved. Default is "scpca_data".} + +\item{overwrite}{Whether to overwrite files in existing directories if they +already exist. Note that files in existing directories that do not have the +same name as one of the downloaded files will not be deleted. Default is FALSE.} + +\item{redownload}{Whether to re-download if files from the same dataset already +exist. If FALSE, existing files will be returned. Default is FALSE.} + +\item{quiet}{Whether to suppress download progress messages. Default is FALSE.} + +\item{unzip}{Whether to unzip the downloaded file. Default is TRUE. When FALSE, +the zip file is saved directly to \code{destination} and its path is returned.} + +\item{auth_token}{an authorization token from \code{\link[=get_auth]{get_auth()}}. Defaults to the +\code{SCPCA_AUTH_TOKEN} environment variable, which \code{\link[=get_auth]{get_auth()}} sets automatically.} + +\item{email}{optional email address for the download notification. Only used +when \code{start = TRUE}. Passed to \code{\link[=start_dataset_processing]{start_dataset_processing()}}.} + +\item{poll_interval}{Number of minutes to wait between status checks. +Default is 0.5 (30 seconds).} + +\item{timeout}{Maximum number of minutes to wait for processing to complete. +Use \code{Inf} to wait indefinitely. Default is 60 (1 hour).} +} +\value{ +a vector of file paths for the downloaded files (invisibly) +} +\description{ +Downloads and extracts the files for a custom dataset that has finished +processing. The dataset must have a status of "succeeded"; use +\code{\link[=get_dataset_status]{get_dataset_status()}} to check before calling this function, or use +\code{\link[=wait_and_download_dataset]{wait_and_download_dataset()}} to wait for processing to complete and then +download in a single call. +} +\details{ +The downloaded files are saved in a subdirectory of \code{destination}, named +from the dataset's download filename (which includes the dataset ID, format, +and date). +} +\examples{ +\dontrun{ +# Create a dataset, start processing, then download once complete +ds <- create_dataset(samples = c("SCPCS000001", "SCPCS000002")) +start_dataset_processing(ds, email = "user@example.com") + +# Check status then download when ready +get_dataset_status(ds) +download_dataset(ds, destination = "scpca_data") + +# Or use wait_and_download_dataset() to do all of this in one call +wait_and_download_dataset(ds, start = TRUE, email = "user@example.com") +} +} diff --git a/man/download_project.Rd b/man/download_project.Rd index d9606ab..e8b5a76 100644 --- a/man/download_project.Rd +++ b/man/download_project.Rd @@ -13,6 +13,7 @@ download_project( overwrite = FALSE, redownload = FALSE, quiet = FALSE, + unzip = TRUE, auth_token = Sys.getenv("SCPCA_AUTH_TOKEN") ) } @@ -44,6 +45,9 @@ Default is FALSE.} \item{quiet}{Whether to suppress download progress messages. Default is FALSE.} +\item{unzip}{Whether to unzip the downloaded file. Default is TRUE. When FALSE, +the zip file is saved directly to \code{destination} and its path is returned.} + \item{auth_token}{An authorization token from \code{\link[=get_auth]{get_auth()}}. Defaults to the \code{SCPCA_AUTH_TOKEN} environment variable, which \code{\link[=get_auth]{get_auth()}} sets automatically.} } diff --git a/man/replace_dataset_data.Rd b/man/replace_dataset_data.Rd index 4f7efde..15afdf5 100644 --- a/man/replace_dataset_data.Rd +++ b/man/replace_dataset_data.Rd @@ -31,12 +31,12 @@ the updated dataset detail as a list (invisibly) \description{ Replaces the samples and/or projects in an existing dataset with a new selection, by sending a PUT request with a freshly built \code{data} field. This -is a wholesale replacement: the resulting dataset contains exactly the samples +is a wholesale replacement: the resulting dataset contains only the samples and projects supplied here. To incrementally add or remove samples while keeping the rest, use \code{\link[=add_dataset_samples]{add_dataset_samples()}} or \code{\link[=remove_dataset_samples]{remove_dataset_samples()}}. } \details{ -A dataset that has already been started cannot be updated. +A dataset that has already started processing cannot be updated. } \examples{ \dontrun{ diff --git a/tests/testthat/test-datasets.R b/tests/testthat/test-datasets.R index 2c8f91e..7d4dad3 100644 --- a/tests/testthat/test-datasets.R +++ b/tests/testthat/test-datasets.R @@ -481,6 +481,13 @@ test_that("get_dataset_status maps detail status fields to a status string", { # a failed dataset is reported as failed even if is_succeeded is also set detail <- list(is_succeeded = TRUE, is_failed = TRUE) expect_equal(get_dataset_status(id, auth_token = "token"), "failed") + + detail <- list(is_expired = TRUE) + expect_equal(get_dataset_status(id, auth_token = "token"), "expired") + + # expired takes priority over succeeded (expired datasets likely still have is_succeeded = TRUE) + detail <- list(is_succeeded = TRUE, is_expired = TRUE) + expect_equal(get_dataset_status(id, auth_token = "token"), "expired") }) test_that("get_dataset_status passes the resolved auth_token to get_dataset_detail", { diff --git a/tests/testthat/test-downloads.R b/tests/testthat/test-downloads.R index 75b1fa3..fe7faad 100644 --- a/tests/testthat/test-downloads.R +++ b/tests/testthat/test-downloads.R @@ -87,11 +87,11 @@ test_that("download_sample validates input parameters", { test_that("check_destination_is_auth warns when destination looks like an auth token (UUID)", { # auth_token is the last argument, so a positionally-passed token lands in destination expect_warning( - check_destination_is_auth("123e4567-e89b-12d3-a456-426614174000"), + warn_destination_is_auth("123e4567-e89b-12d3-a456-426614174000"), "looks like an authorization token" ) # a normal directory path does not warn - expect_no_warning(check_destination_is_auth("scpca_data")) + expect_no_warning(warn_destination_is_auth("scpca_data")) }) test_that("download_project validates input parameters", { @@ -596,3 +596,264 @@ test_that("download_and_extract_file redownloads when exact directory exists, ov # (overwrite doesn't clear directory to prevent user error if they put additional files there) expect_true(file.exists(file.path(existing_dir, "old.txt"))) }) + +# download_and_extract_file unzip = FALSE tests + +test_that("download_and_extract_file with unzip = FALSE saves the zip and returns its path", { + temp_dir <- file.path(tempdir(), "test_no_unzip") + dir.create(temp_dir, showWarnings = FALSE) + on.exit(unlink(temp_dir, recursive = TRUE), add = TRUE) + + local_mocked_bindings( + req_perform = function(req, path = NULL, ...) { + writeLines("fake zip", path) + invisible(NULL) + } + ) + + result <- download_and_extract_file( + setNames("https://example.com/dataset.zip", "dataset.zip"), + temp_dir, + overwrite = FALSE, + redownload = FALSE, + quiet = TRUE, + unzip = FALSE + ) + + expect_equal(result, file.path(temp_dir, "dataset.zip")) + expect_true(file.exists(result)) + expect_false(dir.exists(file.path(temp_dir, "dataset"))) +}) + +test_that("download_and_extract_file with unzip = FALSE skips when file exists and overwrite = FALSE", { + temp_dir <- file.path(tempdir(), "test_no_unzip_skip") + dir.create(temp_dir, showWarnings = FALSE) + on.exit(unlink(temp_dir, recursive = TRUE), add = TRUE) + + zip_path <- file.path(temp_dir, "dataset.zip") + writeLines("existing", zip_path) + + expect_message( + result <- download_and_extract_file( + setNames("https://example.com/dataset.zip", "dataset.zip"), + temp_dir, + overwrite = FALSE, + redownload = FALSE, + quiet = TRUE, + unzip = FALSE + ), + "already exists" + ) + expect_equal(result, zip_path) + # file should not have been overwritten + expect_equal(readLines(zip_path), "existing") +}) + +# download_dataset tests + +test_that("download_dataset downloads a succeeded dataset", { + local_mocked_bindings( + get_dataset_detail = \(dataset, auth_token) { + list( + is_succeeded = TRUE, + download_url = "https://example.com/dataset.zip", + download_filename = "dataset.zip" + ) + }, + download_and_extract_file = \(url, ...) c("scpca_data/dataset/file.rds") + ) + + result <- download_dataset( + "00000000-0000-0000-0000-000000000001", + auth_token = "token" + ) + expect_equal(result, c("scpca_data/dataset/file.rds")) +}) + + +test_that("download_dataset errors when dataset is not succeeded", { + for (detail in list( + list(is_started = FALSE), + list(is_started = TRUE), + list(is_started = TRUE, is_failed = TRUE) + )) { + local_mocked_bindings( + get_dataset_detail = \(dataset, auth_token) detail + ) + expect_error( + download_dataset("00000000-0000-0000-0000-000000000001", auth_token = "token"), + "not ready for download" + ) + } +}) + +test_that("download_dataset error names the current status", { + local_mocked_bindings( + get_dataset_detail = \(dataset, auth_token) list(is_started = TRUE) + ) + expect_error( + download_dataset("00000000-0000-0000-0000-000000000001", auth_token = "token"), + "processing" + ) +}) + +test_that("download_dataset errors when dataset is expired", { + local_mocked_bindings( + get_dataset_detail = \(dataset, auth_token) list(is_succeeded = TRUE, is_expired = TRUE) + ) + expect_error( + download_dataset("00000000-0000-0000-0000-000000000001", auth_token = "token"), + "expired" + ) +}) + +test_that("download_dataset passes unzip = FALSE to download_and_extract_file", { + captured_unzip <- NULL + local_mocked_bindings( + get_dataset_detail = \(dataset, auth_token) { + list( + is_succeeded = TRUE, + download_url = "https://example.com/dataset.zip", + download_filename = "dataset.zip" + ) + }, + download_and_extract_file = \(url, parent_dir, overwrite, redownload, quiet, unzip = TRUE) { + captured_unzip <<- unzip + "scpca_data/dataset.zip" + } + ) + + download_dataset("00000000-0000-0000-0000-000000000001", unzip = FALSE, auth_token = "token") + expect_false(captured_unzip) +}) + +test_that("download_dataset errors when auth_token is empty", { + expect_error( + download_dataset("00000000-0000-0000-0000-000000000001", auth_token = ""), + "Authorization token must be provided" + ) +}) + +# wait_and_download_dataset tests + +test_that("wait_and_download_dataset downloads a dataset that is already succeeded", { + local_mocked_bindings( + get_dataset_status = \(dataset, auth_token) "succeeded", + download_dataset = \(dataset, ...) c("scpca_data/dataset/file.rds") + ) + + result <- wait_and_download_dataset( + "00000000-0000-0000-0000-000000000001", + quiet = TRUE, + poll_interval = 0, + auth_token = "token" + ) + expect_equal(result, c("scpca_data/dataset/file.rds")) +}) + +test_that("wait_and_download_dataset polls until succeeded", { + call_count <- 0L + local_mocked_bindings( + get_dataset_status = \(dataset, auth_token) { + call_count <<- call_count + 1L + if (call_count < 3L) "processing" else "succeeded" + }, + download_dataset = \(dataset, ...) c("scpca_data/dataset/file.rds") + ) + + result <- wait_and_download_dataset( + "00000000-0000-0000-0000-000000000001", + quiet = TRUE, + poll_interval = 0, + auth_token = "token" + ) + expect_equal(result, c("scpca_data/dataset/file.rds")) + expect_gte(call_count, 3L) +}) + + +test_that("wait_and_download_dataset errors when dataset fails", { + local_mocked_bindings( + get_dataset_status = \(dataset, auth_token) "failed" + ) + + expect_error( + wait_and_download_dataset( + "00000000-0000-0000-0000-000000000001", + quiet = TRUE, + poll_interval = 0, + auth_token = "token" + ), + "processing failed" + ) +}) + +test_that("wait_and_download_dataset restarts processing when dataset is expired", { + started <- FALSE + call_count <- 0L + local_mocked_bindings( + get_dataset_status = \(dataset, auth_token) { + call_count <<- call_count + 1L + if (call_count == 1L) "expired" else "succeeded" + }, + start_dataset_processing = \(dataset, email = NULL, auth_token) { + started <<- TRUE + invisible(list()) + }, + download_dataset = \(dataset, ...) c("scpca_data/dataset/file.rds") + ) + + result <- wait_and_download_dataset( + "00000000-0000-0000-0000-000000000001", + quiet = TRUE, + poll_interval = 0, + auth_token = "token" + ) + expect_true(started) + expect_equal(result, c("scpca_data/dataset/file.rds")) +}) + +test_that("wait_and_download_dataset errors with unexpected message if expired during polling", { + call_count <- 0L + local_mocked_bindings( + get_dataset_status = \(dataset, auth_token) { + call_count <<- call_count + 1L + if (call_count == 1L) "processing" else "expired" + }, + start_dataset_processing = \(dataset, email = NULL, auth_token) invisible(list()) + ) + + expect_error( + wait_and_download_dataset( + "00000000-0000-0000-0000-000000000001", + quiet = TRUE, + poll_interval = 0, + auth_token = "token" + ), + "unexpectedly expired" + ) +}) + +test_that("wait_and_download_dataset errors on timeout", { + local_mocked_bindings( + get_dataset_status = \(dataset, auth_token) "processing" + ) + + expect_error( + wait_and_download_dataset( + "00000000-0000-0000-0000-000000000001", + quiet = TRUE, + poll_interval = 0, + timeout = 0, + auth_token = "token" + ), + "Timed out" + ) +}) + +test_that("wait_and_download_dataset errors when auth_token is empty", { + expect_error( + wait_and_download_dataset("00000000-0000-0000-0000-000000000001", auth_token = ""), + "Authorization token must be provided" + ) +})