-
Notifications
You must be signed in to change notification settings - Fork 26
Add PackageCache support for base packages
#1166
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
DavisVaughan
wants to merge
1
commit into
main
Choose a base branch
from
oak/base-package-cache
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,138 @@ | ||
| use std::io::Cursor; | ||
| use std::io::Read; | ||
|
|
||
| use flate2::read::GzDecoder; | ||
| use oak_fs::file_lock::FileLock; | ||
|
|
||
| use crate::download::Outcome; | ||
|
|
||
| /// Names of the R base packages, i.e. everything that ships with R and carries | ||
| /// `Priority: base` in its DESCRIPTION. | ||
| pub(crate) const BASE_PACKAGES: &[&str] = &[ | ||
| "base", | ||
| "compiler", | ||
| "datasets", | ||
| "graphics", | ||
| "grDevices", | ||
| "grid", | ||
| "methods", | ||
| "parallel", | ||
| "splines", | ||
| "stats", | ||
| "stats4", | ||
| "tcltk", | ||
| "tools", | ||
| "utils", | ||
| ]; | ||
|
|
||
| /// Download the R source tarball for R {version} from CRAN's archive. | ||
| /// | ||
| /// Base R packages (e.g. `base`, `utils`, `stats`) are not distributed at the standard | ||
| /// `src/contrib/` location on CRAN. Instead, we must retrieve them from the base R | ||
| /// sources themselves, which lives at `src/base/R-{major}/R-{version}.tar.gz`. Each | ||
| /// package is located inside that tarball at `src/library/{package}/`. | ||
| /// | ||
| /// Returns `Ok(None)` if the tarball is not on CRAN (e.g. a development R version), which | ||
| /// we treat as "source unavailable" rather than an error. | ||
| pub(crate) fn download(version: &str) -> anyhow::Result<Option<Vec<u8>>> { | ||
| let major = version | ||
| .split('.') | ||
| .next() | ||
| .ok_or_else(|| anyhow::anyhow!("Invalid R version for base source download: {version}"))?; | ||
|
|
||
| let mirrors = ["https://cran.r-project.org", "https://cran.rstudio.com"]; | ||
| let suffix = format!("src/base/R-{major}/R-{version}.tar.gz"); | ||
|
|
||
| match crate::download::download_with_mirrors(&suffix, &mirrors)? { | ||
| Outcome::Success(response) => { | ||
| let mut bytes = Vec::new(); | ||
| response.into_body().into_reader().read_to_end(&mut bytes)?; | ||
| Ok(Some(bytes)) | ||
| }, | ||
| Outcome::NotFound => Ok(None), | ||
| } | ||
| } | ||
|
|
||
| /// Extract a single base package's R files from the R source tarball bytes. | ||
| /// | ||
| /// Writes `R-{version}/src/library/{package}/R/*.R` entries into an `R/` folder inside | ||
| /// the directory `destination_lock` lives in. Files are marked read only to match the | ||
| /// rest of the cache. | ||
| pub(crate) fn extract( | ||
| package: &str, | ||
| version: &str, | ||
| bytes: &[u8], | ||
| destination_lock: &FileLock, | ||
| ) -> anyhow::Result<()> { | ||
| let destination = destination_lock.parent().join("R"); | ||
| std::fs::create_dir(&destination)?; | ||
|
|
||
| let cursor = Cursor::new(bytes); | ||
| let gz = GzDecoder::new(cursor); | ||
| let mut archive = tar::Archive::new(gz); | ||
|
|
||
| let prefix = format!("R-{version}/src/library/{package}/R/"); | ||
|
|
||
| for entry in archive.entries()? { | ||
| let mut entry = entry?; | ||
| let path = entry.path()?; | ||
|
|
||
| let Some(relative) = path.strip_prefix(&prefix).ok() else { | ||
| continue; | ||
| }; | ||
|
|
||
| if relative | ||
| .extension() | ||
| .is_none_or(|ext| ext != "R" && ext != "r") | ||
| { | ||
| continue; | ||
| } | ||
|
|
||
| let absolute = destination.join(relative); | ||
|
|
||
| // Some base packages (e.g. `utils`) have platform-specific subdirs under `R/` | ||
| // like `R/windows/` and `R/unix/` (their `Makefile` handles them at install | ||
| // time). Create parents if one is required so `unpack()` can write nested files. | ||
| if let Some(parent) = relative.parent().filter(|p| !p.as_os_str().is_empty()) { | ||
| std::fs::create_dir_all(destination.join(parent))?; | ||
| } | ||
|
|
||
| entry.unpack(&absolute)?; | ||
| crate::fs::set_readonly(&absolute)?; | ||
| } | ||
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use oak_fs::file_lock::Filesystem; | ||
| use tempfile::TempDir; | ||
|
|
||
| use crate::base::download; | ||
| use crate::base::extract; | ||
|
|
||
| /// Requires internet access and downloads a large tarball of the R sources | ||
| #[ignore = "Downloads a 40mb tarball"] | ||
| #[test] | ||
| fn test_base_download_and_extract() { | ||
| let bytes = download("4.5.0").unwrap().expect("R 4.5.0 source to exist"); | ||
|
|
||
| let destination_tempdir = TempDir::new().unwrap(); | ||
| let destination = Filesystem::new(destination_tempdir.path().to_path_buf()); | ||
| let destination_lock = destination.open_rw_exclusive_create(".lock").unwrap(); | ||
|
|
||
| extract("utils", "4.5.0", &bytes, &destination_lock).unwrap(); | ||
|
|
||
| // Spot check: `utils` has a well-known `help.R` file | ||
| let help = destination_lock.parent().join("R").join("help.R"); | ||
| assert!(help.exists()); | ||
| assert!(help.metadata().unwrap().permissions().readonly()); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_base_download_unknown_version_returns_none() { | ||
| let bytes = download("0.0.0").unwrap(); | ||
| assert!(bytes.is_none()); | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| use sha2::Digest; | ||
| use sha2::Sha256; | ||
|
|
||
| /// Retain 8 ASCII characters for each hash fragment | ||
| pub(crate) fn hash(contents: &str) -> String { | ||
| let mut hash = hex::encode(Sha256::digest(contents)); | ||
| hash.truncate(8); | ||
| hash | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,98 @@ | ||
| use std::fs::read_to_string; | ||
| use std::path::Path; | ||
| use std::path::PathBuf; | ||
|
|
||
| use oak_package::package_description::Description; | ||
|
|
||
| pub(crate) struct InstalledPackage { | ||
| key: String, | ||
| name: String, | ||
| library_path: PathBuf, | ||
| description: Description, | ||
| description_hash: String, | ||
| } | ||
|
|
||
| impl InstalledPackage { | ||
| pub(crate) fn find(package: &str, library_paths: &[PathBuf]) -> anyhow::Result<Option<Self>> { | ||
| let mut library_path = None; | ||
|
|
||
| for library_path_candidate in library_paths { | ||
| if library_path_candidate.join(package).exists() { | ||
| library_path = Some(library_path_candidate); | ||
| break; | ||
| } | ||
| } | ||
|
|
||
| let Some(library_path) = library_path else { | ||
| // Not installed | ||
| return Ok(None); | ||
| }; | ||
|
|
||
| let package_path = library_path.join(package); | ||
|
|
||
| let description_path = package_path.join("DESCRIPTION"); | ||
| let description_contents = read_to_string(&description_path)?; | ||
| let description = Description::parse(&description_contents)?; | ||
|
|
||
| let library_path_hash = crate::hash::hash(library_path.to_string_lossy().as_ref()); | ||
| let description_hash = crate::hash::hash(&description_contents); | ||
|
|
||
| // Flat key unique enough to handle: | ||
| // - The same R package across multiple libpaths | ||
| // - Reinstalling a dev R package without changing the version (0.1.0.9000) | ||
| let key = format!( | ||
| "{name}_{version}_libpath-{library_path_hash}_description-{description_hash}", | ||
| name = package, | ||
| version = &description.version, | ||
| library_path_hash = &library_path_hash, | ||
| description_hash = &description_hash | ||
| ); | ||
|
|
||
| Ok(Some(Self { | ||
| key, | ||
| name: package.to_string(), | ||
| library_path: library_path.clone(), | ||
| description, | ||
| description_hash, | ||
| })) | ||
| } | ||
|
|
||
| pub(crate) fn name(&self) -> &str { | ||
| &self.name | ||
| } | ||
|
|
||
| pub(crate) fn version(&self) -> &str { | ||
| &self.description().version | ||
| } | ||
|
|
||
| pub(crate) fn description(&self) -> &Description { | ||
| &self.description | ||
| } | ||
|
|
||
| // Flat key unique enough to handle: | ||
| // - The same R package across multiple libpaths | ||
| // - Reinstalling a dev R package without changing the version (0.1.0.9000) | ||
| pub(crate) fn key(&self) -> &str { | ||
| &self.key | ||
| } | ||
|
|
||
| pub(crate) fn library_path(&self) -> &Path { | ||
| self.library_path.as_path() | ||
| } | ||
|
|
||
| pub(crate) fn package_path(&self) -> PathBuf { | ||
| self.library_path.join(&self.name) | ||
| } | ||
|
|
||
| pub(crate) fn description_path(&self) -> PathBuf { | ||
| self.package_path().join("DESCRIPTION") | ||
| } | ||
|
|
||
| pub(crate) fn namespace_path(&self) -> PathBuf { | ||
| self.package_path().join("NAMESPACE") | ||
| } | ||
|
|
||
| pub(crate) fn description_hash(&self) -> &str { | ||
| &self.description_hash | ||
| } | ||
| } | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some details on how to create this struct have been extracted out because for the base package download I have to create one of these for each of the base packages.