diff --git a/crates/pixi/tests/integration_rust/solve_group_tests.rs b/crates/pixi/tests/integration_rust/solve_group_tests.rs index 2e12df054c..4c18de0dec 100644 --- a/crates/pixi/tests/integration_rust/solve_group_tests.rs +++ b/crates/pixi/tests/integration_rust/solve_group_tests.rs @@ -5,7 +5,10 @@ use std::{ sync::Arc, }; -use pypi_mapping::{self, CustomMapping, MappingLocation, MappingSource, PurlSource}; +use pypi_mapping::{ + self, ProjectDefinedMapping, ProjectDefinedMappingLocation, PurlDerivationMode, + PurlDerivationSource, +}; use rattler_conda_types::{PackageName, Platform, RepoDataRecord}; use rattler_lock::DEFAULT_ENVIRONMENT_NAME; use reqwest_middleware::ClientBuilder; @@ -248,7 +251,7 @@ async fn test_purl_are_added_for_pypi() { .qualifiers() .get("source") .unwrap(), - PurlSource::HashMapping.as_str() + PurlDerivationSource::PrefixHashMapping.as_str() ); } }); @@ -285,7 +288,7 @@ async fn test_purl_are_missing_for_non_conda_forge() { channel: Some("dummy-channel".to_owned()), }; - let mapping_client = pypi_mapping::MappingClient::builder( + let mapping_client = pypi_mapping::PurlDerivationClient::builder( client.clone(), project .config() @@ -294,7 +297,11 @@ async fn test_purl_are_missing_for_non_conda_forge() { ) .finish(); mapping_client - .amend_purls(&MappingSource::Prefix, vec![&mut repo_data_record], None) + .amend_purls( + &PurlDerivationMode::Prefix, + vec![&mut repo_data_record], + None, + ) .await .unwrap(); @@ -328,15 +335,15 @@ async fn test_purl_are_generated_using_custom_mapping() { channel: Some("https://conda.anaconda.org/conda-forge/".to_owned()), }; - // We are using custom mapping + // We are using project-defined mapping let compressed_mapping = HashMap::from([("foo-bar-car".to_owned(), Some("my-test-name".to_owned()))]); let source = HashMap::from([( "https://conda.anaconda.org/conda-forge".to_owned(), - MappingLocation::Memory(compressed_mapping), + ProjectDefinedMappingLocation::InMemory(compressed_mapping), )]); - let mapping_client = pypi_mapping::MappingClient::builder( + let mapping_client = pypi_mapping::PurlDerivationClient::builder( client.clone(), project .config() @@ -346,7 +353,7 @@ async fn test_purl_are_generated_using_custom_mapping() { .finish(); mapping_client .amend_purls( - &MappingSource::Custom(Arc::new(CustomMapping::new(source))), + &PurlDerivationMode::ProjectDefined(Arc::new(ProjectDefinedMapping::new(source))), vec![&mut repo_data_record], None, ) @@ -385,7 +392,7 @@ async fn test_compressed_mapping_catch_not_pandoc_not_a_python_package() { let packages = vec![&mut repo_data_record]; - let mapping_client = pypi_mapping::MappingClient::builder( + let mapping_client = pypi_mapping::PurlDerivationClient::builder( client.clone(), project .config() @@ -394,7 +401,7 @@ async fn test_compressed_mapping_catch_not_pandoc_not_a_python_package() { ) .finish(); mapping_client - .amend_purls(&MappingSource::Prefix, packages, None) + .amend_purls(&PurlDerivationMode::Prefix, packages, None) .await .unwrap(); @@ -438,7 +445,7 @@ async fn test_dont_record_not_present_package_as_purl() { channel: Some("https://conda.anaconda.org/conda-forge/".to_owned()), }; - let mapping_client = pypi_mapping::MappingClient::builder( + let mapping_client = pypi_mapping::PurlDerivationClient::builder( client.clone(), project .config() @@ -448,7 +455,7 @@ async fn test_dont_record_not_present_package_as_purl() { .finish(); mapping_client .amend_purls( - project.pypi_name_mapping_source().unwrap(), + project.pypi_name_derivation_mode().unwrap(), vec![&mut repo_data_record, &mut boltons_repo_data_record], None, ) @@ -457,7 +464,7 @@ async fn test_dont_record_not_present_package_as_purl() { mapping_client .amend_purls( - project.pypi_name_mapping_source().unwrap(), + project.pypi_name_derivation_mode().unwrap(), vec![&mut repo_data_record, &mut boltons_repo_data_record], None, ) @@ -487,7 +494,7 @@ async fn test_dont_record_not_present_package_as_purl() { // so we test that we also record source=conda-forge-mapping qualifier assert_eq!( boltons_purl.qualifiers().get("source").unwrap(), - PurlSource::CompressedMapping.as_str() + PurlDerivationSource::PrefixCompressedMapping.as_str() ); } @@ -536,7 +543,7 @@ async fn test_we_record_not_present_package_as_purl_for_custom_mapping() { // `pixi-something-new-for-test` because `pixi-something-new-for-test` is // from conda-forge channel we will anyway record a purl for it // by assumption that it's a pypi package - // also we are using some custom mapping + // also we are using some project-defined mapping // so we will test for other purl qualifier comparing to // `test_dont_record_not_present_package_as_purl` test let foo_bar_package = Package::build("pixi-something-new", "2").finish(); @@ -558,7 +565,7 @@ async fn test_we_record_not_present_package_as_purl_for_custom_mapping() { let mut packages = vec![repo_data_record, boltons_repo_data_record]; - let mapping_client = pypi_mapping::MappingClient::builder( + let mapping_client = pypi_mapping::PurlDerivationClient::builder( client.clone(), project .config() @@ -568,7 +575,7 @@ async fn test_we_record_not_present_package_as_purl_for_custom_mapping() { .finish(); mapping_client .amend_purls( - project.pypi_name_mapping_source().unwrap(), + project.pypi_name_derivation_mode().unwrap(), &mut packages, None, ) @@ -591,18 +598,18 @@ async fn test_we_record_not_present_package_as_purl_for_custom_mapping() { assert_eq!(boltons_first_purl.name(), "boltons"); assert_eq!( boltons_first_purl.qualifiers().get("source").unwrap(), - PurlSource::ProjectDefinedMapping.as_str() + PurlDerivationSource::ProjectDefinedMapping.as_str() ); let package = packages.pop().unwrap(); - // With custom mapping, packages not in the mapping should NOT get purls - // This verifies that custom mapping is exclusive - only packages explicitly + // With project-defined mapping, packages not in the mapping should NOT get purls + // This verifies that project-defined mapping is exclusive - only packages explicitly // mapped should be considered as pypi packages assert!( package.package_record.purls.is_none() || package.package_record.purls.as_ref().unwrap().is_empty(), - "pixi-something-new should not have purls when not in custom mapping" + "pixi-something-new should not have purls when not in project-defined mapping" ); } @@ -637,7 +644,7 @@ async fn test_custom_mapping_channel_with_suffix() { let mut packages = vec![repo_data_record]; - let mapping_client = pypi_mapping::MappingClient::builder( + let mapping_client = pypi_mapping::PurlDerivationClient::builder( client.clone(), project .config() @@ -647,7 +654,7 @@ async fn test_custom_mapping_channel_with_suffix() { .finish(); mapping_client .amend_purls( - project.pypi_name_mapping_source().unwrap(), + project.pypi_name_derivation_mode().unwrap(), &mut packages, None, ) @@ -666,7 +673,7 @@ async fn test_custom_mapping_channel_with_suffix() { .qualifiers() .get("source") .unwrap(), - PurlSource::ProjectDefinedMapping.as_str() + PurlDerivationSource::ProjectDefinedMapping.as_str() ); } @@ -701,7 +708,7 @@ async fn test_repo_data_record_channel_with_suffix() { let mut packages = vec![repo_data_record]; - let mapping_client = pypi_mapping::MappingClient::builder( + let mapping_client = pypi_mapping::PurlDerivationClient::builder( client.clone(), project .config() @@ -711,7 +718,7 @@ async fn test_repo_data_record_channel_with_suffix() { .finish(); mapping_client .amend_purls( - project.pypi_name_mapping_source().unwrap(), + project.pypi_name_derivation_mode().unwrap(), &mut packages, None, ) @@ -729,7 +736,7 @@ async fn test_repo_data_record_channel_with_suffix() { .qualifiers() .get("source") .unwrap(), - PurlSource::ProjectDefinedMapping.as_str() + PurlDerivationSource::ProjectDefinedMapping.as_str() ); } @@ -764,7 +771,7 @@ async fn test_path_channel() { let mut packages = vec![repo_data_record]; - let mapping_client = pypi_mapping::MappingClient::builder( + let mapping_client = pypi_mapping::PurlDerivationClient::builder( client.clone(), project .config() @@ -774,7 +781,7 @@ async fn test_path_channel() { .finish(); mapping_client .amend_purls( - project.pypi_name_mapping_source().unwrap(), + project.pypi_name_derivation_mode().unwrap(), &mut packages, None, ) @@ -793,7 +800,7 @@ async fn test_path_channel() { .qualifiers() .get("source") .unwrap(), - PurlSource::ProjectDefinedMapping.as_str() + PurlDerivationSource::ProjectDefinedMapping.as_str() ); } @@ -849,7 +856,7 @@ async fn test_file_url_as_mapping_location() { let mut packages = vec![repo_data_record]; - let mapping_client = pypi_mapping::MappingClient::builder( + let mapping_client = pypi_mapping::PurlDerivationClient::builder( client.clone(), project .config() @@ -859,7 +866,7 @@ async fn test_file_url_as_mapping_location() { .finish(); mapping_client .amend_purls( - project.pypi_name_mapping_source().unwrap(), + project.pypi_name_derivation_mode().unwrap(), &mut packages, None, ) @@ -878,7 +885,7 @@ async fn test_file_url_as_mapping_location() { .qualifiers() .get("source") .unwrap(), - PurlSource::ProjectDefinedMapping.as_str() + PurlDerivationSource::ProjectDefinedMapping.as_str() ); } @@ -918,7 +925,7 @@ async fn test_disabled_mapping() { let mut packages = vec![boltons_repo_data_record]; - let mapping_client = pypi_mapping::MappingClient::builder( + let mapping_client = pypi_mapping::PurlDerivationClient::builder( blocked_client.into(), project .config() @@ -928,7 +935,7 @@ async fn test_disabled_mapping() { .finish(); mapping_client .amend_purls( - project.pypi_name_mapping_source().unwrap(), + project.pypi_name_derivation_mode().unwrap(), &mut packages, None, ) @@ -976,7 +983,7 @@ async fn test_custom_mapping_ignores_backwards_compatibility() { .into_simple_index() .expect("failed to create local simple index"); - // Create a custom mapping file that only includes specific packages + // Create a project-defined mapping file that only includes specific packages let temp_dir = TempDir::new().unwrap(); let mapping_file = temp_dir.path().join("map.json"); fs_err::write(&mapping_file, r#"{}"#).unwrap(); @@ -1032,8 +1039,8 @@ async fn test_custom_mapping_ignores_backwards_compatibility() { }) .expect("boltons should be present in conda packages"); - // The issue: boltons should NOT have purls when using custom mapping - // because it's not specified in our custom mapping + // The issue: boltons should NOT have purls when using project-defined mapping + // because it's not specified in our project-defined mapping // But due to backwards compatibility logic, it gets purls anyway let purls = match boltons_package { rattler_lock::CondaPackageData::Binary(binary) => &binary.package_record.purls, @@ -1245,12 +1252,12 @@ async fn test_missing_mapping_file_error_includes_path() { let project = pixi.workspace().unwrap(); let client = project.authenticated_client().unwrap(); - // Use a non-existent file path for the custom mapping + // Use a non-existent file path for the project-defined mapping let non_existent_path = Path::new("/this/path/does/not/exist/mapping.json"); let source = HashMap::from([( "https://conda.anaconda.org/conda-forge".to_owned(), - MappingLocation::Path(non_existent_path.to_path_buf()), + ProjectDefinedMappingLocation::Path(non_existent_path.to_path_buf()), )]); let foo_bar_package = Package::build("foo-bar-car", "2").finish(); @@ -1262,7 +1269,7 @@ async fn test_missing_mapping_file_error_includes_path() { channel: Some("https://conda.anaconda.org/conda-forge/".to_owned()), }; - let mapping_client = pypi_mapping::MappingClient::builder( + let mapping_client = pypi_mapping::PurlDerivationClient::builder( client.clone(), project .config() @@ -1272,7 +1279,7 @@ async fn test_missing_mapping_file_error_includes_path() { .finish(); let result = mapping_client .amend_purls( - &MappingSource::Custom(Arc::new(CustomMapping::new(source))), + &PurlDerivationMode::ProjectDefined(Arc::new(ProjectDefinedMapping::new(source))), vec![&mut repo_data_record], None, ) diff --git a/crates/pixi_core/src/lock_file/update.rs b/crates/pixi_core/src/lock_file/update.rs index dd26cf096d..69acf0f845 100644 --- a/crates/pixi_core/src/lock_file/update.rs +++ b/crates/pixi_core/src/lock_file/update.rs @@ -47,7 +47,7 @@ use pixi_uv_conversions::{ ConversionError, to_exclude_newer, to_extra_name, to_marker_environment, to_normalize, to_uv_extra_name, to_uv_normalize, }; -use pypi_mapping::{self, MappingClient}; +use pypi_mapping::{self, PurlDerivationClient}; use pypi_modifiers::pypi_marker_env::determine_marker_environment; use rattler::package_cache::PackageCache; use rattler_conda_types::{Arch, GenericVirtualPackage, PackageName, ParseChannelError, Platform}; @@ -1375,7 +1375,7 @@ pub struct UpdateContext<'p> { package_cache: PackageCache, /// The mapping client to use when fetching pypi mappings. - mapping_client: MappingClient, + mapping_client: PurlDerivationClient, /// A semaphore to limit the number of concurrent pypi solves. /// TODO(tim): we need this semaphore, to limit the number of concurrent /// solves. This is a problem when using source dependencies @@ -1596,7 +1596,7 @@ pub struct UpdateContextBuilder<'p> { package_cache: Option, /// The mapping client to use for fetching pypi mappings. - mapping_client: Option, + mapping_client: Option, /// The io concurrency semaphore to use when updating environments io_concurrency_limit: Option, @@ -1932,7 +1932,7 @@ impl<'p> UpdateContextBuilder<'p> { let cache_path = project .config() .cache_dir_for(pixi_config::CacheKind::PypiMapping)?; - MappingClient::builder(client, cache_path) + PurlDerivationClient::builder(client, cache_path) .with_concurrency_limit(project.concurrent_downloads_semaphore()) .finish() } @@ -2686,7 +2686,7 @@ pub enum TaskResult { async fn spawn_solve_conda_environment_task( group: GroupedEnvironment<'_>, existing_repodata_records: Arc, - mapping_client: MappingClient, + mapping_client: PurlDerivationClient, platform: PixiPlatformName, channel_priority: ChannelPriority, command_dispatcher: CommandDispatcher, @@ -2745,10 +2745,10 @@ async fn spawn_solve_conda_environment_task( // Whether there are pypi dependencies, and we should fetch purls. let has_pypi_dependencies = group.has_pypi_dependencies(); - // Whether we should use custom mapping location + // Whether we should use project-defined mapping locations let pypi_name_mapping_location = group .workspace() - .pypi_name_mapping_source() + .pypi_name_derivation_mode() .map_err(|err| { CommandDispatcherError::Failed(SolveCondaEnvironmentError::PypiMappingFailed( err.into(), diff --git a/crates/pixi_core/src/workspace/mod.rs b/crates/pixi_core/src/workspace/mod.rs index 29363fcda2..ab08564323 100644 --- a/crates/pixi_core/src/workspace/mod.rs +++ b/crates/pixi_core/src/workspace/mod.rs @@ -52,7 +52,9 @@ use pixi_utils::{ reqwest::LazyReqwestClient, variants::{VariantConfig, VariantValue}, }; -use pypi_mapping::{ChannelName, CustomMapping, MappingLocation, MappingSource}; +use pypi_mapping::{ + ChannelName, ProjectDefinedMapping, ProjectDefinedMappingLocation, PurlDerivationMode, +}; use rattler_conda_types::{ Channel, ChannelConfig, GenericVirtualPackage, MatchSpec, PackageName, Platform, Version, }; @@ -173,7 +175,7 @@ pub struct Workspace { env_vars: HashMap, /// The cache that contains mapping - mapping_source: OnceCell, + derivation_mode: OnceCell, /// The global configuration as loaded from the config file(s) config: Config, @@ -386,7 +388,7 @@ impl Workspace { workspace: manifest.workspace, package: manifest.package, env_vars, - mapping_source: Default::default(), + derivation_mode: Default::default(), config, s3_config, repodata_gateway: Default::default(), @@ -906,14 +908,14 @@ impl Workspace { self.pixi_dir().join(consts::ACTIVATION_ENV_CACHE_DIR) } - /// Returns what pypi mapping configuration we should use. - /// It can be a custom one in following format : conda_name: pypi_name - /// Or we can use our self-hosted - pub fn pypi_name_mapping_source(&self) -> miette::Result<&MappingSource> { - fn build_pypi_name_mapping_source( + /// Returns which PyPI purl derivation mode we should use. + /// It can use project-defined mappings in the format `conda_name: pypi_name`, + /// or the self-hosted prefix.dev mappings. + pub fn pypi_name_derivation_mode(&self) -> miette::Result<&PurlDerivationMode> { + fn build_pypi_name_derivation_mode( manifest: &WorkspaceManifest, channel_config: &ChannelConfig, - ) -> miette::Result { + ) -> miette::Result { match manifest.workspace.conda_pypi_map.clone() { Some(map) => { let channel_to_location_map = map @@ -926,7 +928,7 @@ impl Workspace { // User can disable the mapping by providing an empty map if channel_to_location_map.is_empty() { - return Ok(MappingSource::Disabled); + return Ok(PurlDerivationMode::Disabled); } let project_channels: HashSet<_> = manifest @@ -979,7 +981,7 @@ impl Workspace { || mapping_location.starts_with("file://") { match Url::parse(mapping_location) { - Ok(url) => MappingLocation::Url(url), + Ok(url) => ProjectDefinedMappingLocation::Url(url), Err(err) => { return Err(err).into_diagnostic().context(format!( "Could not convert {mapping_location} to URL" @@ -993,7 +995,7 @@ impl Workspace { } else { path }; - MappingLocation::Path(abs_path) + ProjectDefinedMappingLocation::Path(abs_path) }; Ok(( @@ -1001,15 +1003,17 @@ impl Workspace { url_or_path, )) }) - .collect::>>()?; + .collect::>>()?; - Ok(MappingSource::Custom(CustomMapping::new(mapping).into())) + Ok(PurlDerivationMode::ProjectDefined( + ProjectDefinedMapping::new(mapping).into(), + )) } - None => Ok(MappingSource::Prefix), + None => Ok(PurlDerivationMode::Prefix), } } - self.mapping_source.get_or_try_init(|| { - build_pypi_name_mapping_source(&self.workspace.value, &self.channel_config()) + self.derivation_mode.get_or_try_init(|| { + build_pypi_name_derivation_mode(&self.workspace.value, &self.channel_config()) }) } @@ -1588,13 +1592,13 @@ mod tests { "#; let workspace = Workspace::from_str(Path::new("pixi.toml"), file_contents).unwrap(); - let mapping = workspace.pypi_name_mapping_source().unwrap(); + let mapping = workspace.pypi_name_derivation_mode().unwrap(); let channel = Channel::from_str("conda-forge", &workspace.channel_config()).unwrap(); let canonical_name = channel.canonical_name(); let canonical_channel_name = canonical_name.trim_end_matches('/'); - assert_eq!(mapping.custom().unwrap().mapping.get(canonical_channel_name).unwrap(), &MappingLocation::Url(Url::parse("https://github.com/prefix-dev/parselmouth/blob/main/files/compressed_mapping.json").unwrap())); + assert_eq!(mapping.project_defined().unwrap().mapping.get(canonical_channel_name).unwrap(), &ProjectDefinedMappingLocation::Url(Url::parse("https://github.com/prefix-dev/parselmouth/blob/main/files/compressed_mapping.json").unwrap())); // Check url channel as map key let file_contents = r#" @@ -1606,10 +1610,10 @@ mod tests { "#; let workspace = Workspace::from_str(Path::new("pixi.toml"), file_contents).unwrap(); - let mapping = workspace.pypi_name_mapping_source().unwrap(); + let mapping = workspace.pypi_name_derivation_mode().unwrap(); assert_eq!( mapping - .custom() + .project_defined() .unwrap() .mapping .get( @@ -1622,7 +1626,7 @@ mod tests { .trim_end_matches('/') ) .unwrap(), - &MappingLocation::Path( + &ProjectDefinedMappingLocation::Path( workspace .channel_config() .root_dir @@ -1645,7 +1649,7 @@ mod tests { "#; let workspace = Workspace::from_str(Path::new("pixi.toml"), file_contents).unwrap(); - assert!(workspace.pypi_name_mapping_source().is_ok()); + assert!(workspace.pypi_name_derivation_mode().is_ok()); let non_existing_channel = r#" [workspace] @@ -1660,7 +1664,7 @@ mod tests { // so we need to disable colors for snapshot console::set_colors_enabled(false); - insta::assert_snapshot!(workspace.pypi_name_mapping_source().unwrap_err()); + insta::assert_snapshot!(workspace.pypi_name_derivation_mode().unwrap_err()); } #[test] diff --git a/crates/pypi_mapping/src/channel.rs b/crates/pypi_mapping/src/channel.rs new file mode 100644 index 0000000000..2cf94a34f2 --- /dev/null +++ b/crates/pypi_mapping/src/channel.rs @@ -0,0 +1,23 @@ +use std::str::FromStr; + +use rattler_conda_types::RepoDataRecord; +use url::Url; + +/// Returns `true` if the specified record refers to a conda-forge package. +pub fn is_conda_forge_record(record: &RepoDataRecord) -> bool { + record + .channel + .as_ref() + .and_then(|channel| Url::from_str(channel).ok()) + .is_some_and(|u| is_conda_forge_url(&u)) +} + +/// Returns `true` if the specified url refers to a conda-forge channel. +pub fn is_conda_forge_url(url: &Url) -> bool { + url.path().starts_with("/conda-forge") +} + +/// Normalize channel strings so project-defined mappings and repodata records can be compared. +pub(crate) fn normalize_channel(channel: &str) -> &str { + channel.trim_end_matches('/') +} diff --git a/crates/pypi_mapping/src/derivation.rs b/crates/pypi_mapping/src/derivation.rs new file mode 100644 index 0000000000..0bb9a2d11f --- /dev/null +++ b/crates/pypi_mapping/src/derivation.rs @@ -0,0 +1,25 @@ +use rattler_conda_types::PackageUrl; + +/// The result of asking a mapping source to derive purls for a record. +pub(crate) enum DerivationOutcome { + /// This source does not know about the record; another source may be tried. + NotApplicable, + /// This source knows the record maps to no PyPI package. + NoPurls, + /// This source derived one or more purls for the record. + Purls(Vec), +} + +impl DerivationOutcome { + pub(crate) fn is_not_applicable(&self) -> bool { + matches!(self, Self::NotApplicable) + } + + pub(crate) fn into_purls(self) -> Option> { + match self { + Self::NotApplicable => None, + Self::NoPurls => Some(Vec::new()), + Self::Purls(purls) => Some(purls), + } + } +} diff --git a/crates/pypi_mapping/src/derivation_mode.rs b/crates/pypi_mapping/src/derivation_mode.rs new file mode 100644 index 0000000000..f5f49708a1 --- /dev/null +++ b/crates/pypi_mapping/src/derivation_mode.rs @@ -0,0 +1,45 @@ +use std::{collections::HashMap, path::PathBuf, sync::Arc}; + +use url::Url; + +use crate::{CompressedMapping, ProjectDefinedMapping}; + +pub type ChannelName = String; +pub type MappingMap = HashMap; +pub type MappingByChannel = HashMap; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ProjectDefinedMappingLocation { + Path(PathBuf), + Url(Url), + InMemory(CompressedMapping), +} + +/// User-selected mapping mode. +/// +/// This controls which resolver family [`crate::PurlDerivationClient`] uses. It is not +/// the same thing as [`crate::PurlDerivationSource`], which identifies the +/// concrete resolver that produced an individual purl. +#[derive(Debug, Clone)] +pub enum PurlDerivationMode { + /// Use only project-defined per-channel mappings. + ProjectDefined(Arc), + /// Use prefix.dev mappings: hash mapping first, then compressed mapping. + Prefix, + /// Disable project-defined and prefix.dev mappings. + /// + /// Note: the current resolver still allows the conda-forge verbatim fallback + /// in this mode. + Disabled, +} + +impl PurlDerivationMode { + /// Return the project-defined mapping + /// for `PurlDerivationMode::ProjectDefined` + pub fn project_defined(&self) -> Option> { + match self { + PurlDerivationMode::ProjectDefined(mapping) => Some(mapping.clone()), + _ => None, + } + } +} diff --git a/crates/pypi_mapping/src/lib.rs b/crates/pypi_mapping/src/lib.rs index 95f4fad373..cf06a3b502 100644 --- a/crates/pypi_mapping/src/lib.rs +++ b/crates/pypi_mapping/src/lib.rs @@ -1,8 +1,22 @@ +//! Derive PyPI package URLs for conda packages. +//! +//! There are two related concepts in this crate: +//! +//! - [`PurlDerivationMode`] is the user-selected mapping mode: project-defined, prefix.dev, or disabled. +//! - [`PurlDerivationSource`] is the concrete resolver/provenance for an individual purl. +//! +//! The concrete derivation sources are: +//! +//! 1. [`PurlDerivationSource::ProjectDefinedMapping`] — user/project-defined per-channel mapping. +//! 2. [`PurlDerivationSource::PrefixHashMapping`] — prefix.dev hash mapping by package SHA256. +//! 3. [`PurlDerivationSource::PrefixCompressedMapping`] — prefix.dev compressed name mapping. +//! 4. [`PurlDerivationSource::CondaForgeVerbatimFallback`] — conda-forge fallback that assumes +//! the conda package name is the PyPI package name. + use std::{ collections::{BTreeSet, HashMap}, path::PathBuf, - str::FromStr, - sync::{Arc, Mutex}, + sync::Arc, time::{Duration, Instant}, }; @@ -17,112 +31,64 @@ use reqwest_retry::{RetryTransientMiddleware, policies::ExponentialBackoff}; use thiserror::Error; use tokio::sync::Semaphore; use tracing::Instrument; -use url::Url; - -mod custom_mapping; -pub mod prefix; +mod channel; +mod derivation; +mod derivation_mode; +mod metrics; +mod purl; mod reporter; +pub mod resolvers; -pub use custom_mapping::CustomMapping; +pub use channel::{is_conda_forge_record, is_conda_forge_url}; +pub use derivation_mode::{ + ChannelName, MappingByChannel, MappingMap, ProjectDefinedMappingLocation, PurlDerivationMode, +}; +pub use metrics::CacheMetrics; +pub use purl::PurlDerivationSource; pub use reporter::Reporter; +pub use resolvers::ProjectDefinedMapping; -use crate::custom_mapping::CustomMappingClient; +use crate::{ + derivation::DerivationOutcome, + resolvers::{CondaForgeVerbatim, ProjectDefinedResolver}, +}; /// A compressed mapping is a mapping of a package name to a potential pypi /// name. pub type CompressedMapping = HashMap>; -pub type ChannelName = String; - -pub type MappingMap = HashMap; -pub type MappingByChannel = HashMap; - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum MappingLocation { - Path(PathBuf), - Url(Url), - Memory(CompressedMapping), -} - -/// This enum represents the source of mapping -/// it can be user-defined ( custom ) -/// or from prefix.dev ( prefix ) -#[derive(Debug, Clone)] -pub enum MappingSource { - Custom(Arc), - Prefix, - Disabled, -} - -impl MappingSource { - /// Return the custom `MappingMap` - /// for `MappingSource::Custom` - pub fn custom(&self) -> Option> { - match self { - MappingSource::Custom(mapping) => Some(mapping.clone()), - _ => None, - } - } -} - -/// This enum represents the source of mapping -/// it can be user-defined ( custom ) -/// or from prefix.dev ( prefix ) -#[derive(Debug, Clone)] -pub enum PurlSource { - HashMapping, - CompressedMapping, - ProjectDefinedMapping, -} - -impl PurlSource { - pub fn as_str(&self) -> &str { - match self { - PurlSource::HashMapping => "hash-mapping", - PurlSource::CompressedMapping => "compressed-mapping", - PurlSource::ProjectDefinedMapping => "project-defined-mapping", - } - } -} - -/// Returns `true` if the specified record refers to a conda-forge package. -pub fn is_conda_forge_record(record: &RepoDataRecord) -> bool { - record - .channel - .as_ref() - .and_then(|channel| Url::from_str(channel).ok()) - .is_some_and(|u| is_conda_forge_url(&u)) -} - -/// Returns `true` if the specified url refers to a conda-forge channel. -pub fn is_conda_forge_url(url: &Url) -> bool { - url.path().starts_with("/conda-forge") -} - /// The mapping client implements the logic to derive purls for conda packages. -/// Internally it uses a combination of sources and also allows overwriting the -/// sources for particular channels. +/// +/// The resolver order depends on [`PurlDerivationMode`]: +/// +/// - [`PurlDerivationMode::ProjectDefined`]: project-defined per-channel mapping only. +/// - [`PurlDerivationMode::Prefix`]: prefix hash mapping, then prefix compressed mapping, +/// then the conda-forge verbatim fallback. +/// - [`PurlDerivationMode::Disabled`]: no project-defined or prefix mapping. The current behavior +/// still allows the conda-forge verbatim fallback. +/// +/// Concrete purl provenance is represented by [`PurlDerivationSource`]. /// /// For more information see: -/// - [`prefix::CompressedMappingClient`] -/// - [`prefix::HashMappingClient`] -/// - [`CondaForgeVerbatim`] +/// - [`resolvers::PrefixHashResolver`] +/// - [`resolvers::PrefixCompressedResolver`] +/// - [`PurlDerivationSource::CondaForgeVerbatimFallback`] #[derive(Clone)] -pub struct MappingClient { +pub struct PurlDerivationClient { client: LazyClient, - compressed_mapping: prefix::CompressedMappingClient, - hash_mapping: prefix::HashMappingClient, + compressed_mapping: resolvers::PrefixCompressedResolver, + hash_mapping: resolvers::PrefixHashResolver, cache_path: PathBuf, } -pub struct MappingClientBuilder { +pub struct PurlDerivationClientBuilder { client: LazyClient, - compressed_mapping: prefix::CompressedMappingClientBuilder, - hash_mapping: prefix::HashMappingClientBuilder, + compressed_mapping: resolvers::PrefixCompressedResolverBuilder, + hash_mapping: resolvers::PrefixHashResolverBuilder, cache_path: PathBuf, } -impl MappingClientBuilder { +impl PurlDerivationClientBuilder { /// Sets the concurrency limit for the client. This is useful to limit the /// maximum number of concurrent requests. pub fn with_concurrency_limit(self, limit: Arc) -> Self { @@ -144,8 +110,8 @@ impl MappingClientBuilder { } /// Finish the construction of the client and return it. - pub fn finish(self) -> MappingClient { - MappingClient { + pub fn finish(self) -> PurlDerivationClient { + PurlDerivationClient { client: self.client, compressed_mapping: self.compressed_mapping.finish(), hash_mapping: self.hash_mapping.finish(), @@ -172,15 +138,15 @@ impl From for MappingError { } } -impl MappingClient { - /// Construct a new `MappingClientBuilder` with the provided `Client` and +impl PurlDerivationClient { + /// Construct a new `PurlDerivationClientBuilder` with the provided `Client` and /// the resolved on-disk `cache_path` for the conda-pypi mapping cache. /// /// The caller is responsible for resolving `cache_path` (e.g. through /// `pixi_config::Config::cache_dir_for`) so that workspace-level /// `[cache.pypi-mapping]` overrides are respected; this crate stays /// agnostic about which config layer wins. - pub fn builder(client: LazyClient, cache_path: PathBuf) -> MappingClientBuilder { + pub fn builder(client: LazyClient, cache_path: PathBuf) -> PurlDerivationClientBuilder { // Construct a client with a retry policy and local caching let retry_policy = ExponentialBackoff::builder().build_with_max_retries(3); let retry_strategy = RetryTransientMiddleware::new_with_policy(retry_policy); @@ -201,10 +167,12 @@ impl MappingClient { .build() }); - MappingClientBuilder { + PurlDerivationClientBuilder { client: wrapped_client.clone(), - compressed_mapping: prefix::CompressedMappingClient::builder(wrapped_client.clone()), - hash_mapping: prefix::HashMappingClient::builder(wrapped_client), + compressed_mapping: resolvers::PrefixCompressedResolver::builder( + wrapped_client.clone(), + ), + hash_mapping: resolvers::PrefixHashResolver::builder(wrapped_client), cache_path, } } @@ -212,7 +180,7 @@ impl MappingClient { /// Given a set of `RepoDataRecord`s, amend the purls for each record. pub async fn amend_purls( &self, - mapping_source: &MappingSource, + derivation_mode: &PurlDerivationMode, conda_packages: impl IntoIterator, reporter: Option>, ) -> miette::Result<()> { @@ -234,20 +202,23 @@ impl MappingClient { let metrics = CacheMetrics::default(); - // Fetch custom mapped channels if any. - let custom_mappings = if let MappingSource::Custom(mapping_url) = mapping_source { - Some(CustomMappingClient::from( - mapping_url.fetch_custom_mapping(&self.client).await?, - )) - } else { - None - }; + // Fetch project-defined mapped channels if any. + let project_defined_mappings = + if let PurlDerivationMode::ProjectDefined(mapping_url) = derivation_mode { + Some(ProjectDefinedResolver::from( + mapping_url + .fetch_project_defined_mapping(&self.client) + .await?, + )) + } else { + None + }; let mut amend_futures = FuturesUnordered::new(); let total_records = records.len(); for record in records.into_iter() { let reporter = reporter.clone(); - let custom_mappings = &custom_mappings; + let project_defined_mappings = &project_defined_mappings; let cache_metrics = &metrics; let file_name = record.identifier.to_file_name(); let derive_purls_future = async move { @@ -255,16 +226,14 @@ impl MappingClient { reporter.download_started(record, total_records); } - let derived_purls = if matches!(mapping_source, MappingSource::Disabled) { - Ok(None) - } else if let Some(custom_mappings) = custom_mappings - .as_ref() - .filter(|mapping| mapping.is_mapping_for_record(record)) - { - custom_mappings.derive_purls(record, cache_metrics).await - } else { - self.derive_purls_from_clients(record, cache_metrics).await - }; + let derived_purls = self + .derive_purls_for_record( + derivation_mode, + project_defined_mappings.as_ref(), + record, + cache_metrics, + ) + .await; match derived_purls { Ok(derived_purls) => { @@ -290,19 +259,9 @@ impl MappingClient { let mut amended_records = 0; let mut total_records = 0; while let Some(next) = amend_futures.next().await { - let (record, mut derived_purls) = next.into_diagnostic()?; - - // As a last resort use the verbatim conda-forge purls. - // But only if we're not using a custom mapping, since custom mapping - // should be exclusive - only packages explicitly in the mapping get purls. - if derived_purls.is_none() && !matches!(mapping_source, MappingSource::Custom(_)) { - derived_purls = CondaForgeVerbatim - .derive_purls(record, &metrics) - .await - .into_diagnostic()?; - } + let (record, derived_purls) = next.into_diagnostic()?; - if let Some(derived_purls) = derived_purls { + if let Some(derived_purls) = derived_purls.into_purls() { amend_purls(record, derived_purls); amended_records += 1; } @@ -313,10 +272,7 @@ impl MappingClient { drop(amend_futures); let duration = start.elapsed(); - let data = metrics - .data - .into_inner() - .expect("locking shouldnt fail in this case"); + let data = metrics.into_data(); tracing::info!( "Amended {} out of {} records with purls in {:?}. {} cache hits and {} cache misses ({}%).", amended_records, @@ -337,25 +293,58 @@ impl MappingClient { Ok(()) } - async fn derive_purls_from_clients( + async fn derive_purls_for_record( &self, + derivation_mode: &PurlDerivationMode, + project_defined_mappings: Option<&ProjectDefinedResolver>, record: &RepoDataRecord, cache_metrics: &CacheMetrics, - ) -> Result>, MappingError> { + ) -> Result { + let purls = if matches!(derivation_mode, PurlDerivationMode::Disabled) { + DerivationOutcome::NotApplicable + } else if let Some(project_defined_mappings) = + project_defined_mappings.filter(|mapping| mapping.is_mapping_for_record(record)) + { + project_defined_mappings + .derive_project_defined_purls(record, cache_metrics) + .await? + } else { + self.derive_purls_from_prefix(record, cache_metrics).await? + }; + + // As a last resort use the verbatim conda-forge purls. + // But only if we're not using a project-defined mapping, since project-defined mapping + // should be exclusive - only packages explicitly in the mapping get purls. + if purls.is_not_applicable() + && !matches!(derivation_mode, PurlDerivationMode::ProjectDefined(_)) + { + return CondaForgeVerbatim + .derive_conda_forge_verbatim_purls(record, cache_metrics) + .await; + } + + Ok(purls) + } + + async fn derive_purls_from_prefix( + &self, + record: &RepoDataRecord, + cache_metrics: &CacheMetrics, + ) -> Result { // Try to get the purls from the hash mapping. - let mut purls = self + let purls = self .hash_mapping - .derive_purls(record, cache_metrics) + .derive_prefix_hash_purls(record, cache_metrics) .await .map_err(|e| self.with_cache_path_context(e))?; // Otherwise try from the compressed mapping - if purls.is_none() { - purls = self + if purls.is_not_applicable() { + return self .compressed_mapping - .derive_purls(record, cache_metrics) + .derive_prefix_compressed_purls(record, cache_metrics) .await - .map_err(|e| self.with_cache_path_context(e))?; + .map_err(|e| self.with_cache_path_context(e)); } Ok(purls) @@ -392,75 +381,3 @@ fn amend_purls(record: &mut RepoDataRecord, purls: impl IntoIterator Result>, MappingError>; -} - -/// A struct that provides derived package urls for conda-forge records where -/// the name of the package is just assumed to be the pypi name. -/// -/// This is a fallback for when the mapping is not available. -pub struct CondaForgeVerbatim; - -impl DerivePurls for CondaForgeVerbatim { - async fn derive_purls( - &self, - record: &RepoDataRecord, - _cache_metrics: &CacheMetrics, - ) -> Result>, MappingError> { - if !is_conda_forge_record(record) { - return Ok(None); - } - - // Try to convert the name and version into pep440/pep508 compliant versions. - let (Some(name), Some(_version)) = ( - pep508_rs::PackageName::from_str(record.package_record.name.as_source()).ok(), - pep440_rs::Version::from_str(&record.package_record.version.as_str()).ok(), - ) else { - // If we cannot convert the name or version, we cannot build a purl. - return Ok(Some(vec![])); - }; - - // Build the purl - let purl = PackageUrl::builder(String::from("pypi"), name.to_string()); - let built_purl = purl.build().expect("valid pypi package url"); - Ok(Some(vec![built_purl])) - } -} - -#[derive(Default)] -pub struct CacheMetrics { - data: Mutex, -} - -impl CacheMetrics { - pub fn record_request_response(&self, response: &reqwest::Response) { - let cache_header = response.headers().get("x-cache"); - if cache_header.and_then(|h| h.to_str().ok()) == Some("HIT") { - let mut data = self.data.lock().unwrap(); - data.cache_hits += 1; - } else { - let mut data = self.data.lock().unwrap(); - data.cache_misses += 1; - tracing::debug!("Cache miss on '{}' ({})", response.url(), response.status()); - } - } -} - -#[derive(Default)] -struct CacheMetricsData { - cache_hits: usize, - cache_misses: usize, -} diff --git a/crates/pypi_mapping/src/metrics.rs b/crates/pypi_mapping/src/metrics.rs new file mode 100644 index 0000000000..9376d2c7cf --- /dev/null +++ b/crates/pypi_mapping/src/metrics.rs @@ -0,0 +1,32 @@ +use std::sync::Mutex; + +#[derive(Default)] +pub struct CacheMetrics { + data: Mutex, +} + +impl CacheMetrics { + pub fn record_request_response(&self, response: &reqwest::Response) { + let cache_header = response.headers().get("x-cache"); + if cache_header.and_then(|h| h.to_str().ok()) == Some("HIT") { + let mut data = self.data.lock().unwrap(); + data.cache_hits += 1; + } else { + let mut data = self.data.lock().unwrap(); + data.cache_misses += 1; + tracing::debug!("Cache miss on '{}' ({})", response.url(), response.status()); + } + } + + pub(crate) fn into_data(self) -> CacheMetricsData { + self.data + .into_inner() + .expect("locking shouldnt fail in this case") + } +} + +#[derive(Default)] +pub(crate) struct CacheMetricsData { + pub cache_hits: usize, + pub cache_misses: usize, +} diff --git a/crates/pypi_mapping/src/prefix/mod.rs b/crates/pypi_mapping/src/prefix/mod.rs deleted file mode 100644 index e0e6e62e50..0000000000 --- a/crates/pypi_mapping/src/prefix/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -mod compressed_mapping_client; -mod hash_mapping_client; - -pub use compressed_mapping_client::{CompressedMappingClient, CompressedMappingClientBuilder}; -pub use hash_mapping_client::{ - HashMappingClient, HashMappingClientBuilder, HashMappingClientError, -}; diff --git a/crates/pypi_mapping/src/purl.rs b/crates/pypi_mapping/src/purl.rs new file mode 100644 index 0000000000..837783061d --- /dev/null +++ b/crates/pypi_mapping/src/purl.rs @@ -0,0 +1,56 @@ +use rattler_conda_types::PackageUrl; + +/// Identifies the concrete mechanism that derived a PyPI purl. +/// +/// This is intentionally different from [`crate::PurlDerivationMode`]: +/// `PurlDerivationMode` describes the user-selected mapping mode, while this enum +/// describes the specific resolver that produced a purl. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PurlDerivationSource { + /// prefix.dev hash mapping, looked up by package SHA256. + PrefixHashMapping, + /// prefix.dev compressed name mapping, looked up by conda package name. + PrefixCompressedMapping, + /// Project/user-defined per-channel mapping. + ProjectDefinedMapping, + /// Last-resort conda-forge fallback that assumes the conda name is the PyPI name. + /// + /// This source is not encoded as a `source` qualifier in generated purls. + CondaForgeVerbatimFallback, +} + +impl PurlDerivationSource { + pub fn as_str(&self) -> &str { + match self { + PurlDerivationSource::PrefixHashMapping => "hash-mapping", + PurlDerivationSource::PrefixCompressedMapping => "compressed-mapping", + PurlDerivationSource::ProjectDefinedMapping => "project-defined-mapping", + PurlDerivationSource::CondaForgeVerbatimFallback => "conda-forge-verbatim-fallback", + } + } + + pub(crate) fn purl_qualifier(self) -> Option<&'static str> { + match self { + PurlDerivationSource::PrefixHashMapping => Some("hash-mapping"), + PurlDerivationSource::PrefixCompressedMapping => Some("compressed-mapping"), + PurlDerivationSource::ProjectDefinedMapping => Some("project-defined-mapping"), + PurlDerivationSource::CondaForgeVerbatimFallback => None, + } + } +} + +/// Builds a PyPI package URL, optionally tagging it with the derivation source. +pub(crate) fn pypi_purl( + name: impl Into, + source: Option, +) -> PackageUrl { + let mut builder = PackageUrl::builder(String::from("pypi"), name.into()); + + if let Some(source) = source.and_then(PurlDerivationSource::purl_qualifier) { + builder = builder + .with_qualifier("source", source) + .expect("valid qualifier"); + } + + builder.build().expect("valid pypi package url") +} diff --git a/crates/pypi_mapping/src/resolvers/conda_forge_verbatim.rs b/crates/pypi_mapping/src/resolvers/conda_forge_verbatim.rs new file mode 100644 index 0000000000..6342d4822d --- /dev/null +++ b/crates/pypi_mapping/src/resolvers/conda_forge_verbatim.rs @@ -0,0 +1,41 @@ +use std::str::FromStr; + +use rattler_conda_types::RepoDataRecord; + +use crate::{ + CacheMetrics, MappingError, derivation::DerivationOutcome, is_conda_forge_record, + purl::pypi_purl, +}; + +/// A resolver for conda-forge records where the conda package name is assumed +/// to be the PyPI name. +/// +/// This is a last-resort fallback for when the prefix.dev mappings do not know +/// about a conda-forge package. +pub(crate) struct CondaForgeVerbatim; + +impl CondaForgeVerbatim { + pub(crate) async fn derive_conda_forge_verbatim_purls( + &self, + record: &RepoDataRecord, + _cache_metrics: &CacheMetrics, + ) -> Result { + if !is_conda_forge_record(record) { + return Ok(DerivationOutcome::NotApplicable); + } + + // Try to convert the name and version into pep440/pep508 compliant versions. + let (Some(name), Some(_version)) = ( + pep508_rs::PackageName::from_str(record.package_record.name.as_source()).ok(), + pep440_rs::Version::from_str(&record.package_record.version.as_str()).ok(), + ) else { + // If we cannot convert the name or version, we cannot build a purl. + return Ok(DerivationOutcome::NoPurls); + }; + + Ok(DerivationOutcome::Purls(vec![pypi_purl( + name.to_string(), + None, + )])) + } +} diff --git a/crates/pypi_mapping/src/resolvers/mod.rs b/crates/pypi_mapping/src/resolvers/mod.rs new file mode 100644 index 0000000000..5ef02bb988 --- /dev/null +++ b/crates/pypi_mapping/src/resolvers/mod.rs @@ -0,0 +1,21 @@ +//! Concrete purl derivation resolvers. +//! +//! Each module corresponds to one [`crate::PurlDerivationSource`] variant: +//! +//! - [`ProjectDefinedMapping`] derives from project/user-defined per-channel mappings. +//! - [`PrefixHashResolver`] derives from prefix.dev hash mappings keyed by package SHA256. +//! - [`PrefixCompressedResolver`] derives from prefix.dev compressed name mappings. +//! - `CondaForgeVerbatim` derives by assuming conda-forge package names are PyPI names. + +mod conda_forge_verbatim; +mod prefix_compressed_resolver; +mod prefix_hash_resolver; +mod project_defined_mapping; + +pub(crate) use conda_forge_verbatim::CondaForgeVerbatim; +pub use prefix_compressed_resolver::{PrefixCompressedResolver, PrefixCompressedResolverBuilder}; +pub use prefix_hash_resolver::{ + PrefixHashResolver, PrefixHashResolverBuilder, PrefixHashResolverError, +}; +pub use project_defined_mapping::ProjectDefinedMapping; +pub(crate) use project_defined_mapping::ProjectDefinedResolver; diff --git a/crates/pypi_mapping/src/prefix/compressed_mapping_client.rs b/crates/pypi_mapping/src/resolvers/prefix_compressed_resolver.rs similarity index 75% rename from crates/pypi_mapping/src/prefix/compressed_mapping_client.rs rename to crates/pypi_mapping/src/resolvers/prefix_compressed_resolver.rs index 16dcda7000..4f87a1dbff 100644 --- a/crates/pypi_mapping/src/prefix/compressed_mapping_client.rs +++ b/crates/pypi_mapping/src/resolvers/prefix_compressed_resolver.rs @@ -1,13 +1,14 @@ use std::sync::Arc; use async_once_cell::OnceCell; -use rattler_conda_types::{PackageUrl, RepoDataRecord}; +use rattler_conda_types::RepoDataRecord; use rattler_networking::LazyClient; use tokio::sync::Semaphore; use url::Url; use crate::{ - CacheMetrics, CompressedMapping, DerivePurls, MappingError, PurlSource, is_conda_forge_record, + CacheMetrics, CompressedMapping, MappingError, PurlDerivationSource, + derivation::DerivationOutcome, is_conda_forge_record, purl::pypi_purl, }; const COMPRESSED_MAPPING: &str = @@ -22,22 +23,22 @@ const COMPRESSED_MAPPING: &str = /// The downside of this client is that it only contains information for /// conda-forge packages. #[derive(Clone)] -pub struct CompressedMappingClient { - inner: Arc, +pub struct PrefixCompressedResolver { + inner: Arc, } -pub struct CompressedMappingClientBuilder { +pub struct PrefixCompressedResolverBuilder { client: LazyClient, limit: Option>, } -struct CompressedMappingClientInner { +struct PrefixCompressedResolverInner { client: LazyClient, mapping: OnceCell, limit: Option>, } -impl CompressedMappingClientBuilder { +impl PrefixCompressedResolverBuilder { /// Sets the concurrency limit for the client. This is useful to limit the /// maximum number of concurrent requests. pub fn with_concurrency_limit(self, limit: Arc) -> Self { @@ -55,9 +56,9 @@ impl CompressedMappingClientBuilder { } /// Finish the construction of the client and return it. - pub fn finish(self) -> CompressedMappingClient { - CompressedMappingClient { - inner: Arc::new(CompressedMappingClientInner { + pub fn finish(self) -> PrefixCompressedResolver { + PrefixCompressedResolver { + inner: Arc::new(PrefixCompressedResolverInner { client: self.client, limit: self.limit, mapping: OnceCell::new(), @@ -66,11 +67,11 @@ impl CompressedMappingClientBuilder { } } -impl CompressedMappingClient { - /// Constructs a new `HashMappingClient` with the provided +impl PrefixCompressedResolver { + /// Constructs a new mapping client with the provided /// `ClientWithMiddleware`. - pub fn builder(client: LazyClient) -> CompressedMappingClientBuilder { - CompressedMappingClientBuilder { + pub fn builder(client: LazyClient) -> PrefixCompressedResolverBuilder { + PrefixCompressedResolverBuilder { client, limit: None, } @@ -115,15 +116,15 @@ impl CompressedMappingClient { } } -impl DerivePurls for CompressedMappingClient { - async fn derive_purls( +impl PrefixCompressedResolver { + pub(crate) async fn derive_prefix_compressed_purls( &self, record: &RepoDataRecord, cache_metrics: &CacheMetrics, - ) -> Result>, MappingError> { + ) -> Result { // If the record does not refer to a conda-forge mapping we can skip it if !is_conda_forge_record(record) { - return Ok(None); + return Ok(DerivationOutcome::NotApplicable); } // Get the mapping from the server @@ -132,20 +133,18 @@ impl DerivePurls for CompressedMappingClient { // Determine the mapping for the record let Some(potential_pypi_name) = mapping.get(record.package_record.name.as_normalized()) else { - return Ok(None); + return Ok(DerivationOutcome::NotApplicable); }; // If the mapping is empty, there are no purls. let Some(pypi_name) = potential_pypi_name else { - return Ok(Some(vec![])); + return Ok(DerivationOutcome::NoPurls); }; // Construct the purl - let purl = PackageUrl::builder(String::from("pypi"), pypi_name) - .with_qualifier("source", PurlSource::CompressedMapping.as_str()) - .expect("valid qualifier"); - let built_purl = purl.build().expect("valid pypi package url"); - - Ok(Some(vec![built_purl])) + Ok(DerivationOutcome::Purls(vec![pypi_purl( + pypi_name, + Some(PurlDerivationSource::PrefixCompressedMapping), + )])) } } diff --git a/crates/pypi_mapping/src/prefix/hash_mapping_client.rs b/crates/pypi_mapping/src/resolvers/prefix_hash_resolver.rs similarity index 83% rename from crates/pypi_mapping/src/prefix/hash_mapping_client.rs rename to crates/pypi_mapping/src/resolvers/prefix_hash_resolver.rs index ee1a46ec98..47565a13bb 100644 --- a/crates/pypi_mapping/src/prefix/hash_mapping_client.rs +++ b/crates/pypi_mapping/src/resolvers/prefix_hash_resolver.rs @@ -4,7 +4,7 @@ use std::{ }; use dashmap::{DashMap, Entry}; -use rattler_conda_types::{PackageUrl, RepoDataRecord}; +use rattler_conda_types::RepoDataRecord; use rattler_digest::Sha256Hash; use rattler_networking::LazyClient; use reqwest::StatusCode; @@ -12,7 +12,10 @@ use serde::{Deserialize, Serialize}; use thiserror::Error; use tokio::sync::{Semaphore, broadcast}; -use crate::{CacheMetrics, DerivePurls, MappingError, PurlSource}; +use crate::{ + CacheMetrics, MappingError, PurlDerivationSource, derivation::DerivationOutcome, + purl::pypi_purl, +}; const STORAGE_URL: &str = "https://conda-mapping.prefix.dev"; const HASH_DIR: &str = "hash-v0"; @@ -28,27 +31,27 @@ pub struct PackagePypiMapping { } #[derive(Debug, Error)] -pub enum HashMappingClientError { +pub enum PrefixHashResolverError { #[error(transparent)] Io(#[from] std::io::Error), #[error(transparent)] Reqwest(#[from] reqwest_middleware::Error), } -impl From for HashMappingClientError { +impl From for PrefixHashResolverError { fn from(err: reqwest::Error) -> Self { - HashMappingClientError::Reqwest(err.into()) + PrefixHashResolverError::Reqwest(err.into()) } } -impl From for MappingError { - fn from(value: HashMappingClientError) -> Self { +impl From for MappingError { + fn from(value: PrefixHashResolverError) -> Self { match value { - HashMappingClientError::Io(err) => MappingError::IoError { + PrefixHashResolverError::Io(err) => MappingError::IoError { source: err, path: std::path::PathBuf::new(), }, - HashMappingClientError::Reqwest(err) => MappingError::Reqwest(err), + PrefixHashResolverError::Reqwest(err) => MappingError::Reqwest(err), } } } @@ -63,11 +66,11 @@ impl From for MappingError { /// This client can be shared between multiple tasks. Individual requests are /// coalesced. The client can cheaply be cloned. #[derive(Clone)] -pub struct HashMappingClient { - inner: Arc, +pub struct PrefixHashResolver { + inner: Arc, } -struct HashMappingClientInner { +struct PrefixHashResolverInner { client: LazyClient, entries: DashMap>>, limit: Option>, @@ -80,13 +83,13 @@ enum PendingOrFetched { Fetched(T), } -/// A builder for a `HashMappingClient`. -pub struct HashMappingClientBuilder { +/// A builder for a `PrefixHashResolver`. +pub struct PrefixHashResolverBuilder { client: LazyClient, limit: Option>, } -impl HashMappingClientBuilder { +impl PrefixHashResolverBuilder { /// Sets the concurrency limit for the client. This is useful to limit the /// maximum number of concurrent requests. pub fn with_concurrency_limit(self, limit: Arc) -> Self { @@ -104,9 +107,9 @@ impl HashMappingClientBuilder { } /// Finish the construction of the client and return it. - pub fn finish(self) -> HashMappingClient { - HashMappingClient { - inner: Arc::new(HashMappingClientInner { + pub fn finish(self) -> PrefixHashResolver { + PrefixHashResolver { + inner: Arc::new(PrefixHashResolverInner { client: self.client, entries: DashMap::new(), limit: self.limit, @@ -115,11 +118,11 @@ impl HashMappingClientBuilder { } } -impl HashMappingClient { - /// Constructs a new `HashMappingClient` with the provided +impl PrefixHashResolver { + /// Constructs a new mapping client with the provided /// `ClientWithMiddleware`. - pub fn builder(client: LazyClient) -> HashMappingClientBuilder { - HashMappingClientBuilder { + pub fn builder(client: LazyClient) -> PrefixHashResolverBuilder { + PrefixHashResolverBuilder { client, limit: None, } @@ -131,19 +134,19 @@ impl HashMappingClient { &self, sha256: Sha256Hash, cache_metrics: &CacheMetrics, - ) -> Result, HashMappingClientError> { + ) -> Result, PrefixHashResolverError> { self.inner.get_mapping(sha256, cache_metrics).await } } -impl HashMappingClientInner { +impl PrefixHashResolverInner { /// Fetches the pypi name mapping and caches it to ensure that any /// subsequent request does not hit the network. pub async fn get_mapping( &self, sha256: Sha256Hash, cache_metrics: &CacheMetrics, - ) -> Result, HashMappingClientError> { + ) -> Result, PrefixHashResolverError> { let sender = match self.entries.entry(sha256) { Entry::Vacant(entry) => { // Construct a sender so other tasks can subscribe @@ -242,7 +245,7 @@ async fn try_fetch_mapping( client: &LazyClient, sha256: &Sha256Hash, cache_metrics: &CacheMetrics, -) -> Result, HashMappingClientError> { +) -> Result, PrefixHashResolverError> { let hash_str = format!("{sha256:x}"); let url = format!("{STORAGE_URL}/{HASH_DIR}/{hash_str}"); @@ -262,36 +265,33 @@ async fn try_fetch_mapping( Ok(Some(package)) } -impl DerivePurls for HashMappingClient { - async fn derive_purls( +impl PrefixHashResolver { + pub(crate) async fn derive_prefix_hash_purls( &self, record: &RepoDataRecord, cache_metrics: &CacheMetrics, - ) -> Result>, MappingError> { + ) -> Result { // Get the hash from the record, if there is no sha we cannot derive purls let Some(sha256) = record.package_record.sha256 else { - return Ok(None); + return Ok(DerivationOutcome::NotApplicable); }; // Fetch the mapping from the server, or return None if it doesn't exist let Some(mapped_package) = self.get_mapping(sha256, cache_metrics).await? else { - return Ok(None); + return Ok(DerivationOutcome::NotApplicable); }; // Get the pypi names from the mapping let Some(mapped_name) = mapped_package.pypi_normalized_names else { // If there are no pypi names, there are no purls - return Ok(Some(vec![])); + return Ok(DerivationOutcome::NoPurls); }; - Ok(Some( + Ok(DerivationOutcome::Purls( mapped_name .into_iter() .map(|pypi_name| { - let purl = PackageUrl::builder(String::from("pypi"), pypi_name) - .with_qualifier("source", PurlSource::HashMapping.as_str()) - .expect("valid qualifier"); - purl.build().expect("valid pypi package url") + pypi_purl(pypi_name, Some(PurlDerivationSource::PrefixHashMapping)) }) .collect(), )) diff --git a/crates/pypi_mapping/src/custom_mapping.rs b/crates/pypi_mapping/src/resolvers/project_defined_mapping.rs similarity index 72% rename from crates/pypi_mapping/src/custom_mapping.rs rename to crates/pypi_mapping/src/resolvers/project_defined_mapping.rs index 0926f2002c..3fb458f150 100644 --- a/crates/pypi_mapping/src/custom_mapping.rs +++ b/crates/pypi_mapping/src/resolvers/project_defined_mapping.rs @@ -1,13 +1,14 @@ use async_once_cell::OnceCell as AsyncCell; use miette::{IntoDiagnostic, WrapErr}; -use rattler_conda_types::{PackageUrl, RepoDataRecord}; +use rattler_conda_types::RepoDataRecord; use rattler_networking::LazyClient; use std::path::Path; use url::Url; use crate::{ - CacheMetrics, CompressedMapping, DerivePurls, MappingByChannel, MappingError, MappingLocation, - MappingMap, PurlSource, + CacheMetrics, CompressedMapping, MappingByChannel, MappingError, MappingMap, + ProjectDefinedMappingLocation, PurlDerivationSource, channel::normalize_channel, + derivation::DerivationOutcome, purl::pypi_purl, }; /// Struct with a mapping of channel names to their respective mapping locations @@ -15,13 +16,13 @@ use crate::{ /// /// This struct caches the mapping internally. #[derive(Debug)] -pub struct CustomMapping { +pub struct ProjectDefinedMapping { pub mapping: MappingMap, mapping_value: AsyncCell, } -impl CustomMapping { - /// Create a new `CustomMapping` with the specified mapping. +impl ProjectDefinedMapping { + /// Create a new `ProjectDefinedMapping` with the specified mapping. pub fn new(mapping: MappingMap) -> Self { Self { mapping, @@ -29,8 +30,8 @@ impl CustomMapping { } } - /// Fetch the custom mapping from the server or load from the local - pub async fn fetch_custom_mapping( + /// Fetch the project-defined mapping from the server or load from the local + pub async fn fetch_project_defined_mapping( &self, client: &LazyClient, ) -> miette::Result { @@ -42,7 +43,7 @@ impl CustomMapping { // Fetch the mapping from the server or from the local match url { - MappingLocation::Url(url) => { + ProjectDefinedMappingLocation::Url(url) => { let mapping_by_name = match url.scheme() { "file" => { let file_path = url.to_file_path().map_err(|_| { @@ -55,12 +56,12 @@ impl CustomMapping { mapping_url_to_name.insert(name.to_string(), mapping_by_name); } - MappingLocation::Path(path) => { + ProjectDefinedMappingLocation::Path(path) => { let mapping_by_name = fetch_mapping_from_path(path)?; mapping_url_to_name.insert(name.to_string(), mapping_by_name); } - MappingLocation::Memory(mapping) => { + ProjectDefinedMappingLocation::InMemory(mapping) => { mapping_url_to_name.insert(name.to_string(), mapping.clone()); } } @@ -117,16 +118,16 @@ fn fetch_mapping_from_path(path: &Path) -> miette::Result { Ok(mapping_by_name) } -/// THis is a client that uses a custom in memory mapping to derive purls. +/// THis is a client that uses a project-defined in-memory mapping to derive purls. #[derive(Default)] -pub(crate) struct CustomMappingClient { +pub(crate) struct ProjectDefinedResolver { mapping: MappingByChannel, } -impl CustomMappingClient { +impl ProjectDefinedResolver { /// Returns the mapping associated with a channel. fn get_channel_mapping(&self, channel: &str) -> Option<&CompressedMapping> { - self.mapping.get(channel.trim_end_matches('/')) + self.mapping.get(normalize_channel(channel)) } /// Returns true if this mapping applies to the given record. @@ -138,44 +139,41 @@ impl CustomMappingClient { } } -impl From for CustomMappingClient { +impl From for ProjectDefinedResolver { fn from(value: MappingByChannel) -> Self { Self { mapping: value } } } -impl DerivePurls for CustomMappingClient { - async fn derive_purls( +impl ProjectDefinedResolver { + pub(crate) async fn derive_project_defined_purls( &self, record: &RepoDataRecord, _cache_metrics: &CacheMetrics, - ) -> Result>, MappingError> { + ) -> Result { let Some(channel) = record.channel.as_ref() else { - return Ok(None); + return Ok(DerivationOutcome::NotApplicable); }; // See if the mapping contains the channel - let Some(custom_mapping) = self.get_channel_mapping(channel) else { - return Ok(None); + let Some(project_defined_mapping) = self.get_channel_mapping(channel) else { + return Ok(DerivationOutcome::NotApplicable); }; // Find the mapping for this particular record - match custom_mapping.get(record.package_record.name.as_normalized()) { + match project_defined_mapping.get(record.package_record.name.as_normalized()) { // The record is in the mapping, and it has a pypi name - Some(Some(mapped_name)) => { - let purl = PackageUrl::builder(String::from("pypi"), mapped_name.to_string()) - .with_qualifier("source", PurlSource::ProjectDefinedMapping.as_str()) - .expect("valid qualifier"); - let built_purl = purl.build().expect("valid pypi package url"); - Ok(Some(vec![built_purl])) - } + Some(Some(mapped_name)) => Ok(DerivationOutcome::Purls(vec![pypi_purl( + mapped_name.to_string(), + Some(PurlDerivationSource::ProjectDefinedMapping), + )])), Some(None) => { // The record is in the mapping, but it has no pypi name - Ok(Some(vec![])) + Ok(DerivationOutcome::NoPurls) } None => { // The record is not in the mapping - Ok(None) + Ok(DerivationOutcome::NotApplicable) } } }