Skip to content
236 changes: 236 additions & 0 deletions src/cargo/core/package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,97 @@ impl<'cfg> PackageSet<'cfg> {
}
}
}

Ok(())
}

/// Check if any dependency packages are defined in more than one registry
/// without an explicit registry defined in the dependency definition. If
/// there are, this function will warn the user that they may be at risk of
/// a dependency confusion attack.
pub(crate) fn warn_deps_defined_in_multiple_registries(
&self,
ws: &Workspace<'cfg>,
resolve: &Resolve,
root_ids: &[PackageId],
has_dev_units: HasDevUnits,
requested_kinds: &[CompileKind],
target_data: &RustcTargetData<'_>,
force_all_targets: ForceAllTargets,
) -> CargoResult<()> {
// We need to build the possible sources that could cause confusion:
// we're only actually interested in registries here, since non-registry
// sources are more explicitly defined in Cargo.toml.
let mut check = match warn_multiple::DependencyConfusionChecker::new(
self.sources().sources().map(|(sid, _source)| sid),
) {
Some(check) => check,
None => {
return Ok(());
}
};

// We need to build a list of package+source combinations to check. This
// is essentially the Cartesian product of all package dependencies
// combined with all registry sources that are not the actual source
// that dependency comes from.
for (pid, deps) in root_ids.iter().flat_map(|root_id| {
PackageSet::filter_deps(
*root_id,
resolve,
has_dev_units,
requested_kinds,
target_data,
force_all_targets,
)
}) {
check.check_package_deps(pid, deps);
}

let multiply_defined = check.find_packages_defined_in_multiple_registries(
self.config.acquire_package_cache_lock()?,
self.sources_mut(),
)?;

// Now we have a list of multiply defined packages, we can output that
// list, and suggest to the user how they can avoid the warning.
for (pid, others) in multiply_defined.into_iter() {
let mut other_sources = others
.into_iter()
.map(|sid| format!("`{}`", sid.display_registry_name()))
.collect::<Vec<_>>();

if !other_sources.is_empty() {
// There's no technical reason to sort, but it keeps the test
// output stable.
other_sources.sort();

ws.config().shell().warn(&format!(
"package `{}` from {} is also defined in {} {}",
pid,
pid.source_id(),
if other_sources.len() == 1 {
"registry"
} else {
"registries"
},
other_sources.join(", "),
))?;

ws.config().shell().note(&format!(
r#"you can specify the exact registry to use for the
`{}` dependency in Cargo.toml, eg:

{} = {{ version = "{}", registry = "{}" }}
"#,
pid,
pid.name(),
pid.version(),
pid.source_id().display_registry_name(),
))?;
}
}

Ok(())
}

Expand Down Expand Up @@ -1178,3 +1269,148 @@ mod tls {
})
}
}

mod warn_multiple {
use std::{
cell::RefMut,
collections::{HashMap, HashSet},
task::Poll,
};

use crate::{
core::{Dependency, PackageId, SourceId, SourceMap},
util::config::PackageCacheLock,
CargoResult,
};

/// Checks if packages are defined in more than one registry source.
pub(super) struct DependencyConfusionChecker {
registry_source_ids: Vec<SourceId>,
pending_checks: Vec<(PackageId, SourceId)>,
}

impl DependencyConfusionChecker {
/// Instantiates a new checker based on the given sources. Only sources
/// that are actual registries will be used.
///
/// If there are fewer than two registry sources, this function returns
/// `None`, and no further action is required to check for dependency
/// confusion.
pub(super) fn new<'a>(sources_iter: impl Iterator<Item = &'a SourceId>) -> Option<Self> {
let registry_source_ids = sources_iter
.filter_map(|sid| {
// We're only interested in registry sources, since other
// sources are always explicitly used in dependencies.
if sid.is_registry() {
Some(sid.clone())
} else {
None
}
})
.collect::<Vec<_>>();

// If there are only zero or one registries, then there's nothing to
// check, since no dependency confusion can occur if there is no
// other possible source.
if registry_source_ids.len() > 1 {
Some(Self {
registry_source_ids,
pending_checks: Vec::new(),
})
} else {
None
}
}

/// Enqueues a package dependency to be checked for possible dependency
/// confusion, if required.
pub(super) fn check_package_deps(&mut self, pid: PackageId, deps: &HashSet<Dependency>) {
// If an explicit registry was given in a dependency, we don't want
// to warn, and no further work is required.
if deps.iter().any(|dep| dep.registry_id().is_some()) {
return;
}

// Dependencies that are coming in from non-registry sources — such
// as Git repos, directories, and paths — should also be ignored, as
// the user will have specified the source in their Cargo.toml
// already.
let package_sid = pid.source_id();
if !package_sid.is_registry() {
return;
}

// Add a check for the package to the pending list for all registry
// sources that are not the actual source the package is coming
// from.
for sid in self.registry_source_ids.iter() {
if sid != &package_sid {
self.pending_checks.push((pid, *sid));
}
}
}

/// Returns packages that are available in multiple registry sources.
///
/// Note that this requires package cache to be locked, as checking if a
/// source contains a particular package may trigger a pull from that
/// source.
pub(super) fn find_packages_defined_in_multiple_registries(
mut self,
_lock: PackageCacheLock<'_>,
mut sources: RefMut<'_, SourceMap<'_>>,
) -> CargoResult<HashMap<PackageId, Vec<SourceId>>> {
// Basically, we want to iterate over the pending checks until we've
// ascertained what the result of each one is. The result may be a
// package+source combination that could cause dependency confusion,
// an error generated when checking if the source contains a
// package, or nothing if there's no confusion possible.
let mut results: Vec<CargoResult<(PackageId, SourceId)>> = Vec::new();
while !self.pending_checks.is_empty() {
self.pending_checks.retain(|(pid, sid)| {
if let Some(source) = sources.get_mut(*sid) {
match source.contains_package_name(&pid.name()) {
Poll::Ready(Ok(exists)) => {
if exists {
results.push(Ok((*pid, *sid)));
}
}
Poll::Ready(Err(e)) => {
results.push(Err(e));
}
Poll::Pending => {
// This is the only scenario where we retain the
// pending check.
return true;
}
}
} else {
// This shouldn't happen in practice unless the source
// map was modified after `Self::new` was invoked, in
// which case there are probably bigger problems anyway.
results.push(Err(anyhow::format_err!(
"cannot find source from ID {:?}",
sid
)));
}
false
});

for (_sid, source) in sources.sources_mut() {
source.block_until_ready()?;
}
}

// Now we can turn the raw results into a neat HashMap keyed by each
// potentially affected package, which can then be used to report
// back to the user.
let mut by_package = HashMap::new();
for result in results.into_iter() {
let (pid, sid) = result?;
by_package.entry(pid).or_insert_with(Vec::default).push(sid);
}

Ok(by_package)
}
}
}
17 changes: 17 additions & 0 deletions src/cargo/core/source/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ pub trait Source {
false
}

/// Returns whether a specific package is defined within the source.
fn contains_package_name(&mut self, name: &str) -> Poll<CargoResult<bool>>;

/// Add a number of crates that should be whitelisted for showing up during
/// queries, even if they are yanked. Currently only applies to registry
/// sources.
Expand Down Expand Up @@ -197,6 +200,11 @@ impl<'a, T: Source + ?Sized + 'a> Source for Box<T> {
(**self).is_replaced()
}

/// Forwards to `Source::contains`.
fn contains_package_name(&mut self, name: &str) -> Poll<CargoResult<bool>> {
(**self).contains_package_name(name)
}

fn add_to_yanked_whitelist(&mut self, pkgs: &[PackageId]) {
(**self).add_to_yanked_whitelist(pkgs);
}
Expand Down Expand Up @@ -268,6 +276,10 @@ impl<'a, T: Source + ?Sized + 'a> Source for &'a mut T {
(**self).is_replaced()
}

fn contains_package_name(&mut self, name: &str) -> Poll<CargoResult<bool>> {
(**self).contains_package_name(name)
}

fn add_to_yanked_whitelist(&mut self, pkgs: &[PackageId]) {
(**self).add_to_yanked_whitelist(pkgs);
}
Expand Down Expand Up @@ -324,6 +336,11 @@ impl<'src> SourceMap<'src> {
self.map.len()
}

/// Like `HashMap::iter`.
pub fn sources<'a>(&'a self) -> impl Iterator<Item = (&'a SourceId, &'a (dyn Source + 'src))> {
self.map.iter().map(|(a, b)| (a, &**b))
}

/// Like `HashMap::iter_mut`.
pub fn sources_mut<'a>(
&'a mut self,
Expand Down
10 changes: 10 additions & 0 deletions src/cargo/ops/resolve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,16 @@ pub fn resolve_ws_with_opts<'cfg>(
force_all_targets,
)?;

pkg_set.warn_deps_defined_in_multiple_registries(
ws,
&resolved_with_overrides,
&member_ids,
has_dev_units,
requested_targets,
target_data,
force_all_targets,
)?;

Ok(WorkspaceResolve {
pkg_set,
workspace_resolve: resolve,
Expand Down
4 changes: 4 additions & 0 deletions src/cargo/sources/directory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,10 @@ impl<'cfg> Source for DirectorySource<'cfg> {
Ok(())
}

fn contains_package_name(&mut self, name: &str) -> Poll<CargoResult<bool>> {
Poll::Ready(Ok(self.packages.keys().any(|id| id.name() == name)))
}

fn download(&mut self, id: PackageId) -> CargoResult<MaybePackage> {
self.packages
.get(&id)
Expand Down
7 changes: 7 additions & 0 deletions src/cargo/sources/git/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,13 @@ impl<'cfg> Source for GitSource<'cfg> {
format!("Git repository {}", self.source_id)
}

fn contains_package_name(&mut self, name: &str) -> Poll<CargoResult<bool>> {
match &mut self.path_source {
Some(path_source) => path_source.contains_package_name(name),
None => Poll::Pending,
}
}

fn add_to_yanked_whitelist(&mut self, _pkgs: &[PackageId]) {}

fn is_yanked(&mut self, _pkg: PackageId) -> Poll<CargoResult<bool>> {
Expand Down
8 changes: 8 additions & 0 deletions src/cargo/sources/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,14 @@ impl<'cfg> Source for PathSource<'cfg> {
}
}

fn contains_package_name(&mut self, name: &str) -> Poll<CargoResult<bool>> {
self.update()?;
Poll::Ready(Ok(self
.packages
.iter()
.any(|package| package.name() == name)))
}

fn add_to_yanked_whitelist(&mut self, _pkgs: &[PackageId]) {}

fn is_yanked(&mut self, _pkg: PackageId) -> Poll<CargoResult<bool>> {
Expand Down
12 changes: 11 additions & 1 deletion src/cargo/sources/registry/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ use std::collections::HashSet;
use std::fs::{File, OpenOptions};
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use std::task::Poll;
use std::task::{ready, Poll};

use anyhow::Context as _;
use cargo_util::paths::{self, exclude_from_backups_and_indexing};
Expand Down Expand Up @@ -879,6 +879,16 @@ impl<'cfg> Source for RegistrySource<'cfg> {
self.source_id.display_index()
}

fn contains_package_name(&mut self, name: &str) -> Poll<CargoResult<bool>> {
Poll::Ready(Ok(ready!(self.index.summaries(
name.into(),
&OptVersionReq::Any,
&mut *self.ops
))?
.next()
.is_some()))
}

fn add_to_yanked_whitelist(&mut self, pkgs: &[PackageId]) {
self.yanked_whitelist.extend(pkgs);
}
Expand Down
4 changes: 4 additions & 0 deletions src/cargo/sources/replaced.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ impl<'cfg> Source for ReplacedSource<'cfg> {
true
}

fn contains_package_name(&mut self, name: &str) -> Poll<CargoResult<bool>> {
self.inner.contains_package_name(name)
}

fn add_to_yanked_whitelist(&mut self, pkgs: &[PackageId]) {
let pkgs = pkgs
.iter()
Expand Down
Loading