From a52ac91703128f10b95e74f34be6c4442da0cd27 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Tue, 4 Jun 2024 17:46:41 +0200 Subject: [PATCH 01/52] wip: allow for repodata to be created from a url based dependency --- crates/rattler_cache/src/package_cache.rs | 15 +++++++ crates/rattler_repodata_gateway/Cargo.toml | 2 + .../src/gateway/error.rs | 4 ++ .../src/gateway/mod.rs | 26 ++++++++++- .../src/gateway/query.rs | 43 ++++++++++++++++++- 5 files changed, 87 insertions(+), 3 deletions(-) diff --git a/crates/rattler_cache/src/package_cache.rs b/crates/rattler_cache/src/package_cache.rs index 4e0270b13..94dada031 100644 --- a/crates/rattler_cache/src/package_cache.rs +++ b/crates/rattler_cache/src/package_cache.rs @@ -91,6 +91,21 @@ impl From<&PackageRecord> for CacheKey { } } +impl From<&Url> for CacheKey { + fn from(url: &Url) -> Self { + if let Some(archive) = ArchiveIdentifier::try_from_url(&url){ + return archive.into() + } + // Not a normal archive, so we use the url as the cache key + CacheKey { + name: url.to_string(), + version: "_".to_string(), + build_string: "_".to_string(), + sha256: None + } + } +} + impl Display for CacheKey { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "{}-{}-{}", &self.name, &self.version, &self.build_string) diff --git a/crates/rattler_repodata_gateway/Cargo.toml b/crates/rattler_repodata_gateway/Cargo.toml index 41debc067..9fe1e0beb 100644 --- a/crates/rattler_repodata_gateway/Cargo.toml +++ b/crates/rattler_repodata_gateway/Cargo.toml @@ -54,6 +54,8 @@ tracing = { workspace = true } url = { workspace = true, features = ["serde"] } zstd = { workspace = true } percent-encoding = { workspace = true } +rattler_cache = { version = "0.1.0", path = "../rattler_cache" } +rattler_package_streaming = { version = "0.21.2", path = "../rattler_package_streaming", features = ["reqwest"] } [target.'cfg(unix)'.dependencies] libc = { workspace = true } diff --git a/crates/rattler_repodata_gateway/src/gateway/error.rs b/crates/rattler_repodata_gateway/src/gateway/error.rs index 15739f21e..b6d28ddfc 100644 --- a/crates/rattler_repodata_gateway/src/gateway/error.rs +++ b/crates/rattler_repodata_gateway/src/gateway/error.rs @@ -1,5 +1,6 @@ use crate::fetch; use crate::fetch::{FetchRepoDataError, RepoDataNotFoundError}; +use crate::gateway::direct_url_gateway::DirectUrlQueryError; use rattler_conda_types::Channel; use rattler_networking::Redact; use reqwest_middleware::Error; @@ -34,6 +35,9 @@ pub enum GatewayError { #[error("the operation was cancelled")] Cancelled, + + #[error("the direct url query failed")] + DirectUrlQueryError(#[from] DirectUrlQueryError), } impl From for GatewayError { diff --git a/crates/rattler_repodata_gateway/src/gateway/mod.rs b/crates/rattler_repodata_gateway/src/gateway/mod.rs index cdb7893ad..f5c233409 100644 --- a/crates/rattler_repodata_gateway/src/gateway/mod.rs +++ b/crates/rattler_repodata_gateway/src/gateway/mod.rs @@ -1,6 +1,7 @@ mod barrier_cell; mod builder; mod channel_config; +mod direct_url_gateway; mod error; mod local_subdir; mod query; @@ -336,9 +337,10 @@ mod test { }; use dashmap::DashSet; - use rattler_conda_types::{Channel, ChannelConfig, PackageName, Platform}; + use rattler_conda_types::{Channel, ChannelConfig, MatchSpec, PackageName, Platform}; use rstest::rstest; use url::Url; + use rattler_conda_types::ParseStrictness::Strict; use crate::{ fetch::CacheAction, @@ -400,6 +402,28 @@ mod test { assert_eq!(total_records, 45060); } + #[tokio::test] + async fn test_direct_url_spec_from_gateway() { + let gateway = Gateway::new(); + + let index = remote_conda_forge().await; + + let records = gateway + .query( + vec![index.channel()], + vec![Platform::Linux64], + vec![MatchSpec::from_str("https://conda.anaconda.org/conda-forge/linux-64/mamba-1.4.7-py310h51d5547_0.conda", Strict).unwrap()].into_iter(), + // vec![MatchSpec::from_str("mamba=1.4.7=py310h51d5547_0", Strict).unwrap()].into_iter(), + ) + .recursive(true) + .await + .unwrap(); + + let total_records: usize = records.iter().map(RepoData::len).sum(); + eprintln!("records: {:?}", records); + assert_eq!(total_records, 4413); + } + #[rstest] #[case::named("non-existing-channel")] #[case::url("https://conda.anaconda.org/does-not-exist")] diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index a906b9a01..a9b79b116 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -8,6 +8,8 @@ use std::{ future::IntoFuture, sync::Arc, }; +use rattler_cache::package_cache::PackageCache; +use crate::gateway::direct_url_gateway::DirectUrlQuery; /// Represents a query to execute with a [`Gateway`]. /// @@ -120,6 +122,7 @@ impl GatewayQuery { // Package names that we have or will issue requests for. let mut seen = HashSet::new(); let mut pending_package_specs = HashMap::new(); + let mut pending_direct_url_specs = vec![]; for spec in self.specs { if let Some(name) = &spec.name { seen.insert(name.clone()); @@ -128,14 +131,50 @@ impl GatewayQuery { .or_insert_with(Vec::new) .push(spec); } + else if spec.url.is_some() { + pending_direct_url_specs.push(spec); + } + } + + // Prefetch the direct url specs + let mut direct_url_records = vec![]; + for spec in pending_direct_url_specs { + if let Some(url) = &spec.url { + // If the spec has a URL, we do not need to fetch it from the repodata. + let package_cache = PackageCache::new(self.gateway.cache.clone()); + + let query = DirectUrlQuery::new(url.clone(), package_cache, self.gateway.client.clone()); + let record = query.execute().await?; + + // Add dependencies of record to pending_package_specs + if self.recursive { + for dependency in &record.package_record.depends { + let dependency_name = PackageName::new_unchecked( + dependency.split_once(' ').unwrap_or((dependency, "")).0, + ); + if seen.insert(dependency_name.clone()) { + pending_package_specs.insert(dependency_name.clone(), vec![dependency_name.into()]); + } + } + } + + direct_url_records.push(record); + } } // A list of futures to fetch the records for the pending package names. The main task // awaits these futures. let mut pending_records = FuturesUnordered::new(); - // The resulting list of repodata records. - let mut result = vec![RepoData::default(); subdirs.len()]; + // The resulting list of repodata records + 1 for the direct_url_repodata. + let mut result = vec![RepoData::default(); subdirs.len() + 1]; + + // Add the direct_url_repodata to the result. + let direct_url_repodata = RepoData { + len: direct_url_records.len(), + shards: vec![Arc::from(direct_url_records)], + }; + result.push(direct_url_repodata); // Loop until all pending package names have been fetched. loop { From 5eb36b32663c03791002cde228099bd4c15588e4 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Wed, 5 Jun 2024 15:05:51 +0200 Subject: [PATCH 02/52] fix: local test, ignore lineno in env --- crates/rattler_shell/src/activation.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/rattler_shell/src/activation.rs b/crates/rattler_shell/src/activation.rs index 3e0ff864a..a68b3cc1a 100644 --- a/crates/rattler_shell/src/activation.rs +++ b/crates/rattler_shell/src/activation.rs @@ -735,6 +735,7 @@ mod tests { env_diff.remove("CONDA_PREFIX"); env_diff.remove("Path"); env_diff.remove("PATH"); + env_diff.remove("LINENO"); insta::assert_yaml_snapshot!("after_activation", env_diff); } From 2793fc8c917f909b5772f96bb72ff510298fa2f4 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Wed, 5 Jun 2024 15:07:47 +0200 Subject: [PATCH 03/52] feat: implement direct_url_query for fetching package from url and generating virtual repodata from them. --- crates/rattler_cache/src/package_cache.rs | 6 +- .../src/match_spec/parse.rs | 39 +++-- ...spec__parse__tests__parsed matchspecs.snap | 4 + .../rattler_conda_types/src/package_name.rs | 11 +- .../rattler_conda_types/src/repo_data/mod.rs | 18 ++- crates/rattler_conda_types/src/version/mod.rs | 2 +- .../src/version/with_source.rs | 2 +- crates/rattler_index/src/lib.rs | 1 + crates/rattler_lock/src/parse/v3.rs | 1 + .../src/utils/serde/raw_conda_package_data.rs | 1 + .../src/gateway/direct_url_gateway.rs | 147 ++++++++++++++++++ .../src/gateway/mod.rs | 2 +- .../src/gateway/query.rs | 21 +-- 13 files changed, 225 insertions(+), 30 deletions(-) create mode 100644 crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs diff --git a/crates/rattler_cache/src/package_cache.rs b/crates/rattler_cache/src/package_cache.rs index 94dada031..4114a6f6c 100644 --- a/crates/rattler_cache/src/package_cache.rs +++ b/crates/rattler_cache/src/package_cache.rs @@ -93,15 +93,15 @@ impl From<&PackageRecord> for CacheKey { impl From<&Url> for CacheKey { fn from(url: &Url) -> Self { - if let Some(archive) = ArchiveIdentifier::try_from_url(&url){ - return archive.into() + if let Some(archive) = ArchiveIdentifier::try_from_url(url) { + return archive.into(); } // Not a normal archive, so we use the url as the cache key CacheKey { name: url.to_string(), version: "_".to_string(), build_string: "_".to_string(), - sha256: None + sha256: None, } } } diff --git a/crates/rattler_conda_types/src/match_spec/parse.rs b/crates/rattler_conda_types/src/match_spec/parse.rs index f473cd2ab..a89d0d5cc 100644 --- a/crates/rattler_conda_types/src/match_spec/parse.rs +++ b/crates/rattler_conda_types/src/match_spec/parse.rs @@ -20,6 +20,7 @@ use super::{ matcher::{StringMatcher, StringMatcherParseError}, MatchSpec, }; +use crate::package::ArchiveIdentifier; use crate::utils::url::parse_scheme; use crate::{ build_spec::{BuildNumberSpec, ParseBuildNumberSpecError}, @@ -388,8 +389,18 @@ fn matchspec_parser( // 2.a Is the spec an url, parse it as an url if parse_scheme(input).is_some() { let url = Url::parse(input)?; + + let archive = ArchiveIdentifier::try_from_url(&url); + let name = archive.and_then(|a| a.try_into().ok()); + + // TODO: This should also work without a proper name from the url filename + if name.is_none() { + return Err(ParseMatchSpecError::MissingPackageName); + } + return Ok(MatchSpec { url: Some(url), + name, ..MatchSpec::default() }); } @@ -398,8 +409,18 @@ fn matchspec_parser( let path = Utf8TypedPath::from(input); let url = file_url::file_path_to_url(path) .map_err(|_error| ParseMatchSpecError::InvalidPackagePathOrUrl)?; + + let archive = ArchiveIdentifier::try_from_url(&url); + let name = archive.and_then(|a| a.try_into().ok()); + + // TODO: This should also work without a proper name from the url filename + if name.is_none() { + return Err(ParseMatchSpecError::MissingPackageName); + } + return Ok(MatchSpec { url: Some(url), + name, ..MatchSpec::default() }); } @@ -870,22 +891,16 @@ mod tests { spec.url, Some(Url::parse("file:/home/user/conda-bld/linux-64/foo-1.0-py27_0.tar.bz2").unwrap()) ); - - let spec = MatchSpec::from_str("C:\\Users\\user\\Downloads\\package", Strict).unwrap(); - assert_eq!( - spec.url, - Some(Url::parse("file://C:/Users/user/Downloads/package").unwrap()) - ); - let spec = MatchSpec::from_str("/home/user/Downloads/package", Strict).unwrap(); - - assert_eq!( - spec.url, - Some(Url::parse("file:/home/user/Downloads/package").unwrap()) - ); } #[test] fn test_non_happy_url_parsing() { + let spec = MatchSpec::from_str("C:\\Users\\user\\Downloads\\package", Strict).unwrap_err(); + assert_matches!(spec, ParseMatchSpecError::MissingPackageName); + + let spec = MatchSpec::from_str("/home/user/Downloads/package", Strict).unwrap_err(); + assert_matches!(spec, ParseMatchSpecError::MissingPackageName); + let err = MatchSpec::from_str("http://username@", Strict).expect_err("Invalid url"); assert_eq!(err.to_string(), "invalid package spec url"); diff --git a/crates/rattler_conda_types/src/match_spec/snapshots/rattler_conda_types__match_spec__parse__tests__parsed matchspecs.snap b/crates/rattler_conda_types/src/match_spec/snapshots/rattler_conda_types__match_spec__parse__tests__parsed matchspecs.snap index 7e96aa583..99b1c11d1 100644 --- a/crates/rattler_conda_types/src/match_spec/snapshots/rattler_conda_types__match_spec__parse__tests__parsed matchspecs.snap +++ b/crates/rattler_conda_types/src/match_spec/snapshots/rattler_conda_types__match_spec__parse__tests__parsed matchspecs.snap @@ -4,8 +4,10 @@ assertion_line: 794 expression: evaluated --- /home/user/conda-bld/linux-64/foo-1.0-py27_0.tar.bz2: + name: foo url: "file:///home/user/conda-bld/linux-64/foo-1.0-py27_0.tar.bz2" "C:\\Users\\user\\conda-bld\\linux-64\\foo-1.0-py27_0.tar.bz2": + name: foo url: "file:///C:/Users/user/conda-bld/linux-64/foo-1.0-py27_0.tar.bz2" blas *.* mkl: name: blas @@ -35,8 +37,10 @@ foo==1.0=py27_0: version: "==1.0" build: py27_0 "https://conda.anaconda.org/conda-forge/linux-64/py-rattler-0.6.1-py39h8169da8_0.conda": + name: py-rattler url: "https://conda.anaconda.org/conda-forge/linux-64/py-rattler-0.6.1-py39h8169da8_0.conda" "https://repo.prefix.dev/ruben-arts/linux-64/boost-cpp-1.78.0-h75c5d50_1.tar.bz2": + name: boost-cpp url: "https://repo.prefix.dev/ruben-arts/linux-64/boost-cpp-1.78.0-h75c5d50_1.tar.bz2" python 3.8.* *_cpython: name: python diff --git a/crates/rattler_conda_types/src/package_name.rs b/crates/rattler_conda_types/src/package_name.rs index c3953a1f1..a43641d0a 100644 --- a/crates/rattler_conda_types/src/package_name.rs +++ b/crates/rattler_conda_types/src/package_name.rs @@ -1,3 +1,4 @@ +use crate::package::ArchiveIdentifier; use crate::utils::serde::DeserializeFromStrUnchecked; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde_with::{DeserializeAs, DeserializeFromStr}; @@ -16,7 +17,7 @@ use thiserror::Error; /// This struct explicitly does not implement [`std::fmt::Display`] because its ambiguous if that /// would display the source or the normalized version. Simply call `as_source` or `as_normalized` /// to make the distinction. -#[derive(Debug, Clone, Eq, DeserializeFromStr)] +#[derive(Debug, Default, Clone, Eq, DeserializeFromStr)] pub struct PackageName { normalized: Option, source: String, @@ -62,6 +63,14 @@ impl TryFrom<&String> for PackageName { } } +impl TryFrom for PackageName { + type Error = InvalidPackageNameError; + + fn try_from(value: ArchiveIdentifier) -> Result { + value.name.try_into() + } +} + impl TryFrom for PackageName { type Error = InvalidPackageNameError; diff --git a/crates/rattler_conda_types/src/repo_data/mod.rs b/crates/rattler_conda_types/src/repo_data/mod.rs index b56b9c02a..b93960eba 100644 --- a/crates/rattler_conda_types/src/repo_data/mod.rs +++ b/crates/rattler_conda_types/src/repo_data/mod.rs @@ -80,7 +80,7 @@ pub struct ChannelInfo { #[serde_as] #[skip_serializing_none] #[sorted] -#[derive(Debug, Deserialize, Serialize, Eq, PartialEq, Clone, Hash)] +#[derive(Debug, Default, Deserialize, Serialize, Eq, PartialEq, Clone, Hash)] pub struct PackageRecord { /// Optionally the architecture the package supports pub arch: Option, @@ -103,6 +103,10 @@ pub struct PackageRecord { #[serde(default)] pub depends: Vec, + /// The package's url which isn't part of an real pacakge record but injected by the direct url query. + #[serde(skip)] + pub direct_url: Option, + /// Features are a deprecated way to specify different feature sets for the /// conda solver. This is not supported anymore and should not be used. /// Instead, `mutex` packages should be used to specify @@ -187,7 +191,9 @@ pub struct PackageRecord { impl Display for PackageRecord { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - if self.build.is_empty() { + if let Some(url) = &self.direct_url { + write!(f, "{url}") + } else if self.build.is_empty() { write!(f, "{} {}", self.name.as_normalized(), self.version,) } else { write!( @@ -311,6 +317,7 @@ impl PackageRecord { version: version.into(), purls: None, run_exports: None, + direct_url: None, } } @@ -429,8 +436,15 @@ impl PackageRecord { version: index.version, purls: None, run_exports: None, + direct_url: None, }) } + + /// Add url to the [`PackageRecord`] + pub fn with_package_url(mut self, url: Url) -> Self { + self.direct_url = Some(url); + self + } } fn sort_map_alphabetically( diff --git a/crates/rattler_conda_types/src/version/mod.rs b/crates/rattler_conda_types/src/version/mod.rs index 9c5f167c1..5d55b836c 100644 --- a/crates/rattler_conda_types/src/version/mod.rs +++ b/crates/rattler_conda_types/src/version/mod.rs @@ -133,7 +133,7 @@ pub use with_source::VersionWithSource; /// this problem by appending an underscore to plain version numbers: /// /// 1.0.1_ < 1.0.1a => True # ensure correct ordering for openssl -#[derive(Clone, Eq)] +#[derive(Clone, Eq, Default)] pub struct Version { /// Individual components of the version. /// diff --git a/crates/rattler_conda_types/src/version/with_source.rs b/crates/rattler_conda_types/src/version/with_source.rs index ac83b78ba..945a55b6d 100644 --- a/crates/rattler_conda_types/src/version/with_source.rs +++ b/crates/rattler_conda_types/src/version/with_source.rs @@ -22,7 +22,7 @@ use std::{ /// /// It is also possible to convert directly from a [`Version`] but the [`Display`] implementation /// is then used to generate the string representation. -#[derive(Debug, Clone)] +#[derive(Debug, Default, Clone)] pub struct VersionWithSource { version: Version, source: Option>, diff --git a/crates/rattler_index/src/lib.rs b/crates/rattler_index/src/lib.rs index 95c463de5..0192367d5 100644 --- a/crates/rattler_index/src/lib.rs +++ b/crates/rattler_index/src/lib.rs @@ -49,6 +49,7 @@ fn package_record_from_index_json( legacy_bz2_size: None, purls: None, run_exports: None, + direct_url: None, }; Ok(package_record) diff --git a/crates/rattler_lock/src/parse/v3.rs b/crates/rattler_lock/src/parse/v3.rs index 1d28b0822..f4c43b911 100644 --- a/crates/rattler_lock/src/parse/v3.rs +++ b/crates/rattler_lock/src/parse/v3.rs @@ -173,6 +173,7 @@ pub fn parse_v3_or_lower( version: value.version, purls: value.purls.is_empty().not().then_some(value.purls), run_exports: None, + direct_url: None, }, url: value.url, file_name: None, diff --git a/crates/rattler_lock/src/utils/serde/raw_conda_package_data.rs b/crates/rattler_lock/src/utils/serde/raw_conda_package_data.rs index a87ccab2c..dc011928b 100644 --- a/crates/rattler_lock/src/utils/serde/raw_conda_package_data.rs +++ b/crates/rattler_lock/src/utils/serde/raw_conda_package_data.rs @@ -125,6 +125,7 @@ impl<'a> From> for CondaPackageData { track_features: value.track_features.into_owned(), version: value.version.into_owned(), run_exports: None, + direct_url: None, }, url: value.url.into_owned(), file_name: value.file_name.into_owned(), diff --git a/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs b/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs new file mode 100644 index 000000000..c24e9b510 --- /dev/null +++ b/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs @@ -0,0 +1,147 @@ +use futures::FutureExt; +use rattler_cache::package_cache::{PackageCache, PackageCacheError}; +use rattler_conda_types::package::{IndexJson, PackageFile}; +use rattler_conda_types::{ConvertSubdirError, PackageRecord, RepoDataRecord}; +use std::future::IntoFuture; +use url::Url; + +pub(crate) struct DirectUrlQuery { + /// The url to query + url: Url, + /// The client to use for fetching the package + client: reqwest_middleware::ClientWithMiddleware, + /// The cache to use for storing the package + package_cache: PackageCache, +} + +#[derive(Debug, thiserror::Error)] +pub enum DirectUrlQueryError { + #[error(transparent)] + PackageCache(#[from] PackageCacheError), + #[error(transparent)] + IndexJson(#[from] std::io::Error), + #[error(transparent)] + ConvertSubdir(#[from] ConvertSubdirError), +} + +impl DirectUrlQuery { + pub(crate) fn new( + url: Url, + package_cache: PackageCache, + client: reqwest_middleware::ClientWithMiddleware, + ) -> Self { + Self { + url, + client, + package_cache, + } + } + + /// Execute the Repodata query using the cache as a source for the index.json + pub async fn execute(self) -> Result { + // TODO: Optimize this by only parsing the index json from stream. + // Get package on system + let package_dir = self + .package_cache + .get_or_fetch_from_url( + // Using the url as cache key + &self.url, + self.url.clone(), + self.client.clone(), + // Should we add a reporter? + None, + ) + .await?; + + // Extract package record from index json + let index_json = IndexJson::from_package_directory(package_dir)?; + let package_record = PackageRecord::from_index_json( + index_json, // Size + None, // sha256 + None, // md5 + None, + )? + .with_package_url(self.url.clone()); + + tracing::debug!("Package record build from direct url: {:?}", package_record); + + Ok(RepoDataRecord { + package_record, + // File name is the same as the url. + file_name: self.url.clone().to_string(), + url: self.url.clone(), + // Fake channel as it is unused in this case. + channel: "virtual_direct_url_channel".to_string(), + }) + } +} + +impl IntoFuture for DirectUrlQuery { + type Output = Result; + + type IntoFuture = futures::future::BoxFuture<'static, Self::Output>; + + fn into_future(self) -> Self::IntoFuture { + self.execute().boxed() + } +} + + +#[cfg(test)] +mod test { + use std::env::temp_dir; + use std::path::{Path, PathBuf}; + + /// Returns the path to the test data directory + fn test_data_path() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")).join("../../test-data") + } + + #[tokio::test] + async fn test_direct_url_query() { + use super::*; + use url::Url; + + let url = Url::parse( + "https://conda.anaconda.org/conda-forge/noarch/boltons-24.0.0-pyhd8ed1ab_0.conda", + ) + .unwrap(); + let package_cache = PackageCache::new(PathBuf::from("/tmp")); + let client = reqwest_middleware::ClientWithMiddleware::from(reqwest::Client::new()); + let query = DirectUrlQuery::new(url.clone(), package_cache, client); + + assert_eq!(query.url.clone(), url); + + let repodata_record = query.await.unwrap(); + + assert_eq!( + repodata_record.package_record.name.as_normalized(), + "boltons" + ); + assert_eq!(repodata_record.package_record.version.as_str(), "24.0.0"); + } + + #[tokio::test] + async fn test_direct_url_path_query() { + use super::*; + use url::Url; + let path = temp_dir().join("not_a_conda_archive_style_name.conda"); + // copy path into fake filename into tmp + std::fs::copy( + test_data_path().join("andes-1.8.3-pyhd8ed1ab_0.conda"), + &path, + ) + .unwrap(); + + let url = Url::from_file_path(path).unwrap(); + let package_cache = PackageCache::new(PathBuf::from("/tmp")); + let client = reqwest_middleware::ClientWithMiddleware::from(reqwest::Client::new()); + let query = DirectUrlQuery::new(url.clone(), package_cache, client); + + assert_eq!(query.url.clone(), url); + + let repodata_record = query.await.unwrap(); + assert_eq!(repodata_record.package_record.name.as_normalized(), "andes"); + assert_eq!(repodata_record.package_record.version.as_str(), "1.8.3"); + } +} diff --git a/crates/rattler_repodata_gateway/src/gateway/mod.rs b/crates/rattler_repodata_gateway/src/gateway/mod.rs index f5c233409..f1c5c160c 100644 --- a/crates/rattler_repodata_gateway/src/gateway/mod.rs +++ b/crates/rattler_repodata_gateway/src/gateway/mod.rs @@ -337,10 +337,10 @@ mod test { }; use dashmap::DashSet; + use rattler_conda_types::ParseStrictness::Strict; use rattler_conda_types::{Channel, ChannelConfig, MatchSpec, PackageName, Platform}; use rstest::rstest; use url::Url; - use rattler_conda_types::ParseStrictness::Strict; use crate::{ fetch::CacheAction, diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index a9b79b116..5c8153d9f 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -1,15 +1,15 @@ use super::{subdir::Subdir, BarrierCell, GatewayError, GatewayInner, RepoData}; +use crate::gateway::direct_url_gateway::DirectUrlQuery; use crate::Reporter; use futures::{select_biased, stream::FuturesUnordered, FutureExt, StreamExt}; use itertools::Itertools; +use rattler_cache::package_cache::PackageCache; use rattler_conda_types::{Channel, MatchSpec, PackageName, Platform}; use std::{ collections::{HashMap, HashSet}, future::IntoFuture, sync::Arc, }; -use rattler_cache::package_cache::PackageCache; -use crate::gateway::direct_url_gateway::DirectUrlQuery; /// Represents a query to execute with a [`Gateway`]. /// @@ -124,16 +124,15 @@ impl GatewayQuery { let mut pending_package_specs = HashMap::new(); let mut pending_direct_url_specs = vec![]; for spec in self.specs { - if let Some(name) = &spec.name { + if spec.url.is_some() { + pending_direct_url_specs.push(spec); + } else if let Some(name) = &spec.name { seen.insert(name.clone()); pending_package_specs .entry(name.clone()) .or_insert_with(Vec::new) .push(spec); } - else if spec.url.is_some() { - pending_direct_url_specs.push(spec); - } } // Prefetch the direct url specs @@ -143,8 +142,10 @@ impl GatewayQuery { // If the spec has a URL, we do not need to fetch it from the repodata. let package_cache = PackageCache::new(self.gateway.cache.clone()); - let query = DirectUrlQuery::new(url.clone(), package_cache, self.gateway.client.clone()); - let record = query.execute().await?; + let query = + DirectUrlQuery::new(url.clone(), package_cache, self.gateway.client.clone()); + + let record = query.await?; // Add dependencies of record to pending_package_specs if self.recursive { @@ -153,7 +154,8 @@ impl GatewayQuery { dependency.split_once(' ').unwrap_or((dependency, "")).0, ); if seen.insert(dependency_name.clone()) { - pending_package_specs.insert(dependency_name.clone(), vec![dependency_name.into()]); + pending_package_specs + .insert(dependency_name.clone(), vec![dependency_name.into()]); } } } @@ -244,6 +246,7 @@ impl GatewayQuery { } } } + dbg!(&seen); Ok(result) } From 5ee5d0fddd7328ca063fbcb4f67ec2935de3743d Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Wed, 5 Jun 2024 15:08:35 +0200 Subject: [PATCH 04/52] feat: solve for direct url --- .../rattler_conda_types/src/match_spec/mod.rs | 7 ++ crates/rattler_solve/src/resolvo/mod.rs | 16 +++- crates/rattler_solve/tests/backends.rs | 91 ++++++++++++++++++- 3 files changed, 109 insertions(+), 5 deletions(-) diff --git a/crates/rattler_conda_types/src/match_spec/mod.rs b/crates/rattler_conda_types/src/match_spec/mod.rs index 354397c09..848f428f6 100644 --- a/crates/rattler_conda_types/src/match_spec/mod.rs +++ b/crates/rattler_conda_types/src/match_spec/mod.rs @@ -327,6 +327,13 @@ impl NamelessMatchSpec { } } + // Match on the url, only happens with direct url dependencies + if let Some(url_spec) = self.url.as_ref() { + if Some(url_spec) != record.direct_url.as_ref() { + return false; + } + } + true } } diff --git a/crates/rattler_solve/src/resolvo/mod.rs b/crates/rattler_solve/src/resolvo/mod.rs index 5df905b06..f956461b5 100644 --- a/crates/rattler_solve/src/resolvo/mod.rs +++ b/crates/rattler_solve/src/resolvo/mod.rs @@ -561,10 +561,18 @@ impl super::SolverImpl for Solver { .specs .iter() .map(|spec| { - let (name, spec) = spec.clone().into_nameless(); - let name = name.expect("cannot use matchspec without a name"); - let name_id = provider.pool.intern_package_name(name.as_normalized()); - provider.pool.intern_version_set(name_id, spec.into()) + let (name, nameless_spec) = spec.clone().into_nameless(); + let name = if let Some(name) = name { + name.as_normalized().to_string() + } else if let Some(url) = &nameless_spec.url { + url.to_string() + } else { + panic!("Cannot use matchspec without a name") + }; + let name_id = provider.pool.intern_package_name(name.as_str()); + provider + .pool + .intern_version_set(name_id, nameless_spec.into()) }) .collect(); diff --git a/crates/rattler_solve/tests/backends.rs b/crates/rattler_solve/tests/backends.rs index 74bf1073e..8c4a29d5e 100644 --- a/crates/rattler_solve/tests/backends.rs +++ b/crates/rattler_solve/tests/backends.rs @@ -109,6 +109,7 @@ fn installed_package( legacy_bz2_md5: None, purls: None, run_exports: None, + direct_url: None, }, } } @@ -536,7 +537,7 @@ macro_rules! solver_backend_tests { let result = <$T>::default().solve(task); match result { - Err(rattler_solve::SolveError::DuplicateRecords(_)) => {}, + Err(rattler_solve::SolveError::DuplicateRecords(_)) => {} _ => panic!("expected a DuplicateRecord error"), } } @@ -665,6 +666,11 @@ mod resolvo { dummy_channel_json_path, installed_package, solve, solve_real_world, FromStr, GenericVirtualPackage, SimpleSolveTask, SolveError, Version, }; + use rattler_conda_types::{ + MatchSpec, PackageRecord, ParseStrictness, RepoDataRecord, VersionWithSource, + }; + use rattler_solve::{SolveStrategy, SolverImpl, SolverTask}; + use url::Url; solver_backend_tests!(rattler_solve::resolvo::Solver); @@ -785,6 +791,89 @@ mod resolvo { "expected highest compatible version of bors" ); } + + /// Try to solve a package with a direct url, and then try to do it again without having it in the repodata. + #[test] + fn test_solve_on_url() { + let url_str = + "https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2"; + let url = Url::parse(url_str).unwrap(); + + // Create a match spec for a package that is not in the repodata + let specs: Vec<_> = vec![MatchSpec::from_str(url_str, ParseStrictness::Lenient).unwrap()]; + + // Create RepoData with only the package from the url, so the solver can find it + let mut package_record = PackageRecord { + // Only defining the name, version and url is enough for the solver to find the package + direct_url: Some(url.clone()), + name: "_libgcc_mutex".parse().unwrap(), + version: VersionWithSource::from_str("0.1").unwrap(), + // No dependencies! + ..Default::default() + }; + let repo_data: Vec = vec![RepoDataRecord { + package_record: package_record.clone(), + // Mocking the rest of the fields + file_name: url_str.to_string(), + url: url.clone(), + channel: "".to_string(), + }]; + + // Completely clean solver task, except for the specs and RepoData + let task = SolverTask { + locked_packages: vec![], + virtual_packages: vec![], + specs: specs.clone(), + constraints: vec![], + pinned_packages: vec![], + exclude_newer: None, + strategy: SolveStrategy::default(), + ..SolverTask::from_iter([&repo_data]) + }; + + let pkgs = rattler_solve::resolvo::Solver::default() + .solve(task) + .unwrap(); + + assert_eq!(pkgs.len(), 1); + assert_eq!(pkgs[0].package_record.name.as_normalized(), "_libgcc_mutex"); + assert_eq!(pkgs[0].package_record.direct_url, Some(url.clone())); + assert_eq!( + pkgs[0].package_record.version, + Version::from_str("0.1").unwrap(), + "expected lowest version of _libgcc_mutex" + ); + + // ----------------------------------------------------------------------------------------- + // Break the url in the repodata, making it not a direct url record. + package_record.direct_url = None; + + let repo_data: Vec = vec![RepoDataRecord { + package_record, + // Mocking the rest of the fields + file_name: url_str.to_string(), + url: url.clone(), + channel: "".to_string(), + }]; + + // Completely clean solver task, except for the specs and RepoData + let task = SolverTask { + locked_packages: vec![], + virtual_packages: vec![], + specs, + constraints: vec![], + pinned_packages: vec![], + exclude_newer: None, + strategy: SolveStrategy::default(), + ..SolverTask::from_iter([&repo_data]) + }; + + let solve_error = rattler_solve::resolvo::Solver::default() + .solve(task) + .unwrap_err(); + + assert!(matches!(solve_error, SolveError::Unsolvable(_))); + } } #[derive(Default)] From 88cba3af9d3fbbd5cca7ab6ed8dc5f9df5df222d Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Wed, 5 Jun 2024 15:54:45 +0200 Subject: [PATCH 05/52] fmt --- .../rattler_repodata_gateway/src/gateway/direct_url_gateway.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs b/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs index c24e9b510..251d5d040 100644 --- a/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs +++ b/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs @@ -86,7 +86,6 @@ impl IntoFuture for DirectUrlQuery { } } - #[cfg(test)] mod test { use std::env::temp_dir; From 35492b245ec08b38317573c89e0d76e0f2145795 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Wed, 5 Jun 2024 16:18:15 +0200 Subject: [PATCH 06/52] fix: test, download example package --- crates/rattler_repodata_gateway/Cargo.toml | 3 ++- .../src/gateway/direct_url_gateway.rs | 26 ++++++++++--------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/crates/rattler_repodata_gateway/Cargo.toml b/crates/rattler_repodata_gateway/Cargo.toml index 9fe1e0beb..75f424bc2 100644 --- a/crates/rattler_repodata_gateway/Cargo.toml +++ b/crates/rattler_repodata_gateway/Cargo.toml @@ -55,7 +55,7 @@ url = { workspace = true, features = ["serde"] } zstd = { workspace = true } percent-encoding = { workspace = true } rattler_cache = { version = "0.1.0", path = "../rattler_cache" } -rattler_package_streaming = { version = "0.21.2", path = "../rattler_package_streaming", features = ["reqwest"] } +rattler_package_streaming = { version = "0.21.2", path = "../rattler_package_streaming", default-features = false, features = ["reqwest"] } [target.'cfg(unix)'.dependencies] libc = { workspace = true } @@ -74,6 +74,7 @@ tower-http = { workspace = true, features = ["fs", "compression-gzip", "trace"] tracing-test = { workspace = true } rattler_conda_types = { path = "../rattler_conda_types", version = "0.25.2", default-features = false } fslock = { workspace = true } +tools = { path="../tools" } [features] default = ['native-tls'] diff --git a/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs b/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs index 251d5d040..7b1754b6e 100644 --- a/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs +++ b/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs @@ -89,18 +89,13 @@ impl IntoFuture for DirectUrlQuery { #[cfg(test)] mod test { use std::env::temp_dir; - use std::path::{Path, PathBuf}; - - /// Returns the path to the test data directory - fn test_data_path() -> PathBuf { - Path::new(env!("CARGO_MANIFEST_DIR")).join("../../test-data") - } + use std::path::PathBuf; + use url::Url; + use super::*; + use rattler_cache::package_cache::PackageCache; #[tokio::test] async fn test_direct_url_query() { - use super::*; - use url::Url; - let url = Url::parse( "https://conda.anaconda.org/conda-forge/noarch/boltons-24.0.0-pyhd8ed1ab_0.conda", ) @@ -122,12 +117,19 @@ mod test { #[tokio::test] async fn test_direct_url_path_query() { - use super::*; - use url::Url; + let package_path = tools::download_and_cache_file_async( + "https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-main.tar.bz2" + .parse() + .unwrap(), + "476873c6289c3b7320b3fd6c0b31da67aa557abcf5c85a0583150ad4796cc575", + ) + .await.unwrap(); + let path = temp_dir().join("not_a_conda_archive_style_name.conda"); + // copy path into fake filename into tmp std::fs::copy( - test_data_path().join("andes-1.8.3-pyhd8ed1ab_0.conda"), + package_path, &path, ) .unwrap(); From 6cd59d8d955388d03ed35e644fa4c4801e32f551 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Wed, 5 Jun 2024 16:22:00 +0200 Subject: [PATCH 07/52] fmt --- .../src/gateway/direct_url_gateway.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs b/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs index 7b1754b6e..d62c7cdcf 100644 --- a/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs +++ b/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs @@ -88,11 +88,11 @@ impl IntoFuture for DirectUrlQuery { #[cfg(test)] mod test { + use super::*; + use rattler_cache::package_cache::PackageCache; use std::env::temp_dir; use std::path::PathBuf; use url::Url; - use super::*; - use rattler_cache::package_cache::PackageCache; #[tokio::test] async fn test_direct_url_query() { @@ -123,16 +123,13 @@ mod test { .unwrap(), "476873c6289c3b7320b3fd6c0b31da67aa557abcf5c85a0583150ad4796cc575", ) - .await.unwrap(); + .await + .unwrap(); let path = temp_dir().join("not_a_conda_archive_style_name.conda"); // copy path into fake filename into tmp - std::fs::copy( - package_path, - &path, - ) - .unwrap(); + std::fs::copy(package_path, &path).unwrap(); let url = Url::from_file_path(path).unwrap(); let package_cache = PackageCache::new(PathBuf::from("/tmp")); From 8b573c67e56574bdcb898e5a3ea382efcb046206 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Wed, 5 Jun 2024 16:36:14 +0200 Subject: [PATCH 08/52] fix: test, use same archive type --- .../src/gateway/direct_url_gateway.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs b/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs index d62c7cdcf..c7eebb9f7 100644 --- a/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs +++ b/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs @@ -118,15 +118,15 @@ mod test { #[tokio::test] async fn test_direct_url_path_query() { let package_path = tools::download_and_cache_file_async( - "https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-main.tar.bz2" + "https://conda.anaconda.org/conda-forge/win-64/zlib-1.2.8-vc10_0.tar.bz2" .parse() .unwrap(), - "476873c6289c3b7320b3fd6c0b31da67aa557abcf5c85a0583150ad4796cc575", + "ee9172dbe9ebd158e8e68d6d0f7dc2060f0c8230b44d2e9a3595b7cd7336b915", ) .await .unwrap(); - let path = temp_dir().join("not_a_conda_archive_style_name.conda"); + let path = temp_dir().join("not_a_conda_archive_style_name.tar.bz2"); // copy path into fake filename into tmp std::fs::copy(package_path, &path).unwrap(); @@ -139,7 +139,7 @@ mod test { assert_eq!(query.url.clone(), url); let repodata_record = query.await.unwrap(); - assert_eq!(repodata_record.package_record.name.as_normalized(), "andes"); - assert_eq!(repodata_record.package_record.version.as_str(), "1.8.3"); + assert_eq!(repodata_record.package_record.name.as_normalized(), "zlib"); + assert_eq!(repodata_record.package_record.version.as_str(), "1.2.8"); } } From a3da32f6588c7d798b11ba1696fb0d04174995de Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Wed, 5 Jun 2024 17:39:01 +0200 Subject: [PATCH 09/52] fix: repodata result, tested in py-rattler --- .../src/gateway/query.rs | 15 +++++++------ py-rattler/Cargo.lock | 22 ++++++++++--------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 5c8153d9f..920f46603 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -163,20 +163,22 @@ impl GatewayQuery { direct_url_records.push(record); } } + // Create a RepoData for the direct_url_records + let direct_url_repodata = RepoData { + len: direct_url_records.len(), + shards: vec![Arc::from(direct_url_records)], + }; // A list of futures to fetch the records for the pending package names. The main task // awaits these futures. let mut pending_records = FuturesUnordered::new(); // The resulting list of repodata records + 1 for the direct_url_repodata. - let mut result = vec![RepoData::default(); subdirs.len() + 1]; + let len = subdirs.len() + (if !direct_url_repodata.is_empty() { 1 } else { 0 }); + let mut result = vec![RepoData::default(); len]; // Add the direct_url_repodata to the result. - let direct_url_repodata = RepoData { - len: direct_url_records.len(), - shards: vec![Arc::from(direct_url_records)], - }; - result.push(direct_url_repodata); + if !direct_url_repodata.is_empty() { result.push(direct_url_repodata) }; // Loop until all pending package names have been fetched. loop { @@ -246,7 +248,6 @@ impl GatewayQuery { } } } - dbg!(&seen); Ok(result) } diff --git a/py-rattler/Cargo.lock b/py-rattler/Cargo.lock index 491324a5a..e8d9d7b17 100644 --- a/py-rattler/Cargo.lock +++ b/py-rattler/Cargo.lock @@ -792,7 +792,7 @@ checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" [[package]] name = "file_url" -version = "0.1.1" +version = "0.1.2" dependencies = [ "itertools 0.12.1", "percent-encoding", @@ -2519,7 +2519,7 @@ dependencies = [ [[package]] name = "rattler" -version = "0.26.2" +version = "0.26.3" dependencies = [ "anyhow", "bytes", @@ -2584,7 +2584,7 @@ dependencies = [ [[package]] name = "rattler_conda_types" -version = "0.25.1" +version = "0.25.2" dependencies = [ "chrono", "file_url", @@ -2627,7 +2627,7 @@ dependencies = [ [[package]] name = "rattler_index" -version = "0.19.15" +version = "0.19.16" dependencies = [ "fs-err", "rattler_conda_types", @@ -2640,7 +2640,7 @@ dependencies = [ [[package]] name = "rattler_lock" -version = "0.22.10" +version = "0.22.11" dependencies = [ "chrono", "file_url", @@ -2700,7 +2700,7 @@ dependencies = [ [[package]] name = "rattler_package_streaming" -version = "0.21.2" +version = "0.21.3" dependencies = [ "bzip2", "chrono", @@ -2724,7 +2724,7 @@ dependencies = [ [[package]] name = "rattler_repodata_gateway" -version = "0.20.4" +version = "0.20.5" dependencies = [ "anyhow", "async-compression", @@ -2751,9 +2751,11 @@ dependencies = [ "parking_lot", "percent-encoding", "pin-project-lite", + "rattler_cache", "rattler_conda_types", "rattler_digest", "rattler_networking", + "rattler_package_streaming", "reqwest 0.12.4", "reqwest-middleware", "rmp-serde", @@ -2774,7 +2776,7 @@ dependencies = [ [[package]] name = "rattler_shell" -version = "0.20.7" +version = "0.20.8" dependencies = [ "enum_dispatch", "indexmap 2.2.6", @@ -2789,7 +2791,7 @@ dependencies = [ [[package]] name = "rattler_solve" -version = "0.24.0" +version = "0.24.1" dependencies = [ "chrono", "futures", @@ -2805,7 +2807,7 @@ dependencies = [ [[package]] name = "rattler_virtual_packages" -version = "0.19.14" +version = "0.19.15" dependencies = [ "archspec", "libloading", From 1b9d3fc9d44aa1b500f4ad3d1d76e8d6a81a054e Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Wed, 5 Jun 2024 17:43:29 +0200 Subject: [PATCH 10/52] fix: test, use temp_dir --- .../rattler_repodata_gateway/src/gateway/direct_url_gateway.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs b/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs index c7eebb9f7..731e5db5f 100644 --- a/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs +++ b/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs @@ -132,7 +132,7 @@ mod test { std::fs::copy(package_path, &path).unwrap(); let url = Url::from_file_path(path).unwrap(); - let package_cache = PackageCache::new(PathBuf::from("/tmp")); + let package_cache = PackageCache::new(temp_dir()); let client = reqwest_middleware::ClientWithMiddleware::from(reqwest::Client::new()); let query = DirectUrlQuery::new(url.clone(), package_cache, client); From df6b092a964187dc569546c769db4af2a1ad7d6f Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Wed, 5 Jun 2024 17:44:05 +0200 Subject: [PATCH 11/52] fmt --- crates/rattler_repodata_gateway/src/gateway/query.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 920f46603..0cb1088cb 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -174,11 +174,18 @@ impl GatewayQuery { let mut pending_records = FuturesUnordered::new(); // The resulting list of repodata records + 1 for the direct_url_repodata. - let len = subdirs.len() + (if !direct_url_repodata.is_empty() { 1 } else { 0 }); + let len = subdirs.len() + + (if !direct_url_repodata.is_empty() { + 1 + } else { + 0 + }); let mut result = vec![RepoData::default(); len]; // Add the direct_url_repodata to the result. - if !direct_url_repodata.is_empty() { result.push(direct_url_repodata) }; + if !direct_url_repodata.is_empty() { + result.push(direct_url_repodata) + }; // Loop until all pending package names have been fetched. loop { From a54111ac2dfb3d4578a4123332bc4082ceb94deb Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Mon, 10 Jun 2024 10:08:17 +0200 Subject: [PATCH 12/52] fix: build/test --- crates/rattler_cache/src/package_cache.rs | 2 +- .../src/gateway/direct_url_gateway.rs | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/crates/rattler_cache/src/package_cache.rs b/crates/rattler_cache/src/package_cache.rs index 4114a6f6c..73d85600f 100644 --- a/crates/rattler_cache/src/package_cache.rs +++ b/crates/rattler_cache/src/package_cache.rs @@ -292,7 +292,7 @@ impl PackageCache { return Err(err); } - // Determine whether or not to retry based on the retry policy + // Determine whether to retry based on the retry policy let execute_after = match retry_policy.should_retry(request_start, current_try) { RetryDecision::Retry { execute_after } => execute_after, RetryDecision::DoNotRetry => return Err(err), diff --git a/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs b/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs index 731e5db5f..ff66f7b2b 100644 --- a/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs +++ b/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs @@ -1,8 +1,11 @@ +use std::future::IntoFuture; + use futures::FutureExt; use rattler_cache::package_cache::{PackageCache, PackageCacheError}; -use rattler_conda_types::package::{IndexJson, PackageFile}; -use rattler_conda_types::{ConvertSubdirError, PackageRecord, RepoDataRecord}; -use std::future::IntoFuture; +use rattler_conda_types::{ + package::{IndexJson, PackageFile}, + ConvertSubdirError, PackageRecord, RepoDataRecord, +}; use url::Url; pub(crate) struct DirectUrlQuery { @@ -37,7 +40,8 @@ impl DirectUrlQuery { } } - /// Execute the Repodata query using the cache as a source for the index.json + /// Execute the Repodata query using the cache as a source for the + /// index.json pub async fn execute(self) -> Result { // TODO: Optimize this by only parsing the index json from stream. // Get package on system @@ -88,12 +92,13 @@ impl IntoFuture for DirectUrlQuery { #[cfg(test)] mod test { - use super::*; + use std::{env::temp_dir, path::PathBuf}; + use rattler_cache::package_cache::PackageCache; - use std::env::temp_dir; - use std::path::PathBuf; use url::Url; + use super::*; + #[tokio::test] async fn test_direct_url_query() { let url = Url::parse( From 3c74946f1577b1f14893d5ce2389c0d32cfafb0a Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Mon, 10 Jun 2024 11:35:59 +0200 Subject: [PATCH 13/52] feat: add matcher trait to remove direct url from package record --- crates/rattler_conda_types/src/lib.rs | 2 +- .../rattler_conda_types/src/match_spec/mod.rs | 213 +++++++++++------- .../rattler_conda_types/src/repo_data/mod.rs | 16 +- crates/rattler_index/src/lib.rs | 1 - crates/rattler_lock/src/lib.rs | 1 + crates/rattler_lock/src/parse/v3.rs | 1 - .../src/utils/serde/raw_conda_package_data.rs | 1 - .../src/gateway/direct_url_gateway.rs | 10 +- .../src/gateway/query.rs | 36 ++- crates/rattler_solve/src/resolvo/mod.rs | 6 +- crates/rattler_solve/tests/backends.rs | 13 +- 11 files changed, 163 insertions(+), 137 deletions(-) diff --git a/crates/rattler_conda_types/src/lib.rs b/crates/rattler_conda_types/src/lib.rs index d7f4cf4d0..2b1b0ebf4 100644 --- a/crates/rattler_conda_types/src/lib.rs +++ b/crates/rattler_conda_types/src/lib.rs @@ -32,7 +32,7 @@ pub use explicit_environment_spec::{ pub use generic_virtual_package::GenericVirtualPackage; pub use match_spec::matcher::{StringMatcher, StringMatcherParseError}; pub use match_spec::parse::ParseMatchSpecError; -pub use match_spec::{MatchSpec, NamelessMatchSpec}; +pub use match_spec::{MatchSpec, Matches, NamelessMatchSpec}; pub use no_arch_type::{NoArchKind, NoArchType}; pub use package_name::{InvalidPackageNameError, PackageName}; pub use parse_mode::ParseStrictness; diff --git a/crates/rattler_conda_types/src/match_spec/mod.rs b/crates/rattler_conda_types/src/match_spec/mod.rs index 848f428f6..70f0d6327 100644 --- a/crates/rattler_conda_types/src/match_spec/mod.rs +++ b/crates/rattler_conda_types/src/match_spec/mod.rs @@ -1,4 +1,4 @@ -use crate::{build_spec::BuildNumberSpec, PackageName, PackageRecord, VersionSpec}; +use crate::{build_spec::BuildNumberSpec, PackageName, PackageRecord, RepoDataRecord, VersionSpec}; use rattler_digest::{serde::SerializableHash, Md5Hash, Sha256Hash}; use serde::{Deserialize, Deserializer, Serialize}; use serde_with::{serde_as, skip_serializing_none, DisplayFromStr}; @@ -196,47 +196,6 @@ impl Display for MatchSpec { } impl MatchSpec { - /// Match a [`MatchSpec`] against a [`PackageRecord`] - pub fn matches(&self, record: &PackageRecord) -> bool { - if let Some(name) = self.name.as_ref() { - if name != &record.name { - return false; - } - } - - if let Some(spec) = self.version.as_ref() { - if !spec.matches(&record.version) { - return false; - } - } - - if let Some(build_string) = self.build.as_ref() { - if !build_string.matches(&record.build) { - return false; - } - } - - if let Some(build_number) = self.build_number.as_ref() { - if !build_number.matches(&record.build_number) { - return false; - } - } - - if let Some(md5_spec) = self.md5.as_ref() { - if Some(md5_spec) != record.md5.as_ref() { - return false; - } - } - - if let Some(sha256_spec) = self.sha256.as_ref() { - if Some(sha256_spec) != record.sha256.as_ref() { - return false; - } - } - - true - } - /// Decomposes this instance into a [`NamelessMatchSpec`] and a name. pub fn into_nameless(self) -> (Option, NamelessMatchSpec) { ( @@ -300,44 +259,6 @@ pub struct NamelessMatchSpec { pub url: Option, } -impl NamelessMatchSpec { - /// Match a [`MatchSpec`] against a [`PackageRecord`] - pub fn matches(&self, record: &PackageRecord) -> bool { - if let Some(spec) = self.version.as_ref() { - if !spec.matches(&record.version) { - return false; - } - } - - if let Some(build_string) = self.build.as_ref() { - if !build_string.matches(&record.build) { - return false; - } - } - - if let Some(md5_spec) = self.md5.as_ref() { - if Some(md5_spec) != record.md5.as_ref() { - return false; - } - } - - if let Some(sha256_spec) = self.sha256.as_ref() { - if Some(sha256_spec) != record.sha256.as_ref() { - return false; - } - } - - // Match on the url, only happens with direct url dependencies - if let Some(url_spec) = self.url.as_ref() { - if Some(url_spec) != record.direct_url.as_ref() { - return false; - } - } - - true - } -} - impl Display for NamelessMatchSpec { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match &self.version { @@ -426,6 +347,101 @@ where } } +/// A trait that defines the behavior of matching a spec against a record. +pub trait Matches { + /// Match a [`MatchSpec`] against a [`T`] + /// Matching it to a record means that the record is valid for the spec. + fn matches(&self, other: &T) -> bool; +} + +impl Matches for NamelessMatchSpec { + /// Match a [`NamelessMatchSpec`] against a [`PackageRecord`] + fn matches(&self, other: &PackageRecord) -> bool { + if let Some(spec) = self.version.as_ref() { + if !spec.matches(&other.version) { + return false; + } + } + + if let Some(build_string) = self.build.as_ref() { + if !build_string.matches(&other.build) { + return false; + } + } + + if let Some(build_number) = self.build_number.as_ref() { + if !build_number.matches(&other.build_number) { + return false; + } + } + + if let Some(md5_spec) = self.md5.as_ref() { + if Some(md5_spec) != other.md5.as_ref() { + return false; + } + } + + if let Some(sha256_spec) = self.sha256.as_ref() { + if Some(sha256_spec) != other.sha256.as_ref() { + return false; + } + } + + true + } +} + +impl Matches for MatchSpec { + /// Match a [`MatchSpec`] against a [`PackageRecord`] + fn matches(&self, other: &PackageRecord) -> bool { + if let Some(name) = self.name.as_ref() { + if name != &other.name { + return false; + } + } + + if !self.clone().into_nameless().1.matches(other) { + return false; + } + + true + } +} + +impl Matches for MatchSpec { + /// Match a [`MatchSpec`] against a [`RepoDataRecord`] + fn matches(&self, other: &RepoDataRecord) -> bool { + if !self.matches(&other.package_record) { + return false; + } + + if let Some(url_spec) = self.url.as_ref() { + if url_spec != &other.url { + return false; + } + } + + true + } +} + +impl Matches for NamelessMatchSpec { + /// Match a [`NamelessMatchSpec`] against a [`RepoDataRecord`] + fn matches(&self, other: &RepoDataRecord) -> bool { + if !self.matches(&other.package_record) { + return false; + } + + if let Some(url_spec) = self.url.as_ref() { + if url_spec != &other.url { + return false; + } + } + + true + } +} + #[cfg(test)] mod tests { use std::str::FromStr; @@ -433,7 +449,8 @@ mod tests { use rattler_digest::{parse_digest_from_hex, Md5, Sha256}; use crate::{ - MatchSpec, NamelessMatchSpec, PackageName, PackageRecord, ParseStrictness::*, Version, + match_spec::Matches, MatchSpec, NamelessMatchSpec, PackageName, PackageRecord, + ParseStrictness::*, RepoDataRecord, Version, }; use insta::assert_snapshot; use std::hash::{Hash, Hasher}; @@ -531,6 +548,40 @@ mod tests { assert!(!spec.matches(&record)); } + #[test] + fn test_layered_matches() { + let repodata_record = RepoDataRecord { + package_record: PackageRecord::new( + PackageName::new_unchecked("mamba"), + Version::from_str("1.0").unwrap(), + String::from(""), + ), + file_name: String::from("mamba-1.0-py37_0"), + url: url::Url::parse("https://mamba.io/mamba-1.0-py37_0.conda").unwrap(), + channel: String::from("mamba"), + }; + let package_record = repodata_record.clone().package_record; + + // Test with basic spec + let match_spec = MatchSpec::from_str("mamba[version==1.0]", Strict).unwrap(); + let nameless_spec = match_spec.clone().into_nameless().1; + + assert!(match_spec.matches(&repodata_record)); + assert!(match_spec.matches(&package_record)); + assert!(nameless_spec.matches(&repodata_record)); + assert!(nameless_spec.matches(&package_record)); + + // Test with url spec + let match_spec = + MatchSpec::from_str("https://mamba.io/mamba-1.0-py37_0.conda", Strict).unwrap(); + let nameless_spec = match_spec.clone().into_nameless().1; + + assert!(match_spec.matches(&repodata_record)); + assert!(match_spec.matches(&package_record)); + assert!(nameless_spec.matches(&repodata_record)); + assert!(nameless_spec.matches(&package_record)); + } + #[test] fn test_serialize_matchspec() { let specs = ["mamba 1.0 py37_0", diff --git a/crates/rattler_conda_types/src/repo_data/mod.rs b/crates/rattler_conda_types/src/repo_data/mod.rs index b93960eba..2a8df7735 100644 --- a/crates/rattler_conda_types/src/repo_data/mod.rs +++ b/crates/rattler_conda_types/src/repo_data/mod.rs @@ -103,10 +103,6 @@ pub struct PackageRecord { #[serde(default)] pub depends: Vec, - /// The package's url which isn't part of an real pacakge record but injected by the direct url query. - #[serde(skip)] - pub direct_url: Option, - /// Features are a deprecated way to specify different feature sets for the /// conda solver. This is not supported anymore and should not be used. /// Instead, `mutex` packages should be used to specify @@ -191,9 +187,7 @@ pub struct PackageRecord { impl Display for PackageRecord { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - if let Some(url) = &self.direct_url { - write!(f, "{url}") - } else if self.build.is_empty() { + if self.build.is_empty() { write!(f, "{} {}", self.name.as_normalized(), self.version,) } else { write!( @@ -317,7 +311,6 @@ impl PackageRecord { version: version.into(), purls: None, run_exports: None, - direct_url: None, } } @@ -436,15 +429,8 @@ impl PackageRecord { version: index.version, purls: None, run_exports: None, - direct_url: None, }) } - - /// Add url to the [`PackageRecord`] - pub fn with_package_url(mut self, url: Url) -> Self { - self.direct_url = Some(url); - self - } } fn sort_map_alphabetically( diff --git a/crates/rattler_index/src/lib.rs b/crates/rattler_index/src/lib.rs index 0192367d5..95c463de5 100644 --- a/crates/rattler_index/src/lib.rs +++ b/crates/rattler_index/src/lib.rs @@ -49,7 +49,6 @@ fn package_record_from_index_json( legacy_bz2_size: None, purls: None, run_exports: None, - direct_url: None, }; Ok(package_record) diff --git a/crates/rattler_lock/src/lib.rs b/crates/rattler_lock/src/lib.rs index 0fbcdadaa..1fc9170d3 100644 --- a/crates/rattler_lock/src/lib.rs +++ b/crates/rattler_lock/src/lib.rs @@ -109,6 +109,7 @@ pub use hash::PackageHashes; pub use parse::ParseCondaLockError; pub use pypi::{PypiPackageData, PypiPackageEnvironmentData, PypiSourceTreeHashable}; pub use pypi_indexes::{FindLinksUrlOrPath, PypiIndexes}; +pub use rattler_conda_types::Matches; pub use url_or_path::UrlOrPath; /// The name of the default environment in a [`LockFile`]. This is the diff --git a/crates/rattler_lock/src/parse/v3.rs b/crates/rattler_lock/src/parse/v3.rs index f4c43b911..1d28b0822 100644 --- a/crates/rattler_lock/src/parse/v3.rs +++ b/crates/rattler_lock/src/parse/v3.rs @@ -173,7 +173,6 @@ pub fn parse_v3_or_lower( version: value.version, purls: value.purls.is_empty().not().then_some(value.purls), run_exports: None, - direct_url: None, }, url: value.url, file_name: None, diff --git a/crates/rattler_lock/src/utils/serde/raw_conda_package_data.rs b/crates/rattler_lock/src/utils/serde/raw_conda_package_data.rs index dc011928b..a87ccab2c 100644 --- a/crates/rattler_lock/src/utils/serde/raw_conda_package_data.rs +++ b/crates/rattler_lock/src/utils/serde/raw_conda_package_data.rs @@ -125,7 +125,6 @@ impl<'a> From> for CondaPackageData { track_features: value.track_features.into_owned(), version: value.version.into_owned(), run_exports: None, - direct_url: None, }, url: value.url.into_owned(), file_name: value.file_name.into_owned(), diff --git a/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs b/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs index ff66f7b2b..f1fc1574a 100644 --- a/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs +++ b/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs @@ -60,12 +60,10 @@ impl DirectUrlQuery { // Extract package record from index json let index_json = IndexJson::from_package_directory(package_dir)?; let package_record = PackageRecord::from_index_json( - index_json, // Size - None, // sha256 - None, // md5 - None, - )? - .with_package_url(self.url.clone()); + index_json, None, // Size + None, // sha256 + None, // md5 + )?; tracing::debug!("Package record build from direct url: {:?}", package_record); diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 0cb1088cb..2fc57af23 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -1,16 +1,17 @@ -use super::{subdir::Subdir, BarrierCell, GatewayError, GatewayInner, RepoData}; -use crate::gateway::direct_url_gateway::DirectUrlQuery; -use crate::Reporter; -use futures::{select_biased, stream::FuturesUnordered, FutureExt, StreamExt}; -use itertools::Itertools; -use rattler_cache::package_cache::PackageCache; -use rattler_conda_types::{Channel, MatchSpec, PackageName, Platform}; use std::{ collections::{HashMap, HashSet}, future::IntoFuture, sync::Arc, }; +use futures::{select_biased, stream::FuturesUnordered, FutureExt, StreamExt}; +use itertools::Itertools; +use rattler_cache::package_cache::PackageCache; +use rattler_conda_types::{Channel, MatchSpec, Matches, PackageName, Platform}; + +use super::{subdir::Subdir, BarrierCell, GatewayError, GatewayInner, RepoData}; +use crate::{gateway::direct_url_gateway::DirectUrlQuery, Reporter}; + /// Represents a query to execute with a [`Gateway`]. /// /// When executed the query will asynchronously load the repodata from all @@ -94,8 +95,8 @@ impl GatewayQuery { .cartesian_product(self.platforms.into_iter()) .collect_vec(); - // Create barrier cells for each subdirectory. This can be used to wait until the subdir - // becomes available. + // Create barrier cells for each subdirectory. This can be used to wait until + // the subdir becomes available. let mut subdirs = Vec::with_capacity(channels_and_platforms.len()); let mut pending_subdirs = FuturesUnordered::new(); for (subdir_idx, (channel, platform)) in channels_and_platforms.into_iter().enumerate() { @@ -169,28 +170,23 @@ impl GatewayQuery { shards: vec![Arc::from(direct_url_records)], }; - // A list of futures to fetch the records for the pending package names. The main task - // awaits these futures. + // A list of futures to fetch the records for the pending package names. The + // main task awaits these futures. let mut pending_records = FuturesUnordered::new(); // The resulting list of repodata records + 1 for the direct_url_repodata. - let len = subdirs.len() - + (if !direct_url_repodata.is_empty() { - 1 - } else { - 0 - }); + let len = subdirs.len() + usize::from(!direct_url_repodata.is_empty()); let mut result = vec![RepoData::default(); len]; // Add the direct_url_repodata to the result. if !direct_url_repodata.is_empty() { - result.push(direct_url_repodata) + result.push(direct_url_repodata); }; // Loop until all pending package names have been fetched. loop { - // Iterate over all pending package names and create futures to fetch them from all - // subdirs. + // Iterate over all pending package names and create futures to fetch them from + // all subdirs. for (package_name, specs) in pending_package_specs.drain() { for (subdir_idx, subdir) in subdirs.iter().cloned() { let specs = specs.clone(); diff --git a/crates/rattler_solve/src/resolvo/mod.rs b/crates/rattler_solve/src/resolvo/mod.rs index f956461b5..25775a16f 100644 --- a/crates/rattler_solve/src/resolvo/mod.rs +++ b/crates/rattler_solve/src/resolvo/mod.rs @@ -13,8 +13,8 @@ use std::{ use chrono::{DateTime, Utc}; use itertools::Itertools; use rattler_conda_types::{ - package::ArchiveType, GenericVirtualPackage, MatchSpec, NamelessMatchSpec, PackageName, - PackageRecord, ParseMatchSpecError, ParseStrictness, RepoDataRecord, + package::ArchiveType, GenericVirtualPackage, MatchSpec, Matches, NamelessMatchSpec, + PackageName, PackageRecord, ParseMatchSpecError, ParseStrictness, RepoDataRecord, }; use resolvo::{ Candidates, Dependencies, DependencyProvider, KnownDependencies, NameId, Pool, SolvableDisplay, @@ -83,7 +83,7 @@ impl<'a> VersionSet for SolverMatchSpec<'a> { fn contains(&self, v: &Self::V) -> bool { match v { - SolverPackageRecord::Record(rec) => self.inner.matches(&rec.package_record), + SolverPackageRecord::Record(rec) => self.inner.matches(*rec), SolverPackageRecord::VirtualPackage(GenericVirtualPackage { version, build_string, diff --git a/crates/rattler_solve/tests/backends.rs b/crates/rattler_solve/tests/backends.rs index 8c4a29d5e..68754cf1f 100644 --- a/crates/rattler_solve/tests/backends.rs +++ b/crates/rattler_solve/tests/backends.rs @@ -109,7 +109,6 @@ fn installed_package( legacy_bz2_md5: None, purls: None, run_exports: None, - direct_url: None, }, } } @@ -803,9 +802,9 @@ mod resolvo { let specs: Vec<_> = vec![MatchSpec::from_str(url_str, ParseStrictness::Lenient).unwrap()]; // Create RepoData with only the package from the url, so the solver can find it - let mut package_record = PackageRecord { - // Only defining the name, version and url is enough for the solver to find the package - direct_url: Some(url.clone()), + let package_record = PackageRecord { + // // Only defining the name, version and url is enough for the solver to find the package + // direct_url: Some(url.clone()), name: "_libgcc_mutex".parse().unwrap(), version: VersionWithSource::from_str("0.1").unwrap(), // No dependencies! @@ -837,7 +836,7 @@ mod resolvo { assert_eq!(pkgs.len(), 1); assert_eq!(pkgs[0].package_record.name.as_normalized(), "_libgcc_mutex"); - assert_eq!(pkgs[0].package_record.direct_url, Some(url.clone())); + assert_eq!(pkgs[0].url, url.clone()); assert_eq!( pkgs[0].package_record.version, Version::from_str("0.1").unwrap(), @@ -846,13 +845,11 @@ mod resolvo { // ----------------------------------------------------------------------------------------- // Break the url in the repodata, making it not a direct url record. - package_record.direct_url = None; let repo_data: Vec = vec![RepoDataRecord { package_record, - // Mocking the rest of the fields file_name: url_str.to_string(), - url: url.clone(), + url: Url::from_str("https://false.dont").unwrap(), channel: "".to_string(), }]; From f05d3eaf2bcdaeadd54d34cfe31371f51927f688 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Mon, 10 Jun 2024 13:08:08 +0200 Subject: [PATCH 14/52] fix: remove wrong defaults --- crates/rattler_conda_types/src/package_name.rs | 2 +- crates/rattler_conda_types/src/repo_data/mod.rs | 2 +- crates/rattler_conda_types/src/version/mod.rs | 2 +- crates/rattler_conda_types/src/version/with_source.rs | 2 +- crates/rattler_solve/tests/backends.rs | 11 +++++------ 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/crates/rattler_conda_types/src/package_name.rs b/crates/rattler_conda_types/src/package_name.rs index a43641d0a..1383ace27 100644 --- a/crates/rattler_conda_types/src/package_name.rs +++ b/crates/rattler_conda_types/src/package_name.rs @@ -17,7 +17,7 @@ use thiserror::Error; /// This struct explicitly does not implement [`std::fmt::Display`] because its ambiguous if that /// would display the source or the normalized version. Simply call `as_source` or `as_normalized` /// to make the distinction. -#[derive(Debug, Default, Clone, Eq, DeserializeFromStr)] +#[derive(Debug, Clone, Eq, DeserializeFromStr)] pub struct PackageName { normalized: Option, source: String, diff --git a/crates/rattler_conda_types/src/repo_data/mod.rs b/crates/rattler_conda_types/src/repo_data/mod.rs index 2a8df7735..b56b9c02a 100644 --- a/crates/rattler_conda_types/src/repo_data/mod.rs +++ b/crates/rattler_conda_types/src/repo_data/mod.rs @@ -80,7 +80,7 @@ pub struct ChannelInfo { #[serde_as] #[skip_serializing_none] #[sorted] -#[derive(Debug, Default, Deserialize, Serialize, Eq, PartialEq, Clone, Hash)] +#[derive(Debug, Deserialize, Serialize, Eq, PartialEq, Clone, Hash)] pub struct PackageRecord { /// Optionally the architecture the package supports pub arch: Option, diff --git a/crates/rattler_conda_types/src/version/mod.rs b/crates/rattler_conda_types/src/version/mod.rs index 5d55b836c..9c5f167c1 100644 --- a/crates/rattler_conda_types/src/version/mod.rs +++ b/crates/rattler_conda_types/src/version/mod.rs @@ -133,7 +133,7 @@ pub use with_source::VersionWithSource; /// this problem by appending an underscore to plain version numbers: /// /// 1.0.1_ < 1.0.1a => True # ensure correct ordering for openssl -#[derive(Clone, Eq, Default)] +#[derive(Clone, Eq)] pub struct Version { /// Individual components of the version. /// diff --git a/crates/rattler_conda_types/src/version/with_source.rs b/crates/rattler_conda_types/src/version/with_source.rs index 945a55b6d..ac83b78ba 100644 --- a/crates/rattler_conda_types/src/version/with_source.rs +++ b/crates/rattler_conda_types/src/version/with_source.rs @@ -22,7 +22,7 @@ use std::{ /// /// It is also possible to convert directly from a [`Version`] but the [`Display`] implementation /// is then used to generate the string representation. -#[derive(Debug, Default, Clone)] +#[derive(Debug, Clone)] pub struct VersionWithSource { version: Version, source: Option>, diff --git a/crates/rattler_solve/tests/backends.rs b/crates/rattler_solve/tests/backends.rs index 68754cf1f..45db81d34 100644 --- a/crates/rattler_solve/tests/backends.rs +++ b/crates/rattler_solve/tests/backends.rs @@ -802,14 +802,13 @@ mod resolvo { let specs: Vec<_> = vec![MatchSpec::from_str(url_str, ParseStrictness::Lenient).unwrap()]; // Create RepoData with only the package from the url, so the solver can find it - let package_record = PackageRecord { + let package_record = PackageRecord::new( // // Only defining the name, version and url is enough for the solver to find the package // direct_url: Some(url.clone()), - name: "_libgcc_mutex".parse().unwrap(), - version: VersionWithSource::from_str("0.1").unwrap(), - // No dependencies! - ..Default::default() - }; + "_libgcc_mutex".parse().unwrap(), + VersionWithSource::from_str("0.1").unwrap(), + "0".to_string(), + ); let repo_data: Vec = vec![RepoDataRecord { package_record: package_record.clone(), // Mocking the rest of the fields From 26dfc1c2d5fce101a8fcf8247064891ac51c7d07 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Mon, 10 Jun 2024 15:48:57 +0200 Subject: [PATCH 15/52] feat: move the direct url query into a future which is called by the default loop --- .../src/gateway/direct_url_gateway.rs | 50 ++++++++-- .../src/gateway/query.rs | 94 ++++++++----------- 2 files changed, 83 insertions(+), 61 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs b/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs index f1fc1574a..61f6b429b 100644 --- a/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs +++ b/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs @@ -1,4 +1,5 @@ use std::future::IntoFuture; +use std::sync::Arc; use futures::FutureExt; use rattler_cache::package_cache::{PackageCache, PackageCacheError}; @@ -42,7 +43,7 @@ impl DirectUrlQuery { /// Execute the Repodata query using the cache as a source for the /// index.json - pub async fn execute(self) -> Result { + pub async fn execute(self) -> Result, DirectUrlQueryError> { // TODO: Optimize this by only parsing the index json from stream. // Get package on system let package_dir = self @@ -67,19 +68,19 @@ impl DirectUrlQuery { tracing::debug!("Package record build from direct url: {:?}", package_record); - Ok(RepoDataRecord { + Ok(Arc::new([RepoDataRecord { package_record, // File name is the same as the url. file_name: self.url.clone().to_string(), url: self.url.clone(), // Fake channel as it is unused in this case. channel: "virtual_direct_url_channel".to_string(), - }) + }])) } } impl IntoFuture for DirectUrlQuery { - type Output = Result; + type Output = Result, DirectUrlQueryError>; type IntoFuture = futures::future::BoxFuture<'static, Self::Output>; @@ -112,10 +113,25 @@ mod test { let repodata_record = query.await.unwrap(); assert_eq!( - repodata_record.package_record.name.as_normalized(), + repodata_record + .as_ref() + .first() + .unwrap() + .package_record + .name + .as_normalized(), "boltons" ); - assert_eq!(repodata_record.package_record.version.as_str(), "24.0.0"); + assert_eq!( + repodata_record + .as_ref() + .first() + .unwrap() + .package_record + .version + .as_str(), + "24.0.0" + ); } #[tokio::test] @@ -142,7 +158,25 @@ mod test { assert_eq!(query.url.clone(), url); let repodata_record = query.await.unwrap(); - assert_eq!(repodata_record.package_record.name.as_normalized(), "zlib"); - assert_eq!(repodata_record.package_record.version.as_str(), "1.2.8"); + assert_eq!( + repodata_record + .as_ref() + .first() + .unwrap() + .package_record + .name + .as_normalized(), + "zlib" + ); + assert_eq!( + repodata_record + .as_ref() + .first() + .unwrap() + .package_record + .version + .as_str(), + "1.2.8" + ); } } diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 2fc57af23..191c78f57 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -123,10 +123,10 @@ impl GatewayQuery { // Package names that we have or will issue requests for. let mut seen = HashSet::new(); let mut pending_package_specs = HashMap::new(); - let mut pending_direct_url_specs = vec![]; + let mut direct_url_specs = vec![]; for spec in self.specs { if spec.url.is_some() { - pending_direct_url_specs.push(spec); + direct_url_specs.push(spec); } else if let Some(name) = &spec.name { seen.insert(name.clone()); pending_package_specs @@ -136,52 +136,36 @@ impl GatewayQuery { } } - // Prefetch the direct url specs - let mut direct_url_records = vec![]; - for spec in pending_direct_url_specs { - if let Some(url) = &spec.url { - // If the spec has a URL, we do not need to fetch it from the repodata. - let package_cache = PackageCache::new(self.gateway.cache.clone()); - - let query = - DirectUrlQuery::new(url.clone(), package_cache, self.gateway.client.clone()); - - let record = query.await?; - - // Add dependencies of record to pending_package_specs - if self.recursive { - for dependency in &record.package_record.depends { - let dependency_name = PackageName::new_unchecked( - dependency.split_once(' ').unwrap_or((dependency, "")).0, - ); - if seen.insert(dependency_name.clone()) { - pending_package_specs - .insert(dependency_name.clone(), vec![dependency_name.into()]); - } - } - } - - direct_url_records.push(record); - } - } - // Create a RepoData for the direct_url_records - let direct_url_repodata = RepoData { - len: direct_url_records.len(), - shards: vec![Arc::from(direct_url_records)], - }; + // The resulting list of repodata records + 1 for the direct_url_repodata. + let len = subdirs.len() + usize::from(!&direct_url_specs.is_empty()); + let mut result = vec![RepoData::default(); len]; // A list of futures to fetch the records for the pending package names. The // main task awaits these futures. let mut pending_records = FuturesUnordered::new(); - // The resulting list of repodata records + 1 for the direct_url_repodata. - let len = subdirs.len() + usize::from(!direct_url_repodata.is_empty()); - let mut result = vec![RepoData::default(); len]; - - // Add the direct_url_repodata to the result. - if !direct_url_repodata.is_empty() { - result.push(direct_url_repodata); - }; + // Push the direct url queries to the pending_records. + for spec in direct_url_specs { + let gateway = self.gateway.clone(); + pending_records.push( + async move { + let package_cache = PackageCache::new(gateway.cache.clone()); + let query = DirectUrlQuery::new( + spec.clone() + .url + .expect("direct url spec should always have an url"), + package_cache, + gateway.client.clone(), + ); + query + .execute() + .await + .map_err(GatewayError::DirectUrlQueryError) + .map(|records| (0, vec![spec], records)) + } + .boxed(), + ); + } // Loop until all pending package names have been fetched. loop { @@ -192,17 +176,20 @@ impl GatewayQuery { let specs = specs.clone(); let package_name = package_name.clone(); let reporter = self.reporter.clone(); - pending_records.push(async move { - let barrier_cell = subdir.clone(); - let subdir = barrier_cell.wait().await; - match subdir.as_ref() { - Subdir::Found(subdir) => subdir - .get_or_fetch_package_records(&package_name, reporter) - .await - .map(|records| (subdir_idx, specs, records)), - Subdir::NotFound => Ok((subdir_idx, specs, Arc::from(vec![]))), + pending_records.push( + async move { + let barrier_cell = subdir.clone(); + let subdir = barrier_cell.wait().await; + match subdir.as_ref() { + Subdir::Found(subdir) => subdir + .get_or_fetch_package_records(&package_name, reporter) + .await + .map(|records| (subdir_idx, specs, records)), + Subdir::NotFound => Ok((subdir_idx, specs, Arc::from(vec![]))), + } } - }); + .boxed(), + ); } } @@ -226,6 +213,7 @@ impl GatewayQuery { continue; } for dependency in &record.package_record.depends { + // Use only the name for transitive dependencies. let dependency_name = PackageName::new_unchecked( dependency.split_once(' ').unwrap_or((dependency, "")).0, ); From 1c41a89eab8b0ab679238dadbae332e864d1a7f3 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Mon, 10 Jun 2024 20:31:40 +0200 Subject: [PATCH 16/52] fix: tests --- .../rattler_conda_types/src/match_spec/mod.rs | 8 +- .../src/gateway/mod.rs | 99 +++++++++++++++++-- .../src/gateway/query.rs | 13 ++- 3 files changed, 104 insertions(+), 16 deletions(-) diff --git a/crates/rattler_conda_types/src/match_spec/mod.rs b/crates/rattler_conda_types/src/match_spec/mod.rs index 70f0d6327..39744e4db 100644 --- a/crates/rattler_conda_types/src/match_spec/mod.rs +++ b/crates/rattler_conda_types/src/match_spec/mod.rs @@ -411,16 +411,16 @@ impl Matches for MatchSpec { impl Matches for MatchSpec { /// Match a [`MatchSpec`] against a [`RepoDataRecord`] fn matches(&self, other: &RepoDataRecord) -> bool { - if !self.matches(&other.package_record) { - return false; - } - if let Some(url_spec) = self.url.as_ref() { if url_spec != &other.url { return false; } } + if !self.matches(&other.package_record) { + return false; + } + true } } diff --git a/crates/rattler_repodata_gateway/src/gateway/mod.rs b/crates/rattler_repodata_gateway/src/gateway/mod.rs index f1c5c160c..7f37e4ad0 100644 --- a/crates/rattler_repodata_gateway/src/gateway/mod.rs +++ b/crates/rattler_repodata_gateway/src/gateway/mod.rs @@ -337,8 +337,10 @@ mod test { }; use dashmap::DashSet; - use rattler_conda_types::ParseStrictness::Strict; - use rattler_conda_types::{Channel, ChannelConfig, MatchSpec, PackageName, Platform}; + use rattler_conda_types::{ + Channel, ChannelConfig, MatchSpec, PackageName, ParseStrictness::Strict, Platform, + RepoDataRecord, + }; use rstest::rstest; use url::Url; @@ -406,22 +408,103 @@ mod test { async fn test_direct_url_spec_from_gateway() { let gateway = Gateway::new(); - let index = remote_conda_forge().await; + let index = local_conda_forge().await; let records = gateway .query( - vec![index.channel()], + vec![index.clone()], vec![Platform::Linux64], - vec![MatchSpec::from_str("https://conda.anaconda.org/conda-forge/linux-64/mamba-1.4.7-py310h51d5547_0.conda", Strict).unwrap()].into_iter(), - // vec![MatchSpec::from_str("mamba=1.4.7=py310h51d5547_0", Strict).unwrap()].into_iter(), + vec![MatchSpec::from_str("https://conda.anaconda.org/conda-forge/linux-64/openssl-3.0.4-h166bdaf_2.tar.bz2", Strict).unwrap()].into_iter(), + + ) + .recursive(true) + .await + .unwrap(); + + let non_openssl_direct_records = records + .iter() + .flat_map(RepoData::iter) + .filter(|record| record.package_record.name.as_normalized() != "openssl") + .collect::>() + .len(); + + let records = gateway + .query( + vec![index], + vec![Platform::Linux64], + vec![MatchSpec::from_str("openssl 3.0.4 h166bdaf_2", Strict).unwrap()].into_iter(), + ) + .recursive(true) + .await + .unwrap(); + + let non_openssl_total_records = records + .iter() + .flat_map(RepoData::iter) + .filter(|record| record.package_record.name.as_normalized() != "openssl") + .collect::>() + .len(); + + // The total records without the matchspec should be the same. + assert_eq!(non_openssl_total_records, non_openssl_direct_records); + } + + // Make sure that the direct url version of openssl is used instead of the one from the normal channel. + #[tokio::test] + async fn test_select_forced_url_instead_of_deps() { + let gateway = Gateway::new(); + + let index = local_conda_forge().await; + + let records = gateway + .query( + vec![index.clone()], + vec![Platform::Linux64], + vec![ + MatchSpec::from_str("mamba 0.9.2 py39h951de11_0", Strict).unwrap(), + MatchSpec::from_str("https://conda.anaconda.org/conda-forge/linux-64/openssl-3.0.4-h166bdaf_2.tar.bz2", Strict).unwrap() + ].into_iter(), + ) + .recursive(true) + .await + .unwrap(); + + let total_records_single_openssl: usize = records.iter().map(RepoData::len).sum(); + assert_eq!(total_records_single_openssl, 4644); + + // There should be only one record for the openssl package. + let openssl_records: Vec<&RepoDataRecord> = records + .iter() + .flat_map(RepoData::iter) + .filter(|record| record.package_record.name.as_normalized() == "openssl") + .collect(); + assert_eq!(openssl_records.len(), 1); + + let gateway = Gateway::new(); + let records = gateway + .query( + vec![index.clone()], + vec![Platform::Linux64], + vec![MatchSpec::from_str("mamba 0.9.2 py39h951de11_0", Strict).unwrap()] + .into_iter(), ) .recursive(true) .await .unwrap(); let total_records: usize = records.iter().map(RepoData::len).sum(); - eprintln!("records: {:?}", records); - assert_eq!(total_records, 4413); + + // The total number of records should be greater than the number of records + // fetched when selecting the openssl with a direct url. + assert!(total_records > total_records_single_openssl); + assert_eq!(total_records, 4692); + + let openssl_records: Vec<&RepoDataRecord> = records + .iter() + .flat_map(RepoData::iter) + .filter(|record| record.package_record.name.as_normalized() == "openssl") + .collect(); + assert!(openssl_records.len() > 1); } #[rstest] diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 191c78f57..e47825f76 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -126,7 +126,12 @@ impl GatewayQuery { let mut direct_url_specs = vec![]; for spec in self.specs { if spec.url.is_some() { - direct_url_specs.push(spec); + let name = spec + .name + .clone() + .expect("direct url spec should always have a name"); + seen.insert(name.clone()); + direct_url_specs.push(spec.clone()); } else if let Some(name) = &spec.name { seen.insert(name.clone()); pending_package_specs @@ -137,7 +142,7 @@ impl GatewayQuery { } // The resulting list of repodata records + 1 for the direct_url_repodata. - let len = subdirs.len() + usize::from(!&direct_url_specs.is_empty()); + let len = subdirs.len(); let mut result = vec![RepoData::default(); len]; // A list of futures to fetch the records for the pending package names. The @@ -145,7 +150,7 @@ impl GatewayQuery { let mut pending_records = FuturesUnordered::new(); // Push the direct url queries to the pending_records. - for spec in direct_url_specs { + for spec in direct_url_specs.clone() { let gateway = self.gateway.clone(); pending_records.push( async move { @@ -208,7 +213,7 @@ impl GatewayQuery { // Extract the dependencies from the records and recursively add them to the // list of package names that we need to fetch. for record in records.iter() { - if !request_specs.iter().any(|spec| spec.matches(&record.package_record)) { + if !request_specs.iter().any(|spec| spec.matches(record)) { // Do not recurse into records that do not match to root spec. continue; } From 4a7f67dc60e0cc99c126b33b8ffe1b084e55a85c Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Mon, 10 Jun 2024 20:35:19 +0200 Subject: [PATCH 17/52] fix: speed back up matches --- .../rattler_conda_types/src/match_spec/mod.rs | 38 ++++++++++++++++--- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/crates/rattler_conda_types/src/match_spec/mod.rs b/crates/rattler_conda_types/src/match_spec/mod.rs index 39744e4db..dce2abdb6 100644 --- a/crates/rattler_conda_types/src/match_spec/mod.rs +++ b/crates/rattler_conda_types/src/match_spec/mod.rs @@ -400,8 +400,34 @@ impl Matches for MatchSpec { } } - if !self.clone().into_nameless().1.matches(other) { - return false; + if let Some(spec) = self.version.as_ref() { + if !spec.matches(&other.version) { + return false; + } + } + + if let Some(build_string) = self.build.as_ref() { + if !build_string.matches(&other.build) { + return false; + } + } + + if let Some(build_number) = self.build_number.as_ref() { + if !build_number.matches(&other.build_number) { + return false; + } + } + + if let Some(md5_spec) = self.md5.as_ref() { + if Some(md5_spec) != other.md5.as_ref() { + return false; + } + } + + if let Some(sha256_spec) = self.sha256.as_ref() { + if Some(sha256_spec) != other.sha256.as_ref() { + return false; + } } true @@ -428,16 +454,16 @@ impl Matches for MatchSpec { impl Matches for NamelessMatchSpec { /// Match a [`NamelessMatchSpec`] against a [`RepoDataRecord`] fn matches(&self, other: &RepoDataRecord) -> bool { - if !self.matches(&other.package_record) { - return false; - } - if let Some(url_spec) = self.url.as_ref() { if url_spec != &other.url { return false; } } + if !self.matches(&other.package_record) { + return false; + } + true } } From cb45f152c01b165642009032603218160416fc2d Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Mon, 10 Jun 2024 20:52:47 +0200 Subject: [PATCH 18/52] fix: module rename to direct url query --- .../src/gateway/{direct_url_gateway.rs => direct_url_query.rs} | 0 crates/rattler_repodata_gateway/src/gateway/error.rs | 2 +- crates/rattler_repodata_gateway/src/gateway/mod.rs | 2 +- crates/rattler_repodata_gateway/src/gateway/query.rs | 3 +-- 4 files changed, 3 insertions(+), 4 deletions(-) rename crates/rattler_repodata_gateway/src/gateway/{direct_url_gateway.rs => direct_url_query.rs} (100%) diff --git a/crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs b/crates/rattler_repodata_gateway/src/gateway/direct_url_query.rs similarity index 100% rename from crates/rattler_repodata_gateway/src/gateway/direct_url_gateway.rs rename to crates/rattler_repodata_gateway/src/gateway/direct_url_query.rs diff --git a/crates/rattler_repodata_gateway/src/gateway/error.rs b/crates/rattler_repodata_gateway/src/gateway/error.rs index b6d28ddfc..ad82b8d0c 100644 --- a/crates/rattler_repodata_gateway/src/gateway/error.rs +++ b/crates/rattler_repodata_gateway/src/gateway/error.rs @@ -1,6 +1,6 @@ use crate::fetch; use crate::fetch::{FetchRepoDataError, RepoDataNotFoundError}; -use crate::gateway::direct_url_gateway::DirectUrlQueryError; +use crate::gateway::direct_url_query::DirectUrlQueryError; use rattler_conda_types::Channel; use rattler_networking::Redact; use reqwest_middleware::Error; diff --git a/crates/rattler_repodata_gateway/src/gateway/mod.rs b/crates/rattler_repodata_gateway/src/gateway/mod.rs index 7f37e4ad0..b0858cfb8 100644 --- a/crates/rattler_repodata_gateway/src/gateway/mod.rs +++ b/crates/rattler_repodata_gateway/src/gateway/mod.rs @@ -1,7 +1,7 @@ mod barrier_cell; mod builder; mod channel_config; -mod direct_url_gateway; +mod direct_url_query; mod error; mod local_subdir; mod query; diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index e47825f76..60ad00160 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -10,7 +10,7 @@ use rattler_cache::package_cache::PackageCache; use rattler_conda_types::{Channel, MatchSpec, Matches, PackageName, Platform}; use super::{subdir::Subdir, BarrierCell, GatewayError, GatewayInner, RepoData}; -use crate::{gateway::direct_url_gateway::DirectUrlQuery, Reporter}; +use crate::{gateway::direct_url_query::DirectUrlQuery, Reporter}; /// Represents a query to execute with a [`Gateway`]. /// @@ -141,7 +141,6 @@ impl GatewayQuery { } } - // The resulting list of repodata records + 1 for the direct_url_repodata. let len = subdirs.len(); let mut result = vec![RepoData::default(); len]; From 2749332cecccc8a569c23003c206fae2eda34e9f Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Mon, 10 Jun 2024 20:59:16 +0200 Subject: [PATCH 19/52] fix: improve direct url error --- .../rattler_repodata_gateway/src/gateway/error.rs | 4 ++-- .../rattler_repodata_gateway/src/gateway/query.rs | 15 +++++++-------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/error.rs b/crates/rattler_repodata_gateway/src/gateway/error.rs index ad82b8d0c..d27253faa 100644 --- a/crates/rattler_repodata_gateway/src/gateway/error.rs +++ b/crates/rattler_repodata_gateway/src/gateway/error.rs @@ -36,8 +36,8 @@ pub enum GatewayError { #[error("the operation was cancelled")] Cancelled, - #[error("the direct url query failed")] - DirectUrlQueryError(#[from] DirectUrlQueryError), + #[error("the direct url query failed for {0}")] + DirectUrlQueryError(String, #[source] DirectUrlQueryError), } impl From for GatewayError { diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 60ad00160..cc61d219c 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -153,18 +153,17 @@ impl GatewayQuery { let gateway = self.gateway.clone(); pending_records.push( async move { + let url = spec + .clone() + .url + .expect("direct url spec should always have an url"); let package_cache = PackageCache::new(gateway.cache.clone()); - let query = DirectUrlQuery::new( - spec.clone() - .url - .expect("direct url spec should always have an url"), - package_cache, - gateway.client.clone(), - ); + let query = + DirectUrlQuery::new(url.clone(), package_cache, gateway.client.clone()); query .execute() .await - .map_err(GatewayError::DirectUrlQueryError) + .map_err(|e| GatewayError::DirectUrlQueryError(url.to_string(), e)) .map(|records| (0, vec![spec], records)) } .boxed(), From 7e9369693a0c2be2243e7441257b3a6138627b62 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Mon, 10 Jun 2024 21:14:08 +0200 Subject: [PATCH 20/52] fix: share package cache in the inner gateway --- crates/rattler_repodata_gateway/src/gateway/builder.rs | 4 ++++ crates/rattler_repodata_gateway/src/gateway/mod.rs | 4 ++++ crates/rattler_repodata_gateway/src/gateway/query.rs | 4 +--- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/builder.rs b/crates/rattler_repodata_gateway/src/gateway/builder.rs index 5211d49fd..194062dd7 100644 --- a/crates/rattler_repodata_gateway/src/gateway/builder.rs +++ b/crates/rattler_repodata_gateway/src/gateway/builder.rs @@ -5,6 +5,7 @@ use reqwest::Client; use reqwest_middleware::ClientWithMiddleware; use std::path::PathBuf; use std::sync::Arc; +use rattler_cache::package_cache::PackageCache; /// A builder for constructing a [`Gateway`]. #[derive(Default)] @@ -85,6 +86,8 @@ impl GatewayBuilder { .join("rattler/cache") }); + let package_cache = PackageCache::new(cache.clone()); + let max_concurrent_requests = self.max_concurrent_requests.unwrap_or(100); Gateway { inner: Arc::new(GatewayInner { @@ -92,6 +95,7 @@ impl GatewayBuilder { client, channel_config: self.channel_config, cache, + package_cache, concurrent_requests_semaphore: Arc::new(tokio::sync::Semaphore::new( max_concurrent_requests, )), diff --git a/crates/rattler_repodata_gateway/src/gateway/mod.rs b/crates/rattler_repodata_gateway/src/gateway/mod.rs index b0858cfb8..2cd695aa9 100644 --- a/crates/rattler_repodata_gateway/src/gateway/mod.rs +++ b/crates/rattler_repodata_gateway/src/gateway/mod.rs @@ -30,6 +30,7 @@ use reqwest_middleware::ClientWithMiddleware; use subdir::{Subdir, SubdirData}; use tokio::sync::broadcast; use tracing::instrument; +use rattler_cache::package_cache::PackageCache; use crate::{fetch::FetchRepoDataError, gateway::error::SubdirNotFoundError, Reporter}; @@ -140,6 +141,9 @@ struct GatewayInner { /// The directory to store any cache cache: PathBuf, + /// The package cache, stored to reuse memory cache + package_cache: PackageCache, + /// A semaphore to limit the number of concurrent requests. concurrent_requests_semaphore: Arc, } diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index cc61d219c..36be826d3 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -6,7 +6,6 @@ use std::{ use futures::{select_biased, stream::FuturesUnordered, FutureExt, StreamExt}; use itertools::Itertools; -use rattler_cache::package_cache::PackageCache; use rattler_conda_types::{Channel, MatchSpec, Matches, PackageName, Platform}; use super::{subdir::Subdir, BarrierCell, GatewayError, GatewayInner, RepoData}; @@ -157,9 +156,8 @@ impl GatewayQuery { .clone() .url .expect("direct url spec should always have an url"); - let package_cache = PackageCache::new(gateway.cache.clone()); let query = - DirectUrlQuery::new(url.clone(), package_cache, gateway.client.clone()); + DirectUrlQuery::new(url.clone(), gateway.package_cache.clone(), gateway.client.clone()); query .execute() .await From 55f9e1e1ab954183aa4c34bbe1101d9802c9d4d6 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Mon, 10 Jun 2024 21:14:18 +0200 Subject: [PATCH 21/52] fix: py-rattler --- py-rattler/src/match_spec.rs | 5 +++-- py-rattler/src/nameless_match_spec.rs | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/py-rattler/src/match_spec.rs b/py-rattler/src/match_spec.rs index 0964dd96a..6854b4fa5 100644 --- a/py-rattler/src/match_spec.rs +++ b/py-rattler/src/match_spec.rs @@ -1,7 +1,8 @@ -use pyo3::{pyclass, pymethods, types::PyBytes, PyResult, Python}; -use rattler_conda_types::{Channel, MatchSpec, PackageName, ParseStrictness}; use std::sync::Arc; +use pyo3::{pyclass, pymethods, types::PyBytes, PyResult, Python}; +use rattler_conda_types::{Channel, MatchSpec, Matches, PackageName, ParseStrictness}; + use crate::{ channel::PyChannel, error::PyRattlerError, nameless_match_spec::PyNamelessMatchSpec, package_name::PyPackageName, record::PyRecord, diff --git a/py-rattler/src/nameless_match_spec.rs b/py-rattler/src/nameless_match_spec.rs index b472a8719..aae788d63 100644 --- a/py-rattler/src/nameless_match_spec.rs +++ b/py-rattler/src/nameless_match_spec.rs @@ -1,7 +1,8 @@ -use pyo3::{pyclass, pymethods, types::PyBytes, PyResult, Python}; -use rattler_conda_types::{Channel, MatchSpec, NamelessMatchSpec, ParseStrictness}; use std::sync::Arc; +use pyo3::{pyclass, pymethods, types::PyBytes, PyResult, Python}; +use rattler_conda_types::{Channel, MatchSpec, Matches, NamelessMatchSpec, ParseStrictness}; + use crate::{channel::PyChannel, error::PyRattlerError, match_spec::PyMatchSpec, record::PyRecord}; #[pyclass] From 1e5e5c4216deead93892efff42764af136373c96 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Wed, 12 Jun 2024 12:09:11 +0200 Subject: [PATCH 22/52] misc: add package cache to gateway builder --- crates/rattler-bin/src/commands/create.rs | 2 ++ .../src/gateway/builder.rs | 19 +++++++++++++++++-- .../src/gateway/mod.rs | 17 ++++++++++++----- .../src/gateway/query.rs | 7 +++++-- 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/crates/rattler-bin/src/commands/create.rs b/crates/rattler-bin/src/commands/create.rs index 9ed3b2ee4..4f1e5bb1d 100644 --- a/crates/rattler-bin/src/commands/create.rs +++ b/crates/rattler-bin/src/commands/create.rs @@ -4,6 +4,7 @@ use clap::ValueEnum; use indicatif::{ProgressBar, ProgressStyle}; use itertools::Itertools; use rattler::install::{IndicatifReporter, Installer}; +use rattler::package_cache::PackageCache; use rattler::{ default_cache_dir, install::{Transaction, TransactionOperation}, @@ -144,6 +145,7 @@ pub async fn create(opt: Opt) -> anyhow::Result<()> { // Get the package names from the matchspecs so we can only load the package records that we need. let gateway = Gateway::builder() .with_cache_dir(cache_dir.join("repodata")) + .with_package_cache(PackageCache::new(cache_dir)) .with_client(download_client.clone()) .finish(); diff --git a/crates/rattler_repodata_gateway/src/gateway/builder.rs b/crates/rattler_repodata_gateway/src/gateway/builder.rs index 194062dd7..13f279727 100644 --- a/crates/rattler_repodata_gateway/src/gateway/builder.rs +++ b/crates/rattler_repodata_gateway/src/gateway/builder.rs @@ -1,11 +1,11 @@ use crate::gateway::GatewayInner; use crate::{ChannelConfig, Gateway}; use dashmap::DashMap; +use rattler_cache::package_cache::PackageCache; use reqwest::Client; use reqwest_middleware::ClientWithMiddleware; use std::path::PathBuf; use std::sync::Arc; -use rattler_cache::package_cache::PackageCache; /// A builder for constructing a [`Gateway`]. #[derive(Default)] @@ -13,6 +13,7 @@ pub struct GatewayBuilder { channel_config: ChannelConfig, client: Option, cache: Option, + package_cache: Option, max_concurrent_requests: Option, } @@ -55,12 +56,24 @@ impl GatewayBuilder { self } + /// Add package cache to the builder to store packages. + pub fn with_package_cache(mut self, package_cache: PackageCache) -> Self { + self.set_package_cache(package_cache); + self + } + /// Set the directory to use for caching repodata. pub fn set_cache_dir(&mut self, cache: impl Into) -> &mut Self { self.cache = Some(cache.into()); self } + /// Set the directory to use for caching packages. + pub fn set_package_cache(&mut self, package_cache: PackageCache) -> &mut Self { + self.package_cache = Some(package_cache); + self + } + /// Sets the maximum number of concurrent HTTP requests to make. #[must_use] pub fn with_max_concurrent_requests(mut self, max_concurrent_requests: usize) -> Self { @@ -86,7 +99,9 @@ impl GatewayBuilder { .join("rattler/cache") }); - let package_cache = PackageCache::new(cache.clone()); + let package_cache = self + .package_cache + .unwrap_or(PackageCache::new(cache.clone())); let max_concurrent_requests = self.max_concurrent_requests.unwrap_or(100); Gateway { diff --git a/crates/rattler_repodata_gateway/src/gateway/mod.rs b/crates/rattler_repodata_gateway/src/gateway/mod.rs index 2cd695aa9..34036a03f 100644 --- a/crates/rattler_repodata_gateway/src/gateway/mod.rs +++ b/crates/rattler_repodata_gateway/src/gateway/mod.rs @@ -24,13 +24,13 @@ pub use error::GatewayError; use file_url::url_to_path; use local_subdir::LocalSubdirClient; pub use query::GatewayQuery; +use rattler_cache::package_cache::PackageCache; use rattler_conda_types::{Channel, MatchSpec, Platform}; pub use repo_data::RepoData; use reqwest_middleware::ClientWithMiddleware; use subdir::{Subdir, SubdirData}; use tokio::sync::broadcast; use tracing::instrument; -use rattler_cache::package_cache::PackageCache; use crate::{fetch::FetchRepoDataError, gateway::error::SubdirNotFoundError, Reporter}; @@ -341,6 +341,8 @@ mod test { }; use dashmap::DashSet; + use rattler_cache::default_cache_dir; + use rattler_cache::package_cache::PackageCache; use rattler_conda_types::{ Channel, ChannelConfig, MatchSpec, PackageName, ParseStrictness::Strict, Platform, RepoDataRecord, @@ -410,7 +412,10 @@ mod test { #[tokio::test] async fn test_direct_url_spec_from_gateway() { - let gateway = Gateway::new(); + let gateway = Gateway::builder() + .with_package_cache(PackageCache::new(default_cache_dir().unwrap().join("pkgs"))) + .with_cache_dir(default_cache_dir().unwrap().join("repodata")) + .finish(); let index = local_conda_forge().await; @@ -419,7 +424,6 @@ mod test { vec![index.clone()], vec![Platform::Linux64], vec![MatchSpec::from_str("https://conda.anaconda.org/conda-forge/linux-64/openssl-3.0.4-h166bdaf_2.tar.bz2", Strict).unwrap()].into_iter(), - ) .recursive(true) .await @@ -456,7 +460,10 @@ mod test { // Make sure that the direct url version of openssl is used instead of the one from the normal channel. #[tokio::test] async fn test_select_forced_url_instead_of_deps() { - let gateway = Gateway::new(); + let gateway = Gateway::builder() + .with_package_cache(PackageCache::new(default_cache_dir().unwrap().join("pkgs"))) + .with_cache_dir(default_cache_dir().unwrap().join("repodata")) + .finish(); let index = local_conda_forge().await; @@ -466,7 +473,7 @@ mod test { vec![Platform::Linux64], vec![ MatchSpec::from_str("mamba 0.9.2 py39h951de11_0", Strict).unwrap(), - MatchSpec::from_str("https://conda.anaconda.org/conda-forge/linux-64/openssl-3.0.4-h166bdaf_2.tar.bz2", Strict).unwrap() + MatchSpec::from_str("https://conda.anaconda.org/conda-forge/linux-64/openssl-3.0.4-h166bdaf_2.tar.bz2", Strict).unwrap(), ].into_iter(), ) .recursive(true) diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 36be826d3..a3af2d37b 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -156,8 +156,11 @@ impl GatewayQuery { .clone() .url .expect("direct url spec should always have an url"); - let query = - DirectUrlQuery::new(url.clone(), gateway.package_cache.clone(), gateway.client.clone()); + let query = DirectUrlQuery::new( + url.clone(), + gateway.package_cache.clone(), + gateway.client.clone(), + ); query .execute() .await From dbc4838958119f34e010bbc833af37563dee79ed Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Wed, 12 Jun 2024 12:37:43 +0200 Subject: [PATCH 23/52] misc: add error for missing name in matchspec --- .../src/gateway/error.rs | 5 +++- .../src/gateway/mod.rs | 27 +++++++++++++++++++ .../src/gateway/query.rs | 4 +-- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/error.rs b/crates/rattler_repodata_gateway/src/gateway/error.rs index d27253faa..adae10443 100644 --- a/crates/rattler_repodata_gateway/src/gateway/error.rs +++ b/crates/rattler_repodata_gateway/src/gateway/error.rs @@ -1,7 +1,7 @@ use crate::fetch; use crate::fetch::{FetchRepoDataError, RepoDataNotFoundError}; use crate::gateway::direct_url_query::DirectUrlQueryError; -use rattler_conda_types::Channel; +use rattler_conda_types::{Channel, MatchSpec}; use rattler_networking::Redact; use reqwest_middleware::Error; use simple_spawn_blocking::Cancelled; @@ -38,6 +38,9 @@ pub enum GatewayError { #[error("the direct url query failed for {0}")] DirectUrlQueryError(String, #[source] DirectUrlQueryError), + + #[error("the match spec: '{0}' doesn't contain a name")] + MatchSpecNoName(MatchSpec), } impl From for GatewayError { diff --git a/crates/rattler_repodata_gateway/src/gateway/mod.rs b/crates/rattler_repodata_gateway/src/gateway/mod.rs index 34036a03f..5d5b105de 100644 --- a/crates/rattler_repodata_gateway/src/gateway/mod.rs +++ b/crates/rattler_repodata_gateway/src/gateway/mod.rs @@ -333,6 +333,7 @@ enum PendingOrFetched { #[cfg(test)] mod test { + use assert_matches::assert_matches; use std::{ path::{Path, PathBuf}, str::FromStr, @@ -518,6 +519,32 @@ mod test { assert!(openssl_records.len() > 1); } + #[tokio::test] + async fn test_nameless_matchspec_error() { + let gateway = Gateway::new(); + + let index = local_conda_forge().await; + + let mut matchspec = MatchSpec::from_str( + "https://conda.anaconda.org/conda-forge/linux-64/openssl-3.0.4-h166bdaf_2.tar.bz2", + Strict, + ) + .unwrap(); + matchspec.name = None; + + let gateway_error = gateway + .query( + vec![index.clone()], + vec![Platform::Linux64], + vec![matchspec].into_iter(), + ) + .recursive(true) + .await + .unwrap_err(); + + assert_matches!(gateway_error, GatewayError::MatchSpecNoName(_)) + } + #[rstest] #[case::named("non-existing-channel")] #[case::url("https://conda.anaconda.org/does-not-exist")] diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index a3af2d37b..2001bea94 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -128,7 +128,7 @@ impl GatewayQuery { let name = spec .name .clone() - .expect("direct url spec should always have a name"); + .ok_or(GatewayError::MatchSpecNoName(spec.clone()))?; seen.insert(name.clone()); direct_url_specs.push(spec.clone()); } else if let Some(name) = &spec.name { @@ -155,7 +155,7 @@ impl GatewayQuery { let url = spec .clone() .url - .expect("direct url spec should always have an url"); + .ok_or(GatewayError::MatchSpecNoName(spec.clone()))?; let query = DirectUrlQuery::new( url.clone(), gateway.package_cache.clone(), From d520aa0e6fca780e20151f854c2dccc74e488ddb Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Wed, 12 Jun 2024 13:40:50 +0200 Subject: [PATCH 24/52] misc: check package name compatibility between package from url and the resulting record. Looks as follows: > mv ~/_libgcc_mutex-0.1.0-conda.tar.bz2 > cargo run -- ~/mamba-1.2.3-bla.tar.bz2 Error: failed to load repodata Caused by: the package from url '_libgcc_mutex', doesn't have the same name as the match spec filename intents 'mamba' --- .../src/gateway/error.rs | 3 +++ .../src/gateway/query.rs | 21 ++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/error.rs b/crates/rattler_repodata_gateway/src/gateway/error.rs index adae10443..c46bcdcb0 100644 --- a/crates/rattler_repodata_gateway/src/gateway/error.rs +++ b/crates/rattler_repodata_gateway/src/gateway/error.rs @@ -41,6 +41,9 @@ pub enum GatewayError { #[error("the match spec: '{0}' doesn't contain a name")] MatchSpecNoName(MatchSpec), + + #[error("the package from url '{0}', doesn't have the same name as the match spec filename intents '{1}'")] + NotMatchingNameUrl(String, String), } impl From for GatewayError { diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 2001bea94..4630ba54e 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -9,6 +9,7 @@ use itertools::Itertools; use rattler_conda_types::{Channel, MatchSpec, Matches, PackageName, Platform}; use super::{subdir::Subdir, BarrierCell, GatewayError, GatewayInner, RepoData}; +use crate::GatewayError::MatchSpecNoName; use crate::{gateway::direct_url_query::DirectUrlQuery, Reporter}; /// Represents a query to execute with a [`Gateway`]. @@ -161,11 +162,25 @@ impl GatewayQuery { gateway.package_cache.clone(), gateway.client.clone(), ); - query + + let record = query .execute() .await - .map_err(|e| GatewayError::DirectUrlQueryError(url.to_string(), e)) - .map(|records| (0, vec![spec], records)) + .map_err(|e| GatewayError::DirectUrlQueryError(url.to_string(), e))?; + + // Check if record actually has the same name + if let Some(record) = record.first() { + dbg!(&record); + let spec_name = spec.clone().name.ok_or(MatchSpecNoName(spec.clone()))?; + if record.package_record.name != spec_name { + // Using as_source to get the closest to the retrieved input. + return Err(GatewayError::NotMatchingNameUrl( + record.package_record.name.as_source().to_string(), + spec_name.as_source().to_string(), + )); + } + } + Ok((0, vec![spec], record)) } .boxed(), ); From f34afe5e33c28ec0d88843a8a05f754c67e02883 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Wed, 12 Jun 2024 13:59:56 +0200 Subject: [PATCH 25/52] fix: remove dbg! --- crates/rattler_repodata_gateway/src/gateway/query.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 4630ba54e..55a157d01 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -170,7 +170,6 @@ impl GatewayQuery { // Check if record actually has the same name if let Some(record) = record.first() { - dbg!(&record); let spec_name = spec.clone().name.ok_or(MatchSpecNoName(spec.clone()))?; if record.package_record.name != spec_name { // Using as_source to get the closest to the retrieved input. From 98cc73fd2fe379f2a2953393cedb64b11b43ca5d Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Wed, 12 Jun 2024 14:00:26 +0200 Subject: [PATCH 26/52] fix: remove unsafe check from root req --- crates/rattler_solve/src/resolvo/mod.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/crates/rattler_solve/src/resolvo/mod.rs b/crates/rattler_solve/src/resolvo/mod.rs index 2ef7132b6..54f43899c 100644 --- a/crates/rattler_solve/src/resolvo/mod.rs +++ b/crates/rattler_solve/src/resolvo/mod.rs @@ -580,14 +580,8 @@ impl super::SolverImpl for Solver { .iter() .map(|spec| { let (name, nameless_spec) = spec.clone().into_nameless(); - let name = if let Some(name) = name { - name.as_normalized().to_string() - } else if let Some(url) = &nameless_spec.url { - url.to_string() - } else { - panic!("Cannot use matchspec without a name") - }; - let name_id = provider.pool.intern_package_name(name.as_str()); + let name = name.expect("cannot use matchspec without a name"); + let name_id = provider.pool.intern_package_name(name.as_normalized()); provider .pool .intern_version_set(name_id, nameless_spec.into()) From 179b126998481088dd565f4a0759ad6920ea659e Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Wed, 12 Jun 2024 14:00:50 +0200 Subject: [PATCH 27/52] misc: print with direct url in create command --- crates/rattler-bin/src/commands/create.rs | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/crates/rattler-bin/src/commands/create.rs b/crates/rattler-bin/src/commands/create.rs index 4f1e5bb1d..3692a7cee 100644 --- a/crates/rattler-bin/src/commands/create.rs +++ b/crates/rattler-bin/src/commands/create.rs @@ -160,8 +160,8 @@ pub async fn create(opt: Opt) -> anyhow::Result<()> { ) .recursive(true), ) - .await - .context("failed to load repodata")?; + .await + .context("failed to load repodata")?; // Determine the number of recors let total_records: usize = repo_data.iter().map(RepoData::len).sum(); @@ -206,7 +206,7 @@ pub async fn create(opt: Opt) -> anyhow::Result<()> { "Virtual packages:\n{}\n", virtual_packages .iter() - .format_with("\n", |i, f| f(&format_args!(" - {i}",)),) + .format_with("\n", |i, f| f(&format_args!(" - {i}", ))) ); // Now that we parsed and downloaded all information, construct the packaging problem that we @@ -285,11 +285,18 @@ pub async fn create(opt: Opt) -> anyhow::Result<()> { /// Prints the operations of the transaction to the console. fn print_transaction(transaction: &Transaction) { let format_record = |r: &RepoDataRecord| { + let direct_url_print = if r.clone().channel.contains("virtual_direct_url_channel") { + r.url.as_str() + } else { + "" + }; + format!( - "{} {} {}", + "{} {} {} {}", r.package_record.name.as_normalized(), r.package_record.version, - r.package_record.build + r.package_record.build, + direct_url_print, ) }; @@ -336,7 +343,7 @@ fn wrap_in_progress T>(msg: impl Into>, func } /// Displays a spinner with the given message while running the specified function to completion. -async fn wrap_in_async_progress>( +async fn wrap_in_async_progress>( msg: impl Into>, fut: F, ) -> T { From c1266936f12563d65dcef5e6b0eefd543284eb24 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Wed, 12 Jun 2024 14:17:02 +0200 Subject: [PATCH 28/52] fix: add direct url specific subdir --- .../src/gateway/query.rs | 47 ++++++++++--------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 55a157d01..8c42eff56 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -95,9 +95,30 @@ impl GatewayQuery { .cartesian_product(self.platforms.into_iter()) .collect_vec(); + // Package names that we have or will issue requests for. + let mut seen = HashSet::new(); + let mut pending_package_specs = HashMap::new(); + let mut direct_url_specs = vec![]; + for spec in self.specs { + if spec.url.is_some() { + let name = spec + .name + .clone() + .ok_or(GatewayError::MatchSpecNoName(spec.clone()))?; + seen.insert(name.clone()); + direct_url_specs.push(spec.clone()); + } else if let Some(name) = &spec.name { + seen.insert(name.clone()); + pending_package_specs + .entry(name.clone()) + .or_insert_with(Vec::new) + .push(spec); + } + } + // Create barrier cells for each subdirectory. This can be used to wait until // the subdir becomes available. - let mut subdirs = Vec::with_capacity(channels_and_platforms.len()); + let mut subdirs = Vec::with_capacity(channels_and_platforms.len() + direct_url_specs.is_empty() as usize); let mut pending_subdirs = FuturesUnordered::new(); for (subdir_idx, (channel, platform)) in channels_and_platforms.into_iter().enumerate() { // Create a barrier so work that need this subdir can await it. @@ -120,27 +141,6 @@ impl GatewayQuery { }); } - // Package names that we have or will issue requests for. - let mut seen = HashSet::new(); - let mut pending_package_specs = HashMap::new(); - let mut direct_url_specs = vec![]; - for spec in self.specs { - if spec.url.is_some() { - let name = spec - .name - .clone() - .ok_or(GatewayError::MatchSpecNoName(spec.clone()))?; - seen.insert(name.clone()); - direct_url_specs.push(spec.clone()); - } else if let Some(name) = &spec.name { - seen.insert(name.clone()); - pending_package_specs - .entry(name.clone()) - .or_insert_with(Vec::new) - .push(spec); - } - } - let len = subdirs.len(); let mut result = vec![RepoData::default(); len]; @@ -179,7 +179,8 @@ impl GatewayQuery { )); } } - Ok((0, vec![spec], record)) + // Push the direct url record into the last subdir + Ok((len - 1, vec![spec], record)) } .boxed(), ); From 885ea5bc9adcd2fb67513c474f8b9940549fe91f Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 08:55:42 +0200 Subject: [PATCH 29/52] fmt --- crates/rattler-bin/src/commands/create.rs | 8 ++++---- crates/rattler_repodata_gateway/src/gateway/query.rs | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/crates/rattler-bin/src/commands/create.rs b/crates/rattler-bin/src/commands/create.rs index 3692a7cee..68abf615e 100644 --- a/crates/rattler-bin/src/commands/create.rs +++ b/crates/rattler-bin/src/commands/create.rs @@ -160,8 +160,8 @@ pub async fn create(opt: Opt) -> anyhow::Result<()> { ) .recursive(true), ) - .await - .context("failed to load repodata")?; + .await + .context("failed to load repodata")?; // Determine the number of recors let total_records: usize = repo_data.iter().map(RepoData::len).sum(); @@ -206,7 +206,7 @@ pub async fn create(opt: Opt) -> anyhow::Result<()> { "Virtual packages:\n{}\n", virtual_packages .iter() - .format_with("\n", |i, f| f(&format_args!(" - {i}", ))) + .format_with("\n", |i, f| f(&format_args!(" - {i}",))) ); // Now that we parsed and downloaded all information, construct the packaging problem that we @@ -343,7 +343,7 @@ fn wrap_in_progress T>(msg: impl Into>, func } /// Displays a spinner with the given message while running the specified function to completion. -async fn wrap_in_async_progress>( +async fn wrap_in_async_progress>( msg: impl Into>, fut: F, ) -> T { diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 8c42eff56..8ae0e5eff 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -118,7 +118,8 @@ impl GatewayQuery { // Create barrier cells for each subdirectory. This can be used to wait until // the subdir becomes available. - let mut subdirs = Vec::with_capacity(channels_and_platforms.len() + direct_url_specs.is_empty() as usize); + let mut subdirs = + Vec::with_capacity(channels_and_platforms.len() + direct_url_specs.is_empty() as usize); let mut pending_subdirs = FuturesUnordered::new(); for (subdir_idx, (channel, platform)) in channels_and_platforms.into_iter().enumerate() { // Create a barrier so work that need this subdir can await it. From 27bf7e25b84a70a2f479f7705ee8523b6d9eaabf Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 10:11:45 +0200 Subject: [PATCH 30/52] fix: use correct dir for cache and make a const for pkgs and repodata for easy reuse --- crates/rattler-bin/Cargo.toml | 1 + crates/rattler-bin/src/commands/create.rs | 4 ++-- crates/rattler/src/install/installer/mod.rs | 2 +- crates/rattler_cache/src/consts.rs | 4 ++++ crates/rattler_cache/src/lib.rs | 4 ++++ crates/rattler_repodata_gateway/src/gateway/builder.rs | 2 +- crates/rattler_repodata_gateway/src/gateway/mod.rs | 8 ++++---- tools/create-resolvo-snapshot/src/main.rs | 2 +- 8 files changed, 18 insertions(+), 9 deletions(-) create mode 100644 crates/rattler_cache/src/consts.rs diff --git a/crates/rattler-bin/Cargo.toml b/crates/rattler-bin/Cargo.toml index c019cbbf2..7fbea0af3 100644 --- a/crates/rattler-bin/Cargo.toml +++ b/crates/rattler-bin/Cargo.toml @@ -34,6 +34,7 @@ rattler_networking = { path="../rattler_networking", version = "0.20.8", default rattler_repodata_gateway = { path="../rattler_repodata_gateway", version = "0.20.5", default-features = false, features = ["gateway"] } rattler_solve = { path="../rattler_solve", version = "0.24.2", default-features = false, features = ["resolvo", "libsolv_c"] } rattler_virtual_packages = { path="../rattler_virtual_packages", version = "0.19.15", default-features = false } +rattler_cache = { path="../rattler_cache", version = "0.1.0", default-features = false } reqwest = { workspace = true } reqwest-middleware = { workspace = true } tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } diff --git a/crates/rattler-bin/src/commands/create.rs b/crates/rattler-bin/src/commands/create.rs index 68abf615e..00af89efd 100644 --- a/crates/rattler-bin/src/commands/create.rs +++ b/crates/rattler-bin/src/commands/create.rs @@ -144,8 +144,8 @@ pub async fn create(opt: Opt) -> anyhow::Result<()> { // Get the package names from the matchspecs so we can only load the package records that we need. let gateway = Gateway::builder() - .with_cache_dir(cache_dir.join("repodata")) - .with_package_cache(PackageCache::new(cache_dir)) + .with_cache_dir(cache_dir.join(rattler_cache::REPODATA_CACHE_DIR)) + .with_package_cache(PackageCache::new(cache_dir.join(rattler_cache::PACKAGE_CACHE_DIR))) .with_client(download_client.clone()) .finish(); diff --git a/crates/rattler/src/install/installer/mod.rs b/crates/rattler/src/install/installer/mod.rs index 106f3f693..5e894c693 100644 --- a/crates/rattler/src/install/installer/mod.rs +++ b/crates/rattler/src/install/installer/mod.rs @@ -272,7 +272,7 @@ impl Installer { PackageCache::new( default_cache_dir() .expect("failed to determine default cache directory") - .join("pkgs"), + .join(rattler_cache::PACKAGE_CACHE_DIR), ) }); diff --git a/crates/rattler_cache/src/consts.rs b/crates/rattler_cache/src/consts.rs new file mode 100644 index 000000000..0d5028959 --- /dev/null +++ b/crates/rattler_cache/src/consts.rs @@ -0,0 +1,4 @@ +/// The location in the main cache folder where the conda package cache is stored. +pub const PACKAGE_CACHE_DIR: &str = "pkgs"; +/// The location in the main cache folder where the repodata cache is stored. +pub const REPODATA_CACHE_DIR: &str = "repodata"; \ No newline at end of file diff --git a/crates/rattler_cache/src/lib.rs b/crates/rattler_cache/src/lib.rs index 6ad72fbb1..a6995f46a 100644 --- a/crates/rattler_cache/src/lib.rs +++ b/crates/rattler_cache/src/lib.rs @@ -3,6 +3,10 @@ use std::path::PathBuf; pub mod package_cache; pub mod validation; + +mod consts; +pub use consts::{PACKAGE_CACHE_DIR, REPODATA_CACHE_DIR}; + /// Returns the default cache directory used by rattler. pub fn default_cache_dir() -> anyhow::Result { Ok(dirs::cache_dir() diff --git a/crates/rattler_repodata_gateway/src/gateway/builder.rs b/crates/rattler_repodata_gateway/src/gateway/builder.rs index 13f279727..b9a555195 100644 --- a/crates/rattler_repodata_gateway/src/gateway/builder.rs +++ b/crates/rattler_repodata_gateway/src/gateway/builder.rs @@ -101,7 +101,7 @@ impl GatewayBuilder { let package_cache = self .package_cache - .unwrap_or(PackageCache::new(cache.clone())); + .unwrap_or(PackageCache::new(cache.join(rattler_cache::PACKAGE_CACHE_DIR))); let max_concurrent_requests = self.max_concurrent_requests.unwrap_or(100); Gateway { diff --git a/crates/rattler_repodata_gateway/src/gateway/mod.rs b/crates/rattler_repodata_gateway/src/gateway/mod.rs index 5d5b105de..ff0aec5d1 100644 --- a/crates/rattler_repodata_gateway/src/gateway/mod.rs +++ b/crates/rattler_repodata_gateway/src/gateway/mod.rs @@ -414,8 +414,8 @@ mod test { #[tokio::test] async fn test_direct_url_spec_from_gateway() { let gateway = Gateway::builder() - .with_package_cache(PackageCache::new(default_cache_dir().unwrap().join("pkgs"))) - .with_cache_dir(default_cache_dir().unwrap().join("repodata")) + .with_package_cache(PackageCache::new(default_cache_dir().unwrap().join(rattler_cache::PACKAGE_CACHE_DIR))) + .with_cache_dir(default_cache_dir().unwrap().join(rattler_cache::REPODATA_CACHE_DIR)) .finish(); let index = local_conda_forge().await; @@ -462,8 +462,8 @@ mod test { #[tokio::test] async fn test_select_forced_url_instead_of_deps() { let gateway = Gateway::builder() - .with_package_cache(PackageCache::new(default_cache_dir().unwrap().join("pkgs"))) - .with_cache_dir(default_cache_dir().unwrap().join("repodata")) + .with_package_cache(PackageCache::new(default_cache_dir().unwrap().join(rattler_cache::PACKAGE_CACHE_DIR))) + .with_cache_dir(default_cache_dir().unwrap().join(rattler_cache::REPODATA_CACHE_DIR)) .finish(); let index = local_conda_forge().await; diff --git a/tools/create-resolvo-snapshot/src/main.rs b/tools/create-resolvo-snapshot/src/main.rs index 96b3d4d43..657221f4d 100644 --- a/tools/create-resolvo-snapshot/src/main.rs +++ b/tools/create-resolvo-snapshot/src/main.rs @@ -47,7 +47,7 @@ async fn main() { let repodata = rattler_repodata_gateway::fetch::fetch_repo_data( channel.platform_url(subdir), client.clone().into(), - rattler_cache::default_cache_dir().unwrap().join("repodata"), + rattler_cache::default_cache_dir().unwrap().join(rattler_cache::REPODATA_CACHE_DIR), FetchRepoDataOptions::default(), None, ) From 9f0cd3d9fcc2a58c86ab8b5f21b7b9d171c2cf59 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 10:11:59 +0200 Subject: [PATCH 31/52] fix: merge conflict --- crates/rattler_solve/src/resolvo/mod.rs | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/crates/rattler_solve/src/resolvo/mod.rs b/crates/rattler_solve/src/resolvo/mod.rs index 3f14827d7..55639f0c4 100644 --- a/crates/rattler_solve/src/resolvo/mod.rs +++ b/crates/rattler_solve/src/resolvo/mod.rs @@ -79,31 +79,6 @@ impl<'a> Deref for SolverMatchSpec<'a> { impl<'a> VersionSet for SolverMatchSpec<'a> { type V = SolverPackageRecord<'a>; - - fn contains(&self, v: &Self::V) -> bool { - match v { - SolverPackageRecord::Record(rec) => self.inner.matches(*rec), - SolverPackageRecord::VirtualPackage(GenericVirtualPackage { - version, - build_string, - .. - }) => { - if let Some(spec) = self.inner.version.as_ref() { - if !spec.matches(version) { - return false; - } - } - - if let Some(build_match) = self.inner.build.as_ref() { - if !build_match.matches(build_string) { - return false; - } - } - - true - } - } - } } /// Wrapper around [`PackageRecord`] so that we can use it in resolvo pool From 304a838685583f8eeaba9c94ce5e94f07ca92c48 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 10:12:18 +0200 Subject: [PATCH 32/52] fmt --- crates/rattler-bin/src/commands/create.rs | 4 +++- crates/rattler_cache/src/consts.rs | 2 +- .../src/gateway/builder.rs | 6 ++--- .../src/gateway/mod.rs | 24 +++++++++++++++---- tools/create-resolvo-snapshot/src/main.rs | 4 +++- 5 files changed, 30 insertions(+), 10 deletions(-) diff --git a/crates/rattler-bin/src/commands/create.rs b/crates/rattler-bin/src/commands/create.rs index 00af89efd..fd46d97c6 100644 --- a/crates/rattler-bin/src/commands/create.rs +++ b/crates/rattler-bin/src/commands/create.rs @@ -145,7 +145,9 @@ pub async fn create(opt: Opt) -> anyhow::Result<()> { // Get the package names from the matchspecs so we can only load the package records that we need. let gateway = Gateway::builder() .with_cache_dir(cache_dir.join(rattler_cache::REPODATA_CACHE_DIR)) - .with_package_cache(PackageCache::new(cache_dir.join(rattler_cache::PACKAGE_CACHE_DIR))) + .with_package_cache(PackageCache::new( + cache_dir.join(rattler_cache::PACKAGE_CACHE_DIR), + )) .with_client(download_client.clone()) .finish(); diff --git a/crates/rattler_cache/src/consts.rs b/crates/rattler_cache/src/consts.rs index 0d5028959..d635d262e 100644 --- a/crates/rattler_cache/src/consts.rs +++ b/crates/rattler_cache/src/consts.rs @@ -1,4 +1,4 @@ /// The location in the main cache folder where the conda package cache is stored. pub const PACKAGE_CACHE_DIR: &str = "pkgs"; /// The location in the main cache folder where the repodata cache is stored. -pub const REPODATA_CACHE_DIR: &str = "repodata"; \ No newline at end of file +pub const REPODATA_CACHE_DIR: &str = "repodata"; diff --git a/crates/rattler_repodata_gateway/src/gateway/builder.rs b/crates/rattler_repodata_gateway/src/gateway/builder.rs index b9a555195..c2c572450 100644 --- a/crates/rattler_repodata_gateway/src/gateway/builder.rs +++ b/crates/rattler_repodata_gateway/src/gateway/builder.rs @@ -99,9 +99,9 @@ impl GatewayBuilder { .join("rattler/cache") }); - let package_cache = self - .package_cache - .unwrap_or(PackageCache::new(cache.join(rattler_cache::PACKAGE_CACHE_DIR))); + let package_cache = self.package_cache.unwrap_or(PackageCache::new( + cache.join(rattler_cache::PACKAGE_CACHE_DIR), + )); let max_concurrent_requests = self.max_concurrent_requests.unwrap_or(100); Gateway { diff --git a/crates/rattler_repodata_gateway/src/gateway/mod.rs b/crates/rattler_repodata_gateway/src/gateway/mod.rs index ff0aec5d1..49dd60f5c 100644 --- a/crates/rattler_repodata_gateway/src/gateway/mod.rs +++ b/crates/rattler_repodata_gateway/src/gateway/mod.rs @@ -414,8 +414,16 @@ mod test { #[tokio::test] async fn test_direct_url_spec_from_gateway() { let gateway = Gateway::builder() - .with_package_cache(PackageCache::new(default_cache_dir().unwrap().join(rattler_cache::PACKAGE_CACHE_DIR))) - .with_cache_dir(default_cache_dir().unwrap().join(rattler_cache::REPODATA_CACHE_DIR)) + .with_package_cache(PackageCache::new( + default_cache_dir() + .unwrap() + .join(rattler_cache::PACKAGE_CACHE_DIR), + )) + .with_cache_dir( + default_cache_dir() + .unwrap() + .join(rattler_cache::REPODATA_CACHE_DIR), + ) .finish(); let index = local_conda_forge().await; @@ -462,8 +470,16 @@ mod test { #[tokio::test] async fn test_select_forced_url_instead_of_deps() { let gateway = Gateway::builder() - .with_package_cache(PackageCache::new(default_cache_dir().unwrap().join(rattler_cache::PACKAGE_CACHE_DIR))) - .with_cache_dir(default_cache_dir().unwrap().join(rattler_cache::REPODATA_CACHE_DIR)) + .with_package_cache(PackageCache::new( + default_cache_dir() + .unwrap() + .join(rattler_cache::PACKAGE_CACHE_DIR), + )) + .with_cache_dir( + default_cache_dir() + .unwrap() + .join(rattler_cache::REPODATA_CACHE_DIR), + ) .finish(); let index = local_conda_forge().await; diff --git a/tools/create-resolvo-snapshot/src/main.rs b/tools/create-resolvo-snapshot/src/main.rs index 657221f4d..1fb3cebd0 100644 --- a/tools/create-resolvo-snapshot/src/main.rs +++ b/tools/create-resolvo-snapshot/src/main.rs @@ -47,7 +47,9 @@ async fn main() { let repodata = rattler_repodata_gateway::fetch::fetch_repo_data( channel.platform_url(subdir), client.clone().into(), - rattler_cache::default_cache_dir().unwrap().join(rattler_cache::REPODATA_CACHE_DIR), + rattler_cache::default_cache_dir() + .unwrap() + .join(rattler_cache::REPODATA_CACHE_DIR), FetchRepoDataOptions::default(), None, ) From d03a1d313b4eead64e317c6fe08f415693ce649c Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 10:14:35 +0200 Subject: [PATCH 33/52] clippy --- crates/rattler_repodata_gateway/src/gateway/query.rs | 2 +- .../src/gateway/sharded_subdir/mod.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 8ae0e5eff..c8b749d53 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -119,7 +119,7 @@ impl GatewayQuery { // Create barrier cells for each subdirectory. This can be used to wait until // the subdir becomes available. let mut subdirs = - Vec::with_capacity(channels_and_platforms.len() + direct_url_specs.is_empty() as usize); + Vec::with_capacity(channels_and_platforms.len() + usize::from(direct_url_specs.is_empty())); let mut pending_subdirs = FuturesUnordered::new(); for (subdir_idx, (channel, platform)) in channels_and_platforms.into_iter().enumerate() { // Create a barrier so work that need this subdir can await it. diff --git a/crates/rattler_repodata_gateway/src/gateway/sharded_subdir/mod.rs b/crates/rattler_repodata_gateway/src/gateway/sharded_subdir/mod.rs index 88d730016..865aa5be2 100644 --- a/crates/rattler_repodata_gateway/src/gateway/sharded_subdir/mod.rs +++ b/crates/rattler_repodata_gateway/src/gateway/sharded_subdir/mod.rs @@ -74,7 +74,7 @@ impl ShardedSubdir { let shards_base_url = Url::options() .base_url(Some(&index_base_url)) .parse(&sharded_repodata.info.shards_base_url) - .map_err(|_| { + .map_err(|_e| { GatewayError::Generic(format!( "shard index contains invalid `shards_base_url`: {}", &sharded_repodata.info.shards_base_url @@ -83,7 +83,7 @@ impl ShardedSubdir { let package_base_url = Url::options() .base_url(Some(&index_base_url)) .parse(&sharded_repodata.info.base_url) - .map_err(|_| { + .map_err(|_e| { GatewayError::Generic(format!( "shard index contains invalid `base_url`: {}", &sharded_repodata.info.base_url From c868b5a9cf346016d7de0d6393f77047d68a59fc Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 10:15:17 +0200 Subject: [PATCH 34/52] Update crates/rattler_repodata_gateway/src/gateway/error.rs Co-authored-by: Bas Zalmstra --- crates/rattler_repodata_gateway/src/gateway/error.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/error.rs b/crates/rattler_repodata_gateway/src/gateway/error.rs index c46bcdcb0..b13e01960 100644 --- a/crates/rattler_repodata_gateway/src/gateway/error.rs +++ b/crates/rattler_repodata_gateway/src/gateway/error.rs @@ -39,8 +39,8 @@ pub enum GatewayError { #[error("the direct url query failed for {0}")] DirectUrlQueryError(String, #[source] DirectUrlQueryError), - #[error("the match spec: '{0}' doesn't contain a name")] - MatchSpecNoName(MatchSpec), + #[error("the match spec '{0}' does not specify a name")] + MatchSpecWithoutName(MatchSpec), #[error("the package from url '{0}', doesn't have the same name as the match spec filename intents '{1}'")] NotMatchingNameUrl(String, String), From cce373a63375e1942170a7ab61b587ad546cdf0c Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 10:15:27 +0200 Subject: [PATCH 35/52] Update crates/rattler_repodata_gateway/src/gateway/query.rs Co-authored-by: Bas Zalmstra --- crates/rattler_repodata_gateway/src/gateway/query.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index c8b749d53..88a056346 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -106,7 +106,7 @@ impl GatewayQuery { .clone() .ok_or(GatewayError::MatchSpecNoName(spec.clone()))?; seen.insert(name.clone()); - direct_url_specs.push(spec.clone()); + direct_url_specs.push(spec); } else if let Some(name) = &spec.name { seen.insert(name.clone()); pending_package_specs From 87c8b5ad34337b3d60051f05ac31d0b2cc9e27b8 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 10:24:33 +0200 Subject: [PATCH 36/52] fix: Make the direct url the first subdir --- .../src/gateway/query.rs | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 88a056346..66db33f5a 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -104,7 +104,7 @@ impl GatewayQuery { let name = spec .name .clone() - .ok_or(GatewayError::MatchSpecNoName(spec.clone()))?; + .ok_or(GatewayError::MatchSpecWithoutName(spec.clone()))?; seen.insert(name.clone()); direct_url_specs.push(spec); } else if let Some(name) = &spec.name { @@ -116,10 +116,12 @@ impl GatewayQuery { } } - // Create barrier cells for each subdirectory. This can be used to wait until - // the subdir becomes available. - let mut subdirs = - Vec::with_capacity(channels_and_platforms.len() + usize::from(direct_url_specs.is_empty())); + // Prepare subdir vec, with direct url offset prepended. + let direct_url_offset = if direct_url_specs.is_empty() { 0 } else { 1 }; + let mut subdirs = Vec::with_capacity(channels_and_platforms.len() + direct_url_offset); + + // Create barrier cells for each subdirectory. + // This can be used to wait until the subdir becomes available. let mut pending_subdirs = FuturesUnordered::new(); for (subdir_idx, (channel, platform)) in channels_and_platforms.into_iter().enumerate() { // Create a barrier so work that need this subdir can await it. @@ -145,8 +147,8 @@ impl GatewayQuery { let len = subdirs.len(); let mut result = vec![RepoData::default(); len]; - // A list of futures to fetch the records for the pending package names. The - // main task awaits these futures. + // A list of futures to fetch the records for the pending package names. + // The main task awaits these futures. let mut pending_records = FuturesUnordered::new(); // Push the direct url queries to the pending_records. @@ -157,7 +159,7 @@ impl GatewayQuery { let url = spec .clone() .url - .ok_or(GatewayError::MatchSpecNoName(spec.clone()))?; + .ok_or(GatewayError::MatchSpecWithoutName(spec.clone()))?; let query = DirectUrlQuery::new( url.clone(), gateway.package_cache.clone(), @@ -171,7 +173,7 @@ impl GatewayQuery { // Check if record actually has the same name if let Some(record) = record.first() { - let spec_name = spec.clone().name.ok_or(MatchSpecNoName(spec.clone()))?; + let spec_name = spec.clone().name.ok_or(GatewayError::MatchSpecWithoutName(spec.clone()))?; if record.package_record.name != spec_name { // Using as_source to get the closest to the retrieved input. return Err(GatewayError::NotMatchingNameUrl( @@ -180,10 +182,10 @@ impl GatewayQuery { )); } } - // Push the direct url record into the last subdir - Ok((len - 1, vec![spec], record)) + // Push the direct url in the first subdir for channel priority logic. + Ok((0, vec![spec], record)) } - .boxed(), + .boxed(), ); } @@ -204,11 +206,11 @@ impl GatewayQuery { Subdir::Found(subdir) => subdir .get_or_fetch_package_records(&package_name, reporter) .await - .map(|records| (subdir_idx, specs, records)), - Subdir::NotFound => Ok((subdir_idx, specs, Arc::from(vec![]))), + .map(|records| (subdir_idx + direct_url_offset, specs, records)), + Subdir::NotFound => Ok((subdir_idx + direct_url_offset, specs, Arc::from(vec![]))), } } - .boxed(), + .boxed(), ); } } From c296eda375b2da7e1e26931ea41556104232325a Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 10:24:45 +0200 Subject: [PATCH 37/52] fix: Make the direct url the first subdir --- crates/rattler_repodata_gateway/src/gateway/query.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 66db33f5a..36bab6adc 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -9,7 +9,6 @@ use itertools::Itertools; use rattler_conda_types::{Channel, MatchSpec, Matches, PackageName, Platform}; use super::{subdir::Subdir, BarrierCell, GatewayError, GatewayInner, RepoData}; -use crate::GatewayError::MatchSpecNoName; use crate::{gateway::direct_url_query::DirectUrlQuery, Reporter}; /// Represents a query to execute with a [`Gateway`]. From 600ed2277ff7380e138570be154ec11c76ab44a6 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 10:34:15 +0200 Subject: [PATCH 38/52] fix: reduce amount of name checking by providing url in direct_url_specs --- crates/rattler_repodata_gateway/src/gateway/mod.rs | 2 +- .../rattler_repodata_gateway/src/gateway/query.rs | 13 +++++-------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/mod.rs b/crates/rattler_repodata_gateway/src/gateway/mod.rs index 49dd60f5c..594f17698 100644 --- a/crates/rattler_repodata_gateway/src/gateway/mod.rs +++ b/crates/rattler_repodata_gateway/src/gateway/mod.rs @@ -558,7 +558,7 @@ mod test { .await .unwrap_err(); - assert_matches!(gateway_error, GatewayError::MatchSpecNoName(_)) + assert_matches!(gateway_error, GatewayError::MatchSpecWithoutName(_)) } #[rstest] diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 36bab6adc..20a24c9c6 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -99,13 +99,13 @@ impl GatewayQuery { let mut pending_package_specs = HashMap::new(); let mut direct_url_specs = vec![]; for spec in self.specs { - if spec.url.is_some() { + if let Some(url) = spec.url.clone() { let name = spec .name .clone() .ok_or(GatewayError::MatchSpecWithoutName(spec.clone()))?; seen.insert(name.clone()); - direct_url_specs.push(spec); + direct_url_specs.push((spec.clone(), url)); } else if let Some(name) = &spec.name { seen.insert(name.clone()); pending_package_specs @@ -155,10 +155,7 @@ impl GatewayQuery { let gateway = self.gateway.clone(); pending_records.push( async move { - let url = spec - .clone() - .url - .ok_or(GatewayError::MatchSpecWithoutName(spec.clone()))?; + let url = spec.1; let query = DirectUrlQuery::new( url.clone(), gateway.package_cache.clone(), @@ -172,7 +169,7 @@ impl GatewayQuery { // Check if record actually has the same name if let Some(record) = record.first() { - let spec_name = spec.clone().name.ok_or(GatewayError::MatchSpecWithoutName(spec.clone()))?; + let spec_name = spec.0.clone().name.ok_or(GatewayError::MatchSpecWithoutName(spec.0.clone()))?; if record.package_record.name != spec_name { // Using as_source to get the closest to the retrieved input. return Err(GatewayError::NotMatchingNameUrl( @@ -182,7 +179,7 @@ impl GatewayQuery { } } // Push the direct url in the first subdir for channel priority logic. - Ok((0, vec![spec], record)) + Ok((0, vec![spec.0], record)) } .boxed(), ); From 7a19f32f12f6f50bfcf94476ef5907f0141ce73a Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 10:34:22 +0200 Subject: [PATCH 39/52] fix tests --- ...s__match_spec__parse__tests__test_from_string_Strict.snap | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/rattler_conda_types/src/match_spec/snapshots/rattler_conda_types__match_spec__parse__tests__test_from_string_Strict.snap b/crates/rattler_conda_types/src/match_spec/snapshots/rattler_conda_types__match_spec__parse__tests__test_from_string_Strict.snap index 64ff8a6ac..7f806dd0c 100644 --- a/crates/rattler_conda_types/src/match_spec/snapshots/rattler_conda_types__match_spec__parse__tests__test_from_string_Strict.snap +++ b/crates/rattler_conda_types/src/match_spec/snapshots/rattler_conda_types__match_spec__parse__tests__test_from_string_Strict.snap @@ -1,11 +1,12 @@ --- source: crates/rattler_conda_types/src/match_spec/parse.rs -assertion_line: 904 expression: evaluated --- /home/user/conda-bld/linux-64/foo-1.0-py27_0.tar.bz2: + name: foo url: "file:///home/user/conda-bld/linux-64/foo-1.0-py27_0.tar.bz2" "C:\\Users\\user\\conda-bld\\linux-64\\foo-1.0-py27_0.tar.bz2": + name: foo url: "file:///C:/Users/user/conda-bld/linux-64/foo-1.0-py27_0.tar.bz2" blas *.* mkl: name: blas @@ -31,8 +32,10 @@ foo=1.0=py27_0: foo==1.0=py27_0: error: "'foo==1.0=py27_0' is not a valid package name. Package names can only contain 0-9, a-z, A-Z, -, _, or ." "https://conda.anaconda.org/conda-forge/linux-64/py-rattler-0.6.1-py39h8169da8_0.conda": + name: py-rattler url: "https://conda.anaconda.org/conda-forge/linux-64/py-rattler-0.6.1-py39h8169da8_0.conda" "https://repo.prefix.dev/ruben-arts/linux-64/boost-cpp-1.78.0-h75c5d50_1.tar.bz2": + name: boost-cpp url: "https://repo.prefix.dev/ruben-arts/linux-64/boost-cpp-1.78.0-h75c5d50_1.tar.bz2" python 3.8.* *_cpython: name: python From 97d3cbddadfde102c655f8e18a506c15992118b1 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 10:36:59 +0200 Subject: [PATCH 40/52] fmt --- .../src/gateway/query.rs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 20a24c9c6..807403de8 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -169,7 +169,11 @@ impl GatewayQuery { // Check if record actually has the same name if let Some(record) = record.first() { - let spec_name = spec.0.clone().name.ok_or(GatewayError::MatchSpecWithoutName(spec.0.clone()))?; + let spec_name = spec + .0 + .clone() + .name + .ok_or(GatewayError::MatchSpecWithoutName(spec.0.clone()))?; if record.package_record.name != spec_name { // Using as_source to get the closest to the retrieved input. return Err(GatewayError::NotMatchingNameUrl( @@ -181,7 +185,7 @@ impl GatewayQuery { // Push the direct url in the first subdir for channel priority logic. Ok((0, vec![spec.0], record)) } - .boxed(), + .boxed(), ); } @@ -202,11 +206,15 @@ impl GatewayQuery { Subdir::Found(subdir) => subdir .get_or_fetch_package_records(&package_name, reporter) .await - .map(|records| (subdir_idx + direct_url_offset, specs, records)), - Subdir::NotFound => Ok((subdir_idx + direct_url_offset, specs, Arc::from(vec![]))), + .map(|records| { + (subdir_idx + direct_url_offset, specs, records) + }), + Subdir::NotFound => { + Ok((subdir_idx + direct_url_offset, specs, Arc::from(vec![]))) + } } } - .boxed(), + .boxed(), ); } } From cb7a0cc6e72ca4fcc1cb9070b1241e7ef5fe6170 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 10:56:22 +0200 Subject: [PATCH 41/52] fix: subdirs only used for channels but in the result reserve a spot for the direct urls --- .../src/gateway/query.rs | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 807403de8..21880b828 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -115,17 +115,18 @@ impl GatewayQuery { } } - // Prepare subdir vec, with direct url offset prepended. + // Result offset for direct url queries. let direct_url_offset = if direct_url_specs.is_empty() { 0 } else { 1 }; - let mut subdirs = Vec::with_capacity(channels_and_platforms.len() + direct_url_offset); // Create barrier cells for each subdirectory. // This can be used to wait until the subdir becomes available. + let mut subdirs = Vec::with_capacity(channels_and_platforms.len()); let mut pending_subdirs = FuturesUnordered::new(); for (subdir_idx, (channel, platform)) in channels_and_platforms.into_iter().enumerate() { // Create a barrier so work that need this subdir can await it. let barrier = Arc::new(BarrierCell::new()); - subdirs.push((subdir_idx, barrier.clone())); + // Set the subdir to prepend the direct url queries in the result. + subdirs.push((subdir_idx + direct_url_offset, barrier.clone())); let inner = self.gateway.clone(); let reporter = self.reporter.clone(); @@ -143,9 +144,6 @@ impl GatewayQuery { }); } - let len = subdirs.len(); - let mut result = vec![RepoData::default(); len]; - // A list of futures to fetch the records for the pending package names. // The main task awaits these futures. let mut pending_records = FuturesUnordered::new(); @@ -182,13 +180,16 @@ impl GatewayQuery { )); } } - // Push the direct url in the first subdir for channel priority logic. + // Push the direct url in the first subdir result for channel priority logic. Ok((0, vec![spec.0], record)) } .boxed(), ); } + let len = subdirs.len() + direct_url_offset; + let mut result = vec![RepoData::default(); len]; + // Loop until all pending package names have been fetched. loop { // Iterate over all pending package names and create futures to fetch them from @@ -206,12 +207,8 @@ impl GatewayQuery { Subdir::Found(subdir) => subdir .get_or_fetch_package_records(&package_name, reporter) .await - .map(|records| { - (subdir_idx + direct_url_offset, specs, records) - }), - Subdir::NotFound => { - Ok((subdir_idx + direct_url_offset, specs, Arc::from(vec![]))) - } + .map(|records| (subdir_idx, specs, records)), + Subdir::NotFound => Ok((subdir_idx, specs, Arc::from(vec![]))), } } .boxed(), From d2404dea81a95c90fe184f3f329b6339d7f769ac Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 10:57:45 +0200 Subject: [PATCH 42/52] fix: docstring --- crates/rattler_repodata_gateway/src/gateway/query.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 21880b828..9519e21b5 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -94,7 +94,7 @@ impl GatewayQuery { .cartesian_product(self.platforms.into_iter()) .collect_vec(); - // Package names that we have or will issue requests for. + // Collect all the specs that have a direct url and the ones that have a name. let mut seen = HashSet::new(); let mut pending_package_specs = HashMap::new(); let mut direct_url_specs = vec![]; From 0f09dd8dd6e108db544b55174e689a6c6a0577f8 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 11:12:36 +0200 Subject: [PATCH 43/52] fix: make channel nameless for direct urls --- crates/rattler-bin/src/commands/create.rs | 2 +- crates/rattler_repodata_gateway/src/gateway/direct_url_query.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/rattler-bin/src/commands/create.rs b/crates/rattler-bin/src/commands/create.rs index fd46d97c6..5b3dcf305 100644 --- a/crates/rattler-bin/src/commands/create.rs +++ b/crates/rattler-bin/src/commands/create.rs @@ -287,7 +287,7 @@ pub async fn create(opt: Opt) -> anyhow::Result<()> { /// Prints the operations of the transaction to the console. fn print_transaction(transaction: &Transaction) { let format_record = |r: &RepoDataRecord| { - let direct_url_print = if r.clone().channel.contains("virtual_direct_url_channel") { + let direct_url_print = if r.clone().channel.is_empty() { r.url.as_str() } else { "" diff --git a/crates/rattler_repodata_gateway/src/gateway/direct_url_query.rs b/crates/rattler_repodata_gateway/src/gateway/direct_url_query.rs index 61f6b429b..9f01f8ab0 100644 --- a/crates/rattler_repodata_gateway/src/gateway/direct_url_query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/direct_url_query.rs @@ -74,7 +74,7 @@ impl DirectUrlQuery { file_name: self.url.clone().to_string(), url: self.url.clone(), // Fake channel as it is unused in this case. - channel: "virtual_direct_url_channel".to_string(), + channel: "".to_string(), }])) } } From bee860c6769f42a78612cdc68139ce6c195ac60e Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 11:16:42 +0200 Subject: [PATCH 44/52] fix intra-docs test --- crates/rattler_conda_types/src/match_spec/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/rattler_conda_types/src/match_spec/mod.rs b/crates/rattler_conda_types/src/match_spec/mod.rs index f41e06e9c..5ccc9c573 100644 --- a/crates/rattler_conda_types/src/match_spec/mod.rs +++ b/crates/rattler_conda_types/src/match_spec/mod.rs @@ -349,7 +349,7 @@ where /// A trait that defines the behavior of matching a spec against a record. pub trait Matches { - /// Match a [`MatchSpec`] against a [`T`] + /// Match a [`MatchSpec`] against a record. /// Matching it to a record means that the record is valid for the spec. fn matches(&self, other: &T) -> bool; } From b7eeeded79a7e16d7b9bb49b428644d0e9796876 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 11:28:41 +0200 Subject: [PATCH 45/52] fix: match on record not package record --- crates/rattler_solve/src/resolvo/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/rattler_solve/src/resolvo/mod.rs b/crates/rattler_solve/src/resolvo/mod.rs index 55639f0c4..1e272d953 100644 --- a/crates/rattler_solve/src/resolvo/mod.rs +++ b/crates/rattler_solve/src/resolvo/mod.rs @@ -541,7 +541,7 @@ impl<'a> DependencyProvider for CondaDependencyProvider<'a> { let record = &self.pool.resolve_solvable(*c).record; match record { SolverPackageRecord::Record(rec) => { - spec.matches(&rec.package_record) != inverse + spec.matches(*rec) != inverse } SolverPackageRecord::VirtualPackage(GenericVirtualPackage { version, From 7ea414a9b3ecf609d1c7027ad99a9e99c50e2328 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 11:41:14 +0200 Subject: [PATCH 46/52] fix: improve error name and only unpack spec once. --- .../src/gateway/error.rs | 2 +- .../src/gateway/query.rs | 18 ++++++------------ 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/error.rs b/crates/rattler_repodata_gateway/src/gateway/error.rs index b13e01960..1b7ecf52e 100644 --- a/crates/rattler_repodata_gateway/src/gateway/error.rs +++ b/crates/rattler_repodata_gateway/src/gateway/error.rs @@ -43,7 +43,7 @@ pub enum GatewayError { MatchSpecWithoutName(MatchSpec), #[error("the package from url '{0}', doesn't have the same name as the match spec filename intents '{1}'")] - NotMatchingNameUrl(String, String), + UrlRecordNameMismatch(String, String), } impl From for GatewayError { diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 9519e21b5..ccba0fb0a 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -105,7 +105,7 @@ impl GatewayQuery { .clone() .ok_or(GatewayError::MatchSpecWithoutName(spec.clone()))?; seen.insert(name.clone()); - direct_url_specs.push((spec.clone(), url)); + direct_url_specs.push((spec.clone(), url, name)); } else if let Some(name) = &spec.name { seen.insert(name.clone()); pending_package_specs @@ -149,11 +149,10 @@ impl GatewayQuery { let mut pending_records = FuturesUnordered::new(); // Push the direct url queries to the pending_records. - for spec in direct_url_specs.clone() { + for (spec, url, name) in direct_url_specs { let gateway = self.gateway.clone(); pending_records.push( async move { - let url = spec.1; let query = DirectUrlQuery::new( url.clone(), gateway.package_cache.clone(), @@ -167,21 +166,16 @@ impl GatewayQuery { // Check if record actually has the same name if let Some(record) = record.first() { - let spec_name = spec - .0 - .clone() - .name - .ok_or(GatewayError::MatchSpecWithoutName(spec.0.clone()))?; - if record.package_record.name != spec_name { + if record.package_record.name != name { // Using as_source to get the closest to the retrieved input. - return Err(GatewayError::NotMatchingNameUrl( + return Err(GatewayError::UrlRecordNameMismatch( record.package_record.name.as_source().to_string(), - spec_name.as_source().to_string(), + name.as_source().to_string(), )); } } // Push the direct url in the first subdir result for channel priority logic. - Ok((0, vec![spec.0], record)) + Ok((0, vec![spec], record)) } .boxed(), ); From 44c28640e5e2193fe0321bee15eccc13735c734f Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 11:46:25 +0200 Subject: [PATCH 47/52] fix: Add new subdir with offset --- crates/rattler_repodata_gateway/src/gateway/query.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index ccba0fb0a..6a42456f2 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -202,7 +202,7 @@ impl GatewayQuery { .get_or_fetch_package_records(&package_name, reporter) .await .map(|records| (subdir_idx, specs, records)), - Subdir::NotFound => Ok((subdir_idx, specs, Arc::from(vec![]))), + Subdir::NotFound => Ok((subdir_idx + direct_url_offset, specs, Arc::from(vec![]))), } } .boxed(), @@ -219,7 +219,7 @@ impl GatewayQuery { // Handle any records that were fetched records = pending_records.select_next_some() => { - let (subdir_idx, request_specs, records) = records?; + let (result_idx, request_specs, records) = records?; if self.recursive { // Extract the dependencies from the records and recursively add them to the @@ -243,7 +243,7 @@ impl GatewayQuery { // Add the records to the result if records.len() > 0 { - let result = &mut result[subdir_idx]; + let result = &mut result[result_idx]; result.len += records.len(); result.shards.push(records); } From 268df39d94d52974be214b1c98a6a5428df50cda Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 11:48:00 +0200 Subject: [PATCH 48/52] fmt --- crates/rattler_repodata_gateway/src/gateway/query.rs | 4 +++- crates/rattler_solve/src/resolvo/mod.rs | 4 +--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 6a42456f2..b7c3e8251 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -202,7 +202,9 @@ impl GatewayQuery { .get_or_fetch_package_records(&package_name, reporter) .await .map(|records| (subdir_idx, specs, records)), - Subdir::NotFound => Ok((subdir_idx + direct_url_offset, specs, Arc::from(vec![]))), + Subdir::NotFound => { + Ok((subdir_idx + direct_url_offset, specs, Arc::from(vec![]))) + } } } .boxed(), diff --git a/crates/rattler_solve/src/resolvo/mod.rs b/crates/rattler_solve/src/resolvo/mod.rs index 1e272d953..0678bf3f7 100644 --- a/crates/rattler_solve/src/resolvo/mod.rs +++ b/crates/rattler_solve/src/resolvo/mod.rs @@ -540,9 +540,7 @@ impl<'a> DependencyProvider for CondaDependencyProvider<'a> { .filter(|c| { let record = &self.pool.resolve_solvable(*c).record; match record { - SolverPackageRecord::Record(rec) => { - spec.matches(*rec) != inverse - } + SolverPackageRecord::Record(rec) => spec.matches(*rec) != inverse, SolverPackageRecord::VirtualPackage(GenericVirtualPackage { version, build_string, From 4cc3c508fd47f3c7b34bd67be83571e3f1cab898 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 13:58:31 +0200 Subject: [PATCH 49/52] test: subdir asignment for direct url --- crates/rattler_repodata_gateway/src/gateway/mod.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/mod.rs b/crates/rattler_repodata_gateway/src/gateway/mod.rs index 594f17698..0e420aa6b 100644 --- a/crates/rattler_repodata_gateway/src/gateway/mod.rs +++ b/crates/rattler_repodata_gateway/src/gateway/mod.rs @@ -484,13 +484,14 @@ mod test { let index = local_conda_forge().await; + let openssl_url = "https://conda.anaconda.org/conda-forge/linux-64/openssl-3.0.4-h166bdaf_2.tar.bz2"; let records = gateway .query( vec![index.clone()], vec![Platform::Linux64], vec![ MatchSpec::from_str("mamba 0.9.2 py39h951de11_0", Strict).unwrap(), - MatchSpec::from_str("https://conda.anaconda.org/conda-forge/linux-64/openssl-3.0.4-h166bdaf_2.tar.bz2", Strict).unwrap(), + MatchSpec::from_str(openssl_url, Strict).unwrap(), ].into_iter(), ) .recursive(true) @@ -508,6 +509,15 @@ mod test { .collect(); assert_eq!(openssl_records.len(), 1); + // Test if the first repodata subdir contains only the direct url package. + let first_subdir = records.iter().next().unwrap(); + assert_eq!(first_subdir.len, 1); + let openssl_record = first_subdir.iter().find(|record| record.package_record.name.as_normalized() == "openssl").unwrap(); + assert_eq!(openssl_record.url.as_str(), openssl_url); + + // ------------------------------------------------------------ + // Now we query for the openssl package without the direct url. + // ------------------------------------------------------------ let gateway = Gateway::new(); let records = gateway .query( From e2b284829f19a9d69635c701950783acb3787c08 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Thu, 13 Jun 2024 13:58:44 +0200 Subject: [PATCH 50/52] fmt --- crates/rattler_repodata_gateway/src/gateway/mod.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/mod.rs b/crates/rattler_repodata_gateway/src/gateway/mod.rs index 0e420aa6b..d3df398ab 100644 --- a/crates/rattler_repodata_gateway/src/gateway/mod.rs +++ b/crates/rattler_repodata_gateway/src/gateway/mod.rs @@ -484,7 +484,8 @@ mod test { let index = local_conda_forge().await; - let openssl_url = "https://conda.anaconda.org/conda-forge/linux-64/openssl-3.0.4-h166bdaf_2.tar.bz2"; + let openssl_url = + "https://conda.anaconda.org/conda-forge/linux-64/openssl-3.0.4-h166bdaf_2.tar.bz2"; let records = gateway .query( vec![index.clone()], @@ -492,7 +493,8 @@ mod test { vec![ MatchSpec::from_str("mamba 0.9.2 py39h951de11_0", Strict).unwrap(), MatchSpec::from_str(openssl_url, Strict).unwrap(), - ].into_iter(), + ] + .into_iter(), ) .recursive(true) .await @@ -512,7 +514,10 @@ mod test { // Test if the first repodata subdir contains only the direct url package. let first_subdir = records.iter().next().unwrap(); assert_eq!(first_subdir.len, 1); - let openssl_record = first_subdir.iter().find(|record| record.package_record.name.as_normalized() == "openssl").unwrap(); + let openssl_record = first_subdir + .iter() + .find(|record| record.package_record.name.as_normalized() == "openssl") + .unwrap(); assert_eq!(openssl_record.url.as_str(), openssl_url); // ------------------------------------------------------------ From e74d4dc84bd55e3cabe7bea0d449493586a8640e Mon Sep 17 00:00:00 2001 From: Bas Zalmstra Date: Thu, 13 Jun 2024 16:00:22 +0200 Subject: [PATCH 51/52] fix: windows issues --- crates/rattler_cache/src/package_cache.rs | 29 ++++++------ .../src/gateway/direct_url_query.rs | 46 ++++++++++++------- .../src/gateway/query.rs | 1 + 3 files changed, 44 insertions(+), 32 deletions(-) diff --git a/crates/rattler_cache/src/package_cache.rs b/crates/rattler_cache/src/package_cache.rs index 73d85600f..091ce4d28 100644 --- a/crates/rattler_cache/src/package_cache.rs +++ b/crates/rattler_cache/src/package_cache.rs @@ -62,6 +62,20 @@ pub struct CacheKey { sha256: Option, } +impl CacheKey { + /// Adds a sha256 hash of the archive. + pub fn with_sha256(mut self, sha256: Sha256Hash) -> Self { + self.sha256 = Some(sha256); + self + } + + /// Potentially adds a sha256 hash of the archive. + pub fn with_opt_sha256(mut self, sha256: Option) -> Self { + self.sha256 = sha256; + self + } +} + impl CacheKey { /// Return the sha256 hash of the package if it is known. pub fn sha256(&self) -> Option { @@ -91,21 +105,6 @@ impl From<&PackageRecord> for CacheKey { } } -impl From<&Url> for CacheKey { - fn from(url: &Url) -> Self { - if let Some(archive) = ArchiveIdentifier::try_from_url(url) { - return archive.into(); - } - // Not a normal archive, so we use the url as the cache key - CacheKey { - name: url.to_string(), - version: "_".to_string(), - build_string: "_".to_string(), - sha256: None, - } - } -} - impl Display for CacheKey { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "{}-{}-{}", &self.name, &self.version, &self.build_string) diff --git a/crates/rattler_repodata_gateway/src/gateway/direct_url_query.rs b/crates/rattler_repodata_gateway/src/gateway/direct_url_query.rs index 9f01f8ab0..a41266f82 100644 --- a/crates/rattler_repodata_gateway/src/gateway/direct_url_query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/direct_url_query.rs @@ -1,17 +1,19 @@ -use std::future::IntoFuture; -use std::sync::Arc; +use std::{future::IntoFuture, sync::Arc}; use futures::FutureExt; -use rattler_cache::package_cache::{PackageCache, PackageCacheError}; +use rattler_cache::package_cache::{CacheKey, PackageCache, PackageCacheError}; use rattler_conda_types::{ - package::{IndexJson, PackageFile}, + package::{ArchiveIdentifier, IndexJson, PackageFile}, ConvertSubdirError, PackageRecord, RepoDataRecord, }; +use rattler_digest::Sha256Hash; use url::Url; pub(crate) struct DirectUrlQuery { /// The url to query url: Url, + /// Optional Sha256 of the file + sha256: Option, /// The client to use for fetching the package client: reqwest_middleware::ClientWithMiddleware, /// The cache to use for storing the package @@ -26,6 +28,8 @@ pub enum DirectUrlQueryError { IndexJson(#[from] std::io::Error), #[error(transparent)] ConvertSubdir(#[from] ConvertSubdirError), + #[error("could not determine archive identifier from url filename '{0}'")] + InvalidFilename(String), } impl DirectUrlQuery { @@ -33,24 +37,36 @@ impl DirectUrlQuery { url: Url, package_cache: PackageCache, client: reqwest_middleware::ClientWithMiddleware, + sha256: Option, ) -> Self { Self { url, client, package_cache, + sha256, } } /// Execute the Repodata query using the cache as a source for the /// index.json pub async fn execute(self) -> Result, DirectUrlQueryError> { + // Convert the url to an archive identifier. + let Some(archive_identifier) = ArchiveIdentifier::try_from_url(&self.url) else { + let filename = self.url.path_segments().and_then(Iterator::last); + return Err(DirectUrlQueryError::InvalidFilename( + filename.unwrap_or("").to_string(), + )); + }; + + // Construct a cache key + let cache_key = CacheKey::from(archive_identifier).with_opt_sha256(self.sha256); + // TODO: Optimize this by only parsing the index json from stream. // Get package on system let package_dir = self .package_cache .get_or_fetch_from_url( - // Using the url as cache key - &self.url, + cache_key, self.url.clone(), self.client.clone(), // Should we add a reporter? @@ -61,9 +77,10 @@ impl DirectUrlQuery { // Extract package record from index json let index_json = IndexJson::from_package_directory(package_dir)?; let package_record = PackageRecord::from_index_json( - index_json, None, // Size - None, // sha256 - None, // md5 + index_json, + None, // Size + self.sha256, // sha256 + None, // md5 )?; tracing::debug!("Package record build from direct url: {:?}", package_record); @@ -106,7 +123,7 @@ mod test { .unwrap(); let package_cache = PackageCache::new(PathBuf::from("/tmp")); let client = reqwest_middleware::ClientWithMiddleware::from(reqwest::Client::new()); - let query = DirectUrlQuery::new(url.clone(), package_cache, client); + let query = DirectUrlQuery::new(url.clone(), package_cache, client, None); assert_eq!(query.url.clone(), url); @@ -145,15 +162,10 @@ mod test { .await .unwrap(); - let path = temp_dir().join("not_a_conda_archive_style_name.tar.bz2"); - - // copy path into fake filename into tmp - std::fs::copy(package_path, &path).unwrap(); - - let url = Url::from_file_path(path).unwrap(); + let url = Url::from_file_path(package_path).unwrap(); let package_cache = PackageCache::new(temp_dir()); let client = reqwest_middleware::ClientWithMiddleware::from(reqwest::Client::new()); - let query = DirectUrlQuery::new(url.clone(), package_cache, client); + let query = DirectUrlQuery::new(url.clone(), package_cache, client, None); assert_eq!(query.url.clone(), url); diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index b7c3e8251..2fbd7aec0 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -157,6 +157,7 @@ impl GatewayQuery { url.clone(), gateway.package_cache.clone(), gateway.client.clone(), + spec.sha256.clone(), ); let record = query From 8ab4d68eb13adda5edfe3468b5b8cb0146e6cd2d Mon Sep 17 00:00:00 2001 From: Bas Zalmstra Date: Thu, 13 Jun 2024 16:02:07 +0200 Subject: [PATCH 52/52] fix: clippy --- .../rattler_repodata_gateway/src/gateway/direct_url_query.rs | 2 +- crates/rattler_repodata_gateway/src/gateway/query.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/direct_url_query.rs b/crates/rattler_repodata_gateway/src/gateway/direct_url_query.rs index a41266f82..6ecbf3083 100644 --- a/crates/rattler_repodata_gateway/src/gateway/direct_url_query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/direct_url_query.rs @@ -41,9 +41,9 @@ impl DirectUrlQuery { ) -> Self { Self { url, + sha256, client, package_cache, - sha256, } } diff --git a/crates/rattler_repodata_gateway/src/gateway/query.rs b/crates/rattler_repodata_gateway/src/gateway/query.rs index 2fbd7aec0..2a0847ee4 100644 --- a/crates/rattler_repodata_gateway/src/gateway/query.rs +++ b/crates/rattler_repodata_gateway/src/gateway/query.rs @@ -116,7 +116,7 @@ impl GatewayQuery { } // Result offset for direct url queries. - let direct_url_offset = if direct_url_specs.is_empty() { 0 } else { 1 }; + let direct_url_offset = usize::from(!direct_url_specs.is_empty()); // Create barrier cells for each subdirectory. // This can be used to wait until the subdir becomes available. @@ -157,7 +157,7 @@ impl GatewayQuery { url.clone(), gateway.package_cache.clone(), gateway.client.clone(), - spec.sha256.clone(), + spec.sha256, ); let record = query