diff --git a/crates/rattler_s3/src/lib.rs b/crates/rattler_s3/src/lib.rs index 7fa4321a7..f0138c09a 100644 --- a/crates/rattler_s3/src/lib.rs +++ b/crates/rattler_s3/src/lib.rs @@ -23,7 +23,7 @@ pub enum S3AddressingStyle { /// Rattler based crates always either use S3 credentials specified by the user /// through CLI arguments combined with credentials coming from `rattler auth`, /// or everything is loaded through the AWS SDK. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct S3Credentials { /// The endpoint URL of the S3 backend diff --git a/crates/rattler_upload/src/lib.rs b/crates/rattler_upload/src/lib.rs index 41adaf37f..51ea3976c 100644 --- a/crates/rattler_upload/src/lib.rs +++ b/crates/rattler_upload/src/lib.rs @@ -1,13 +1,26 @@ pub mod upload; pub(crate) mod utils; +use crate::upload::opt::{AnacondaOpts, ArtifactoryOpts, CondaForgeOpts, PrefixOpts}; +use crate::utils::server_util::{ + check_server_type, extract_anaconda_info, extract_artifactory_info, extract_conda_forge_info, + extract_prefix_info, extract_quetz_info, SimpleServerType, +}; +use crate::utils::tool_configuration; use miette::IntoDiagnostic; use rattler_conda_types::package::ArchiveType; use upload::opt::{ - AnacondaData, ArtifactoryData, CondaForgeData, PrefixData, QuetzData, ServerType, UploadOpts, + AnacondaData, ArtifactoryData, CondaForgeData, PrefixData, QuetzData, QuetzOpts, ServerType, + UploadOpts, }; -use crate::utils::tool_configuration; +#[cfg(feature = "s3")] +use crate::upload::opt::{S3Data, S3Opts}; +#[cfg(feature = "s3")] +use crate::utils::server_util::extract_s3_info; +#[cfg(feature = "s3")] +use rattler_s3::clap::{S3AddressingStyleOpts, S3CredentialsOpts}; + /// Upload package to different channels pub async fn upload_from_args(args: UploadOpts) -> miette::Result<()> { // Validate package files are provided @@ -29,8 +42,111 @@ pub async fn upload_from_args(args: UploadOpts) -> miette::Result<()> { let store = tool_configuration::get_auth_store(args.common.auth_file, args.auth_store) .into_diagnostic()?; + // Check server type from host (if provided) + let detected_type: SimpleServerType = match &args.host { + Some(host_url) => check_server_type(host_url), + None => SimpleServerType::Unknown, + }; + + // Use detected type if available, otherwise fall back to provided server_type + let server_type = match detected_type { + SimpleServerType::Unknown => { + // If detection failed, use provided subcommand server_type or return error + match args.server_type { + Some(server_type) => server_type, + None => { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "Cannot determine server type from host and no server type provided", + )) + .into_diagnostic() + } + } + } + SimpleServerType::Quetz => { + let host_url = args.host.as_ref().unwrap(); + let (base_url, channel) = + extract_quetz_info(host_url).expect("Failed to parse Quetz URL"); + ServerType::Quetz(QuetzOpts { + url: base_url, + channels: channel, + api_key: None, + }) + } + SimpleServerType::Artifactory => { + let host_url = args.host.as_ref().unwrap(); + let (base_url, channel) = + extract_artifactory_info(host_url).expect("Failed to parse Artifactory URL"); + ServerType::Artifactory(ArtifactoryOpts { + url: base_url, + channels: channel, + username: None, + password: None, + token: None, + }) + } + SimpleServerType::Prefix => { + let host_url = args.host.as_ref().unwrap(); + let (base_url, channel) = + extract_prefix_info(host_url).expect("Failed to parse Prefix URL"); + ServerType::Prefix(PrefixOpts { + url: base_url, + channel, + api_key: None, + attestation: None, + skip_existing: false, + }) + } + SimpleServerType::Anaconda => { + let host_url = args.host.as_ref().unwrap(); + let (base_url, channel) = + extract_anaconda_info(host_url).expect("Failed to parse Anaconda URL"); + ServerType::Anaconda(AnacondaOpts { + url: Some(base_url), + channels: Some(channel), + api_key: None, + owner: "".to_string(), + force: false, + }) + } + #[cfg(feature = "s3")] + SimpleServerType::S3 => { + let host_url = args.host.as_ref().unwrap(); + let (endpoint_url, channel, region) = + extract_s3_info(host_url).expect("Failed to parse S3 URL"); + ServerType::S3(S3Opts { + channel, + s3_credentials: S3CredentialsOpts { + endpoint_url: Some(endpoint_url), + region: Some(region), + access_key_id: None, + secret_access_key: None, + session_token: None, + addressing_style: S3AddressingStyleOpts::VirtualHost, + force_path_style: None, + }, + credentials: None, + force: false, + }) + } + SimpleServerType::CondaForge => { + let host_url = args.host.as_ref().unwrap(); + let (base_url, channel) = + extract_conda_forge_info(host_url).expect("Failed to parse Conda Forge URL"); + ServerType::CondaForge(CondaForgeOpts { + anaconda_url: Some(base_url), + staging_channel: Some(channel), + staging_token: "".to_string(), + feedstock: "".to_string(), + feedstock_token: "".to_string(), + validation_endpoint: None, + provider: None, + dry_run: false, + }) + } + }; // Upload handler based on server type - match args.server_type { + match server_type { ServerType::Quetz(quetz_opts) => { let quetz_data = QuetzData::from(quetz_opts); upload::upload_package_to_quetz(&store, &args.package_files, quetz_data).await @@ -51,12 +167,13 @@ pub async fn upload_from_args(args: UploadOpts) -> miette::Result<()> { } #[cfg(feature = "s3")] ServerType::S3(s3_opts) => { + let s3_data = S3Data::from(s3_opts); upload::upload_package_to_s3( &store, - s3_opts.channel, - s3_opts.credentials.into(), + s3_data.channel, + s3_data.credentials, &args.package_files, - s3_opts.force, + s3_data.force, // force parameter - using false as default ) .await } diff --git a/crates/rattler_upload/src/upload/opt.rs b/crates/rattler_upload/src/upload/opt.rs index e170a579b..de6a4805a 100644 --- a/crates/rattler_upload/src/upload/opt.rs +++ b/crates/rattler_upload/src/upload/opt.rs @@ -1,19 +1,22 @@ //! Command-line options. -use std::{collections::HashMap, path::PathBuf, str::FromStr}; - use clap::{arg, Parser}; -use rattler_conda_types::{ - utils::url_with_trailing_slash::UrlWithTrailingSlash, NamedChannelOrUrl, Platform, -}; +use rattler_conda_types::utils::url_with_trailing_slash::UrlWithTrailingSlash; +use rattler_conda_types::{NamedChannelOrUrl, Platform}; +use rattler_networking::mirror_middleware; +use rattler_networking::AuthenticationStorage; #[cfg(feature = "s3")] -use rattler_networking::s3_middleware; -use rattler_networking::{mirror_middleware, AuthenticationStorage}; +use rattler_s3::clap::S3CredentialsOpts; +#[cfg(feature = "s3")] +use rattler_s3::{S3AddressingStyle, S3Credentials}; use rattler_solve::ChannelPriority; +use std::{collections::HashMap, path::PathBuf, str::FromStr}; use tracing::warn; use url::Url; -/// The configuration type for rattler-build - just extends rattler / pixi -/// config and can load the same TOML files. +#[cfg(feature = "s3")] +use rattler_networking::s3_middleware; + +/// The configuration type for rattler-build - just extends rattler / pixi config and can load the same TOML files. pub type Config = rattler_config::config::ConfigBase<()>; /// Container for `rattler_solver::ChannelPriority` so that it can be parsed @@ -99,8 +102,7 @@ impl CommonData { allow_insecure_host: Option>, ) -> Self { // mirror config - // todo: this is a duplicate in pixi and pixi-pack: do it like in - // `compute_s3_config` + // todo: this is a duplicate in pixi and pixi-pack: do it like in `compute_s3_config` let mut mirror_config = HashMap::new(); tracing::debug!("Using mirrors: {:?}", config.mirrors); @@ -158,13 +160,16 @@ impl CommonData { /// Upload options. #[derive(Parser, Debug)] pub struct UploadOpts { + /// The host + channel (optional if the server type is provided) + pub host: Option, + /// The package file to upload #[arg(global = true, required = false)] pub package_files: Vec, - /// The server type + //// The server type (optional if host is provided) #[clap(subcommand)] - pub server_type: ServerType, + pub server_type: Option, /// Common options. #[clap(flatten)] @@ -426,19 +431,102 @@ fn parse_s3_url(value: &str) -> Result { #[cfg(feature = "s3")] #[derive(Clone, Debug, PartialEq, Parser)] pub struct S3Opts { - /// The channel URL in the S3 bucket to upload the package to, e.g., - /// `s3://my-bucket/my-channel` + /// The channel URL in the S3 bucket to upload the package to, e.g., `s3://my-bucket/my-channel` #[arg(short, long, env = "S3_CHANNEL", value_parser = parse_s3_url)] pub channel: Url, + /// S3 credentials #[clap(flatten)] - pub credentials: rattler_s3::clap::S3CredentialsOpts, + pub s3_credentials: S3CredentialsOpts, - /// Replace files if it already exists. - #[arg(long)] + /// S3 credentials (set programmatically, not via CLI) + #[clap(skip)] + pub credentials: Option, + + /// Replace files on conflict + #[arg(long, short, env = "ANACONDA_FORCE")] + pub force: bool, +} + +#[cfg(feature = "s3")] +#[derive(Debug)] +#[allow(missing_docs)] +pub struct S3Data { + pub channel: Url, + pub endpoint_url: Url, + pub region: Option, + pub force_path_style: bool, + pub credentials: Option, pub force: bool, } +#[cfg(feature = "s3")] +impl From for S3Data { + fn from(value: S3Opts) -> Self { + let addressing_style = value.s3_credentials.addressing_style.into(); + let force_path_style = matches!(addressing_style, S3AddressingStyle::Path); + + let credentials: Option = + if let (Some(access_key_id), Some(secret_access_key)) = ( + value.s3_credentials.access_key_id.clone(), + value.s3_credentials.secret_access_key.clone(), + ) { + Some(S3Credentials { + endpoint_url: value + .s3_credentials + .endpoint_url + .clone() + .expect("endpoint_url is required"), + region: value + .s3_credentials + .region + .clone() + .expect("region is required"), + addressing_style, + access_key_id: Some(access_key_id), + secret_access_key: Some(secret_access_key), + session_token: value.s3_credentials.session_token.clone(), + }) + } else { + value.credentials + }; + + Self { + channel: value.channel, + endpoint_url: value + .s3_credentials + .endpoint_url + .expect("endpoint_url is required"), + region: value.s3_credentials.region, + force_path_style, + credentials, + force: value.force, + } + } +} + +#[cfg(feature = "s3")] +impl S3Data { + /// Create a new instance of `S3Data` + pub fn new( + channel: Url, + endpoint_url: Url, + region: Option, + force_path_style: bool, + credentials: Option, + force: bool, + ) -> Self { + Self { + channel, + endpoint_url, + region, + force_path_style, + credentials, + force, + } + } +} + #[derive(Debug)] #[allow(missing_docs)] pub struct AnacondaData { @@ -607,8 +695,7 @@ pub struct DebugOpts { #[clap(flatten)] pub common: CommonOpts, - /// Name of the specific output to debug (only required when a recipe has - /// multiple outputs) + /// Name of the specific output to debug (only required when a recipe has multiple outputs) #[arg(long, help = "Name of the specific output to debug")] pub output_name: Option, } @@ -635,8 +722,8 @@ pub struct DebugData { } impl DebugData { - /// Generate a new `TestData` struct from `TestOpts` and an optional pixi - /// config. `TestOpts` have higher priority than the pixi config. + /// Generate a new `TestData` struct from `TestOpts` and an optional pixi config. + /// `TestOpts` have higher priority than the pixi config. pub fn from_opts_and_config(opts: DebugOpts, config: Option) -> Self { Self { recipe_path: opts.recipe, diff --git a/crates/rattler_upload/src/upload/prefix.rs b/crates/rattler_upload/src/upload/prefix.rs index 780a0021c..8c1e68e0f 100644 --- a/crates/rattler_upload/src/upload/prefix.rs +++ b/crates/rattler_upload/src/upload/prefix.rs @@ -1,5 +1,6 @@ use fs_err::tokio as tokio_fs; use futures::TryStreamExt as _; +use indicatif::{style::TemplateError, HumanBytes, ProgressState}; use miette::IntoDiagnostic as _; use rattler_networking::{Authentication, AuthenticationStorage}; use reqwest::{ @@ -19,9 +20,10 @@ use super::opt::{ // ← Import from sibling module PrefixData, }; +use std::fmt::Write; use crate::upload::{ - default_bytes_style, get_client_with_retry, get_default_client, + get_client_with_retry, get_default_client, trusted_publishing::{check_trusted_publishing, TrustedPublishResult}, }; @@ -77,6 +79,25 @@ async fn create_upload_form( Ok(form) } +/// Returns the style to use for a progressbar that is currently in progress. +fn default_bytes_style() -> Result { + Ok(indicatif::ProgressStyle::default_bar() + .template("{spinner:.green} {prefix:20!} [{elapsed_precise}] [{bar:40!.bright.yellow/dim.white}] {bytes:>8} @ {smoothed_bytes_per_sec:8}")? + .progress_chars("━━╾─") + .with_key( + "smoothed_bytes_per_sec", + |s: &ProgressState, w: &mut dyn Write| match (s.pos(), s.elapsed().as_millis()) { + (pos, elapsed_ms) if elapsed_ms > 0 => { + // TODO: log with tracing? + _ = write!(w, "{}/s", HumanBytes((pos as f64 * 1000_f64 / elapsed_ms as f64) as u64)); + } + _ => { + _ = write!(w, "-"); + }, + }, + )) +} + /// Uploads package files to a prefix.dev server. pub async fn upload_package_to_prefix( storage: &AuthenticationStorage, diff --git a/crates/rattler_upload/src/upload/s3.rs b/crates/rattler_upload/src/upload/s3.rs index b1477fb2c..8512a0f94 100644 --- a/crates/rattler_upload/src/upload/s3.rs +++ b/crates/rattler_upload/src/upload/s3.rs @@ -2,12 +2,17 @@ use std::path::PathBuf; use miette::IntoDiagnostic; use opendal::{services::S3Config, Configurator, ErrorKind, Operator}; +use rattler_digest::{HashingReader, Md5, Sha256}; use rattler_networking::AuthenticationStorage; use rattler_s3::{ResolvedS3Credentials, S3Credentials}; +use tokio::io::{AsyncReadExt, AsyncSeekExt}; +use tokio_util::bytes::BytesMut; use url::Url; use crate::upload::package::ExtractedPackage; +const DESIRED_CHUNK_SIZE: usize = 1024 * 1024 * 10; + /// Uploads a package to a channel in an S3 bucket. #[allow(clippy::too_many_arguments)] pub async fn upload_package_to_s3( @@ -56,26 +61,90 @@ pub async fn upload_package_to_s3( .filename() .ok_or_else(|| miette::miette!("Failed to get filename"))?; let key = format!("{subdir}/{filename}"); - let body = fs_err::tokio::read(package_file).await.into_diagnostic()?; - match op - .write_with(&key, body) + + // Compute the hash of the package by streaming its content. + let file = tokio::io::BufReader::new( + fs_err::tokio::File::open(package_file) + .await + .into_diagnostic()?, + ); + let sha256_reader = HashingReader::<_, Sha256>::new(file); + let mut md5_reader = HashingReader::<_, Md5>::new(sha256_reader); + let size = tokio::io::copy(&mut md5_reader, &mut tokio::io::sink()) + .await + .into_diagnostic()?; + let (sha256_reader, md5hash) = md5_reader.finalize(); + let (mut file, sha256hash) = sha256_reader.finalize(); + + // Rewind the file to the beginning. + file.rewind().await.into_diagnostic()?; + + // Construct a writer for the package. + let mut writer = match op + .writer_with(&key) .content_disposition(&format!("attachment; filename={filename}")) .if_not_exists(!force) + .user_metadata([ + (String::from("package-sha256"), format!("{sha256hash:x}")), + (String::from("package-md5"), format!("{md5hash:x}")), + ]) .await { Err(e) if e.kind() == ErrorKind::ConditionNotMatch => { - tracing::info!( - "Skipped package s3://{bucket}{}/{key}, the package already exists. Use --force to overwrite.", + miette::bail!( + "Package s3://{bucket}{}/{key} already exists. Use --force to overwrite.", channel.path().to_string() ); } - Ok(_metadata) => { + Ok(writer) => writer, + Err(e) => { + return Err(e).into_diagnostic(); + } + }; + + // Write the contents to the writer. We do this in a more complex way than just + // using `io::copy` because some underlying storage providers expect to receive + // the data in specifically sized chunks. The code below guarantees chunks of + // equal size except for maybe the last chunk. + let mut remaining_size = size as usize; + loop { + // Allocate memory for this chunk + let chunk_size = remaining_size.min(DESIRED_CHUNK_SIZE); + let mut chunk = BytesMut::with_capacity(chunk_size); + // SAFE: because we do not care about the bytes that are currently in the buffer + unsafe { chunk.set_len(chunk_size) }; + + // Fill the chunk with data. This reads exactly the number of bytes we want. No + // more, no less. + let bytes_read = file.read_exact(&mut chunk[..]).await.into_diagnostic()?; + debug_assert_eq!(bytes_read, chunk.len()); + + // Write the writes directly to storage + writer.write(chunk.freeze()).await.into_diagnostic()?; + + // Update the number of remaining bytes + remaining_size = remaining_size.saturating_sub(bytes_read); + if remaining_size == 0 { + break; + } + } + + match writer.close().await { + Err(e) if e.kind() == ErrorKind::ConditionNotMatch => { + miette::bail!( + "Package s3://{bucket}{}/{key} already exists. Use --force to overwrite.", + channel.path().to_string() + ); + } + Ok(_) => { tracing::info!( "Uploaded package to s3://{bucket}{}/{key}", channel.path().to_string() ); } - Err(e) => return Err(e).into_diagnostic(), + Err(e) => { + return Err(e).into_diagnostic(); + } } } diff --git a/crates/rattler_upload/src/utils/mod.rs b/crates/rattler_upload/src/utils/mod.rs index df2b22c56..a7ecff599 100644 --- a/crates/rattler_upload/src/utils/mod.rs +++ b/crates/rattler_upload/src/utils/mod.rs @@ -1,3 +1,4 @@ pub mod console_utils; pub mod consts; +pub mod server_util; pub mod tool_configuration; diff --git a/crates/rattler_upload/src/utils/server_util.rs b/crates/rattler_upload/src/utils/server_util.rs new file mode 100644 index 000000000..ecb9b4f63 --- /dev/null +++ b/crates/rattler_upload/src/utils/server_util.rs @@ -0,0 +1,228 @@ +use url::Url; + +/// Simplified server type without options +#[derive(Debug, Clone, PartialEq)] +pub enum SimpleServerType { + Quetz, + Artifactory, + Prefix, + Anaconda, + #[cfg(feature = "s3")] + S3, + CondaForge, + Unknown, +} + +/// Determine server type from host URL +/// +/// # Arguments +/// * `host` - The host URL to analyze +/// +/// # Returns +/// * `SimpleServerType` - The detected server type or Unknown +/// +/// ``` +pub fn check_server_type(host_url: &Url) -> SimpleServerType { + let host = match host_url.host_str() { + Some(host) => host, + None => return SimpleServerType::Unknown, + }; + + // 1. Check Prefix.dev + if host.contains("prefix.dev") { + return SimpleServerType::Prefix; + } + + // 2. Check Anaconda.org + if host.contains("anaconda.org") { + return SimpleServerType::Anaconda; + } + + // 3. Check Conda-forge (GitHub) + if host.contains("conda-forge") { + return SimpleServerType::CondaForge; + } + + // 4. Check S3 + #[cfg(feature = "s3")] + if host_url.scheme() == "s3" || (host.contains("s3") && host.contains("amazonaws.com")) { + return SimpleServerType::S3; + } + + // 5. Check Artifactory + if host_url.path().contains("/artifactory/") { + return SimpleServerType::Artifactory; + } + + // 6. Check Quetz + if host_url.path().contains("/api/channels/") { + return SimpleServerType::Quetz; + } + + // 7. Unknown server type + SimpleServerType::Unknown +} + +// Extract Quetz base_url and channel from the host +pub fn extract_quetz_info(url: &Url) -> Result<(Url, String), Box> { + // Extract base URL (scheme + host) + let mut base_url = url.clone(); + base_url.set_path(""); + base_url.set_query(None); + + // Parse path to find channel in /api/channels/CHANNEL pattern + let path_segments: Vec<&str> = url + .path_segments() + .ok_or("Cannot extract path segments")? + .collect(); + + // Look for /api/channels/CHANNEL pattern + if let Some(api_pos) = path_segments.iter().position(|&s| s == "api") { + if path_segments.get(api_pos + 1) == Some(&"channels") { + if let Some(&channel) = path_segments.get(api_pos + 2) { + return Ok((base_url, channel.to_string())); + } + } + } + + // Default to "main" if no channel found + Ok((base_url, "main".to_string())) +} + +// Extract Artifactory base_url and channel from host +pub fn extract_artifactory_info(url: &Url) -> Result<(Url, String), Box> { + let path_segments: Vec<&str> = url + .path_segments() + .ok_or("Cannot extract path segments")? + .collect(); + + // Look for /artifactory/CHANNEL pattern + if let Some(artifactory_pos) = path_segments.iter().position(|&s| s == "artifactory") { + if let Some(&channel) = path_segments.get(artifactory_pos + 1) { + let mut base_url = url.clone(); + base_url.set_path(""); + return Ok((base_url, channel.to_string())); + } + } + + Err("Invalid Artifactory URL format".into()) +} + +// Extract Prefix base_url and channel from host +pub fn extract_prefix_info(url: &Url) -> Result<(Url, String), Box> { + let path_segments: Vec<&str> = url + .path_segments() + .unwrap_or_else(|| "".split('/')) + .filter(|s| !s.is_empty()) + .collect(); + + let mut base_url = url.clone(); + base_url.set_path(""); + + // Look for API upload pattern: /api/v1/upload/CHANNEL + if path_segments.len() >= 4 + && path_segments[0] == "api" + && path_segments[1] == "v1" + && path_segments[2] == "upload" + { + return Ok((base_url, path_segments[3].to_string())); + } + + // Look for direct channel pattern: /CHANNEL + if let Some(&channel) = path_segments.first() { + return Ok((base_url, channel.to_string())); + } + + // Default to conda-forge + Ok((base_url, "conda-forge".to_string())) +} + +// Extract Anaconda base_url and channel from host +pub fn extract_anaconda_info(url: &Url) -> Result<(Url, Vec), Box> { + let mut base_url = url.clone(); + base_url.set_path(""); + + let path_segments: Vec<&str> = url + .path_segments() + .unwrap_or_else(|| "".split('/')) + .filter(|s| !s.is_empty()) + .collect(); + + // Extract channel from first path segment, default to "main" + let channel = if let Some(&first_segment) = path_segments.first() { + vec![first_segment.to_string()] + } else { + vec!["main".to_string()] + }; + + Ok((base_url, channel)) +} + +#[cfg(feature = "s3")] +// Extract S3 base_url and channel from host +pub fn extract_s3_info(url: &Url) -> Result<(Url, Url, String), Box> { + if url.scheme() == "s3" { + // S3 URI format: s3://bucket-name/channel-name + let host = url.host_str().ok_or("No host in S3 URL")?; + let path_segments: Vec<&str> = url + .path_segments() + .unwrap_or_else(|| "".split('/')) + .filter(|s| !s.is_empty()) + .collect(); + + let channel_name = path_segments + .first() + .map_or_else(|| "main".to_string(), ToString::to_string); + + let base_url = Url::parse("https://s3.amazonaws.com")?; + let channel = Url::parse(&format!("s3://{host}/{channel_name}"))?; + let region = "eu-central-1".to_string(); + + return Ok((base_url, channel, region)); + } else if url.scheme().starts_with("http") && url.host_str().unwrap_or("").contains("s3") { + // HTTP(S) format: https://bucket.s3.region.amazonaws.com/channel + let host = url.host_str().ok_or("No host in URL")?; + let host_parts: Vec<&str> = host.split('.').collect(); + + if host_parts.len() >= 4 && host_parts[1] == "s3" && host_parts.last() == Some(&"com") { + let bucket = host_parts[0]; + let region = if host_parts.len() > 4 { + host_parts[2].to_string() + } else { + "eu-central-1".to_string() + }; + + let path_segments: Vec<&str> = url + .path_segments() + .unwrap_or_else(|| "".split('/')) + .filter(|s| !s.is_empty()) + .collect(); + + let base_url = url.clone(); + let channel_name = path_segments + .first() + .map_or_else(|| "main".to_string(), ToString::to_string); + let channel = Url::parse(&format!("s3://{bucket}/{channel_name}"))?; + + return Ok((base_url, channel, region)); + } + } + + Err("Invalid S3 URL format".into()) +} + +// Extract S3 base_url and channel from host +pub fn extract_conda_forge_info(url: &Url) -> Result<(Url, String), Box> { + let mut base_url = url.clone(); + base_url.set_path(""); + + // Extract channel from path - first path segment or "main" as default + let channel = url + .path_segments() + .and_then(|mut segments| segments.next()) + .filter(|s| !s.is_empty()) + .unwrap_or("main") + .to_string(); + + Ok((base_url, channel)) +}