From 2ec1b9cb71a8d0da30d2bd9c96a5a37048af8543 Mon Sep 17 00:00:00 2001 From: Andrea Reale Date: Thu, 25 Sep 2025 12:01:34 +0200 Subject: [PATCH] support https urls for Azure Blob Storage `object_store` supports several ways of encoding the address of Azure Blob Storage URLs, as specified [here](https://github.com/apache/arrow-rs-object-store/blob/b82979d44a916cf1615e719c8e80800766fd6efe/src/azure/builder.rs#L531). Before this change, the lance wrapper only supported the `az` scheme. However, the other schemes are significant too because they allow to encode extra information (e.g., the Azure Storage account name) or access different store type (e.g., Files Blob Store). This change, maps `https` object URIs to Azure, so that the underlying `object_store` support can be effectively used. Signed-off-by: Andrea Reale --- rust/lance-io/src/object_store/providers.rs | 6 ++++- .../src/object_store/providers/azure.rs | 25 +++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/rust/lance-io/src/object_store/providers.rs b/rust/lance-io/src/object_store/providers.rs index 3ac6be93d6f..a8d4d328b5d 100644 --- a/rust/lance-io/src/object_store/providers.rs +++ b/rust/lance-io/src/object_store/providers.rs @@ -235,7 +235,11 @@ impl Default for ObjectStoreRegistry { providers.insert("s3+ddb".into(), aws); } #[cfg(feature = "azure")] - providers.insert("az".into(), Arc::new(azure::AzureBlobStoreProvider)); + { + let azure = Arc::new(azure::AzureBlobStoreProvider); + providers.insert("az".into(), azure.clone()); + providers.insert("https".into(), azure); + } #[cfg(feature = "gcp")] providers.insert("gs".into(), Arc::new(gcp::GcsStoreProvider)); #[cfg(feature = "oss")] diff --git a/rust/lance-io/src/object_store/providers/azure.rs b/rust/lance-io/src/object_store/providers/azure.rs index b79ca8498d0..8c5ed1c2ac6 100644 --- a/rust/lance-io/src/object_store/providers/azure.rs +++ b/rust/lance-io/src/object_store/providers/azure.rs @@ -10,6 +10,7 @@ use snafu::location; use object_store::{ azure::{AzureConfigKey, MicrosoftAzureBuilder}, + path::Path, RetryConfig, }; use url::Url; @@ -87,6 +88,20 @@ impl AzureBlobStoreProvider { #[async_trait::async_trait] impl ObjectStoreProvider for AzureBlobStoreProvider { + fn extract_path(&self, url: &Url) -> Result { + // Azure https paths in ObjectSore encode the container name as the first path segment. + // The actual object path starts from the second segment. + if url.scheme() == "https" { + url.path_segments() + .map(|s| Path::from_iter(s.skip(1))) + .ok_or_else(|| { + Error::invalid_input(format!("Invalid Azure URL: {url}"), location!()) + }) + } else { + Ok(Path::from(url.path())) + } + } + async fn new_store(&self, base_path: Url, params: &ObjectStoreParams) -> Result { let block_size = params.block_size.unwrap_or(DEFAULT_CLOUD_BLOCK_SIZE); let mut storage_options = @@ -164,6 +179,16 @@ mod tests { assert_eq!(path, expected_path); } + #[test] + fn test_azure_store_https_path() { + let provider = AzureBlobStoreProvider; + + let url = Url::parse("https://account.blob.core.windows.net/bucket/path/to/file").unwrap(); + let path = provider.extract_path(&url).expect("Failed to extract path"); + let expected_path = object_store::path::Path::from("path/to/file"); + assert_eq!(path, expected_path); + } + #[tokio::test] async fn test_use_opendal_flag() { let provider = AzureBlobStoreProvider;