Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/application/common/monitor_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from src.application.common import logger
from src.application.contracts import IMonitoringStorageService, IAzureCostService
from src.application.dtos import CostConfiguration
from src.domain.enums import BlobOperationType, StopReason
from src.domain.enums import BlobOperationType, DatasetSize, StopReason
from src.infra.infrastructure import Containers


Expand Down Expand Up @@ -118,7 +118,9 @@ def _save_run_cost_analytics(
monitoring_storage_service: IMonitoringStorageService = Provide[
Containers.monitoring_storage_service
],
dataset_size_value: str = Provide[Containers.config.dataset_size],
) -> None:
dataset_size = DatasetSize(dataset_size_value)
benchmark_run = _get_benchmark_run()
if cost_configuration.include_aci:
aci_cost = azure_cost_service.compute_aci_cost(query_id, start_time, end_time)
Expand All @@ -138,7 +140,9 @@ def _save_run_cost_analytics(
)
if cost_configuration.include_blob_storage and is_blob_params_present:
blob_cost = azure_cost_service.compute_blob_storage_cost(
start_time, end_time, bytes_ingress, bytes_egress, operation_type
start_time, end_time, bytes_ingress, bytes_egress, operation_type,
dataset_size=dataset_size,
is_cross_region=cost_configuration.is_cross_region_blob,
)
logger.debug("Computed Blob Storage cost: %s", blob_cost.to_dict())
monitoring_storage_service.write_cost_analytics_to_blob_storage(
Expand Down
6 changes: 5 additions & 1 deletion src/application/contracts/azure_cost_service_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from abc import abstractmethod, ABC

from src.application.dtos import Cost
from src.domain.enums import BlobOperationType
from src.domain.enums import BlobOperationType, DatasetSize


class IAzureCostService(ABC):
Expand Down Expand Up @@ -35,6 +35,8 @@ def compute_blob_storage_cost(
bytes_ingress: float,
bytes_egress: float,
operation_type: BlobOperationType,
dataset_size: DatasetSize = DatasetSize.SMALL,
is_cross_region: bool = False,
) -> Cost:
"""
Computes the blob storage cost for the benchmark window. The cost includes prorated storage
Expand All @@ -45,6 +47,8 @@ def compute_blob_storage_cost(
:param bytes_ingress: Bytes uploaded to blob storage during the benchmark.
:param bytes_egress: Bytes downloaded from blob storage during the benchmark.
:param operation_type: Whether the benchmark performs READ or WRITE operations against blob storage.
:param dataset_size: Dataset size tier used to count blobs for operation cost estimation.
:param is_cross_region: When True, uses cross-region egress pricing instead of intra-region.
:return: Cost DTO with compute, storage, network, operations, and total cost. Compute cost is 0
for blob storage.
:rtype: Cost
Expand Down
6 changes: 4 additions & 2 deletions src/application/contracts/azure_metric_service_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from azure.monitor.querymetrics import MetricAggregationType, MetricsQueryResult

from src.application.dtos import AciUsage, BlobStorageUsage, DatabaseUsage, DatabricksUsage
from src.domain.enums import AzureMetricNamespace, AzureResourceMetrics, BlobOperationType
from src.domain.enums import AzureMetricNamespace, AzureResourceMetrics, BlobOperationType, DatasetSize


class IAzureMetricService(ABC):
Expand Down Expand Up @@ -66,7 +66,8 @@ def get_blob_storage_usage(
end_time: datetime.datetime,
bytes_ingress: float,
bytes_egress: float,
operation_type: BlobOperationType
operation_type: BlobOperationType,
dataset_size: DatasetSize = DatasetSize.SMALL,
) -> BlobStorageUsage:
"""
Returns the blob storage usage for the benchmark window. The transaction counts are derived from
Expand All @@ -78,6 +79,7 @@ def get_blob_storage_usage(
:param bytes_ingress: Bytes uploaded to blob storage during the benchmark.
:param bytes_egress: Bytes downloaded from blob storage during the benchmark.
:param operation_type: Whether the benchmark performs READ or WRITE operations against blob storage.
:param dataset_size: Dataset size tier used to resolve the blob path for counting blobs.
:return: BlobStorageUsage DTO with transaction counts, network bytes, and storage size in bytes.
:rtype: BlobStorageUsage
"""
Expand Down
1 change: 1 addition & 0 deletions src/application/dtos/blob_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class BlobStoragePricing:
storage_gb_per_month: float
ingress_per_gb: float = 0.0
egress_per_gb: float = 0.0
cross_region_egress_per_gb: float = 0.0

def to_dict(self) -> dict[str, float]:
return asdict(self)
Expand Down
1 change: 1 addition & 0 deletions src/application/dtos/cost.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ class CostConfiguration:
include_postgres: bool = False
include_databricks: bool = False
num_workers: int = 0
is_cross_region_blob: bool = False
17 changes: 2 additions & 15 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,6 @@ class Config:
f"https://{AZURE_RESOURCE_LOCATION}.metrics.monitor.azure.com"
)

AZURE_ACI_VCPU_PRICE_PER_SECOND: float = 0.0002
AZURE_ACI_MEMORY_GB_PRICE_PER_SECOND: float = 0.0002

AZURE_BLOB_READ_OPERATION_COST: float = 0
AZURE_BLOB_WRITE_OPERATION_COST: float = 0
AZURE_BLOB_LIST_OPERATION_COST: float = 0
AZURE_BLOB_STORAGE_GB_PER_MONTH_COST: float = 0
AZURE_BLOB_INGRESS_PER_GB_COST: float = 0
AZURE_BLOB_EGRESS_PER_GB_COST: float = 0

AZURE_DATABASE_COMPUTE_PRICE_PER_SECOND: float = 0
AZURE_DATABASE_STORAGE_GB_PER_MONTH_COST: float = 0

# POSTGRESQL
POSTGRES_USERNAME: str = os.getenv("POSTGRES_USERNAME")
POSTGRES_PASSWORD: str = os.getenv("POSTGRES_PASSWORD")
Expand Down Expand Up @@ -138,9 +125,9 @@ class Config:
)
DATABRICKS_POLL_INTERVAL_SECONDS: int = 30
DATABRICKS_HTTP_TIMEOUT_SECONDS: int = 30
DATABRICKS_DRIVER_MEMORY: str = "14g"
DATABRICKS_DRIVER_MEMORY: str = "9g"
DATABRICKS_DRIVER_MEMORY_OVERHEAD: str = "1g"
DATABRICKS_DRIVER_MAX_RESULT_SIZE: str = "8g"
DATABRICKS_DRIVER_MAX_RESULT_SIZE: str = "4g"
DATABRICKS_SEDONA_MAVEN_COORDINATES: str = (
"org.apache.sedona:sedona-spark-shaded-3.5_2.12:1.7.1"
)
Expand Down
16 changes: 12 additions & 4 deletions src/infra/infrastructure/services/azure_cost_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from src.application.contracts import IAzureCostService, IAzureMetricService, IAzurePricingService
from src.application.dtos import Cost
from src.domain.enums import BlobOperationType
from src.domain.enums import BlobOperationType, DatasetSize


class AzureCostService(IAzureCostService):
Expand Down Expand Up @@ -52,13 +52,16 @@ def compute_blob_storage_cost(
bytes_ingress: float,
bytes_egress: float,
operation_type: BlobOperationType,
dataset_size: DatasetSize = DatasetSize.SMALL,
is_cross_region: bool = False,
) -> Cost:
usage = self.__azure_metric_service.get_blob_storage_usage(
start_time=start_time,
end_time=end_time,
bytes_ingress=bytes_ingress,
bytes_egress=bytes_egress,
operation_type=operation_type,
dataset_size=dataset_size,
)
pricing = self.__azure_pricing_service.get_blob_storage_pricing()

Expand All @@ -73,11 +76,15 @@ def compute_blob_storage_cost(
+ usage.list_transactions * pricing.list_operation_cost
)

egress_rate = (
pricing.cross_region_egress_per_gb if is_cross_region
else pricing.egress_per_gb
)
ingress_gb = usage.bytes_ingress / (1024 ** 3)
egress_gb = usage.bytes_egress / (1024 ** 3)
network_cost = (
ingress_gb * pricing.ingress_per_gb
+ egress_gb * pricing.egress_per_gb
+ egress_gb * egress_rate
)

total = storage_cost + operations_cost + network_cost
Expand Down Expand Up @@ -106,8 +113,9 @@ def compute_databricks_cost(
pricing = self.__azure_pricing_service.get_databricks_pricing()

duration_hours = usage.duration_seconds / 3600
dbu_cost = usage.num_workers * pricing.dbu_per_node_per_hour * pricing.dbu_price_per_hour * duration_hours
vm_cost = usage.num_workers * pricing.vm_cost_per_node_per_hour * duration_hours
total_nodes = usage.num_workers + 1 # workers + driver (same VM type)
dbu_cost = total_nodes * pricing.dbu_per_node_per_hour * pricing.dbu_price_per_hour * duration_hours
vm_cost = total_nodes * pricing.vm_cost_per_node_per_hour * duration_hours
compute_cost = dbu_cost + vm_cost

egress_gb = usage.bytes_egress / (1024 ** 3)
Expand Down
3 changes: 2 additions & 1 deletion src/infra/infrastructure/services/azure_metric_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,14 @@ def get_blob_storage_usage(
bytes_ingress: float,
bytes_egress: float,
operation_type: BlobOperationType,
dataset_size: DatasetSize = DatasetSize.SMALL,
) -> BlobStorageUsage:
path = self.__file_path_service.create_dataset_blob_path(
release=Config.BENCHMARK_DOPPA_DATA_RELEASE,
theme=Theme.BUILDINGS,
region="*",
file_name="*.parquet",
dataset_size=DatasetSize.SMALL,
dataset_size=dataset_size,
)

blob_count, storage_size = self.__blob_storage_service.get_blob_summary(
Expand Down
16 changes: 12 additions & 4 deletions src/infra/infrastructure/services/azure_pricing_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,20 @@ class AzurePricingService(IAzurePricingService):
Hard-coded Azure Norway East pricing (USD) as of 2026.
All rates are per-second or per-GB unless noted.

All resources (ACI, Blob Storage, PostgreSQL) are deployed in the same
region (Norway East) and under the same tenant/subscription. Intra-region,
intra-tenant data transfers are free, so all network egress rates are $0.00.
ACI, Blob Storage, and PostgreSQL are deployed in Norway East under the
same tenant/subscription. Intra-region, intra-tenant data transfers are
free, so their network egress rates are $0.00.

Databricks is deployed in Sweden Central (Norway East does not offer
Databricks). When Databricks executors read GeoParquet from Blob Storage
in Norway East, the transfer is inter-region egress billed on the blob
storage side at the intra-Europe cross-region rate (~$0.02/GB).
See: https://azure.microsoft.com/pricing/details/bandwidth/
Section "Intra-continental data transfer" → Zone 1 ↔ Zone 1.

If resources are later moved to different regions, update the egress rates
to the applicable Azure Bandwidth tier. Norway East is in Zone 1.
Zone 1 internet egress (Premium Global Network): $0.087/GB (first 10 TB/month).
See: https://azure.microsoft.com/pricing/details/bandwidth/

Sources:
ACI compute: https://azure.microsoft.com/pricing/details/container-instances/
Expand Down Expand Up @@ -54,6 +60,7 @@ class AzurePricingService(IAzurePricingService):
__BLOB_LIST_PER_10K: float = 0.065 # $0.065 per 10 000 list ops
__BLOB_INGRESS_PER_GB: float = 0.0 # Free inbound (always free on Azure)
__BLOB_EGRESS_PER_GB: float = 0.0 # Free — intra-region, same tenant
__BLOB_CROSS_REGION_EGRESS_PER_GB: float = 0.02 # Intra-Europe cross-region (Zone 1 ↔ Zone 1)

# ------------------------------------------------------------------
# Azure Databricks — Sweden Central, Standard tier, Jobs Compute
Expand Down Expand Up @@ -104,6 +111,7 @@ def get_blob_storage_pricing(self) -> BlobStoragePricing:
storage_gb_per_month=self.__BLOB_STORAGE_GB_PER_MONTH,
ingress_per_gb=self.__BLOB_INGRESS_PER_GB,
egress_per_gb=self.__BLOB_EGRESS_PER_GB,
cross_region_egress_per_gb=self.__BLOB_CROSS_REGION_EGRESS_PER_GB,
)

def get_databricks_pricing(self) -> DatabricksPricing:
Expand Down
6 changes: 5 additions & 1 deletion src/presentation/entrypoints/_databricks_benchmark_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,11 @@ def _build_benchmark_fn(
query_id=query_id,
benchmark_iteration=BenchmarkIteration.NATIONAL_SCALE_SPATIAL_JOIN,
cost_configuration=CostConfiguration(
include_aci=True, include_databricks=True, num_workers=num_workers
include_aci=True,
include_databricks=True,
include_blob_storage=True,
num_workers=num_workers,
is_cross_region_blob=True,
),
skip_warmup=False,
elapsed_from_result=True,
Expand Down
Loading