From 6146ee844e2fe5ccd9665b705459a7cf0ba77b41 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Thu, 2 Apr 2026 18:26:44 +0530 Subject: [PATCH 1/4] setup IOPS using annotations --- .../organization/project/branch/__init__.py | 19 ++- src/deployment/__init__.py | 116 ++++++------------ src/deployment/kubernetes/pvc.py | 6 +- src/deployment/kubernetes/volume_clone.py | 2 - 4 files changed, 47 insertions(+), 96 deletions(-) diff --git a/src/api/organization/project/branch/__init__.py b/src/api/organization/project/branch/__init__.py index f53c5c8d7..4f39cd121 100644 --- a/src/api/organization/project/branch/__init__.py +++ b/src/api/organization/project/branch/__init__.py @@ -25,6 +25,7 @@ from ....._util.crypto import encrypt_with_passphrase, generate_keys from .....database import AsyncSessionLocal, SessionDep from .....deployment import ( + SIMPLYBLOCK_CSI_STORAGE_CLASS, DeploymentParameters, ResizeParameters, branch_api_domain, @@ -33,11 +34,11 @@ branch_service_name, delete_deployment, deploy_branch_environment, - ensure_branch_storage_class, get_autoscaler_vm_identity, kube_service, resolve_branch_database_volume_size, update_branch_database_password, + update_branch_volume_iops, ) from .....deployment._util import deployment_namespace from .....deployment.health import ( @@ -765,15 +766,13 @@ async def _clone_branch_environment_task( initial_password: str | None, ) -> None: await _persist_branch_status(branch_id, BranchServiceStatus.CREATING) - storage_class_name: str | None = None if copy_data: try: - storage_class_name = await ensure_branch_storage_class(branch_id, iops=parameters.iops) await clone_branch_database_volume( source_branch_id=source_branch_id, target_branch_id=branch_id, snapshot_class=_VOLUME_SNAPSHOT_CLASS, - storage_class_name=storage_class_name, + storage_class_name=SIMPLYBLOCK_CSI_STORAGE_CLASS, snapshot_timeout_seconds=_SNAPSHOT_TIMEOUT_SECONDS, snapshot_poll_interval_seconds=_SNAPSHOT_POLL_INTERVAL_SECONDS, pvc_timeout_seconds=_PVC_CLONE_TIMEOUT_SECONDS, @@ -781,6 +780,7 @@ async def _clone_branch_environment_task( database_size=parameters.database_size, pitr_enabled=pitr_enabled, ) + await update_branch_volume_iops(branch_id, parameters.iops) except VelaError: await _persist_branch_status(branch_id, BranchServiceStatus.ERROR) await _cleanup_failed_branch_deployment(branch_id) @@ -842,9 +842,7 @@ async def _restore_branch_environment_task( initial_password: str | None, ) -> None: await _persist_branch_status(branch_id, BranchServiceStatus.CREATING) - storage_class_name: str | None = None try: - storage_class_name = await ensure_branch_storage_class(branch_id, iops=parameters.iops) await restore_branch_database_volume_from_snapshot( source_branch_id=source_branch_id, target_branch_id=branch_id, @@ -852,7 +850,7 @@ async def _restore_branch_environment_task( snapshot_name=snapshot_name, snapshot_content_name=snapshot_content_name, snapshot_class=_VOLUME_SNAPSHOT_CLASS, - storage_class_name=storage_class_name, + storage_class_name=SIMPLYBLOCK_CSI_STORAGE_CLASS, snapshot_timeout_seconds=_SNAPSHOT_TIMEOUT_SECONDS, snapshot_poll_interval_seconds=_SNAPSHOT_POLL_INTERVAL_SECONDS, pvc_timeout_seconds=_PVC_TIMEOUT_SECONDS, @@ -866,12 +864,13 @@ async def _restore_branch_environment_task( snapshot_namespace=snapshot_namespace, snapshot_name=wal_snapshot_name, snapshot_class=_VOLUME_SNAPSHOT_CLASS, - storage_class_name=storage_class_name, + storage_class_name=SIMPLYBLOCK_CSI_STORAGE_CLASS, snapshot_timeout_seconds=_SNAPSHOT_TIMEOUT_SECONDS, snapshot_poll_interval_seconds=_SNAPSHOT_POLL_INTERVAL_SECONDS, pvc_timeout_seconds=_PVC_TIMEOUT_SECONDS, pvc_poll_interval_seconds=_PVC_POLL_INTERVAL_SECONDS, ) + await update_branch_volume_iops(branch_id, parameters.iops) except VelaError: await _persist_branch_status(branch_id, BranchServiceStatus.ERROR) await _cleanup_failed_branch_deployment(branch_id) @@ -937,7 +936,6 @@ async def _restore_branch_environment_in_place_task( return try: - storage_class_name = await ensure_branch_storage_class(branch_id, iops=parameters.iops) await restore_branch_database_volume_from_snapshot( source_branch_id=source_branch_id, target_branch_id=branch_id, @@ -945,13 +943,14 @@ async def _restore_branch_environment_in_place_task( snapshot_name=snapshot_name, snapshot_content_name=snapshot_content_name, snapshot_class=_VOLUME_SNAPSHOT_CLASS, - storage_class_name=storage_class_name, + storage_class_name=SIMPLYBLOCK_CSI_STORAGE_CLASS, snapshot_timeout_seconds=_SNAPSHOT_TIMEOUT_SECONDS, snapshot_poll_interval_seconds=_SNAPSHOT_POLL_INTERVAL_SECONDS, pvc_timeout_seconds=_PVC_TIMEOUT_SECONDS, pvc_poll_interval_seconds=_PVC_POLL_INTERVAL_SECONDS, database_size=parameters.database_size, ) + await update_branch_volume_iops(branch_id, parameters.iops) except VelaError: await _persist_branch_status(branch_id, BranchServiceStatus.ERROR) logging.exception( diff --git a/src/deployment/__init__.py b/src/deployment/__init__.py index c445371cd..83f5ab1c2 100644 --- a/src/deployment/__init__.py +++ b/src/deployment/__init__.py @@ -74,6 +74,7 @@ SIMPLYBLOCK_CSI_CONFIGMAP = "simplyblock-csi-cm" SIMPLYBLOCK_CSI_SECRET = "simplyblock-csi-secret" SIMPLYBLOCK_CSI_STORAGE_CLASS = "simplyblock-csi-sc" +SIMPLYBLOCK_QOS_RW_IOPS_ANNOTATION = "simplybk/qos-rw-iops" STORAGE_PVC_SUFFIX = "-storage-pvc" DATABASE_PVC_SUFFIX = "-db-pvc" AUTOSCALER_PVC_SUFFIX = "-block-data" @@ -94,8 +95,8 @@ DATABASE_DNS_RECORD_TYPE: Literal["CNAME"] = "CNAME" -def branch_storage_class_name(branch_id: Identifier) -> str: - return f"sc-{str(branch_id).lower()}" +def simplyblock_iops_annotations(iops: int) -> dict[str, str]: + return {SIMPLYBLOCK_QOS_RW_IOPS_ANNOTATION: str(iops)} def deployment_branch(namespace: str) -> ULID: @@ -238,52 +239,6 @@ async def _initialize_autoscaler_overlay_endpoints(namespace: str) -> None: await _ensure_autoscaler_overlay_endpoint_slices(namespace, overlay_ip) -def _build_storage_class_manifest(*, storage_class_name: str, iops: int, base_storage_class: Any) -> dict[str, Any]: - provisioner = getattr(base_storage_class, "provisioner", None) - if not provisioner: - raise VelaKubernetesError("Base storage class missing provisioner") - - base_parameters = dict(getattr(base_storage_class, "parameters", {}) or {}) - cluster_id = base_parameters.get("cluster_id") - if not cluster_id: - raise VelaKubernetesError("Base storage class missing required parameter 'cluster_id'") - - parameters = {key: str(value) for key, value in base_parameters.items()} - parameters.update( - { - "qos_rw_iops": str(iops), - "qos_rw_mbytes": "0", - "qos_r_mbytes": "0", - "qos_w_mbytes": "0", - } - ) - - allow_volume_expansion = getattr(base_storage_class, "allow_volume_expansion", None) - volume_binding_mode = getattr(base_storage_class, "volume_binding_mode", None) - reclaim_policy = getattr(base_storage_class, "reclaim_policy", None) - mount_options = getattr(base_storage_class, "mount_options", None) - - manifest: dict[str, Any] = { - "apiVersion": "storage.k8s.io/v1", - "kind": "StorageClass", - "metadata": { - "name": storage_class_name, - }, - "provisioner": provisioner, - "parameters": parameters, - } - if reclaim_policy is not None: - manifest["reclaimPolicy"] = reclaim_policy - if volume_binding_mode is not None: - manifest["volumeBindingMode"] = volume_binding_mode - if allow_volume_expansion is not None: - manifest["allowVolumeExpansion"] = bool(allow_volume_expansion) - if mount_options: - manifest["mountOptions"] = list(mount_options) - - return manifest - - async def load_simplyblock_credentials() -> tuple[str, UUID, str, str]: simplyblock_namespace = get_settings().simplyblock_csi_namespace try: @@ -354,34 +309,36 @@ async def resolve_branch_database_volume_size(branch_id: Identifier) -> int: async def update_branch_volume_iops(branch_id: Identifier, iops: int) -> None: namespace = deployment_namespace(branch_id) + _, autoscaler_vm_name = get_autoscaler_vm_identity(branch_id) + pvc_names = ( + f"{autoscaler_vm_name}{AUTOSCALER_PVC_SUFFIX}", + f"{autoscaler_vm_name}{AUTOSCALER_WAL_PVC_SUFFIX}", + f"{autoscaler_vm_name}{STORAGE_PVC_SUFFIX}", + ) + + async def _resolve_existing_volume(pvc_name: str) -> UUID | None: + try: + volume, _ = await _resolve_volume_identifiers(namespace, pvc_name) + except (VelaDeploymentError, VelaKubernetesError) as exc: + if "not found" in str(exc).lower(): + return None + raise + return volume + + resolved_volumes = await asyncio.gather(*(_resolve_existing_volume(pvc_name) for pvc_name in pvc_names)) + volumes = tuple(dict.fromkeys(volume for volume in resolved_volumes if volume is not None)) + if not volumes: + raise VelaDeploymentError(f"Failed to resolve any Simplyblock volumes for branch {branch_id}") - volume, _ = await resolve_autoscaler_volume_identifiers(namespace) try: async with create_simplyblock_api() as sb_api: - await sb_api.update_volume(volume=volume, payload={"max_rw_iops": iops}) + await asyncio.gather( + *(sb_api.update_volume(volume=volume, payload={"max_rw_iops": iops}) for volume in volumes) + ) except VelaSimplyblockAPIError as exc: raise VelaDeploymentError("Failed to update volume") from exc - logger.info("Updated Simplyblock volume %s IOPS to %s", volume, iops) - - -async def ensure_branch_storage_class(branch_id: Identifier, *, iops: int) -> str: - storage_class_name = branch_storage_class_name(branch_id) - try: - await kube_service.get_storage_class(storage_class_name) - logger.info("StorageClass %s already exists; reusing", storage_class_name) - return storage_class_name - except VelaKubernetesError: - pass - - base_storage_class = await kube_service.get_storage_class(SIMPLYBLOCK_CSI_STORAGE_CLASS) - storage_class_manifest = _build_storage_class_manifest( - storage_class_name=storage_class_name, - iops=iops, - base_storage_class=base_storage_class, - ) - await kube_service.apply_storage_class(storage_class_manifest) - return storage_class_name + logger.info("Updated Simplyblock volumes %s IOPS to %s", list(volumes), iops) def _load_chart_values(chart_root: Any) -> dict[str, Any]: @@ -405,7 +362,10 @@ async def _create_fresh_pvcs( def _pvc(name: str, size: str) -> kubernetes_client.V1PersistentVolumeClaim: return kubernetes_client.V1PersistentVolumeClaim( - metadata=kubernetes_client.V1ObjectMeta(name=name), + metadata=kubernetes_client.V1ObjectMeta( + name=name, + annotations=simplyblock_iops_annotations(parameters.iops), + ), spec=kubernetes_client.V1PersistentVolumeClaimSpec( access_modes=access_modes, storage_class_name=storage_class_name, @@ -475,6 +435,7 @@ def _configure_vela_values( else: storage_persistence.pop("size", None) storage_persistence["storageClassName"] = storage_class_name + storage_persistence["annotations"] = simplyblock_iops_annotations(parameters.iops) storage_spec["enabled"] = enable_file_storage wal_archive_spec = values_content.pop("walArchive", None) @@ -486,11 +447,13 @@ def _configure_vela_values( wal_persistence["claimName"] = wal_persistence.get("claimName") or ( f"{_autoscaler_vm_name()}{AUTOSCALER_WAL_PVC_SUFFIX}" ) + wal_persistence["annotations"] = simplyblock_iops_annotations(parameters.iops) wal_persistence.setdefault("accessModes", ["ReadWriteMany"]) db_persistence = db_spec.setdefault("persistence", {}) db_persistence["size"] = str(parameters.database_size) db_persistence["storageClassName"] = storage_class_name + db_persistence["annotations"] = simplyblock_iops_annotations(parameters.iops) autoscaler_spec = values_content.setdefault("autoscalerVm", {}) autoscaler_spec["enabled"] = True @@ -509,6 +472,7 @@ def _configure_vela_values( autoscaler_persistence["claimName"] = f"{_autoscaler_vm_name()}{AUTOSCALER_PVC_SUFFIX}" autoscaler_persistence["size"] = str(parameters.database_size) autoscaler_persistence["storageClassName"] = storage_class_name + autoscaler_persistence["annotations"] = simplyblock_iops_annotations(parameters.iops) autoscaler_persistence.setdefault("accessModes", ["ReadWriteMany"]) autoscaler_tls = autoscaler_spec.setdefault("tls", {}) @@ -548,7 +512,7 @@ async def create_vela_config( postgresql_resource = resources.files(__package__).joinpath("postgresql.conf") values_content = _load_chart_values(chart) - storage_class_name = await ensure_branch_storage_class(branch_id, iops=parameters.iops) + storage_class_name = SIMPLYBLOCK_CSI_STORAGE_CLASS if not use_existing_db_pvc: await _create_fresh_pvcs(namespace, storage_class_name, parameters, pitr_enabled=pitr_enabled) @@ -639,7 +603,6 @@ async def _delete_autoscaler_vm(namespace: str) -> None: async def delete_deployment(branch_id: Identifier) -> None: namespace, _ = get_autoscaler_vm_identity(branch_id) - storage_class_name = branch_storage_class_name(branch_id) await cleanup_branch_dns(branch_id) await _delete_autoscaler_vm(namespace) try: @@ -653,13 +616,6 @@ async def delete_deployment(branch_id: Identifier) -> None: await delete_vela_grafana_obj(branch_id) except VelaGrafanaError: logger.info("Grafana dashboard for branch %s not found", branch_id) - try: - await kube_service.delete_storage_class(storage_class_name) - except ApiException as exc: - if exc.status == 404: - logger.info("StorageClass %s not found", storage_class_name) - else: - raise def get_autoscaler_vm_identity(branch_id: Identifier) -> tuple[str, str]: diff --git a/src/deployment/kubernetes/pvc.py b/src/deployment/kubernetes/pvc.py index f43ff36f7..2437aa0f2 100644 --- a/src/deployment/kubernetes/pvc.py +++ b/src/deployment/kubernetes/pvc.py @@ -3,9 +3,8 @@ from kubernetes_asyncio import client as kubernetes_client from kubernetes_asyncio.client.exceptions import ApiException -from ..._util import Identifier from ...exceptions import VelaKubernetesError -from .. import branch_storage_class_name, kube_service +from .. import SIMPLYBLOCK_CSI_STORAGE_CLASS, kube_service from ._util import core_v1_client from ._wait import wait_for_condition @@ -76,7 +75,6 @@ async def wait_for_pvc_bound( def build_pvc_manifest_from_existing( pvc: Any, *, - branch_id: Identifier, volume_snapshot_name: str, ) -> kubernetes_client.V1PersistentVolumeClaim: """Build a PVC manifest equivalent to the input but sourced from the given snapshot.""" @@ -101,7 +99,7 @@ def build_pvc_manifest_from_existing( access_modes = list(getattr(spec, "access_modes", None) or []) storage_class_name = getattr(spec, "storage_class_name", None) or getattr(spec, "storageClassName", None) if not storage_class_name: - storage_class_name = branch_storage_class_name(branch_id) + storage_class_name = SIMPLYBLOCK_CSI_STORAGE_CLASS resources = getattr(spec, "resources", None) requests = getattr(resources, "requests", {}) if resources else {} diff --git a/src/deployment/kubernetes/volume_clone.py b/src/deployment/kubernetes/volume_clone.py index a1be97494..c96cc118a 100644 --- a/src/deployment/kubernetes/volume_clone.py +++ b/src/deployment/kubernetes/volume_clone.py @@ -272,7 +272,6 @@ async def _create_target_pvc(self) -> None: source_pvc = await kube_service.get_persistent_volume_claim(self.ids.source_namespace, pvc_name) new_manifest = build_pvc_manifest_from_existing( source_pvc, - branch_id=self.target_branch_id, volume_snapshot_name=snapshot_name, ) new_manifest.spec.resources.requests["storage"] = str(self.target_database_size) @@ -442,7 +441,6 @@ async def _create_target_pvc(self) -> None: source_pvc = await kube_service.get_persistent_volume_claim(self.ids.source_namespace, self.ids.pvc) new_manifest = build_pvc_manifest_from_existing( source_pvc, - branch_id=self.target_branch_id, volume_snapshot_name=self.ids.target_snapshot, ) new_manifest.spec.resources.requests["storage"] = str(self.target_database_size) From fd3a8387068540951219ea5187fae449b0c78977 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Thu, 2 Apr 2026 20:22:31 +0530 Subject: [PATCH 2/4] remove redunant storageclass name parameter --- .../organization/project/branch/__init__.py | 5 ----- src/deployment/__init__.py | 17 ++++++----------- src/deployment/kubernetes/volume_clone.py | 18 +++++------------- 3 files changed, 11 insertions(+), 29 deletions(-) diff --git a/src/api/organization/project/branch/__init__.py b/src/api/organization/project/branch/__init__.py index 4f39cd121..af5fdc0a3 100644 --- a/src/api/organization/project/branch/__init__.py +++ b/src/api/organization/project/branch/__init__.py @@ -25,7 +25,6 @@ from ....._util.crypto import encrypt_with_passphrase, generate_keys from .....database import AsyncSessionLocal, SessionDep from .....deployment import ( - SIMPLYBLOCK_CSI_STORAGE_CLASS, DeploymentParameters, ResizeParameters, branch_api_domain, @@ -772,7 +771,6 @@ async def _clone_branch_environment_task( source_branch_id=source_branch_id, target_branch_id=branch_id, snapshot_class=_VOLUME_SNAPSHOT_CLASS, - storage_class_name=SIMPLYBLOCK_CSI_STORAGE_CLASS, snapshot_timeout_seconds=_SNAPSHOT_TIMEOUT_SECONDS, snapshot_poll_interval_seconds=_SNAPSHOT_POLL_INTERVAL_SECONDS, pvc_timeout_seconds=_PVC_CLONE_TIMEOUT_SECONDS, @@ -850,7 +848,6 @@ async def _restore_branch_environment_task( snapshot_name=snapshot_name, snapshot_content_name=snapshot_content_name, snapshot_class=_VOLUME_SNAPSHOT_CLASS, - storage_class_name=SIMPLYBLOCK_CSI_STORAGE_CLASS, snapshot_timeout_seconds=_SNAPSHOT_TIMEOUT_SECONDS, snapshot_poll_interval_seconds=_SNAPSHOT_POLL_INTERVAL_SECONDS, pvc_timeout_seconds=_PVC_TIMEOUT_SECONDS, @@ -864,7 +861,6 @@ async def _restore_branch_environment_task( snapshot_namespace=snapshot_namespace, snapshot_name=wal_snapshot_name, snapshot_class=_VOLUME_SNAPSHOT_CLASS, - storage_class_name=SIMPLYBLOCK_CSI_STORAGE_CLASS, snapshot_timeout_seconds=_SNAPSHOT_TIMEOUT_SECONDS, snapshot_poll_interval_seconds=_SNAPSHOT_POLL_INTERVAL_SECONDS, pvc_timeout_seconds=_PVC_TIMEOUT_SECONDS, @@ -943,7 +939,6 @@ async def _restore_branch_environment_in_place_task( snapshot_name=snapshot_name, snapshot_content_name=snapshot_content_name, snapshot_class=_VOLUME_SNAPSHOT_CLASS, - storage_class_name=SIMPLYBLOCK_CSI_STORAGE_CLASS, snapshot_timeout_seconds=_SNAPSHOT_TIMEOUT_SECONDS, snapshot_poll_interval_seconds=_SNAPSHOT_POLL_INTERVAL_SECONDS, pvc_timeout_seconds=_PVC_TIMEOUT_SECONDS, diff --git a/src/deployment/__init__.py b/src/deployment/__init__.py index 83f5ab1c2..9f306d157 100644 --- a/src/deployment/__init__.py +++ b/src/deployment/__init__.py @@ -350,7 +350,6 @@ def _load_chart_values(chart_root: Any) -> dict[str, Any]: async def _create_fresh_pvcs( namespace: str, - storage_class_name: str, parameters: DeploymentParameters, *, pitr_enabled: bool, @@ -368,7 +367,7 @@ def _pvc(name: str, size: str) -> kubernetes_client.V1PersistentVolumeClaim: ), spec=kubernetes_client.V1PersistentVolumeClaimSpec( access_modes=access_modes, - storage_class_name=storage_class_name, + storage_class_name=SIMPLYBLOCK_CSI_STORAGE_CLASS, volume_mode="Block", resources=kubernetes_client.V1VolumeResourceRequirements(requests={"storage": size}), ), @@ -389,7 +388,6 @@ def _configure_vela_values( jwt_secret: str, database_admin_password: str, pgbouncer_admin_password: str, - storage_class_name: str, pgbouncer_config: Mapping[str, int] | None, enable_file_storage: bool, pitr_enabled: bool, @@ -434,7 +432,7 @@ def _configure_vela_values( storage_persistence["size"] = str(parameters.storage_size) else: storage_persistence.pop("size", None) - storage_persistence["storageClassName"] = storage_class_name + storage_persistence["storageClassName"] = SIMPLYBLOCK_CSI_STORAGE_CLASS storage_persistence["annotations"] = simplyblock_iops_annotations(parameters.iops) storage_spec["enabled"] = enable_file_storage @@ -443,7 +441,7 @@ def _configure_vela_values( pg_wal_spec["enabled"] = pitr_enabled wal_persistence = pg_wal_spec.setdefault("persistence", {}) wal_persistence["size"] = PITR_WAL_PVC_SIZE - wal_persistence["storageClassName"] = storage_class_name + wal_persistence["storageClassName"] = SIMPLYBLOCK_CSI_STORAGE_CLASS wal_persistence["claimName"] = wal_persistence.get("claimName") or ( f"{_autoscaler_vm_name()}{AUTOSCALER_WAL_PVC_SUFFIX}" ) @@ -452,7 +450,7 @@ def _configure_vela_values( db_persistence = db_spec.setdefault("persistence", {}) db_persistence["size"] = str(parameters.database_size) - db_persistence["storageClassName"] = storage_class_name + db_persistence["storageClassName"] = SIMPLYBLOCK_CSI_STORAGE_CLASS db_persistence["annotations"] = simplyblock_iops_annotations(parameters.iops) autoscaler_spec = values_content.setdefault("autoscalerVm", {}) @@ -471,7 +469,7 @@ def _configure_vela_values( autoscaler_persistence = autoscaler_spec.setdefault("persistence", {}) autoscaler_persistence["claimName"] = f"{_autoscaler_vm_name()}{AUTOSCALER_PVC_SUFFIX}" autoscaler_persistence["size"] = str(parameters.database_size) - autoscaler_persistence["storageClassName"] = storage_class_name + autoscaler_persistence["storageClassName"] = SIMPLYBLOCK_CSI_STORAGE_CLASS autoscaler_persistence["annotations"] = simplyblock_iops_annotations(parameters.iops) autoscaler_persistence.setdefault("accessModes", ["ReadWriteMany"]) @@ -512,10 +510,8 @@ async def create_vela_config( postgresql_resource = resources.files(__package__).joinpath("postgresql.conf") values_content = _load_chart_values(chart) - storage_class_name = SIMPLYBLOCK_CSI_STORAGE_CLASS - if not use_existing_db_pvc: - await _create_fresh_pvcs(namespace, storage_class_name, parameters, pitr_enabled=pitr_enabled) + await _create_fresh_pvcs(namespace, parameters, pitr_enabled=pitr_enabled) values_content = _configure_vela_values( values_content, @@ -523,7 +519,6 @@ async def create_vela_config( jwt_secret=jwt_secret, database_admin_password=database_admin_password, pgbouncer_admin_password=pgbouncer_admin_password, - storage_class_name=storage_class_name, pgbouncer_config=pgbouncer_config, enable_file_storage=parameters.enable_file_storage, pitr_enabled=pitr_enabled, diff --git a/src/deployment/kubernetes/volume_clone.py b/src/deployment/kubernetes/volume_clone.py index c96cc118a..07ae4ecc4 100644 --- a/src/deployment/kubernetes/volume_clone.py +++ b/src/deployment/kubernetes/volume_clone.py @@ -14,6 +14,7 @@ AUTOSCALER_PVC_SUFFIX, AUTOSCALER_WAL_PVC_SUFFIX, PITR_WAL_PVC_SIZE, + SIMPLYBLOCK_CSI_STORAGE_CLASS, get_autoscaler_vm_identity, kube_service, ) @@ -114,7 +115,6 @@ class _VolumeCloneOperation: source_branch_id: Identifier target_branch_id: Identifier snapshot_class: str - storage_class_name: str target_database_size: int timeouts: CloneTimeouts volume_label: str = "data" @@ -275,9 +275,9 @@ async def _create_target_pvc(self) -> None: volume_snapshot_name=snapshot_name, ) new_manifest.spec.resources.requests["storage"] = str(self.target_database_size) - new_manifest.spec.storage_class_name = self.storage_class_name + new_manifest.spec.storage_class_name = SIMPLYBLOCK_CSI_STORAGE_CLASS if hasattr(new_manifest.spec, "storageClassName"): - new_manifest.spec.storageClassName = self.storage_class_name + new_manifest.spec.storageClassName = SIMPLYBLOCK_CSI_STORAGE_CLASS annotations = dict(getattr(new_manifest.metadata, "annotations", {}) or {}) annotations["meta.helm.sh/release-name"] = get_settings().deployment_release_name annotations["meta.helm.sh/release-namespace"] = namespace @@ -320,7 +320,6 @@ class _SnapshotRestoreOperation: snapshot_name: str snapshot_content_name: str | None snapshot_class: str - storage_class_name: str target_database_size: int timeouts: CloneTimeouts pvc_suffix: str = AUTOSCALER_PVC_SUFFIX @@ -444,9 +443,9 @@ async def _create_target_pvc(self) -> None: volume_snapshot_name=self.ids.target_snapshot, ) new_manifest.spec.resources.requests["storage"] = str(self.target_database_size) - new_manifest.spec.storage_class_name = self.storage_class_name + new_manifest.spec.storage_class_name = SIMPLYBLOCK_CSI_STORAGE_CLASS if hasattr(new_manifest.spec, "storageClassName"): - new_manifest.spec.storageClassName = self.storage_class_name + new_manifest.spec.storageClassName = SIMPLYBLOCK_CSI_STORAGE_CLASS annotations = dict(getattr(new_manifest.metadata, "annotations", {}) or {}) annotations["meta.helm.sh/release-name"] = get_settings().deployment_release_name annotations["meta.helm.sh/release-namespace"] = self.ids.target_namespace @@ -487,7 +486,6 @@ async def clone_branch_database_volume( source_branch_id: Identifier, target_branch_id: Identifier, snapshot_class: str, - storage_class_name: str, snapshot_timeout_seconds: float, snapshot_poll_interval_seconds: float, pvc_timeout_seconds: float, @@ -510,7 +508,6 @@ async def clone_branch_database_volume( source_branch_id=source_branch_id, target_branch_id=target_branch_id, snapshot_class=snapshot_class, - storage_class_name=storage_class_name, target_database_size=database_size, timeouts=timeouts, volume_label="pgdata", @@ -523,7 +520,6 @@ async def clone_branch_database_volume( source_branch_id=source_branch_id, target_branch_id=target_branch_id, snapshot_class=snapshot_class, - storage_class_name=storage_class_name, target_database_size=_PITR_WAL_PVC_SIZE_BYTES, timeouts=timeouts, volume_label="wal", @@ -540,7 +536,6 @@ async def restore_branch_database_volume_from_snapshot( snapshot_name: str, snapshot_content_name: str | None, snapshot_class: str, - storage_class_name: str, database_size: int, snapshot_timeout_seconds: float, snapshot_poll_interval_seconds: float, @@ -557,7 +552,6 @@ async def restore_branch_database_volume_from_snapshot( snapshot_name=snapshot_name, snapshot_content_name=snapshot_content_name, snapshot_class=snapshot_class, - storage_class_name=storage_class_name, target_database_size=database_size, timeouts=CloneTimeouts( snapshot_ready=snapshot_timeout_seconds, @@ -576,7 +570,6 @@ async def restore_branch_wal_volume_from_snapshot( snapshot_namespace: str, snapshot_name: str, snapshot_class: str, - storage_class_name: str, snapshot_timeout_seconds: float, snapshot_poll_interval_seconds: float, pvc_timeout_seconds: float, @@ -593,7 +586,6 @@ async def restore_branch_wal_volume_from_snapshot( snapshot_name=snapshot_name, snapshot_content_name=None, snapshot_class=snapshot_class, - storage_class_name=storage_class_name, target_database_size=_PITR_WAL_PVC_SIZE_BYTES, pvc_suffix=AUTOSCALER_WAL_PVC_SUFFIX, timeouts=CloneTimeouts( From bb832c222f1681286ef6549180d084b87ae71888 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Wed, 8 Apr 2026 16:47:42 +0530 Subject: [PATCH 3/4] remove un-used functions --- src/deployment/kubernetes/__init__.py | 92 --------------------------- src/deployment/kubernetes/pvc.py | 4 +- 2 files changed, 1 insertion(+), 95 deletions(-) diff --git a/src/deployment/kubernetes/__init__.py b/src/deployment/kubernetes/__init__.py index ca96b6c23..b60cd742e 100644 --- a/src/deployment/kubernetes/__init__.py +++ b/src/deployment/kubernetes/__init__.py @@ -47,25 +47,6 @@ async def ensure_namespace(self, namespace: str, *, labels: Mapping[str, str] | if exc.status != 409: raise - async def check_namespace_status(self, namespace: str) -> dict[str, str]: - """ - Check if all pods in the namespace are running. - - Raises - - KeyError if namespace is missing - - urllib3.exceptions.HTTPError on failed access to the kubernetes API - - kubernetes_asyncio.client.rest.ApiException on API failure - """ - - async with core_v1_client() as core_v1: - namespaces = await core_v1.list_namespace() - - if namespace not in {ns.metadata.name for ns in namespaces.items}: - raise KeyError(f"Namespace {namespace} not found") - - pods = await core_v1.list_namespaced_pod(namespace) - return {pod.metadata.name: pod.status.phase for pod in pods.items} - async def apply_http_routes(self, namespace: str, routes: list[dict[str, Any]]) -> None: async with custom_api_client() as custom: for route in routes: @@ -131,38 +112,6 @@ async def apply_kong_plugin(self, namespace: str, plugin: dict[str, Any]) -> Non else: raise - async def apply_kong_consumer(self, namespace: str, consumer: dict[str, Any]) -> None: - group, version = consumer["apiVersion"].split("/") - plural = "kongconsumers" - - async with custom_api_client() as custom: - try: - await custom.create_namespaced_custom_object( - group=group, - version=version, - namespace=namespace, - plural=plural, - body=consumer, - ) - logger.info("Created KongConsumer %s in %s", consumer["metadata"]["name"], namespace) - except client.exceptions.ApiException as exc: - if exc.status == 409: - logger.info( - "KongConsumer %s already exists in %s; replacing", - consumer["metadata"]["name"], - namespace, - ) - await custom.replace_namespaced_custom_object( - group=group, - version=version, - namespace=namespace, - plural=plural, - name=consumer["metadata"]["name"], - body=consumer, - ) - else: - raise - async def ensure_endpoint_slice( self, namespace: str, @@ -221,36 +170,6 @@ async def ensure_endpoint_slice( f"Failed to create EndpointSlice {slice_name} for {service_name} in {namespace}: {exc.reason}" ) from exc - async def apply_secret(self, namespace: str, secret: dict[str, Any]) -> None: - name = secret["metadata"]["name"] - async with core_v1_client() as core_v1: - try: - await core_v1.create_namespaced_secret(namespace=namespace, body=secret) - logger.info("Created Secret %s in %s", name, namespace) - except client.exceptions.ApiException as exc: - if exc.status == 409: - logger.info("Secret %s already exists in %s; replacing", name, namespace) - await core_v1.replace_namespaced_secret( - name=name, - namespace=namespace, - body=secret, - ) - else: - raise - - async def apply_storage_class(self, manifest: dict[str, Any]) -> None: - name = manifest["metadata"]["name"] - async with storage_v1_client() as storage_v1: - try: - await storage_v1.create_storage_class(body=manifest) - logger.info("Created StorageClass %s", name) - except client.exceptions.ApiException as exc: - if exc.status == 409: - logger.info("StorageClass %s already exists; replacing", name) - await storage_v1.replace_storage_class(name=name, body=manifest) - else: - raise - async def get_storage_class(self, name: str) -> Any: async with storage_v1_client() as storage_v1: try: @@ -260,17 +179,6 @@ async def get_storage_class(self, name: str) -> Any: raise VelaKubernetesError(f"StorageClass {name!r} not found") from exc raise - async def delete_storage_class(self, name: str) -> None: - async with storage_v1_client() as storage_v1: - try: - await storage_v1.delete_storage_class(name) - logger.info("Deleted StorageClass %s", name) - except client.exceptions.ApiException as exc: - if exc.status == 404: - logger.info("StorageClass %s not found; skipping delete", name) - return - raise - async def get_service(self, namespace: str, name: str) -> Any: async with core_v1_client() as core_v1: try: diff --git a/src/deployment/kubernetes/pvc.py b/src/deployment/kubernetes/pvc.py index 2437aa0f2..2525743be 100644 --- a/src/deployment/kubernetes/pvc.py +++ b/src/deployment/kubernetes/pvc.py @@ -97,9 +97,7 @@ def build_pvc_manifest_from_existing( annotations.pop(noisy, None) access_modes = list(getattr(spec, "access_modes", None) or []) - storage_class_name = getattr(spec, "storage_class_name", None) or getattr(spec, "storageClassName", None) - if not storage_class_name: - storage_class_name = SIMPLYBLOCK_CSI_STORAGE_CLASS + storage_class_name = getattr(spec, "storage_class_name", None) or SIMPLYBLOCK_CSI_STORAGE_CLASS resources = getattr(spec, "resources", None) requests = getattr(resources, "requests", {}) if resources else {} From 011833c4b3a8972af7cc62ae647927735394afb1 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Wed, 8 Apr 2026 16:59:11 +0530 Subject: [PATCH 4/4] overwrite IOPS annotations during branch clone and restore --- src/api/organization/project/branch/__init__.py | 8 ++++---- src/deployment/kubernetes/volume_clone.py | 16 ++++++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/api/organization/project/branch/__init__.py b/src/api/organization/project/branch/__init__.py index af5fdc0a3..f9d48471f 100644 --- a/src/api/organization/project/branch/__init__.py +++ b/src/api/organization/project/branch/__init__.py @@ -37,7 +37,6 @@ kube_service, resolve_branch_database_volume_size, update_branch_database_password, - update_branch_volume_iops, ) from .....deployment._util import deployment_namespace from .....deployment.health import ( @@ -776,9 +775,9 @@ async def _clone_branch_environment_task( pvc_timeout_seconds=_PVC_CLONE_TIMEOUT_SECONDS, pvc_poll_interval_seconds=_PVC_POLL_INTERVAL_SECONDS, database_size=parameters.database_size, + iops=parameters.iops, pitr_enabled=pitr_enabled, ) - await update_branch_volume_iops(branch_id, parameters.iops) except VelaError: await _persist_branch_status(branch_id, BranchServiceStatus.ERROR) await _cleanup_failed_branch_deployment(branch_id) @@ -853,6 +852,7 @@ async def _restore_branch_environment_task( pvc_timeout_seconds=_PVC_TIMEOUT_SECONDS, pvc_poll_interval_seconds=_PVC_POLL_INTERVAL_SECONDS, database_size=restore_database_size, + iops=parameters.iops, ) if wal_snapshot_name is not None: await restore_branch_wal_volume_from_snapshot( @@ -865,8 +865,8 @@ async def _restore_branch_environment_task( snapshot_poll_interval_seconds=_SNAPSHOT_POLL_INTERVAL_SECONDS, pvc_timeout_seconds=_PVC_TIMEOUT_SECONDS, pvc_poll_interval_seconds=_PVC_POLL_INTERVAL_SECONDS, + iops=parameters.iops, ) - await update_branch_volume_iops(branch_id, parameters.iops) except VelaError: await _persist_branch_status(branch_id, BranchServiceStatus.ERROR) await _cleanup_failed_branch_deployment(branch_id) @@ -944,8 +944,8 @@ async def _restore_branch_environment_in_place_task( pvc_timeout_seconds=_PVC_TIMEOUT_SECONDS, pvc_poll_interval_seconds=_PVC_POLL_INTERVAL_SECONDS, database_size=parameters.database_size, + iops=parameters.iops, ) - await update_branch_volume_iops(branch_id, parameters.iops) except VelaError: await _persist_branch_status(branch_id, BranchServiceStatus.ERROR) logging.exception( diff --git a/src/deployment/kubernetes/volume_clone.py b/src/deployment/kubernetes/volume_clone.py index 07ae4ecc4..261e67a3f 100644 --- a/src/deployment/kubernetes/volume_clone.py +++ b/src/deployment/kubernetes/volume_clone.py @@ -15,6 +15,7 @@ AUTOSCALER_WAL_PVC_SUFFIX, PITR_WAL_PVC_SIZE, SIMPLYBLOCK_CSI_STORAGE_CLASS, + SIMPLYBLOCK_QOS_RW_IOPS_ANNOTATION, get_autoscaler_vm_identity, kube_service, ) @@ -117,6 +118,7 @@ class _VolumeCloneOperation: snapshot_class: str target_database_size: int timeouts: CloneTimeouts + iops: int volume_label: str = "data" pvc_suffix: str = AUTOSCALER_PVC_SUFFIX ids: CloneIdentifiers = field(init=False) @@ -276,11 +278,10 @@ async def _create_target_pvc(self) -> None: ) new_manifest.spec.resources.requests["storage"] = str(self.target_database_size) new_manifest.spec.storage_class_name = SIMPLYBLOCK_CSI_STORAGE_CLASS - if hasattr(new_manifest.spec, "storageClassName"): - new_manifest.spec.storageClassName = SIMPLYBLOCK_CSI_STORAGE_CLASS annotations = dict(getattr(new_manifest.metadata, "annotations", {}) or {}) annotations["meta.helm.sh/release-name"] = get_settings().deployment_release_name annotations["meta.helm.sh/release-namespace"] = namespace + annotations[SIMPLYBLOCK_QOS_RW_IOPS_ANNOTATION] = str(self.iops) new_manifest.metadata.annotations = annotations await delete_pvc(namespace, pvc_name) @@ -321,6 +322,7 @@ class _SnapshotRestoreOperation: snapshot_content_name: str | None snapshot_class: str target_database_size: int + iops: int timeouts: CloneTimeouts pvc_suffix: str = AUTOSCALER_PVC_SUFFIX ids: CloneIdentifiers = field(init=False) @@ -444,11 +446,10 @@ async def _create_target_pvc(self) -> None: ) new_manifest.spec.resources.requests["storage"] = str(self.target_database_size) new_manifest.spec.storage_class_name = SIMPLYBLOCK_CSI_STORAGE_CLASS - if hasattr(new_manifest.spec, "storageClassName"): - new_manifest.spec.storageClassName = SIMPLYBLOCK_CSI_STORAGE_CLASS annotations = dict(getattr(new_manifest.metadata, "annotations", {}) or {}) annotations["meta.helm.sh/release-name"] = get_settings().deployment_release_name annotations["meta.helm.sh/release-namespace"] = self.ids.target_namespace + annotations[SIMPLYBLOCK_QOS_RW_IOPS_ANNOTATION] = str(self.iops) new_manifest.metadata.annotations = annotations await delete_pvc(self.ids.target_namespace, self.ids.pvc) @@ -491,6 +492,7 @@ async def clone_branch_database_volume( pvc_timeout_seconds: float, pvc_poll_interval_seconds: float, database_size: int, + iops: int, pitr_enabled: bool = False, ) -> None: """ @@ -510,6 +512,7 @@ async def clone_branch_database_volume( snapshot_class=snapshot_class, target_database_size=database_size, timeouts=timeouts, + iops=iops, volume_label="pgdata", pvc_suffix=AUTOSCALER_PVC_SUFFIX, ) @@ -522,6 +525,7 @@ async def clone_branch_database_volume( snapshot_class=snapshot_class, target_database_size=_PITR_WAL_PVC_SIZE_BYTES, timeouts=timeouts, + iops=iops, volume_label="wal", pvc_suffix=AUTOSCALER_WAL_PVC_SUFFIX, ) @@ -537,6 +541,7 @@ async def restore_branch_database_volume_from_snapshot( snapshot_content_name: str | None, snapshot_class: str, database_size: int, + iops: int, snapshot_timeout_seconds: float, snapshot_poll_interval_seconds: float, pvc_timeout_seconds: float, @@ -553,6 +558,7 @@ async def restore_branch_database_volume_from_snapshot( snapshot_content_name=snapshot_content_name, snapshot_class=snapshot_class, target_database_size=database_size, + iops=iops, timeouts=CloneTimeouts( snapshot_ready=snapshot_timeout_seconds, snapshot_poll=snapshot_poll_interval_seconds, @@ -570,6 +576,7 @@ async def restore_branch_wal_volume_from_snapshot( snapshot_namespace: str, snapshot_name: str, snapshot_class: str, + iops: int, snapshot_timeout_seconds: float, snapshot_poll_interval_seconds: float, pvc_timeout_seconds: float, @@ -587,6 +594,7 @@ async def restore_branch_wal_volume_from_snapshot( snapshot_content_name=None, snapshot_class=snapshot_class, target_database_size=_PITR_WAL_PVC_SIZE_BYTES, + iops=iops, pvc_suffix=AUTOSCALER_WAL_PVC_SUFFIX, timeouts=CloneTimeouts( snapshot_ready=snapshot_timeout_seconds,