From 67f19e3291d4d1f67b35b478d784aee7a2986412 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Fri, 20 Mar 2026 10:47:51 +0530 Subject: [PATCH 01/25] tests: add tests for cpu,memory,volume resize --- tests/test_branches.py | 57 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tests/test_branches.py b/tests/test_branches.py index a4562210..00f59732 100644 --- a/tests/test_branches.py +++ b/tests/test_branches.py @@ -165,6 +165,63 @@ def test_branch_resize(client, org, project, branch_id): ) +def test_branch_resize_cpu(client, org, project, branch_id): + r = client.post( + f"organizations/{org}/projects/{project}/branches/{branch_id}/resize", + json={"milli_vcpu": 1000}, + ) + assert r.status_code == 202 + wait_for_status( + client, + f"organizations/{org}/projects/{project}/branches/{branch_id}/", + "ACTIVE_HEALTHY", + BRANCH_TIMEOUT_SEC, + ) + r = client.get(f"organizations/{org}/projects/{project}/branches/{branch_id}/") + assert r.status_code == 200 + assert r.json()["max_resources"]["milli_vcpu"] == 1000 + + +def test_branch_resize_memory(client, org, project, branch_id): + # 2 GiB expressed in bytes (must be a multiple of 256 MiB) + two_gib = 2 * 1024 * 1024 * 1024 + r = client.post( + f"organizations/{org}/projects/{project}/branches/{branch_id}/resize", + json={"memory_bytes": two_gib}, + ) + assert r.status_code == 202 + wait_for_status( + client, + f"organizations/{org}/projects/{project}/branches/{branch_id}/", + "ACTIVE_HEALTHY", + BRANCH_TIMEOUT_SEC, + ) + r = client.get(f"organizations/{org}/projects/{project}/branches/{branch_id}/") + assert r.status_code == 200 + assert r.json()["max_resources"]["ram_bytes"] == two_gib + + +def test_branch_resize_database_size(client, org, project, branch_id): + # 6 GB expressed in bytes (must be a multiple of 1 GB) + six_gb = 6 * 1_000_000_000 + # due to issues related to round up + seven_gb = 7 * 1_000_000_000 + r = client.post( + f"organizations/{org}/projects/{project}/branches/{branch_id}/resize", + json={"database_size": six_gb}, + ) + assert r.status_code == 202 + wait_for_status( + client, + f"organizations/{org}/projects/{project}/branches/{branch_id}/", + "ACTIVE_HEALTHY", + BRANCH_TIMEOUT_SEC, + ) + r = client.get(f"organizations/{org}/projects/{project}/branches/{branch_id}/") + assert r.status_code == 200 + assert r.json()["max_resources"]["nvme_bytes"] == seven_gb + + def test_branch_password_reset(client, org, project, branch_id): r = client.post( f"organizations/{org}/projects/{project}/branches/{branch_id}/reset-password", From 8a3d16b89fe119930e8d95e92b1f100fb7cffa0a Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Fri, 20 Mar 2026 16:11:56 +0530 Subject: [PATCH 02/25] add tests for iops and storage resize --- tests/test_branches.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/test_branches.py b/tests/test_branches.py index 00f59732..0d7b858d 100644 --- a/tests/test_branches.py +++ b/tests/test_branches.py @@ -163,6 +163,9 @@ def test_branch_resize(client, org, project, branch_id): "ACTIVE_HEALTHY", BRANCH_TIMEOUT_SEC, ) + r = client.get(f"organizations/{org}/projects/{project}/branches/{branch_id}/") + assert r.status_code == 200 + assert r.json()["max_resources"]["iops"] == 2000 def test_branch_resize_cpu(client, org, project, branch_id): @@ -222,6 +225,27 @@ def test_branch_resize_database_size(client, org, project, branch_id): assert r.json()["max_resources"]["nvme_bytes"] == seven_gb +def test_branch_resize_storage_size(client, org, project, branch_id): + # 2 GB expressed in bytes (must be a multiple of 1 GB) + two_gb = 2 * 1_000_000_000 + # due to GiB rounding by the storage backend: 2 GB -> 2 GiB -> rounds up to 3 GB + three_gb = 3 * 1_000_000_000 + r = client.post( + f"organizations/{org}/projects/{project}/branches/{branch_id}/resize", + json={"storage_size": two_gb}, + ) + assert r.status_code == 202 + wait_for_status( + client, + f"organizations/{org}/projects/{project}/branches/{branch_id}/", + "ACTIVE_HEALTHY", + BRANCH_TIMEOUT_SEC, + ) + r = client.get(f"organizations/{org}/projects/{project}/branches/{branch_id}/") + assert r.status_code == 200 + assert r.json()["max_resources"]["storage_bytes"] == three_gb + + def test_branch_password_reset(client, org, project, branch_id): r = client.post( f"organizations/{org}/projects/{project}/branches/{branch_id}/reset-password", From 63f4a953b8417a8c91f007452735c9498da1d7bf Mon Sep 17 00:00:00 2001 From: Max Schettler Date: Thu, 19 Mar 2026 18:10:16 +0100 Subject: [PATCH 03/25] Add endpoint for accessing the aggregate per-branch limits This is required to check on branch creation/modification within which bounds the operation may be conducted. --- src/api/_util/resourcelimit.py | 15 +++++++++++++++ src/api/organization/project/resources.py | 18 +++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/api/_util/resourcelimit.py b/src/api/_util/resourcelimit.py index d8f65005..a14a7af0 100644 --- a/src/api/_util/resourcelimit.py +++ b/src/api/_util/resourcelimit.py @@ -716,3 +716,18 @@ async def project_available(session: AsyncSession, project: Project) -> Resource (await project_limits(project)).total - await project_allocations(session, project), await organization_available(session, await project.awaitable_attrs.organization), ) + + +async def project_branch_maxima(session: AsyncSession, project: Project) -> Resources: + """Minimum per-branch limit across the hierarchy (project > organization > system). + + Returns None for any field where no per-branch limit has been configured at any level. + """ + organization = await project.awaitable_attrs.organization + return Resources.min( + Resources.min( + (await project_limits(project)).per_branch, + (await organization_limits(organization)).per_branch, + ), + (await system_limits(session)).per_branch, + ) diff --git a/src/api/organization/project/resources.py b/src/api/organization/project/resources.py index 19d657f6..a23b0014 100644 --- a/src/api/organization/project/resources.py +++ b/src/api/organization/project/resources.py @@ -2,7 +2,14 @@ from ....models.resources import EntityType from ..._util import Forbidden, NotFound, Unauthenticated -from ..._util.resourcelimit import Limits, Resources, project_allocations, project_available, project_limits +from ..._util.resourcelimit import ( + Limits, + Resources, + project_allocations, + project_available, + project_branch_maxima, + project_limits, +) from ...dependencies import ProjectDep, SessionDep api = APIRouter() @@ -49,3 +56,12 @@ async def allocations(session: SessionDep, project: ProjectDep) -> Resources: ) async def available(session: SessionDep, project: ProjectDep) -> Resources: return await project_available(session, project) + + +@api.get( + "/branch-maxima/", + name="organizations:projects:resources:branch-maxima", + responses={401: Unauthenticated, 403: Forbidden, 404: NotFound}, +) +async def branch_maxima(session: SessionDep, project: ProjectDep) -> Resources: + return await project_branch_maxima(session, project) From 85b5d8c229987897a539c2b558a46d7b41d36843 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Thu, 19 Mar 2026 09:48:04 +0530 Subject: [PATCH 04/25] fix api tests --- tests/conftest.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 07dfd80f..280b9174 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -72,6 +72,28 @@ def _id(location: str) -> ULID: return ULID.from_str(location.rstrip("/").rsplit("/", 1)[-1]) +_TEST_ORG_NAMES: frozenset[str] = frozenset( + { + "test-org-lifecycle", + "test-org-lifecycle-upd", + "test-org-projects", + "test-org-resources", + "test-org-branches", + } +) + + +@pytest.fixture(scope="session", autouse=True) +def _cleanup_leftover_test_resources(client): + """Delete any orgs left behind by a previously interrupted test run.""" + r = client.get("organizations/") + if r.status_code == 200: + for org in r.json(): + if org.get("name") in _TEST_ORG_NAMES: + client.delete(f"organizations/{org['id']}/") + yield + + @pytest.fixture(scope="session") def client(): base = VELA_API_URL.rstrip("/") + "/" From 5f8f77791caeec81bb105898263e622ffc45e1a9 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Fri, 20 Mar 2026 16:17:24 +0530 Subject: [PATCH 05/25] added a module-scoped autouse fixture --- tests/conftest.py | 22 ---------------------- tests/test_organizations.py | 9 +++++++++ tests/test_projects.py | 7 +++++++ 3 files changed, 16 insertions(+), 22 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 280b9174..07dfd80f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -72,28 +72,6 @@ def _id(location: str) -> ULID: return ULID.from_str(location.rstrip("/").rsplit("/", 1)[-1]) -_TEST_ORG_NAMES: frozenset[str] = frozenset( - { - "test-org-lifecycle", - "test-org-lifecycle-upd", - "test-org-projects", - "test-org-resources", - "test-org-branches", - } -) - - -@pytest.fixture(scope="session", autouse=True) -def _cleanup_leftover_test_resources(client): - """Delete any orgs left behind by a previously interrupted test run.""" - r = client.get("organizations/") - if r.status_code == 200: - for org in r.json(): - if org.get("name") in _TEST_ORG_NAMES: - client.delete(f"organizations/{org['id']}/") - yield - - @pytest.fixture(scope="session") def client(): base = VELA_API_URL.rstrip("/") + "/" diff --git a/tests/test_organizations.py b/tests/test_organizations.py index c60a45c0..81bff8d5 100644 --- a/tests/test_organizations.py +++ b/tests/test_organizations.py @@ -1,3 +1,4 @@ +import pytest from conftest import _id _ORG_NAME = "test-org-lifecycle" @@ -5,6 +6,14 @@ _state: dict = {} +@pytest.fixture(scope="module", autouse=True) +def _org_cleanup(client): + """Safety net: delete the org if test_org_delete did not run (e.g. early failure).""" + yield + if "org_id" in _state: + client.delete(f"organizations/{_state['org_id']}/") # no-op if already deleted + + def test_org_list_empty(client): r = client.get("organizations/") assert r.status_code == 200 diff --git a/tests/test_projects.py b/tests/test_projects.py index 2da5ab1f..0d957aec 100644 --- a/tests/test_projects.py +++ b/tests/test_projects.py @@ -65,3 +65,10 @@ def test_project_list_contains(client, org): def test_project_not_found(client, org): r = client.get(f"organizations/{org}/projects/00000000000000000000000000/") assert r.status_code == 404 + + +def test_project_delete(client, org): + r = client.delete(f"organizations/{org}/projects/{_state['project_id']}/") + assert r.status_code == 204 + r = client.get(f"organizations/{org}/projects/{_state['project_id']}/") + assert r.status_code == 404 From 576048011b7e5a04423b31b96d353b0579d12a84 Mon Sep 17 00:00:00 2001 From: Max Schettler Date: Fri, 20 Mar 2026 14:00:48 +0100 Subject: [PATCH 06/25] Fix system limits The recent changes on the resource limit implementation exposed an issue with the implementation of system limits. They shouldnt't be a third, global scope of resources, but rather a default for organization limits on creation. This change addresses this, by introducing a separate, appropriately named table for handling these defaults. The system-level endpoints are removed as they are not needed anymore, and a migration ensures existing system limits are converted to the correct values. --- src/api/_util/resourcelimit.py | 85 +++++-------------- src/api/organization/__init__.py | 14 +-- src/api/organization/project/resources.py | 4 +- src/api/resources.py | 33 ------- src/api/system.py | 9 +- .../e8f3a2d51c9b_fix_system_limits.py | 72 ++++++++++++++++ src/models/resources.py | 8 +- tests/test_resources.py | 37 +++----- 8 files changed, 128 insertions(+), 134 deletions(-) create mode 100644 src/models/migrations/versions/e8f3a2d51c9b_fix_system_limits.py diff --git a/src/api/_util/resourcelimit.py b/src/api/_util/resourcelimit.py index a14a7af0..d58af461 100644 --- a/src/api/_util/resourcelimit.py +++ b/src/api/_util/resourcelimit.py @@ -20,6 +20,7 @@ BranchAllocationPublic, BranchProvisioning, EntityType, + OrganizationLimitDefault, ProvisioningLog, ResourceLimit, ResourceLimitsPublic, @@ -134,26 +135,6 @@ async def clone_branch_provisioning(session: SessionDep, source: Branch, target: await session.refresh(target) -async def initialize_organization_resource_limits(session: SessionDep, organization: Organization): - result = await session.execute(select(ResourceLimit).where(ResourceLimit.entity_type == EntityType.system)) - system_limits = result.scalars().all() - - with session.no_autoflush: - for system_limit in system_limits: - await session.merge( - ResourceLimit( - entity_type=EntityType.org, - org_id=organization.id, - project_id=None, - resource=system_limit.resource, - max_total=system_limit.max_total, - max_per_branch=system_limit.max_per_branch, - ) - ) - await session.commit() - await session.refresh(organization) - - def dict_to_resource_limits(value: Mapping[ResourceType, int | None]) -> ResourceLimitsPublic: return ResourceLimitsPublic( milli_vcpu=value.get(ResourceType.milli_vcpu), @@ -320,7 +301,6 @@ async def get_remaining_project_resources( *, exclude_branch_ids: Sequence[Identifier] | None = None, ) -> ResourceLimitsPublic: - system_limits = await get_system_resource_limits(session) organization_limits = await get_organization_resource_limits(session, organization_id) project_limits = await get_project_resource_limits(session, project_id) @@ -337,16 +317,13 @@ async def get_remaining_project_resources( effective_limits: dict[ResourceType, int] = {} for resource_type in ResourceType: - system_limit = system_limits.get(resource_type) organization_limit = organization_limits.get(resource_type) project_limit = project_limits.get(resource_type) per_branch_limit = ( project_limit.max_per_branch - if project_limit and project_limit.max_per_branch is not None + if project_limit else organization_limit.max_per_branch - if organization_limit and organization_limit.max_per_branch is not None - else system_limit.max_per_branch - if system_limit and system_limit.max_per_branch is not None + if organization_limit else None ) @@ -374,17 +351,6 @@ async def get_remaining_project_resources( return dict_to_resource_limits(effective_limits) -async def get_system_resource_limits(session: SessionDep) -> dict[ResourceType, ResourceLimit]: - result = await session.execute( - select(ResourceLimit).where( - ResourceLimit.entity_type == EntityType.system, - ResourceLimit.org_id.is_(None), # type: ignore[union-attr] - ResourceLimit.project_id.is_(None), # type: ignore[union-attr] - ) - ) - return _map_resource_limits(list(result.scalars().all())) - - async def get_organization_resource_limits( session: SessionDep, organization_id: Identifier ) -> dict[ResourceType, ResourceLimit]: @@ -638,6 +604,18 @@ def from_database(cls, limits: Sequence[ResourceLimit]) -> Self: per_branch=Resources.from_database(limits, "max_per_branch"), ) + @classmethod + def from_defaults(cls, defaults: Sequence["OrganizationLimitDefault"]) -> Self: + return cls( + total=Resources.from_database(defaults, "max_total"), + per_branch=Resources.from_database(defaults, "max_per_branch"), + ) + + @classmethod + async def organization_defaults(cls, session: AsyncSession) -> Self: + result = await session.execute(select(OrganizationLimitDefault)) + return cls.from_defaults(list(result.scalars().all())) + def to_database(self, entity_type: EntityType) -> list[ResourceLimit]: return [ ResourceLimit( @@ -651,14 +629,6 @@ def to_database(self, entity_type: EntityType) -> list[ResourceLimit]: ] -async def system_limits(session: AsyncSession) -> Limits: - statement = select(ResourceLimit).where( - col(ResourceLimit.org_id).is_(None), col(ResourceLimit.project_id).is_(None) - ) - entries = (await session.exec(statement)).all() - return Limits.from_database(entries) - - async def organization_limits(organization: Organization) -> Limits: return Limits.from_database(await organization.awaitable_attrs.limits) @@ -683,11 +653,6 @@ def _allocations(): ) -async def system_allocations(session: AsyncSession) -> Resources: - allocations = (await session.exec(_allocations())).one() - return Resources(**(allocations._asdict())) - - async def organization_allocations(session: AsyncSession, organization: Organization) -> Resources: statement = _allocations().join(Project).where(Project.organization_id == organization.id) allocations = (await session.exec(statement)).one() @@ -700,15 +665,8 @@ async def project_allocations(session: AsyncSession, project: Project) -> Resour return Resources(**(allocations._asdict())) -async def system_available(session: AsyncSession) -> Resources: - return (await system_limits(session)).total - await system_allocations(session) - - async def organization_available(session: AsyncSession, organization: Organization) -> Resources: - return Resources.min( - (await organization_limits(organization)).total - await organization_allocations(session, organization), - await system_available(session), - ) + return (await organization_limits(organization)).total - await organization_allocations(session, organization) async def project_available(session: AsyncSession, project: Project) -> Resources: @@ -718,16 +676,13 @@ async def project_available(session: AsyncSession, project: Project) -> Resource ) -async def project_branch_maxima(session: AsyncSession, project: Project) -> Resources: - """Minimum per-branch limit across the hierarchy (project > organization > system). +async def project_branch_maxima(project: Project) -> Resources: + """Minimum per-branch limit across the hierarchy (project > organization). Returns None for any field where no per-branch limit has been configured at any level. """ organization = await project.awaitable_attrs.organization return Resources.min( - Resources.min( - (await project_limits(project)).per_branch, - (await organization_limits(organization)).per_branch, - ), - (await system_limits(session)).per_branch, + (await project_limits(project)).per_branch, + (await organization_limits(organization)).per_branch, ) diff --git a/src/api/organization/__init__.py b/src/api/organization/__init__.py index de0de2d4..29c4a74c 100644 --- a/src/api/organization/__init__.py +++ b/src/api/organization/__init__.py @@ -13,9 +13,9 @@ from ...deployment import delete_deployment from ...models.audit import OrganizationAuditLog from ...models.organization import Organization, OrganizationCreate, OrganizationUpdate -from ...models.resources import ResourceTypePublic, ResourceUsageMinute +from ...models.resources import EntityType, ResourceTypePublic, ResourceUsageMinute from .._util import Conflict, Forbidden, NotFound, Unauthenticated, url_path_for -from .._util.resourcelimit import initialize_organization_resource_limits +from .._util.resourcelimit import Limits from .._util.role import create_organization_admin_role from ..auth import authenticated_user from ..dependencies import AuthUserDep, OrganizationDep, SessionDep @@ -87,7 +87,12 @@ async def create( user: AuthUserDep, response: Literal["empty", "full"] = "empty", ) -> JSONResponse: - entity = Organization(**parameters.model_dump(), users=[user]) + default_limits = await Limits.organization_defaults(session) + entity = Organization( + **parameters.model_dump(), + users=[user], + limits=default_limits.to_database(EntityType.org), + ) session.add(entity) try: await session.commit() @@ -108,9 +113,6 @@ async def create( await session.commit() await session.refresh(entity) - # Set up initial organization resource limits - await initialize_organization_resource_limits(session, entity) - entity_url = url_path_for(request, "organizations:detail", organization_id=entity.id) return JSONResponse( content=entity.model_dump() if response == "full" else None, diff --git a/src/api/organization/project/resources.py b/src/api/organization/project/resources.py index a23b0014..6dc201b6 100644 --- a/src/api/organization/project/resources.py +++ b/src/api/organization/project/resources.py @@ -63,5 +63,5 @@ async def available(session: SessionDep, project: ProjectDep) -> Resources: name="organizations:projects:resources:branch-maxima", responses={401: Unauthenticated, 403: Forbidden, 404: NotFound}, ) -async def branch_maxima(session: SessionDep, project: ProjectDep) -> Resources: - return await project_branch_maxima(session, project) +async def branch_maxima(project: ProjectDep) -> Resources: + return await project_branch_maxima(project) diff --git a/src/api/resources.py b/src/api/resources.py index 12c5b1d4..9d2c7383 100644 --- a/src/api/resources.py +++ b/src/api/resources.py @@ -40,10 +40,7 @@ ResourceLimitsPublic, ResourceUsageMinute, ) -from ._util import Unauthenticated from ._util.resourcelimit import ( - Limits, - Resources, check_resource_limits, create_or_update_branch_provisioning, dict_to_resource_limits, @@ -55,9 +52,6 @@ get_organization_resource_usage, get_project_resource_usage, make_usage_cycle, - system_allocations, - system_available, - system_limits, ) from .auth import authenticated_user from .db import SessionDep @@ -494,30 +488,3 @@ async def monitor_resources(): await asyncio.sleep((interval - elapsed).total_seconds()) else: logger.warning("Resource monitor execution exeeded desired interval") - - -@api.get( - "/limits/", - name="resources:limits", - responses={401: Unauthenticated}, -) -async def limits(session: SessionDep) -> Limits: - return await system_limits(session) - - -@api.get( - "/allocations/", - name="resources:allocations", - responses={401: Unauthenticated}, -) -async def allocated(session: SessionDep) -> Resources: - return await system_allocations(session) - - -@api.get( - "/available/", - name="resources:available", - responses={401: Unauthenticated}, -) -async def available(session: SessionDep) -> Resources: - return await system_available(session) diff --git a/src/api/system.py b/src/api/system.py index 23ce59c2..5256aa99 100644 --- a/src/api/system.py +++ b/src/api/system.py @@ -21,9 +21,8 @@ VCPU_MILLIS_MIN, VCPU_MILLIS_STEP, ) -from ..models.resources import ResourceLimitDefinitionPublic, ResourceType +from ..models.resources import OrganizationLimitDefault, ResourceLimitDefinitionPublic, ResourceType from ..models.role import AccessRight -from ._util.resourcelimit import get_system_resource_limits from .auth import authenticated_user from .db import SessionDep @@ -68,10 +67,12 @@ async def list_available_permissions( async def list_resource_limit_definitions( session: SessionDep, ) -> list[ResourceLimitDefinitionPublic]: - system_limits = await get_system_resource_limits(session) + defaults_result = await session.execute(select(OrganizationLimitDefault)) + defaults = {d.resource: d for d in defaults_result.scalars().all()} def _get_limit(resource_type: ResourceType, default: int) -> int: - return system_limits[resource_type].max_total if system_limits[resource_type] else default + d = defaults.get(resource_type) + return d.max_total if d else default max_vcpu_millis = _get_limit(ResourceType.milli_vcpu, VCPU_MILLIS_MAX) max_ram_bytes = _get_limit(ResourceType.ram, MEMORY_MAX) diff --git a/src/models/migrations/versions/e8f3a2d51c9b_fix_system_limits.py b/src/models/migrations/versions/e8f3a2d51c9b_fix_system_limits.py new file mode 100644 index 00000000..214940d1 --- /dev/null +++ b/src/models/migrations/versions/e8f3a2d51c9b_fix_system_limits.py @@ -0,0 +1,72 @@ +"""fix-system-limits + +Revision ID: e8f3a2d51c9b +Revises: 9bebcc605033 +Create Date: 2026-03-20 00:00:00.000000 + +""" +from typing import Sequence, Union + +import sqlalchemy as sa +import sqlmodel +import sqlmodel.sql +from alembic import op +from ulid import ULID + +# revision identifiers, used by Alembic. +revision: str = "e8f3a2d51c9b" +down_revision: Union[str, Sequence[str], None] = "9bebcc605033" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + resource_type_enum = sa.Enum( + "milli_vcpu", "ram", "iops", "database_size", "storage_size", + name="resourcetype", + create_type=False, + ) + + # Create the organizationlimitdefault table + op.create_table( + "organizationlimitdefault", + sa.Column("id", sa.UUID(), nullable=False), + sa.Column("resource", resource_type_enum, nullable=False), + sa.Column("max_total", sa.BigInteger(), nullable=False), + sa.Column("max_per_branch", sa.BigInteger(), nullable=False), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index("uq_org_limit_default_resource", "organizationlimitdefault", ["resource"], unique=True) + + # Migrate system limits → OrganizationLimitDefault + conn = op.get_bind() + system_rows = conn.execute( + sa.text("SELECT resource, max_total, max_per_branch FROM resourcelimit WHERE entity_type = 'system'") + ).fetchall() + + org_limit_default = sa.table( + "organizationlimitdefault", + sa.column("id", sa.UUID), + sa.column("resource", resource_type_enum), + sa.column("max_total", sa.BigInteger), + sa.column("max_per_branch", sa.BigInteger), + ) + for resource, max_total, max_per_branch in system_rows: + conn.execute( + sa.insert(org_limit_default).values( + id=ULID().to_uuid(), + resource=resource, + max_total=max_total, + max_per_branch=max_per_branch, + ) + ) + + conn.execute(sa.text("DELETE FROM resourcelimit WHERE entity_type = 'system'")) + + +def downgrade() -> None: + """Downgrade schema.""" + # Data migration is not trivially reversible; just drop the table. + op.drop_index("uq_org_limit_default_resource", table_name="organizationlimitdefault") + op.drop_table("organizationlimitdefault") diff --git a/src/models/resources.py b/src/models/resources.py index b2357124..28815c83 100644 --- a/src/models/resources.py +++ b/src/models/resources.py @@ -29,7 +29,6 @@ class ResourceType(PyEnum): class EntityType(PyEnum): - system = "system" org = "org" project = "project" @@ -85,6 +84,13 @@ class ResourceLimit(AsyncAttrs, Model, table=True): max_per_branch: Annotated[int, Field(sa_type=BigInteger)] +class OrganizationLimitDefault(AsyncAttrs, Model, table=True): + __table_args__ = (Index("uq_org_limit_default_resource", "resource", unique=True),) + resource: ResourceType + max_total: Annotated[int, Field(sa_type=BigInteger)] + max_per_branch: Annotated[int, Field(sa_type=BigInteger)] + + class BranchProvisioning(AsyncAttrs, Model, table=True): branch_id: Identifier | None = Model.foreign_key_field("branch", ondelete="CASCADE") resource: ResourceType diff --git a/tests/test_resources.py b/tests/test_resources.py index 28b741c0..f7a6ec79 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -21,7 +21,6 @@ _FIELDS = {"milli_vcpu", "ram", "iops", "database_size", "storage_size"} # Stored during the test run to reuse in later assertions. -_system_available: dict = {} _org_available: dict = {} @@ -42,42 +41,34 @@ def project(make_project, org): # --------------------------------------------------------------------------- -# System +# Org default limits (must run before test_org_set_limits changes them) # --------------------------------------------------------------------------- -def test_system_limits(client) -> None: - r = client.get("resources/limits/") +def test_org_has_default_limits_on_creation(client, org) -> None: + """New org should have limits populated from OrganizationLimitDefault defaults.""" + r = client.get(f"organizations/{org}/resources/limits/") assert r.status_code == 200 data = r.json() assert set(data.keys()) == {"total", "per_branch"} for section in ("total", "per_branch"): assert set(data[section].keys()) == _FIELDS for v in data[section].values(): - assert v is not None and isinstance(v, int) - + assert v is not None and isinstance(v, int) and v > 0 -def test_system_allocations(client) -> None: - r = client.get("resources/allocations/") - assert r.status_code == 200 - data = r.json() - assert set(data.keys()) == _FIELDS - for v in data.values(): - assert v == 0 - -def test_system_available(client) -> None: - r = client.get("resources/available/") - assert r.status_code == 200 - data = r.json() - assert set(data.keys()) == _FIELDS - for v in data.values(): - assert v >= 0 - _system_available.update(data) +def test_org_default_limits_are_available(client, org) -> None: + """Available resources for a new org should equal its default limits (no branches yet).""" + limits_r = client.get(f"organizations/{org}/resources/limits/") + avail_r = client.get(f"organizations/{org}/resources/available/") + assert limits_r.status_code == 200 + assert avail_r.status_code == 200 + for f in _FIELDS: + assert avail_r.json()[f] == limits_r.json()["total"][f] # --------------------------------------------------------------------------- -# Org +# Org (override limits) # --------------------------------------------------------------------------- From 2a1c25c2076efc34069c38ada182b6f1ed49f44f Mon Sep 17 00:00:00 2001 From: Max Schettler Date: Fri, 20 Mar 2026 17:02:23 +0100 Subject: [PATCH 07/25] Fix staging/production redeploy The workflow simply referenced the built image tage, adding the digest ensures helm detects changes and redeploys when a new image is built. --- .github/workflows/ci-deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-deploy.yml b/.github/workflows/ci-deploy.yml index 65875fb4..3f2d3e51 100644 --- a/.github/workflows/ci-deploy.yml +++ b/.github/workflows/ci-deploy.yml @@ -17,7 +17,7 @@ jobs: needs: build uses: ./.github/workflows/_deploy.yml with: - image_tag: ${{ needs.build.outputs.image_tag }} + image_tag: '${{ needs.build.outputs.image_tag }}@${{ needs.build.outputs.image_digest }}' deployment_name: vela environment: ${{ github.ref_name == 'main' && 'prod' || 'dev' }} secrets: inherit From 1d5bc91c4c34b481f4a0f6c4dc8236bcda7cac20 Mon Sep 17 00:00:00 2001 From: Max Schettler Date: Fri, 20 Mar 2026 16:57:59 +0100 Subject: [PATCH 08/25] Fix system limit migration The previous version failed to reuse the existing enum datatype. The change makes sure it is referenced instead. --- .../migrations/versions/e8f3a2d51c9b_fix_system_limits.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/models/migrations/versions/e8f3a2d51c9b_fix_system_limits.py b/src/models/migrations/versions/e8f3a2d51c9b_fix_system_limits.py index 214940d1..8a579eb8 100644 --- a/src/models/migrations/versions/e8f3a2d51c9b_fix_system_limits.py +++ b/src/models/migrations/versions/e8f3a2d51c9b_fix_system_limits.py @@ -11,6 +11,7 @@ import sqlmodel import sqlmodel.sql from alembic import op +from sqlalchemy.dialects import postgresql from ulid import ULID # revision identifiers, used by Alembic. @@ -22,11 +23,7 @@ def upgrade() -> None: """Upgrade schema.""" - resource_type_enum = sa.Enum( - "milli_vcpu", "ram", "iops", "database_size", "storage_size", - name="resourcetype", - create_type=False, - ) + resource_type_enum = postgresql.ENUM(name="resourcetype", create_type=False) # Create the organizationlimitdefault table op.create_table( From bb8a0dcdecc3b163ced83c67c453082f7e7bfd01 Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Thu, 19 Mar 2026 11:26:00 +0530 Subject: [PATCH 09/25] json logs: use python-json-logger --- pyproject.toml | 1 + src/api/__init__.py | 25 +++++++++++++++++++------ src/api/settings.py | 1 + 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index eb60547e..75560158 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ dependencies = [ 'cloudflare', 'alembic', 'httpx', + 'python-json-logger>=4.0', ] [tool.setuptools] diff --git a/src/api/__init__.py b/src/api/__init__.py index 1de6e0c1..af66fb7b 100644 --- a/src/api/__init__.py +++ b/src/api/__init__.py @@ -28,17 +28,30 @@ def _logging_config() -> dict[str, Any]: - log_format = "%(asctime)s - %(levelname)s - %(name)s - %(message)s" date_format = "%Y-%m-%dT%H:%M:%S%z" - log_level = get_settings().log_level + settings = get_settings() + log_level = settings.log_level + if settings.log_json: + formatter: dict[str, Any] = { + "()": "pythonjsonlogger.json.JsonFormatter", + "fmt": "%(asctime)s %(levelname)s %(name)s %(message)s", + "rename_fields": { + "asctime": "timestamp", + "levelname": "level", + "name": "logger", + }, + "datefmt": date_format, + } + else: + formatter = { + "format": "%(asctime)s - %(levelname)s - %(name)s - %(message)s", + "datefmt": date_format, + } return { "version": 1, "disable_existing_loggers": False, "formatters": { - "default": { - "format": log_format, - "datefmt": date_format, - }, + "default": formatter, }, "handlers": { "default": { diff --git a/src/api/settings.py b/src/api/settings.py index 24e85ef9..b652afa9 100644 --- a/src/api/settings.py +++ b/src/api/settings.py @@ -22,6 +22,7 @@ class Settings(BaseSettings): keycloak_admin_name: str keycloak_admin_secret: str log_level: Literal["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"] = "INFO" + log_json: bool = True resource_monitor_interval: Annotated[timedelta, BeforeValidator(permissive_numeric_timedelta)] = timedelta( seconds=60 ) From 08c7b5a4deed815a7148af1093b1d7e928d48f9a Mon Sep 17 00:00:00 2001 From: Manohar Reddy Date: Mon, 23 Mar 2026 17:55:44 +0530 Subject: [PATCH 10/25] parse vela-controller JSON logs --- chart/templates/vector-config.yaml | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/chart/templates/vector-config.yaml b/chart/templates/vector-config.yaml index 9d0a294c..7b288427 100644 --- a/chart/templates/vector-config.yaml +++ b/chart/templates/vector-config.yaml @@ -126,16 +126,23 @@ data: inputs: - router.controller source: |- - parsed, err = parse_regex(.event_message, r'^(?P