Resolve pyright type check

Bihan  Rana · Bihan  Rana · commit abba7da9eb8a · 2025-12-23T21:57:00.000+05:45
diff --git a/src/dstack/_internal/core/models/configurations.py b/src/dstack/_internal/core/models/configurations.py
@@ -838,25 +838,19 @@ class ServiceConfigurationParams(CoreModel):
         SERVICE_HTTPS_DEFAULT
     )
     auth: Annotated[bool, Field(description="Enable the authorization")] = True
-    # replicas: Annotated[
-    #     Range[int],
-    #     Field(
-    #         description="The number of replicas. Can be a number (e.g. `2`) or a range (`0..4` or `1..8`). "
-    #         "If it's a range, the `scaling` property is required"
-    #     ),
-    # ] = Range[int](min=1, max=1)
-    # scaling: Annotated[
-    #     Optional[ScalingSpec],
-    #     Field(description="The auto-scaling rules. Required if `replicas` is set to a range"),
-    # ] = None
+
+    scaling: Annotated[
+        Optional[ScalingSpec],
+        Field(description="The auto-scaling rules. Required if `replicas` is set to a range"),
+    ] = None
     rate_limits: Annotated[list[RateLimit], Field(description="Rate limiting rules")] = []
     probes: Annotated[
         list[ProbeConfig],
         Field(description="List of probes used to determine job health"),
     ] = []
 
     replicas: Annotated[
-        Optional[Union[Range[int], List[ReplicaGroup], int, str]],
+        Optional[Union[Range[int], List[ReplicaGroup]]],
         Field(
             description=(
                 "List of replica groups. Each group defines replicas with shared configuration "
@@ -882,16 +876,6 @@ def convert_model(cls, v: Optional[Union[AnyModel, str]]) -> Optional[AnyModel]:
             return OpenAIChatModel(type="chat", name=v, format="openai")
         return v
 
-    # @validator("replicas")
-    # def convert_replicas(cls, v: Range[int]) -> Range[int]:
-    #     if v.max is None:
-    #         raise ValueError("The maximum number of replicas is required")
-    #     if v.min is None:
-    #         v.min = 0
-    #     if v.min < 0:
-    #         raise ValueError("The minimum number of replicas must be greater than or equal to 0")
-    #     return v
-
     @validator("gateway")
     def validate_gateway(
         cls, v: Optional[Union[bool, str]]
@@ -902,22 +886,6 @@ def validate_gateway(
             )
         return v
 
-    # @root_validator()
-    # def validate_scaling(cls, values):
-    #     replica_groups = values.get("replica_groups")
-    #     # If replica_groups are set, we don't need to validate scaling.
-    #     # Each replica group has its own scaling.
-    #     if replica_groups:
-    #         return values
-
-    #     scaling = values.get("scaling")
-    #     replicas = values.get("replicas")
-    #     if replicas and replicas.min != replicas.max and not scaling:
-    #         raise ValueError("When you set `replicas` to a range, ensure to specify `scaling`.")
-    #     if replicas and replicas.min == replicas.max and scaling:
-    #         raise ValueError("To use `scaling`, `replicas` must be set to a range.")
-    #     return values
-
     @root_validator()
     def normalize_replicas(cls, values):
         replicas = values.get("replicas")
@@ -966,10 +934,12 @@ def validate_probes(cls, v: list[ProbeConfig]) -> list[ProbeConfig]:
         return v
 
     @validator("replicas")
-    def validate_replicas(cls, v: Optional[List[ReplicaGroup]]) -> Optional[List[ReplicaGroup]]:
+    def validate_replicas(
+        cls, v: Optional[Union[Range[int], List[ReplicaGroup]]]
+    ) -> Optional[Union[Range[int], List[ReplicaGroup]]]:
         if v is None:
             return v
-        if isinstance(v, (Range, int, str)):
+        if isinstance(v, Range):
             return v
 
         if isinstance(v, list):
@@ -1007,6 +977,18 @@ class ServiceConfiguration(
 ):
     type: Literal["service"] = "service"
 
+    @property
+    def replica_groups(self) -> Optional[List[ReplicaGroup]]:
+        """
+        Get normalized replica groups. After validation, replicas is always List[ReplicaGroup] or None.
+        Use this property for type-safe access in code.
+        """
+        if self.replicas is None:
+            return None
+        if isinstance(self.replicas, list):
+            return self.replicas
+        return None
+
 
 AnyRunConfiguration = Union[DevEnvironmentConfiguration, TaskConfiguration, ServiceConfiguration]
 
diff --git a/src/dstack/_internal/server/background/tasks/process_runs.py b/src/dstack/_internal/server/background/tasks/process_runs.py
@@ -196,10 +196,9 @@ async def _process_pending_run(session: AsyncSession, run_model: RunModel):
         logger.debug("%s: retrying run is not yet ready for resubmission", fmt(run_model))
         return
 
-    # run_model.desired_replica_count = 1
     if run.run_spec.configuration.type == "service":
         run_model.desired_replica_count = sum(
-            group.replicas.min or 0 for group in run.run_spec.configuration.replicas
+            group.replicas.min or 0 for group in (run.run_spec.configuration.replica_groups or [])
         )
         await update_service_desired_replica_count(
             session,
@@ -214,7 +213,7 @@ async def _process_pending_run(session: AsyncSession, run_model: RunModel):
             return
 
         # Per group scaling because single replica is also normalized to replica groups.
-        replicas = run.run_spec.configuration.replicas or []
+        replicas: List[ReplicaGroup] = run.run_spec.configuration.replica_groups or []
         counts = (
             json.loads(run_model.desired_replica_counts)
             if run_model.desired_replica_counts
@@ -461,7 +460,7 @@ async def _handle_run_replicas(
             # FIXME: should only include scaling events, not retries and deployments
             last_scaled_at=max((r.timestamp for r in replicas_info), default=None),
         )
-        replicas = run_spec.configuration.replicas or []
+        replicas: List[ReplicaGroup] = run_spec.configuration.replica_groups or []
         if replicas:
             counts = (
                 json.loads(run_model.desired_replica_counts)
diff --git a/src/dstack/_internal/server/services/runs/__init__.py b/src/dstack/_internal/server/services/runs/__init__.py
@@ -520,7 +520,7 @@ async def submit_run(
 
             global_replica_num = 0  # Global counter across all groups for unique replica_num
 
-            for replica_group in service_config.replicas:
+            for replica_group in service_config.replica_groups or []:
                 if run_spec.merged_profile.schedule is not None:
                     group_initial_replicas = 0
                 else:
diff --git a/src/dstack/_internal/server/services/runs/spec.py b/src/dstack/_internal/server/services/runs/spec.py
@@ -90,7 +90,7 @@ def validate_run_spec_and_set_defaults(
     if isinstance(run_spec.configuration, ServiceConfiguration):
         # Check if any group has min=0
         if run_spec.merged_profile.schedule and any(
-            group.replicas.min == 0 for group in run_spec.configuration.replicas
+            group.replicas.min == 0 for group in (run_spec.configuration.replica_groups or [])
         ):
             raise ServerClientError(
                 "Scheduled services with autoscaling to zero are not supported"
@@ -154,7 +154,7 @@ def get_nodes_required_num(run_spec: RunSpec) -> int:
         nodes_required_num = run_spec.configuration.nodes
     elif run_spec.configuration.type == "service":
         nodes_required_num = sum(
-            group.replicas.min or 0 for group in run_spec.configuration.replicas
+            group.replicas.min or 0 for group in (run_spec.configuration.replica_groups or [])
         )
     return nodes_required_num
 
diff --git a/src/dstack/_internal/server/services/services/__init__.py b/src/dstack/_internal/server/services/services/__init__.py
@@ -144,7 +144,8 @@ def _register_service_in_server(run_model: RunModel, run_spec: RunSpec) -> Servi
         )
     # Check if any group has autoscaling (min != max)
     has_autoscaling = any(
-        group.replicas.min != group.replicas.max for group in run_spec.configuration.replicas
+        group.replicas.min != group.replicas.max
+        for group in (run_spec.configuration.replica_groups or [])
     )
     if has_autoscaling:
         raise ServerClientError(
@@ -308,21 +309,17 @@ async def update_service_desired_replica_count(
     if run_model.gateway_id is not None:
         conn = await get_or_add_gateway_connection(session, run_model.gateway_id)
         stats = await conn.get_stats(run_model.project.name, run_model.run_name)
-    if configuration.replicas:
+    replica_groups = configuration.replica_groups or []
+    if replica_groups:
         desired_replica_counts = {}
         total = 0
         prev_counts = (
             json.loads(run_model.desired_replica_counts)
             if run_model.desired_replica_counts
             else {}
         )
-        for group in configuration.replicas:
-            # temp group_wise config to get the group_wise desired replica count.
-            group_config = configuration.copy(
-                exclude={"replicas"},
-                update={"replicas": group.replicas, "scaling": group.scaling},
-            )
-            scaler = get_service_scaler(group_config)
+        for group in replica_groups:
+            scaler = get_service_scaler(group.replicas, group.scaling)
             group_desired = scaler.get_desired_count(
                 current_desired_count=prev_counts.get(group.name, group.replicas.min or 0),
                 stats=stats,
@@ -334,9 +331,11 @@ async def update_service_desired_replica_count(
         run_model.desired_replica_count = total
     else:
         # Todo Not required as single replica is normalized to replicas.
-        scaler = get_service_scaler(configuration)
-        run_model.desired_replica_count = scaler.get_desired_count(
-            current_desired_count=run_model.desired_replica_count,
-            stats=stats,
-            last_scaled_at=last_scaled_at,
-        )
+        if configuration.replica_groups:
+            first_group = configuration.replica_groups[0]
+            scaler = get_service_scaler(count=first_group.replicas, scaling=first_group.scaling)
+            run_model.desired_replica_count = scaler.get_desired_count(
+                current_desired_count=run_model.desired_replica_count,
+                stats=stats,
+                last_scaled_at=last_scaled_at,
+            )
diff --git a/src/dstack/_internal/server/services/services/autoscalers.py b/src/dstack/_internal/server/services/services/autoscalers.py
@@ -6,7 +6,8 @@
 from pydantic import BaseModel
 
 import dstack._internal.utils.common as common_utils
-from dstack._internal.core.models.configurations import ServiceConfiguration
+from dstack._internal.core.models.configurations import ScalingSpec
+from dstack._internal.core.models.resources import Range
 from dstack._internal.proxy.gateway.schemas.stats import PerWindowStats
 
 
@@ -119,21 +120,21 @@ def get_desired_count(
         return new_desired_count
 
 
-def get_service_scaler(conf: ServiceConfiguration) -> BaseServiceScaler:
-    assert conf.replicas.min is not None
-    assert conf.replicas.max is not None
-    if conf.scaling is None:
+def get_service_scaler(count: Range[int], scaling: Optional[ScalingSpec]) -> BaseServiceScaler:
+    assert count.min is not None
+    assert count.max is not None
+    if scaling is None:
         return ManualScaler(
-            min_replicas=conf.replicas.min,
-            max_replicas=conf.replicas.max,
+            min_replicas=count.min,
+            max_replicas=count.max,
         )
-    if conf.scaling.metric == "rps":
+    if scaling.metric == "rps":
         return RPSAutoscaler(
             # replicas count validated by configuration model
-            min_replicas=conf.replicas.min,
-            max_replicas=conf.replicas.max,
-            target=conf.scaling.target,
-            scale_up_delay=conf.scaling.scale_up_delay,
-            scale_down_delay=conf.scaling.scale_down_delay,
+            min_replicas=count.min,
+            max_replicas=count.max,
+            target=scaling.target,
+            scale_up_delay=scaling.scale_up_delay,
+            scale_down_delay=scaling.scale_down_delay,
         )
-    raise ValueError(f"No scaler found for scaling parameters {conf.scaling}")
+    raise ValueError(f"No scaler found for scaling parameters {scaling}")