Skip to content

Commit 5f41f23

Browse files
committed
Revert job-related changes
To avoid extra testing and changes, limit the scope of the PR to instance termination reasons, as per the PR name. The introduction of `JobTerminationReason.NO_BALANCE` can be done in a separate PR. Its implementation may need to be updated to also use `JobTerminationReason.NO_BALANCE` on jobs with statuses other than `running`.
1 parent 1b20ae6 commit 5f41f23

5 files changed

Lines changed: 4 additions & 24 deletions

File tree

src/dstack/_internal/core/models/runs.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,6 @@ class JobTerminationReason(str, Enum):
139139
TERMINATED_BY_SERVER = "terminated_by_server"
140140
INACTIVITY_DURATION_EXCEEDED = "inactivity_duration_exceeded"
141141
TERMINATED_DUE_TO_UTILIZATION_POLICY = "terminated_due_to_utilization_policy"
142-
NO_BALANCE = "no_balance"
143142
# Set by the runner
144143
CONTAINER_EXITED_WITH_ERROR = "container_exited_with_error"
145144
PORTS_BINDING_FAILED = "ports_binding_failed"
@@ -163,7 +162,6 @@ def to_status(self) -> JobStatus:
163162
self.TERMINATED_BY_SERVER: JobStatus.TERMINATED,
164163
self.INACTIVITY_DURATION_EXCEEDED: JobStatus.TERMINATED,
165164
self.TERMINATED_DUE_TO_UTILIZATION_POLICY: JobStatus.TERMINATED,
166-
self.NO_BALANCE: JobStatus.TERMINATED,
167165
self.CONTAINER_EXITED_WITH_ERROR: JobStatus.FAILED,
168166
self.PORTS_BINDING_FAILED: JobStatus.FAILED,
169167
self.CREATING_CONTAINER_ERROR: JobStatus.FAILED,

src/dstack/_internal/server/background/tasks/process_running_jobs.py

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
from dstack._internal.core.models.files import FileArchiveMapping
1919
from dstack._internal.core.models.instances import (
2020
InstanceStatus,
21-
InstanceTerminationReason,
2221
RemoteConnectionInfo,
2322
SSHConnectionParams,
2423
)
@@ -59,7 +58,6 @@
5958
from dstack._internal.server.services.jobs import (
6059
find_job,
6160
get_job_attached_volumes,
62-
get_job_provisioning_data,
6361
get_job_runtime_data,
6462
is_master_job,
6563
job_model_to_job_submission,
@@ -390,22 +388,9 @@ async def _process_running_job(session: AsyncSession, job_model: JobModel):
390388
if job_model.disconnected_at is None:
391389
job_model.disconnected_at = common_utils.get_current_datetime()
392390
if _should_terminate_job_due_to_disconnect(job_model):
393-
if (
394-
job_model.instance is not None
395-
and job_model.instance.termination_reason
396-
== InstanceTerminationReason.NO_BALANCE
397-
):
398-
# if instance was terminated due to no balance, set job termination reason accodingly
399-
job_model.termination_reason = JobTerminationReason.NO_BALANCE
400-
else:
401-
job_provisioning_data = get_job_provisioning_data(job_model)
402-
# use JobTerminationReason.INSTANCE_UNREACHABLE for on-demand instances only
403-
job_model.termination_reason = (
404-
JobTerminationReason.INSTANCE_UNREACHABLE
405-
if job_provisioning_data
406-
and not job_provisioning_data.instance_type.resources.spot
407-
else JobTerminationReason.INTERRUPTED_BY_NO_CAPACITY
408-
)
391+
# TODO: Replace with JobTerminationReason.INSTANCE_UNREACHABLE for on-demand.
392+
job_model.termination_reason = JobTerminationReason.INTERRUPTED_BY_NO_CAPACITY
393+
job_model.termination_reason_message = "Instance is unreachable"
409394
switch_job_status(session, job_model, JobStatus.TERMINATING)
410395
else:
411396
logger.warning(

src/dstack/_internal/server/services/jobs/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -814,8 +814,6 @@ def _get_job_status_message(job_model: JobModel) -> str:
814814
return "stopped"
815815
elif job_model.termination_reason == JobTerminationReason.ABORTED_BY_USER:
816816
return "aborted"
817-
elif job_model.termination_reason == JobTerminationReason.NO_BALANCE:
818-
return "no balance"
819817
return job_model.status.value
820818

821819

src/tests/_internal/core/models/test_runs.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ def test_get_error_returns_expected_messages():
4343
JobTerminationReason.ABORTED_BY_USER,
4444
JobTerminationReason.TERMINATED_BY_SERVER,
4545
JobTerminationReason.CONTAINER_EXITED_WITH_ERROR,
46-
JobTerminationReason.NO_BALANCE,
4746
]
4847

4948
for reason in JobTerminationReason:

src/tests/_internal/server/background/tasks/test_process_running_jobs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,7 @@ async def test_pulling_shim_failed(self, test_db, session: AsyncSession):
528528
assert SSHTunnelMock.call_count == 3
529529
await session.refresh(job)
530530
assert job.status == JobStatus.TERMINATING
531-
assert job.termination_reason == JobTerminationReason.INSTANCE_UNREACHABLE
531+
assert job.termination_reason == JobTerminationReason.INTERRUPTED_BY_NO_CAPACITY
532532
assert job.remove_at is None
533533

534534
@pytest.mark.asyncio

0 commit comments

Comments
 (0)