Skip to content

Commit 7631d71

Browse files
[UX] Make dstack fleet show more useful information
1 parent 14ef341 commit 7631d71

2 files changed

Lines changed: 806 additions & 75 deletions

File tree

Lines changed: 275 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
from typing import List
1+
from typing import Any, Dict, List, Optional, Union
22

33
from rich.table import Table
44

55
from dstack._internal.cli.utils.common import add_row_from_dict, console
66
from dstack._internal.core.models.backends.base import BackendType
7-
from dstack._internal.core.models.fleets import Fleet, FleetStatus
8-
from dstack._internal.core.models.instances import InstanceStatus
7+
from dstack._internal.core.models.fleets import Fleet, FleetNodesSpec, FleetStatus
8+
from dstack._internal.core.models.instances import Instance, InstanceStatus
9+
from dstack._internal.core.models.resources import GPUSpec, ResourcesSpec
910
from dstack._internal.utils.common import DateFormatter, pretty_date
1011

1112

@@ -14,93 +15,292 @@ def print_fleets_table(fleets: List[Fleet], verbose: bool = False) -> None:
1415
console.print()
1516

1617

18+
def _format_nodes(nodes: Optional[FleetNodesSpec]) -> str:
19+
"""Format nodes spec as '0..1', '3', '2..10', etc."""
20+
if nodes is None:
21+
return "-"
22+
if nodes.min == nodes.max:
23+
return str(nodes.min)
24+
if nodes.max is None:
25+
return f"{nodes.min}.."
26+
return f"{nodes.min}..{nodes.max}"
27+
28+
29+
def _format_backends(backends: Optional[List[BackendType]]) -> str:
30+
if backends is None or len(backends) == 0:
31+
return "*"
32+
return ", ".join(b.value.replace("remote", "ssh") for b in backends)
33+
34+
35+
def _format_range(min_val: Optional[Any], max_val: Optional[Any]) -> str:
36+
if min_val is None and max_val is None:
37+
return ""
38+
if min_val == max_val:
39+
return str(min_val)
40+
if max_val is None:
41+
return f"{min_val}.."
42+
if min_val is None:
43+
return f"..{max_val}"
44+
return f"{min_val}..{max_val}"
45+
46+
47+
def _format_fleet_gpu(resources: Optional[ResourcesSpec]) -> str:
48+
"""Extract GPU-only info from fleet requirements, handling ranges."""
49+
if resources is None or resources.gpu is None:
50+
return "-"
51+
52+
gpu: GPUSpec = resources.gpu
53+
54+
# Check if there's actually a GPU requirement
55+
count = gpu.count
56+
if count is None or (count.min == 0 and (count.max is None or count.max == 0)):
57+
return "-"
58+
59+
parts = []
60+
61+
# GPU name(s)
62+
if gpu.name:
63+
parts.append(",".join(gpu.name))
64+
else:
65+
parts.append("gpu")
66+
67+
# GPU memory (range)
68+
if gpu.memory is not None:
69+
mem_str = _format_range(gpu.memory.min, gpu.memory.max)
70+
if mem_str:
71+
parts.append(mem_str)
72+
73+
# GPU count (range)
74+
count_str = _format_range(count.min, count.max)
75+
if count_str:
76+
parts.append(count_str)
77+
78+
return ":".join(parts)
79+
80+
81+
def _format_fleet_status(fleet: Fleet) -> str:
82+
status = fleet.status
83+
status_text = status.value
84+
85+
color_map = {
86+
FleetStatus.SUBMITTED: "grey",
87+
FleetStatus.ACTIVE: "white",
88+
FleetStatus.TERMINATING: "deep_sky_blue1",
89+
FleetStatus.TERMINATED: "grey",
90+
FleetStatus.FAILED: "indian_red1",
91+
}
92+
color = color_map.get(status, "white")
93+
is_finished = status in [FleetStatus.TERMINATED, FleetStatus.FAILED]
94+
status_style = f"bold {color}" if not is_finished else color
95+
return f"[{status_style}]{status_text}[/]"
96+
97+
98+
def _format_instance_status(instance: Instance) -> str:
99+
"""Format instance status with colors and health info."""
100+
status = instance.status
101+
status_text = status.value
102+
103+
total_blocks = instance.total_blocks
104+
busy_blocks = instance.busy_blocks
105+
if (
106+
status in [InstanceStatus.IDLE, InstanceStatus.BUSY]
107+
and total_blocks is not None
108+
and total_blocks > 1
109+
):
110+
status_text = f"{busy_blocks}/{total_blocks} {InstanceStatus.BUSY.value}"
111+
112+
# Add health status
113+
health_suffix = ""
114+
if status in [InstanceStatus.IDLE, InstanceStatus.BUSY]:
115+
if instance.unreachable:
116+
health_suffix = " (unreachable)"
117+
elif not instance.health_status.is_healthy():
118+
health_suffix = f" ({instance.health_status.value})"
119+
120+
color_map = {
121+
InstanceStatus.PENDING: "deep_sky_blue1",
122+
InstanceStatus.PROVISIONING: "deep_sky_blue1",
123+
InstanceStatus.IDLE: "sea_green3",
124+
InstanceStatus.BUSY: "white",
125+
InstanceStatus.TERMINATING: "deep_sky_blue1",
126+
InstanceStatus.TERMINATED: "grey",
127+
}
128+
color = color_map.get(status, "white")
129+
is_finished = status == InstanceStatus.TERMINATED
130+
status_style = f"bold {color}" if not is_finished else color
131+
return f"[{status_style}]{status_text}{health_suffix}[/]"
132+
133+
134+
def _format_backend(backend: Optional[BackendType], region: Optional[str]) -> str:
135+
if backend is None:
136+
return "-"
137+
backend_str = backend.value
138+
if backend == BackendType.REMOTE:
139+
backend_str = "ssh"
140+
if region:
141+
backend_str += f" ({region})"
142+
return backend_str
143+
144+
145+
def _format_price(price: Optional[float]) -> str:
146+
if price is None:
147+
return "-"
148+
return f"${price:.4f}".rstrip("0").rstrip(".")
149+
150+
151+
def _format_instance_gpu(instance: Instance) -> str:
152+
if instance.instance_type is None:
153+
return "-"
154+
if (
155+
instance.backend == BackendType.REMOTE
156+
and instance.status in [InstanceStatus.PENDING, InstanceStatus.PROVISIONING]
157+
):
158+
return "-"
159+
return instance.instance_type.resources.pretty_format(gpu_only=True, include_spot=False) or "-"
160+
161+
162+
def _format_instance_resources(instance: Instance) -> str:
163+
if instance.instance_type is None:
164+
return "-"
165+
if (
166+
instance.backend == BackendType.REMOTE
167+
and instance.status in [InstanceStatus.PENDING, InstanceStatus.PROVISIONING]
168+
):
169+
return "-"
170+
return instance.instance_type.resources.pretty_format(include_spot=False)
171+
172+
17173
def get_fleets_table(
18174
fleets: List[Fleet], verbose: bool = False, format_date: DateFormatter = pretty_date
19175
) -> Table:
20176
table = Table(box=None)
21-
table.add_column("FLEET", no_wrap=True)
177+
178+
# Columns
179+
table.add_column("NAME", style="bold", no_wrap=True)
180+
table.add_column("NODES")
22181
if verbose:
23-
table.add_column("RESERVATION")
24-
table.add_column("INSTANCE")
182+
table.add_column("RESOURCES")
183+
else:
184+
table.add_column("GPU")
185+
table.add_column("SPOT")
25186
table.add_column("BACKEND")
26-
if verbose:
27-
table.add_column("REGION")
28-
table.add_column("RESOURCES")
29187
table.add_column("PRICE")
30-
table.add_column("STATUS")
31-
table.add_column("CREATED")
32-
188+
table.add_column("STATUS", no_wrap=True)
189+
table.add_column("CREATED", no_wrap=True)
33190
if verbose:
34191
table.add_column("ERROR")
35192

36193
for fleet in fleets:
37-
for i, instance in enumerate(fleet.instances):
38-
resources = ""
39-
if instance.instance_type is not None and (
40-
instance.backend != BackendType.REMOTE
41-
or instance.status not in [InstanceStatus.PENDING, InstanceStatus.PROVISIONING]
42-
):
43-
resources = instance.instance_type.resources.pretty_format(include_spot=True)
44-
45-
status = instance.status.value
46-
total_blocks = instance.total_blocks
47-
busy_blocks = instance.busy_blocks
48-
if (
49-
instance.status in [InstanceStatus.IDLE, InstanceStatus.BUSY]
50-
and total_blocks is not None
51-
and total_blocks > 1
52-
):
53-
status = f"{busy_blocks}/{total_blocks} {InstanceStatus.BUSY.value}"
54-
if instance.status in [InstanceStatus.IDLE, InstanceStatus.BUSY]:
55-
if instance.unreachable:
56-
status += "\n(unreachable)"
57-
elif not instance.health_status.is_healthy():
58-
status += f"\n({instance.health_status.value})"
59-
60-
backend = instance.backend or ""
61-
if backend == "remote":
62-
backend = "ssh"
63-
64-
region = ""
65-
if instance.region:
66-
region = f"{instance.region}"
67-
if verbose:
68-
if instance.availability_zone:
69-
region += f" ({instance.availability_zone})"
70-
else:
71-
backend += f" ({instance.region})"
72-
error = ""
73-
if instance.status == InstanceStatus.TERMINATED and instance.termination_reason:
74-
error = f"{instance.termination_reason}"
75-
row = {
76-
"FLEET": fleet.name if i == 0 else "",
77-
"RESERVATION": fleet.spec.configuration.reservation or "" if i == 0 else "",
78-
"INSTANCE": str(instance.instance_num),
79-
"BACKEND": backend,
80-
"REGION": region,
81-
"RESOURCES": resources,
82-
"PRICE": f"${instance.price:.4f}".rstrip("0").rstrip(".")
83-
if instance.price is not None
84-
else "",
85-
"STATUS": status,
194+
# Fleet row
195+
config = fleet.spec.configuration
196+
merged_profile = fleet.spec.merged_profile
197+
198+
# Detect SSH fleet vs backend fleet
199+
is_ssh_fleet = config.ssh_config is not None
200+
201+
if is_ssh_fleet:
202+
# SSH fleet: fixed number of hosts, no cloud billing
203+
nodes = str(len(config.ssh_config.hosts))
204+
backend = "ssh"
205+
spot_policy = "-"
206+
max_price = "-"
207+
else:
208+
# Backend fleet: dynamic nodes, cloud billing
209+
nodes = _format_nodes(config.nodes)
210+
backend = _format_backends(config.backends)
211+
spot_policy = "-"
212+
if merged_profile and merged_profile.spot_policy:
213+
spot_policy = merged_profile.spot_policy.value
214+
# Format as "$0..$X.XX" range, or "-" if not set
215+
if merged_profile and merged_profile.max_price is not None:
216+
max_price = f"$0..{_format_price(merged_profile.max_price)}"
217+
else:
218+
max_price = "-"
219+
220+
# In verbose mode, append placement to nodes if cluster
221+
if verbose and config.placement and config.placement.value == "cluster":
222+
nodes = f"{nodes} (cluster)"
223+
224+
fleet_row: Dict[Union[str, int], Any] = {
225+
"NAME": fleet.name,
226+
"NODES": nodes,
227+
"BACKEND": backend,
228+
"PRICE": max_price,
229+
"SPOT": spot_policy,
230+
"STATUS": _format_fleet_status(fleet),
231+
"CREATED": format_date(fleet.created_at),
232+
}
233+
234+
if verbose:
235+
fleet_row["RESOURCES"] = config.resources.pretty_format() if config.resources else "-"
236+
fleet_row["ERROR"] = ""
237+
else:
238+
fleet_row["GPU"] = _format_fleet_gpu(config.resources)
239+
240+
add_row_from_dict(table, fleet_row)
241+
242+
# Instance rows (indented)
243+
for instance in fleet.instances:
244+
# Check if this is an SSH instance
245+
is_ssh_instance = instance.backend == BackendType.REMOTE
246+
247+
# Format backend with region (and AZ in verbose mode)
248+
if verbose and instance.availability_zone:
249+
# In verbose mode, show AZ instead of region (AZ is more specific)
250+
backend_with_region = _format_backend(instance.backend, instance.availability_zone)
251+
else:
252+
backend_with_region = _format_backend(instance.backend, instance.region)
253+
254+
# Get spot info from instance resources (not applicable to SSH)
255+
if is_ssh_instance:
256+
instance_spot = "-"
257+
instance_price = "-"
258+
else:
259+
instance_spot = "-"
260+
if (
261+
instance.instance_type is not None
262+
and instance.instance_type.resources is not None
263+
):
264+
instance_spot = "spot" if instance.instance_type.resources.spot else "on-demand"
265+
instance_price = _format_price(instance.price)
266+
267+
instance_row: Dict[Union[str, int], Any] = {
268+
"NAME": f" instance={instance.instance_num}",
269+
"NODES": "",
270+
"BACKEND": backend_with_region,
271+
"PRICE": instance_price,
272+
"SPOT": instance_spot,
273+
"STATUS": _format_instance_status(instance),
86274
"CREATED": format_date(instance.created),
87-
"ERROR": error,
88275
}
89-
add_row_from_dict(table, row)
90276

277+
if verbose:
278+
instance_row["RESOURCES"] = _format_instance_resources(instance)
279+
error = ""
280+
if instance.status == InstanceStatus.TERMINATED and instance.termination_reason:
281+
error = instance.termination_reason
282+
instance_row["ERROR"] = error
283+
else:
284+
instance_row["GPU"] = _format_instance_gpu(instance)
285+
286+
add_row_from_dict(table, instance_row, style="secondary")
287+
288+
# If fleet has no instances and is not terminating, show placeholder
91289
if len(fleet.instances) == 0 and fleet.status != FleetStatus.TERMINATING:
92-
row = {
93-
"FLEET": fleet.name,
94-
"RESERVATION": "-",
95-
"INSTANCE": "-",
96-
"BACKEND": "-",
97-
"REGION": "-",
98-
"RESOURCES": "-",
99-
"PRICE": "-",
100-
"STATUS": "-",
101-
"CREATED": format_date(fleet.created_at),
102-
"ERROR": "-",
290+
empty_row: Dict[Union[str, int], Any] = {
291+
"NAME": " (no instances)",
292+
"NODES": "",
293+
"BACKEND": "",
294+
"PRICE": "",
295+
"SPOT": "",
296+
"STATUS": "",
297+
"CREATED": "",
103298
}
104-
add_row_from_dict(table, row)
299+
if verbose:
300+
empty_row["RESOURCES"] = ""
301+
empty_row["ERROR"] = ""
302+
else:
303+
empty_row["GPU"] = ""
304+
add_row_from_dict(table, empty_row, style="secondary")
105305

106306
return table

0 commit comments

Comments
 (0)