Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions app/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,8 @@ def _stop_container(self, c: Container, attempt=1) -> bool:
def _start_container(self, c: Container, attempt=1, max_attempts: Optional[int] = None) -> bool:
if max_attempts is None:
start_timeout = int(self.get_label(c, "start-timeout", str(self.env.START_TIMEOUT)))
max_attempts = max(1, start_timeout // 2)
max_attempts = max(1, (start_timeout // 2) + 1)
self.log_this(f"Container {c.name}: start-timeout={start_timeout}s (max_attempts={max_attempts})", "DEBUG")

c.reload() # Refresh the status for this container
status = c.status # Read once to avoid consuming multiple mock cycles
Expand Down Expand Up @@ -1101,9 +1102,14 @@ def backup(self):
for dir in dest_dirs:
self._backup_additional_folders(c, dir)

if c.name not in self.containers_skipped:
if c.name not in self.containers_skipped and c.name not in self.containers_failed:
self.containers_completed.add(c.name)
self.log_this(f"Backup of {c.name} complete!", "INFO")
elif c.name in self.containers_failed and c.name not in self.containers_skipped:
self.log_this(
f"Backup data for {c.name} was completed, but the container failed to restart within the start timeout.",
"WARN",
)

for dir in dest_dirs:
self._backup_additional_folders_standalone(BeforeOrAfter.AFTER, dir)
Expand Down
69 changes: 64 additions & 5 deletions pytest/test_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2999,10 +2999,21 @@ def test_start_container_three_restarting_states_eventually_running(
"name": "container1",
"id": "123456789",
"labels": {"nautical-backup.start-timeout": "10"},
# Needs 2 retries after c.start() to reach running.
# With START_TIMEOUT=2 (max_attempts=1) this would fail;
# the label overrides to 10s (max_attempts=5) so it succeeds.
"status_side_effect": ["running", "exited", "exited", "exited", "restarting", "restarting", "running"],
# Needs 4 retries after c.start() to reach running.
# START_TIMEOUT=2 → max_attempts=2 (formula: (2//2)+1=2), so attempt=3 sees
# "restarting" and returns False — failure without the label.
# Label start-timeout=10 → max_attempts=6 ((10//2)+1), so it succeeds.
"status_side_effect": [
"running",
"exited",
"exited",
"exited",
"restarting",
"restarting",
"restarting",
"restarting",
"running",
],
}
],
indirect=True,
Expand All @@ -3016,7 +3027,7 @@ def test_START_TIMEOUT_label_supersedes_env(
):
"""Per-container start-timeout label should override the global START_TIMEOUT env var."""
mock_subprocess_run.return_value.returncode = 0
monkeypatch.setenv("START_TIMEOUT", "2") # max_attempts=1would fail without the label
monkeypatch.setenv("START_TIMEOUT", "2") # max_attempts=2exhausted before container is running

mock_docker_client.containers.list.return_value = [mock_container1]
nb = NauticalBackup(mock_docker_client)
Expand Down Expand Up @@ -3080,6 +3091,54 @@ def test_rsync_exit_code_23_includes_symlink_hint(
assert "symlinks" in nb.error_messages[0]
assert "container1" in nb.containers_failed

@mock.patch("builtins.print")
@mock.patch("subprocess.run")
@pytest.mark.parametrize(
"mock_container1",
[
{
"name": "container1",
"id": "123456789",
# Container never reaches running after start — stays restarting through all attempts.
# START_TIMEOUT=2 → max_attempts=(2//2)+1=2.
# Reads: stop-check, after-stop, during-backup, attempt-1, post-start, attempt-2, attempt-3(→False)
"status_side_effect": [
"running",
"exited",
"exited",
"exited",
"restarting",
"restarting",
"restarting",
],
}
],
indirect=True,
)
def test_start_container_exhausted_retries_not_in_completed(
self,
mock_subprocess_run: MagicMock,
mock_print: MagicMock,
mock_docker_client: MagicMock,
mock_container1: MagicMock,
monkeypatch: pytest.MonkeyPatch,
):
"""When _start_container exhausts all retries, container must be in containers_failed,
NOT in containers_completed, and a WARN about the partial backup should be logged."""
mock_subprocess_run.return_value.returncode = 0
monkeypatch.setenv("START_TIMEOUT", "2") # max_attempts=2 → 3 total attempts before giving up

mock_docker_client.containers.list.return_value = [mock_container1]
nb = NauticalBackup(mock_docker_client)
nb.backup()

assert "container1" in nb.containers_failed
assert "container1" not in nb.containers_completed

printed = [call_args[0][0] for call_args in mock_print.call_args_list]
assert any("failed to restart within the start timeout" in line for line in printed)
assert not any("Backup of container1 complete!" in line for line in printed)

# -------------------------------------------------------------------------
# Retention policy tests
# -------------------------------------------------------------------------
Expand Down