diff --git a/app/backup.py b/app/backup.py index 49e6abe6..15c8c2e0 100644 --- a/app/backup.py +++ b/app/backup.py @@ -474,7 +474,8 @@ def _stop_container(self, c: Container, attempt=1) -> bool: def _start_container(self, c: Container, attempt=1, max_attempts: Optional[int] = None) -> bool: if max_attempts is None: start_timeout = int(self.get_label(c, "start-timeout", str(self.env.START_TIMEOUT))) - max_attempts = max(1, start_timeout // 2) + max_attempts = max(1, (start_timeout // 2) + 1) + self.log_this(f"Container {c.name}: start-timeout={start_timeout}s (max_attempts={max_attempts})", "DEBUG") c.reload() # Refresh the status for this container status = c.status # Read once to avoid consuming multiple mock cycles @@ -1101,9 +1102,14 @@ def backup(self): for dir in dest_dirs: self._backup_additional_folders(c, dir) - if c.name not in self.containers_skipped: + if c.name not in self.containers_skipped and c.name not in self.containers_failed: self.containers_completed.add(c.name) self.log_this(f"Backup of {c.name} complete!", "INFO") + elif c.name in self.containers_failed and c.name not in self.containers_skipped: + self.log_this( + f"Backup data for {c.name} was completed, but the container failed to restart within the start timeout.", + "WARN", + ) for dir in dest_dirs: self._backup_additional_folders_standalone(BeforeOrAfter.AFTER, dir) diff --git a/pytest/test_backup.py b/pytest/test_backup.py index 50faec30..0dbe07a6 100644 --- a/pytest/test_backup.py +++ b/pytest/test_backup.py @@ -2999,10 +2999,21 @@ def test_start_container_three_restarting_states_eventually_running( "name": "container1", "id": "123456789", "labels": {"nautical-backup.start-timeout": "10"}, - # Needs 2 retries after c.start() to reach running. - # With START_TIMEOUT=2 (max_attempts=1) this would fail; - # the label overrides to 10s (max_attempts=5) so it succeeds. - "status_side_effect": ["running", "exited", "exited", "exited", "restarting", "restarting", "running"], + # Needs 4 retries after c.start() to reach running. + # START_TIMEOUT=2 → max_attempts=2 (formula: (2//2)+1=2), so attempt=3 sees + # "restarting" and returns False — failure without the label. + # Label start-timeout=10 → max_attempts=6 ((10//2)+1), so it succeeds. + "status_side_effect": [ + "running", + "exited", + "exited", + "exited", + "restarting", + "restarting", + "restarting", + "restarting", + "running", + ], } ], indirect=True, @@ -3016,7 +3027,7 @@ def test_START_TIMEOUT_label_supersedes_env( ): """Per-container start-timeout label should override the global START_TIMEOUT env var.""" mock_subprocess_run.return_value.returncode = 0 - monkeypatch.setenv("START_TIMEOUT", "2") # max_attempts=1 — would fail without the label + monkeypatch.setenv("START_TIMEOUT", "2") # max_attempts=2 — exhausted before container is running mock_docker_client.containers.list.return_value = [mock_container1] nb = NauticalBackup(mock_docker_client) @@ -3080,6 +3091,54 @@ def test_rsync_exit_code_23_includes_symlink_hint( assert "symlinks" in nb.error_messages[0] assert "container1" in nb.containers_failed + @mock.patch("builtins.print") + @mock.patch("subprocess.run") + @pytest.mark.parametrize( + "mock_container1", + [ + { + "name": "container1", + "id": "123456789", + # Container never reaches running after start — stays restarting through all attempts. + # START_TIMEOUT=2 → max_attempts=(2//2)+1=2. + # Reads: stop-check, after-stop, during-backup, attempt-1, post-start, attempt-2, attempt-3(→False) + "status_side_effect": [ + "running", + "exited", + "exited", + "exited", + "restarting", + "restarting", + "restarting", + ], + } + ], + indirect=True, + ) + def test_start_container_exhausted_retries_not_in_completed( + self, + mock_subprocess_run: MagicMock, + mock_print: MagicMock, + mock_docker_client: MagicMock, + mock_container1: MagicMock, + monkeypatch: pytest.MonkeyPatch, + ): + """When _start_container exhausts all retries, container must be in containers_failed, + NOT in containers_completed, and a WARN about the partial backup should be logged.""" + mock_subprocess_run.return_value.returncode = 0 + monkeypatch.setenv("START_TIMEOUT", "2") # max_attempts=2 → 3 total attempts before giving up + + mock_docker_client.containers.list.return_value = [mock_container1] + nb = NauticalBackup(mock_docker_client) + nb.backup() + + assert "container1" in nb.containers_failed + assert "container1" not in nb.containers_completed + + printed = [call_args[0][0] for call_args in mock_print.call_args_list] + assert any("failed to restart within the start timeout" in line for line in printed) + assert not any("Backup of container1 complete!" in line for line in printed) + # ------------------------------------------------------------------------- # Retention policy tests # -------------------------------------------------------------------------