From 3d92132452b223a10dd7be9a08bc53d3edc1946a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 21:34:11 +0000 Subject: [PATCH 1/3] Initial plan From e751d633ee224c7136f2122b4803e6c1ae168c78 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 21:40:27 +0000 Subject: [PATCH 2/3] Stream backup tar.gz directly to S3 to reduce ephemeral storage Agent-Logs-Url: https://github.com/sillsdev/TheCombine/sessions/aa2742e4-8a85-48c9-82a2-3d3d7ee75063 Co-authored-by: imnasnainaec <6411521+imnasnainaec@users.noreply.github.com> --- maintenance/scripts/aws_backup.py | 13 ++++++ maintenance/scripts/combine_backup.py | 61 +++++++++++++++------------ 2 files changed, 48 insertions(+), 26 deletions(-) diff --git a/maintenance/scripts/aws_backup.py b/maintenance/scripts/aws_backup.py index 5238110f90..813d59eda8 100644 --- a/maintenance/scripts/aws_backup.py +++ b/maintenance/scripts/aws_backup.py @@ -23,6 +23,19 @@ def push(self, src: Path, dest: str) -> subprocess.CompletedProcess[str]: s3_uri = f"{self.bucket}/{dest}" return run_cmd(["aws", "s3", "cp", "--no-progress", str(src), s3_uri]) + def push_stream(self, dest: str) -> subprocess.Popen[bytes]: + """Start a streaming upload to the AWS S3 bucket. + + Returns a Popen process whose stdin accepts the data to upload. + The caller must close stdin and call wait() to finalize the upload. + """ + s3_uri = f"{self.bucket}/{dest}" + return subprocess.Popen( + ["aws", "s3", "cp", "--no-progress", "-", s3_uri], + stdin=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + def pull(self, src: str, dest: Path) -> subprocess.CompletedProcess[str]: """Pull a file from the AWS S3 bucket.""" s3_uri = f"{self.bucket}/{src}" diff --git a/maintenance/scripts/combine_backup.py b/maintenance/scripts/combine_backup.py index b61c2d942b..dc755b5ea4 100755 --- a/maintenance/scripts/combine_backup.py +++ b/maintenance/scripts/combine_backup.py @@ -54,7 +54,6 @@ def main() -> None: step.print("Prepare the backup directory.") with tempfile.TemporaryDirectory() as backup_dir: - backup_file = Path("combine-backup.tar.gz") date_str = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") aws_file = f"{combine_host}-{date_str}.tar.gz" @@ -90,35 +89,45 @@ def main() -> None: print("Cannot find the backend container.", file=sys.stderr) sys.exit(1) - step.print("Create the tarball for the backup.") + step.print("Create the tarball and push to AWS S3 storage.") # cd to backup_dir so that files in the tarball are relative to the backup_dir os.chdir(backup_dir) - with tarfile.open(backup_file, "x:gz") as tar: - # cp, tar, and rm the db subdir - combine.kubectl( - [ - "cp", - f"{db_pod}:/{db_files_subdir}", - str(Path(backup_dir) / db_files_subdir), - ] + # Stream the tar.gz directly to S3 to avoid writing the archive to local disk. + upload_proc = aws.push_stream(aws_file) + try: + with tarfile.open(fileobj=upload_proc.stdin, mode="w:gz") as tar: + # cp, tar, and rm the db subdir + combine.kubectl( + [ + "cp", + f"{db_pod}:/{db_files_subdir}", + str(Path(backup_dir) / db_files_subdir), + ] + ) + tar.add(db_files_subdir) + rmtree(db_files_subdir) + + # cp, tar, and rm the backend subdir + combine.kubectl( + [ + "cp", + f"{backend_pod}:/home/app/{backend_files_subdir}/", + str(Path(backup_dir) / backend_files_subdir), + ] + ) + tar.add(backend_files_subdir) + rmtree(backend_files_subdir) + finally: + if upload_proc.stdin: + upload_proc.stdin.close() + _, upload_stderr = upload_proc.communicate() + if upload_proc.returncode != 0: + print( + f"Failed to push backup to AWS S3:\n{upload_stderr.decode() if upload_stderr else ''}", + file=sys.stderr, ) - tar.add(db_files_subdir) - rmtree(db_files_subdir) - - # cp, tar, and rm the backend subdir - combine.kubectl( - [ - "cp", - f"{backend_pod}:/home/app/{backend_files_subdir}/", - str(Path(backup_dir) / backend_files_subdir), - ] - ) - tar.add(backend_files_subdir) - rmtree(backend_files_subdir) - - step.print("Push backup to AWS S3 storage.") - aws.push(backup_file, aws_file) + sys.exit(upload_proc.returncode) if __name__ == "__main__": From c4d01a2edf55e2a8eb7613da15ad03d38f494c5a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Apr 2026 18:41:26 +0000 Subject: [PATCH 3/3] Kill upload and clean up partial S3 object on backup failure Agent-Logs-Url: https://github.com/sillsdev/TheCombine/sessions/ea2b91ff-fe07-4dcc-bca0-8bc9ef35f46b Co-authored-by: imnasnainaec <6411521+imnasnainaec@users.noreply.github.com> --- maintenance/scripts/aws_backup.py | 8 ++++++++ maintenance/scripts/combine_backup.py | 13 +++++++++++++ 2 files changed, 21 insertions(+) diff --git a/maintenance/scripts/aws_backup.py b/maintenance/scripts/aws_backup.py index 813d59eda8..f1f16bed47 100644 --- a/maintenance/scripts/aws_backup.py +++ b/maintenance/scripts/aws_backup.py @@ -41,6 +41,14 @@ def pull(self, src: str, dest: Path) -> subprocess.CompletedProcess[str]: s3_uri = f"{self.bucket}/{src}" return run_cmd(["aws", "s3", "cp", "--no-progress", s3_uri, str(dest)]) + def delete(self, dest: str) -> subprocess.CompletedProcess[str]: + """Delete an object from the AWS S3 bucket. + + Uses check_results=False so that a missing object does not raise an error. + """ + s3_uri = f"{self.bucket}/{dest}" + return run_cmd(["aws", "s3", "rm", s3_uri], check_results=False) + def list(self) -> subprocess.CompletedProcess[str]: """List the objects in the S3 bucket.""" return run_cmd(["aws", "s3", "ls", f"{self.bucket}", "--recursive"]) diff --git a/maintenance/scripts/combine_backup.py b/maintenance/scripts/combine_backup.py index dc755b5ea4..63c46234b8 100755 --- a/maintenance/scripts/combine_backup.py +++ b/maintenance/scripts/combine_backup.py @@ -118,6 +118,16 @@ def main() -> None: ) tar.add(backend_files_subdir) rmtree(backend_files_subdir) + except (Exception, SystemExit): + # Kill the upload to prevent a partial object from being finalized in S3, + # then remove any partial object that may already have been written. + upload_proc.kill() + upload_proc.wait() + print(f"Cleaning up partial S3 backup {aws_file}.", file=sys.stderr) + delete_result = aws.delete(aws_file) + if delete_result.returncode != 0: + print(f"Warning: failed to delete partial S3 backup {aws_file}.", file=sys.stderr) + raise finally: if upload_proc.stdin: upload_proc.stdin.close() @@ -127,6 +137,9 @@ def main() -> None: f"Failed to push backup to AWS S3:\n{upload_stderr.decode() if upload_stderr else ''}", file=sys.stderr, ) + delete_result = aws.delete(aws_file) + if delete_result.returncode != 0: + print(f"Warning: failed to delete partial S3 backup {aws_file}.", file=sys.stderr) sys.exit(upload_proc.returncode)