Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions maintenance/scripts/aws_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,32 @@ def push(self, src: Path, dest: str) -> subprocess.CompletedProcess[str]:
s3_uri = f"{self.bucket}/{dest}"
return run_cmd(["aws", "s3", "cp", "--no-progress", str(src), s3_uri])

def push_stream(self, dest: str) -> subprocess.Popen[bytes]:
"""Start a streaming upload to the AWS S3 bucket.

Returns a Popen process whose stdin accepts the data to upload.
The caller must close stdin and call wait() to finalize the upload.
"""
s3_uri = f"{self.bucket}/{dest}"
return subprocess.Popen(
["aws", "s3", "cp", "--no-progress", "-", s3_uri],
stdin=subprocess.PIPE,
stderr=subprocess.PIPE,
)

def pull(self, src: str, dest: Path) -> subprocess.CompletedProcess[str]:
"""Pull a file from the AWS S3 bucket."""
s3_uri = f"{self.bucket}/{src}"
return run_cmd(["aws", "s3", "cp", "--no-progress", s3_uri, str(dest)])

def delete(self, dest: str) -> subprocess.CompletedProcess[str]:
"""Delete an object from the AWS S3 bucket.

Uses check_results=False so that a missing object does not raise an error.
"""
s3_uri = f"{self.bucket}/{dest}"
return run_cmd(["aws", "s3", "rm", s3_uri], check_results=False)

def list(self) -> subprocess.CompletedProcess[str]:
"""List the objects in the S3 bucket."""
return run_cmd(["aws", "s3", "ls", f"{self.bucket}", "--recursive"])
74 changes: 48 additions & 26 deletions maintenance/scripts/combine_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ def main() -> None:

step.print("Prepare the backup directory.")
with tempfile.TemporaryDirectory() as backup_dir:
backup_file = Path("combine-backup.tar.gz")
date_str = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
aws_file = f"{combine_host}-{date_str}.tar.gz"

Expand Down Expand Up @@ -90,35 +89,58 @@ def main() -> None:
print("Cannot find the backend container.", file=sys.stderr)
sys.exit(1)

step.print("Create the tarball for the backup.")
step.print("Create the tarball and push to AWS S3 storage.")
# cd to backup_dir so that files in the tarball are relative to the backup_dir
os.chdir(backup_dir)

with tarfile.open(backup_file, "x:gz") as tar:
# cp, tar, and rm the db subdir
combine.kubectl(
[
"cp",
f"{db_pod}:/{db_files_subdir}",
str(Path(backup_dir) / db_files_subdir),
]
# Stream the tar.gz directly to S3 to avoid writing the archive to local disk.
upload_proc = aws.push_stream(aws_file)
try:
with tarfile.open(fileobj=upload_proc.stdin, mode="w:gz") as tar:
# cp, tar, and rm the db subdir
combine.kubectl(
[
"cp",
f"{db_pod}:/{db_files_subdir}",
str(Path(backup_dir) / db_files_subdir),
]
)
tar.add(db_files_subdir)
rmtree(db_files_subdir)

# cp, tar, and rm the backend subdir
combine.kubectl(
[
"cp",
f"{backend_pod}:/home/app/{backend_files_subdir}/",
str(Path(backup_dir) / backend_files_subdir),
]
)
tar.add(backend_files_subdir)
rmtree(backend_files_subdir)
except (Exception, SystemExit):
# Kill the upload to prevent a partial object from being finalized in S3,
# then remove any partial object that may already have been written.
upload_proc.kill()
upload_proc.wait()
print(f"Cleaning up partial S3 backup {aws_file}.", file=sys.stderr)
delete_result = aws.delete(aws_file)
if delete_result.returncode != 0:
print(f"Warning: failed to delete partial S3 backup {aws_file}.", file=sys.stderr)
raise
finally:
if upload_proc.stdin:
upload_proc.stdin.close()
_, upload_stderr = upload_proc.communicate()
if upload_proc.returncode != 0:
print(
f"Failed to push backup to AWS S3:\n{upload_stderr.decode() if upload_stderr else ''}",
file=sys.stderr,
)
tar.add(db_files_subdir)
rmtree(db_files_subdir)

# cp, tar, and rm the backend subdir
combine.kubectl(
[
"cp",
f"{backend_pod}:/home/app/{backend_files_subdir}/",
str(Path(backup_dir) / backend_files_subdir),
]
)
tar.add(backend_files_subdir)
rmtree(backend_files_subdir)

step.print("Push backup to AWS S3 storage.")
aws.push(backup_file, aws_file)
delete_result = aws.delete(aws_file)
if delete_result.returncode != 0:
print(f"Warning: failed to delete partial S3 backup {aws_file}.", file=sys.stderr)
sys.exit(upload_proc.returncode)


if __name__ == "__main__":
Expand Down