diff --git a/S3MP/_version.py b/S3MP/_version.py index 1ee95cb..363c446 100644 --- a/S3MP/_version.py +++ b/S3MP/_version.py @@ -1,3 +1,3 @@ """Semantic versioning for S3MP.""" -__version__ = "0.5.3" +__version__ = "0.6.0" diff --git a/S3MP/mirror_path.py b/S3MP/mirror_path.py index 9ebad63..04fe8f9 100644 --- a/S3MP/mirror_path.py +++ b/S3MP/mirror_path.py @@ -37,7 +37,11 @@ def s3_key(self) -> str: """Get s3 key.""" ret_key = "/".join([str(s.name) for s in self.key_segments]) # We'll infer folder/file based on extension - return ret_key if '.' in self.key_segments[-1].name else f"{ret_key}/" + # HACK to catch case where the "extension" is actually a part of the folder name + # (eg, a folder named "v0.1.0"), we check if the extension is actually a number + name = self.key_segments[-1].name + ext = name.split('.')[-1] + return ret_key if (ext is not name and not ext.isdigit()) else f"{ret_key}/" @property def local_path(self) -> Path: @@ -86,6 +90,10 @@ def is_file_on_s3(self) -> bool: """Check if is a file on s3.""" return key_is_file_on_s3(self.s3_key) + def is_file_and_exists_on_s3(self) -> bool: + """Check if is a file and exists on s3.""" + return self.exists_on_s3() and self.is_file_on_s3() + def update_callback_on_skipped_transfer(self): """Update the current global callback if the transfer gets skipped.""" if S3MPConfig.callback and self in S3MPConfig.callback._transfer_objs: @@ -151,28 +159,7 @@ def get_child(self, child_name: str) -> MirrorPath: def get_children_on_s3(self) -> List[MirrorPath]: """Get all children on s3.""" - child_s3_keys = [] - continuation_token = None - - while True: - resp = s3_list_child_keys( - self.s3_key, continuation_token=continuation_token - ) - - # Collect keys from the current response - if "Contents" in resp: - child_s3_keys.extend( - obj["Key"] for obj in resp["Contents"] if obj["Key"] != self.s3_key - ) - if "CommonPrefixes" in resp: - child_s3_keys.extend(obj["Prefix"] for obj in resp["CommonPrefixes"]) - - # Check if there are more pages to fetch - if "NextContinuationToken" in resp: - continuation_token = resp["NextContinuationToken"] - else: - break - return [MirrorPath.from_s3_key(key) for key in child_s3_keys] + return [MirrorPath.from_s3_key(key) for key in s3_list_child_keys(self.s3_key)] def get_parent(self) -> MirrorPath: """Get the parent of this file.""" diff --git a/S3MP/utils/s3_utils.py b/S3MP/utils/s3_utils.py index 11759fa..e89933a 100644 --- a/S3MP/utils/s3_utils.py +++ b/S3MP/utils/s3_utils.py @@ -3,8 +3,7 @@ from pathlib import Path from S3MP.global_config import S3MPConfig from S3MP.types import S3Bucket, S3Client, S3ListObjectV2Output -from botocore.exceptions import ClientError - +from typing import List def s3_list_single_key( key: str, @@ -20,10 +19,9 @@ def s3_list_single_key( def s3_list_child_keys( key: str, - bucket=None, - client=None, - continuation_token=None, -): + bucket: S3Bucket = None, + client: S3Client = None, +) -> List[str]: """List details of all child keys on S3.""" if not key.endswith("/"): warnings.warn(f"Listing child keys of {key} - key does not end with '/'") @@ -34,9 +32,29 @@ def s3_list_child_keys( "Prefix": key, "Delimiter": "/", } - if continuation_token: - params["ContinuationToken"] = continuation_token - return client.list_objects_v2(**params) + child_s3_keys: List[str] = [] + continuation_token: str = None + while True: + if continuation_token: + params["ContinuationToken"] = continuation_token + + resp = client.list_objects_v2(**params) + + # Collect keys from the current response + if "Contents" in resp: + child_s3_keys.extend( + obj["Key"] for obj in resp["Contents"] if obj["Key"] != key + ) + if "CommonPrefixes" in resp: + child_s3_keys.extend(obj["Prefix"] for obj in resp["CommonPrefixes"]) + + # Check if there are more pages to fetch + if "NextContinuationToken" in resp: + continuation_token = resp["NextContinuationToken"] + else: + break + + return child_s3_keys def download_key( key: str, @@ -47,12 +65,18 @@ def download_key( """Download a key from S3.""" bucket = bucket or S3MPConfig.bucket client = client or S3MPConfig.s3_client + + if not key_exists_on_s3(key, bucket, client): + raise ValueError(f"Key {key} does not exist on S3") + + # If the key is a file, download it + # Otherwise, download all child keys if key_is_file_on_s3(key, bucket, client): local_path.parent.mkdir(parents=True, exist_ok=True) client.download_file(bucket.name, key, str(local_path), Callback=S3MPConfig.callback, Config=S3MPConfig.transfer_config) else: - for obj in s3_list_child_keys(key, bucket, client)["Contents"]: - download_key(obj["Key"], local_path / obj["Key"].replace(key, "")) + for child_key in s3_list_child_keys(key, bucket, client): + download_key(child_key, local_path / child_key.replace(key, "")) def upload_to_key( key: str, @@ -86,20 +110,19 @@ def key_is_file_on_s3( bucket: S3Bucket = None, client: S3Client = None, ) -> bool: - """Check if a key is a file on S3, returns false if it is a folder. Raises an error if the key does not exist.""" + """Check if a key is a file on S3 by using head_object.""" bucket = bucket or S3MPConfig.bucket client = client or S3MPConfig.s3_client - if not key_exists_on_s3(key, bucket, client): - raise ValueError(f"Key {key} does not exist on S3") - res = s3_list_single_key(key, bucket, client) - # Handle case of trailing slash, but still verify - if ( - key[-1] == "/" - and len(res['Contents']) == 1 - and res['Contents'][0]['Key'] == key - ): - return False - return "Contents" in res + + try: + client.head_object(Bucket=bucket.name, Key=key) + return True + except Exception as e: + # 404 occurs if the key is a "folder" or does not exist + if "404" in str(e): + return False + else: + raise e def key_size_on_s3( @@ -124,8 +147,8 @@ def delete_child_keys_on_s3( """Delete all keys that are children of a key on S3.""" bucket = bucket or S3MPConfig.bucket client = client or S3MPConfig.s3_client - for obj in s3_list_child_keys(key, bucket, client)["Contents"]: - client.delete_object(Bucket=bucket.name, Key=obj["Key"]) + for child_key in s3_list_child_keys(key, bucket, client): + client.delete_object(Bucket=bucket.name, Key=child_key) def delete_key_on_s3( diff --git a/pyproject.toml b/pyproject.toml index 63a40ed..2e4cd9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "S3MP" -version = "0.5.3" +version = "0.6.0" description = "" authors = [ {name = "Joshua Dean", email = "joshua.dean@sentera.com"}