From bddb86c83bcee33f5a3c6c5e808cbe1269e3056a Mon Sep 17 00:00:00 2001 From: TrevorBurgoyne Date: Mon, 20 Jan 2025 10:26:04 -0600 Subject: [PATCH 1/7] catch number in ext --- S3MP/_version.py | 2 +- S3MP/mirror_path.py | 6 +++++- pyproject.toml | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/S3MP/_version.py b/S3MP/_version.py index 1ee95cb..54542d5 100644 --- a/S3MP/_version.py +++ b/S3MP/_version.py @@ -1,3 +1,3 @@ """Semantic versioning for S3MP.""" -__version__ = "0.5.3" +__version__ = "0.5.4" diff --git a/S3MP/mirror_path.py b/S3MP/mirror_path.py index 9ebad63..c2d4945 100644 --- a/S3MP/mirror_path.py +++ b/S3MP/mirror_path.py @@ -37,7 +37,11 @@ def s3_key(self) -> str: """Get s3 key.""" ret_key = "/".join([str(s.name) for s in self.key_segments]) # We'll infer folder/file based on extension - return ret_key if '.' in self.key_segments[-1].name else f"{ret_key}/" + # HACK to catch case where the "extension" is actually a part of the folder name + # (eg, a folder named "v0.1.0"), we check if the extension is actually a number + name = self.key_segments[-1].name + ext = name.split('.')[-1] + return ret_key if (ext is not name and not ext.isdigit()) else f"{ret_key}/" @property def local_path(self) -> Path: diff --git a/pyproject.toml b/pyproject.toml index 63a40ed..5721702 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "S3MP" -version = "0.5.3" +version = "0.5.4" description = "" authors = [ {name = "Joshua Dean", email = "joshua.dean@sentera.com"} From 1596557aad9b2421dd1b0c92c4da2841c2712ce4 Mon Sep 17 00:00:00 2001 From: TrevorBurgoyne Date: Mon, 20 Jan 2025 10:51:14 -0600 Subject: [PATCH 2/7] Move continuation logic to s3_utils --- S3MP/mirror_path.py | 23 +--------------------- S3MP/utils/s3_utils.py | 44 +++++++++++++++++++++++++++++------------- 2 files changed, 32 insertions(+), 35 deletions(-) diff --git a/S3MP/mirror_path.py b/S3MP/mirror_path.py index c2d4945..5f544f9 100644 --- a/S3MP/mirror_path.py +++ b/S3MP/mirror_path.py @@ -155,28 +155,7 @@ def get_child(self, child_name: str) -> MirrorPath: def get_children_on_s3(self) -> List[MirrorPath]: """Get all children on s3.""" - child_s3_keys = [] - continuation_token = None - - while True: - resp = s3_list_child_keys( - self.s3_key, continuation_token=continuation_token - ) - - # Collect keys from the current response - if "Contents" in resp: - child_s3_keys.extend( - obj["Key"] for obj in resp["Contents"] if obj["Key"] != self.s3_key - ) - if "CommonPrefixes" in resp: - child_s3_keys.extend(obj["Prefix"] for obj in resp["CommonPrefixes"]) - - # Check if there are more pages to fetch - if "NextContinuationToken" in resp: - continuation_token = resp["NextContinuationToken"] - else: - break - return [MirrorPath.from_s3_key(key) for key in child_s3_keys] + return [MirrorPath.from_s3_key(key) for key in s3_list_child_keys(self.s3_key)] def get_parent(self) -> MirrorPath: """Get the parent of this file.""" diff --git a/S3MP/utils/s3_utils.py b/S3MP/utils/s3_utils.py index 11759fa..7361b43 100644 --- a/S3MP/utils/s3_utils.py +++ b/S3MP/utils/s3_utils.py @@ -3,8 +3,7 @@ from pathlib import Path from S3MP.global_config import S3MPConfig from S3MP.types import S3Bucket, S3Client, S3ListObjectV2Output -from botocore.exceptions import ClientError - +from typing import List def s3_list_single_key( key: str, @@ -20,10 +19,9 @@ def s3_list_single_key( def s3_list_child_keys( key: str, - bucket=None, - client=None, - continuation_token=None, -): + bucket: S3Bucket = None, + client: S3Client = None, +) -> List[str]: """List details of all child keys on S3.""" if not key.endswith("/"): warnings.warn(f"Listing child keys of {key} - key does not end with '/'") @@ -34,9 +32,29 @@ def s3_list_child_keys( "Prefix": key, "Delimiter": "/", } - if continuation_token: - params["ContinuationToken"] = continuation_token - return client.list_objects_v2(**params) + child_s3_keys: List[str] = [] + continuation_token: str = None + while True: + if continuation_token: + params["ContinuationToken"] = continuation_token + + resp = client.list_objects_v2(**params) + + # Collect keys from the current response + if "Contents" in resp: + child_s3_keys.extend( + obj["Key"] for obj in resp["Contents"] if obj["Key"] != self.s3_key + ) + if "CommonPrefixes" in resp: + child_s3_keys.extend(obj["Prefix"] for obj in resp["CommonPrefixes"]) + + # Check if there are more pages to fetch + if "NextContinuationToken" in resp: + continuation_token = resp["NextContinuationToken"] + else: + break + + return child_s3_keys def download_key( key: str, @@ -51,8 +69,8 @@ def download_key( local_path.parent.mkdir(parents=True, exist_ok=True) client.download_file(bucket.name, key, str(local_path), Callback=S3MPConfig.callback, Config=S3MPConfig.transfer_config) else: - for obj in s3_list_child_keys(key, bucket, client)["Contents"]: - download_key(obj["Key"], local_path / obj["Key"].replace(key, "")) + for child_key in s3_list_child_keys(key, bucket, client): + download_key(child_key, local_path / child_key.replace(key, "")) def upload_to_key( key: str, @@ -124,8 +142,8 @@ def delete_child_keys_on_s3( """Delete all keys that are children of a key on S3.""" bucket = bucket or S3MPConfig.bucket client = client or S3MPConfig.s3_client - for obj in s3_list_child_keys(key, bucket, client)["Contents"]: - client.delete_object(Bucket=bucket.name, Key=obj["Key"]) + for child_key in s3_list_child_keys(key, bucket, client): + client.delete_object(Bucket=bucket.name, Key=child_key) def delete_key_on_s3( From 5d5f37776b9d73fb1f9907a0fe60134c167f2b7b Mon Sep 17 00:00:00 2001 From: TrevorBurgoyne Date: Mon, 20 Jan 2025 10:57:46 -0600 Subject: [PATCH 3/7] Try different file det method --- S3MP/utils/s3_utils.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/S3MP/utils/s3_utils.py b/S3MP/utils/s3_utils.py index 7361b43..2e8e699 100644 --- a/S3MP/utils/s3_utils.py +++ b/S3MP/utils/s3_utils.py @@ -4,6 +4,7 @@ from S3MP.global_config import S3MPConfig from S3MP.types import S3Bucket, S3Client, S3ListObjectV2Output from typing import List +from botocore.exceptions import ClientError def s3_list_single_key( key: str, @@ -43,7 +44,7 @@ def s3_list_child_keys( # Collect keys from the current response if "Contents" in resp: child_s3_keys.extend( - obj["Key"] for obj in resp["Contents"] if obj["Key"] != self.s3_key + obj["Key"] for obj in resp["Contents"] if obj["Key"] != key ) if "CommonPrefixes" in resp: child_s3_keys.extend(obj["Prefix"] for obj in resp["CommonPrefixes"]) @@ -107,17 +108,17 @@ def key_is_file_on_s3( """Check if a key is a file on S3, returns false if it is a folder. Raises an error if the key does not exist.""" bucket = bucket or S3MPConfig.bucket client = client or S3MPConfig.s3_client - if not key_exists_on_s3(key, bucket, client): - raise ValueError(f"Key {key} does not exist on S3") - res = s3_list_single_key(key, bucket, client) - # Handle case of trailing slash, but still verify - if ( - key[-1] == "/" - and len(res['Contents']) == 1 - and res['Contents'][0]['Key'] == key - ): - return False - return "Contents" in res + + try: + client.head_object(Bucket=bucket, Key=key) + return True + except ClientError as e: + if e.response['Error']['Code'] == '404': + raise ValueError(f"Key {key} does not exist on S3") + elif e.response['Error']['Code'] == '403': + raise ValueError(f"Access denied for key {key} on S3") + else: + return False def key_size_on_s3( From d33f1ed8c71c7dd4b6bf71347ddf79bc2578ebb2 Mon Sep 17 00:00:00 2001 From: TrevorBurgoyne Date: Mon, 20 Jan 2025 11:05:42 -0600 Subject: [PATCH 4/7] debug --- S3MP/utils/s3_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/S3MP/utils/s3_utils.py b/S3MP/utils/s3_utils.py index 2e8e699..452beea 100644 --- a/S3MP/utils/s3_utils.py +++ b/S3MP/utils/s3_utils.py @@ -66,6 +66,7 @@ def download_key( """Download a key from S3.""" bucket = bucket or S3MPConfig.bucket client = client or S3MPConfig.s3_client + print(f"{key} is file: {key_is_file_on_s3(key, bucket, client)}") if key_is_file_on_s3(key, bucket, client): local_path.parent.mkdir(parents=True, exist_ok=True) client.download_file(bucket.name, key, str(local_path), Callback=S3MPConfig.callback, Config=S3MPConfig.transfer_config) From b430e40593433f8cc39fe6d4940c98ad71de6624 Mon Sep 17 00:00:00 2001 From: TrevorBurgoyne Date: Mon, 20 Jan 2025 11:12:18 -0600 Subject: [PATCH 5/7] Fix bucket name str --- S3MP/utils/s3_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/S3MP/utils/s3_utils.py b/S3MP/utils/s3_utils.py index 452beea..c8d9f0c 100644 --- a/S3MP/utils/s3_utils.py +++ b/S3MP/utils/s3_utils.py @@ -111,7 +111,7 @@ def key_is_file_on_s3( client = client or S3MPConfig.s3_client try: - client.head_object(Bucket=bucket, Key=key) + client.head_object(Bucket=bucket.name, Key=key) return True except ClientError as e: if e.response['Error']['Code'] == '404': From fa607f555ecadb3d6e67dc198390d425030fc82d Mon Sep 17 00:00:00 2001 From: TrevorBurgoyne Date: Mon, 20 Jan 2025 11:40:53 -0600 Subject: [PATCH 6/7] dont raise execption for 404 since that can indicate a folder --- S3MP/mirror_path.py | 4 ++++ S3MP/utils/s3_utils.py | 21 ++++++++++++--------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/S3MP/mirror_path.py b/S3MP/mirror_path.py index 5f544f9..04fe8f9 100644 --- a/S3MP/mirror_path.py +++ b/S3MP/mirror_path.py @@ -90,6 +90,10 @@ def is_file_on_s3(self) -> bool: """Check if is a file on s3.""" return key_is_file_on_s3(self.s3_key) + def is_file_and_exists_on_s3(self) -> bool: + """Check if is a file and exists on s3.""" + return self.exists_on_s3() and self.is_file_on_s3() + def update_callback_on_skipped_transfer(self): """Update the current global callback if the transfer gets skipped.""" if S3MPConfig.callback and self in S3MPConfig.callback._transfer_objs: diff --git a/S3MP/utils/s3_utils.py b/S3MP/utils/s3_utils.py index c8d9f0c..4792921 100644 --- a/S3MP/utils/s3_utils.py +++ b/S3MP/utils/s3_utils.py @@ -4,7 +4,6 @@ from S3MP.global_config import S3MPConfig from S3MP.types import S3Bucket, S3Client, S3ListObjectV2Output from typing import List -from botocore.exceptions import ClientError def s3_list_single_key( key: str, @@ -66,7 +65,12 @@ def download_key( """Download a key from S3.""" bucket = bucket or S3MPConfig.bucket client = client or S3MPConfig.s3_client - print(f"{key} is file: {key_is_file_on_s3(key, bucket, client)}") + + if not key_exists_on_s3(key, bucket, client): + raise ValueError(f"Key {key} does not exist on S3") + + # If the key is a file, download it + # Otherwise, download all child keys if key_is_file_on_s3(key, bucket, client): local_path.parent.mkdir(parents=True, exist_ok=True) client.download_file(bucket.name, key, str(local_path), Callback=S3MPConfig.callback, Config=S3MPConfig.transfer_config) @@ -106,20 +110,19 @@ def key_is_file_on_s3( bucket: S3Bucket = None, client: S3Client = None, ) -> bool: - """Check if a key is a file on S3, returns false if it is a folder. Raises an error if the key does not exist.""" + """Check if a key is a file on S3 by using head_object.""" bucket = bucket or S3MPConfig.bucket client = client or S3MPConfig.s3_client try: client.head_object(Bucket=bucket.name, Key=key) return True - except ClientError as e: - if e.response['Error']['Code'] == '404': - raise ValueError(f"Key {key} does not exist on S3") - elif e.response['Error']['Code'] == '403': - raise ValueError(f"Access denied for key {key} on S3") - else: + except Exception as e: + # 404 occurs is the key is a "folder" or does not exist + if "404" in str(e): return False + else: + raise e def key_size_on_s3( From 2c23d6f5e0dfe2394d04f8e6dd9f5da5330645dc Mon Sep 17 00:00:00 2001 From: TrevorBurgoyne Date: Mon, 20 Jan 2025 11:50:31 -0600 Subject: [PATCH 7/7] Bump minor verison --- S3MP/_version.py | 2 +- S3MP/utils/s3_utils.py | 2 +- pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/S3MP/_version.py b/S3MP/_version.py index 54542d5..363c446 100644 --- a/S3MP/_version.py +++ b/S3MP/_version.py @@ -1,3 +1,3 @@ """Semantic versioning for S3MP.""" -__version__ = "0.5.4" +__version__ = "0.6.0" diff --git a/S3MP/utils/s3_utils.py b/S3MP/utils/s3_utils.py index 4792921..e89933a 100644 --- a/S3MP/utils/s3_utils.py +++ b/S3MP/utils/s3_utils.py @@ -118,7 +118,7 @@ def key_is_file_on_s3( client.head_object(Bucket=bucket.name, Key=key) return True except Exception as e: - # 404 occurs is the key is a "folder" or does not exist + # 404 occurs if the key is a "folder" or does not exist if "404" in str(e): return False else: diff --git a/pyproject.toml b/pyproject.toml index 5721702..2e4cd9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "S3MP" -version = "0.5.4" +version = "0.6.0" description = "" authors = [ {name = "Joshua Dean", email = "joshua.dean@sentera.com"}