From 5404307834b22fb8c3e283c04c23dbd3868a641e Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Tue, 11 Feb 2025 15:19:20 -0500
Subject: [PATCH 01/22] Adding DistanceMeasure signature

---
 pose_evaluation/metrics/base.py             |  6 ++-
 pose_evaluation/metrics/distance_measure.py | 57 +++++++++++++++++++++
 2 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100644 pose_evaluation/metrics/distance_measure.py

diff --git a/pose_evaluation/metrics/base.py b/pose_evaluation/metrics/base.py
index 692f04b..90a4be6 100644
--- a/pose_evaluation/metrics/base.py
+++ b/pose_evaluation/metrics/base.py
@@ -18,11 +18,15 @@ def __init__(self, name:str, args: dict):
     def update(self, key: str, value: Any):
         self.signature_info[key] = value
 
-    def update_signature_and_abbr(self, key:str, abbr:str, args:dict):
+    def update_abbr(self, key, abbr):
         self._abbreviated.update({
             key: abbr
         })
 
+
+    def update_signature_and_abbr(self, key:str, abbr:str, args:dict):
+        self.update_abbr(key, abbr)
+
         self.signature_info.update({
             key: args.get(key, None)
         })
diff --git a/pose_evaluation/metrics/distance_measure.py b/pose_evaluation/metrics/distance_measure.py
new file mode 100644
index 0000000..78bb92c
--- /dev/null
+++ b/pose_evaluation/metrics/distance_measure.py
@@ -0,0 +1,57 @@
+import numpy as np
+from pose_evaluation.metrics.base import Signature
+
+
+class DistanceMeasureSignature(Signature):
+    def __init__(self, name: str, args: dict):
+        super().__init__(name=name, args=args)
+        self.update_abbr("distance", "dist")
+
+
+class DistanceMeasure:
+    _SIGNATURE_TYPE = DistanceMeasureSignature
+
+    def __init__(self, name: str) -> None:
+        self.name = name
+
+    def get_distance(
+        self, hyp_data: np.ma.MaskedArray, ref_data: np.ma.MaskedArray
+    ) -> float:
+        raise NotImplementedError
+
+    def __call__(
+        self, hyp_data: np.ma.MaskedArray, ref_data: np.ma.MaskedArray
+    ) -> float:
+        return self.get_distance(hyp_data, ref_data)
+
+    def get_signature(self) -> Signature:
+        return self._SIGNATURE_TYPE(self.name, self.__dict__)
+
+
+class PowerDistanceSignature(DistanceMeasureSignature):
+    def __init__(self, name, args: dict):
+        super().__init__(name=name, args=args)
+        self.update_signature_and_abbr("power", "pow", args)
+        self.update_signature_and_abbr("default_distance", "dflt", args)
+
+
+class PowerDistance(DistanceMeasure):
+    _SIGNATURE_TYPE = PowerDistanceSignature
+
+    def __init__(
+        self, name: str = "power_distance", power: int = 2, default_distance=0
+    ) -> None:
+        super().__init__(name)
+        self.power = power
+        self.default_distance = default_distance
+
+    def get_distance(
+        self, hyp_data: np.ma.MaskedArray, ref_data: np.ma.MaskedArray
+    ) -> float:
+        return (
+            (hyp_data - ref_data)
+            .pow(self.power)
+            .abs()
+            .filled(self.default_distance)
+            .mean()
+        )

From 6ef9b51279f7d3dd06433188396767efa48ef768 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Tue, 11 Feb 2025 15:19:44 -0500
Subject: [PATCH 02/22] Throwing in some .gitignores

---
 .gitignore | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 96ee987..4edea12 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,10 @@
 .idea/
 build/
 pose_evaluation.egg-info/
-**/__pycache__/
\ No newline at end of file
+**/__pycache__/
+.coverage
+.vscode/
+coverage.lcov
+**/test_data/
+*.npz
+*.code-workspace
\ No newline at end of file

From 6606e8bd28c00693ad545c74099087a0a56b8088 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Tue, 11 Feb 2025 17:08:29 -0500
Subject: [PATCH 03/22] Implemented and tested PowerDistance

---
 pose_evaluation/metrics/distance_measure.py   | 54 +++++++++++----
 pose_evaluation/metrics/distance_metric.py    | 32 ++-------
 .../metrics/test_distance_metric.py           | 67 +++++++++++--------
 3 files changed, 85 insertions(+), 68 deletions(-)

diff --git a/pose_evaluation/metrics/distance_measure.py b/pose_evaluation/metrics/distance_measure.py
index 78bb92c..a5d023d 100644
--- a/pose_evaluation/metrics/distance_measure.py
+++ b/pose_evaluation/metrics/distance_measure.py
@@ -1,4 +1,5 @@
 import numpy as np
+import numpy.ma as ma
 from pose_evaluation.metrics.base import Signature
 
 
@@ -31,27 +32,52 @@ def get_signature(self) -> Signature:
 class PowerDistanceSignature(DistanceMeasureSignature):
     def __init__(self, name, args: dict):
         super().__init__(name=name, args=args)
-        self.update_signature_and_abbr("power", "pow", args)
+        self.update_signature_and_abbr("order", "ord", args)
         self.update_signature_and_abbr("default_distance", "dflt", args)
+        self.update_signature_and_abbr("aggregation_strategy", "agg", args)
 
 
-class PowerDistance(DistanceMeasure):
+class AggregatedPowerDistance(DistanceMeasure):
     _SIGNATURE_TYPE = PowerDistanceSignature
 
     def __init__(
-        self, name: str = "power_distance", power: int = 2, default_distance=0
+        self,
+        order: int = 2,
+        default_distance=0,
+        aggregation_strategy = "mean"
     ) -> None:
-        super().__init__(name)
-        self.power = power
+        super().__init__(name="power_distance")
+        self.power = order
         self.default_distance = default_distance
+        self.aggregation_strategy = aggregation_strategy
+
+    def aggregate(self, distances: ma.MaskedArray)->float:
+        if self.aggregation_strategy == "mean":
+            return distances.mean()
+        if self.aggregation_strategy == "max":
+            return distances.max()
+        if self.aggregation_strategy == "min":
+            return distances.min()
+        if self.aggregation_strategy == "sum":
+            return distances.sum()
+        
+        raise NotImplementedError(f"Aggregation Strategy {self.aggregation_strategy} not implemented")
+    
+    def _calculate_distances(self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray):
+
+        diffs = ma.abs(hyp_data - ref_data) # elementwise, for example if 3D the last dim is still 3, e.g. (2, 2, 2)
+        raised_to_power = ma.power(diffs, self.power)  # (2, 2, 2) becomes (2**power, 2**power, 2**power), for example (4, 4, 4)
+        summed_results = ma.sum(raised_to_power, axis=-1, keepdims=True) # (4, 4, 4) becomes (12). If we had (30 frames, 1 person, 137 keypoints, xyz), now we have just (30, 1, 137, 1)
+        roots = ma.power(summed_results, 1/self.power)
+
+
+        
+        filled_with_defaults = ma.filled(roots, self.default_distance)
+        # distances = ma.linalg.norm(diffs, ord=self.power, axis=-1)
+        return filled_with_defaults
+
 
     def get_distance(
-        self, hyp_data: np.ma.MaskedArray, ref_data: np.ma.MaskedArray
-    ) -> float:
-        return (
-            (hyp_data - ref_data)
-            .pow(self.power)
-            .abs()
-            .filled(self.default_distance)
-            .mean()
-        )
+        self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray
+    ) -> float:        
+        return self.aggregate(self._calculate_distances(hyp_data, ref_data))
diff --git a/pose_evaluation/metrics/distance_metric.py b/pose_evaluation/metrics/distance_metric.py
index c3cbc16..a69cdf6 100644
--- a/pose_evaluation/metrics/distance_metric.py
+++ b/pose_evaluation/metrics/distance_metric.py
@@ -4,35 +4,13 @@
 from pose_format import Pose
 
 from pose_evaluation.metrics.base_pose_metric import PoseMetric
+from pose_evaluation.metrics.distance_measure import DistanceMeasure
 
 
 class DistanceMetric(PoseMetric):
-    def __init__(self, kind: Literal["l1", "l2"] = "l2"):
-        super().__init__(f"DistanceMetric {kind}", higher_is_better=False)
-        self.kind = kind
+    def __init__(self, name, distance_measure:DistanceMeasure):
+        super().__init__(name=name, higher_is_better=False)
+        self.distance_measure = distance_measure
 
     def score(self, hypothesis: Pose, reference: Pose) -> float:
-        arrays = [hypothesis.body.data, reference.body.data]
-        max_length = max(len(array) for array in arrays)
-        # Pad the shorter array with zeros
-        for i, array in enumerate(arrays):
-            if len(array) < max_length:
-                shape = list(array.shape)
-                shape[0] = max_length - len(array)
-                padding_tensor = ma.zeros(shape)
-                arrays[i] = ma.concatenate([array, padding_tensor], axis=0)
-
-        # Calculate the error
-        error = arrays[0] - arrays[1]
-
-        # for l2, we need to calculate the error for each point
-        if self.kind == "l2":
-            # the last dimension is the 3D coordinates
-            error = ma.power(error, 2)
-            error = error.sum(axis=-1)
-            error = ma.sqrt(error)
-        else:
-            error = ma.abs(error)
-
-        error = error.filled(0)
-        return error.sum()
+        return self.distance_measure.get_distance(hypothesis.body.data, reference.body.data)
diff --git a/pose_evaluation/metrics/test_distance_metric.py b/pose_evaluation/metrics/test_distance_metric.py
index e1d7d39..e119f4e 100644
--- a/pose_evaluation/metrics/test_distance_metric.py
+++ b/pose_evaluation/metrics/test_distance_metric.py
@@ -5,68 +5,81 @@
 from pose_format import Pose
 from pose_format.numpy import NumPyPoseBody
 
+from pose_evaluation.metrics.distance_measure import AggregatedPowerDistance
 from pose_evaluation.metrics.distance_metric import DistanceMetric
 
 
-def get_poses(length1: int, length2: int):
+def get_poses(length1: int, length2: int, conf1= None, conf2=None):
     data_tensor = np.full((length1, 3, 4, 3), fill_value=2)
     zeros_tensor = np.zeros((length2, 3, 4, 3))
     data_confidence = np.ones(data_tensor.shape[:-1])
     zeros_confidence = np.ones(zeros_tensor.shape[:-1])
+    
+    if conf1 is not None:
+        data_confidence = conf1 * data_confidence
+    if conf2 is not None: 
+        zeros_confidence = conf2 * zeros_confidence
 
     hypothesis = Pose(header=None, body=NumPyPoseBody(fps=1, data=data_tensor, confidence=data_confidence))
     reference = Pose(header=None, body=NumPyPoseBody(fps=1, data=zeros_tensor, confidence=zeros_confidence))
     return hypothesis, reference
 
-class TestDistanceMetricGeneric(unittest.TestCase):
-    def setUp(self):
-        self.metric = DistanceMetric("l2")
+# class TestDistanceMetricGeneric(unittest.TestCase):
+#     def setUp(self):
+#         self.metric = DistanceMetric("l2")
 
-    def test_scores_are_symmetric(self):
-        hypothesis, reference = get_poses(2, 2)
+#     def test_scores_are_symmetric(self):
+#         hypothesis, reference = get_poses(2, 2)
 
-        score1 = self.metric.score(hypothesis, reference)
-        # pylint: disable=arguments-out-of-order
-        score2 = self.metric.score(reference, hypothesis)
-        self.assertAlmostEqual(score1, score2)
+#         score1 = self.metric.score(hypothesis, reference)
+#         # pylint: disable=arguments-out-of-order
+#         score2 = self.metric.score(reference, hypothesis)
+#         self.assertAlmostEqual(score1, score2)
 
-    def test_score_different_length(self):
-        hypothesis, reference = get_poses(3, 2)
+#     def test_score_different_length(self):
+#         hypothesis, reference = get_poses(3, 2)
 
-        difference = 6 * np.prod(hypothesis.body.confidence.shape)
+#         difference = 6 * np.prod(hypothesis.body.confidence.shape)
 
-        score = self.metric.score(hypothesis, reference)
-        self.assertIsInstance(score, float)
-        self.assertAlmostEqual(score, difference)
+#         score = self.metric.score(hypothesis, reference)
+#         self.assertIsInstance(score, float)
+#         self.assertAlmostEqual(score, difference)
 
-class TestDistanceMetricL1(unittest.TestCase):
+class TestDistanceMetricMeanL1(unittest.TestCase):
     def setUp(self):
-        self.metric = DistanceMetric("l1")
+        self.metric = DistanceMetric("mean_l1_metric", distance_measure=AggregatedPowerDistance(1, 0))
 
     def test_score_equal_length(self):
         hypothesis, reference = get_poses(2, 2)
 
-        # calculate what the difference should be
-        difference = 6 * np.prod(hypothesis.body.confidence.shape)
+        expected_mean = 6 # absolute distance between (2, 2, 2) and (0, 0, 0)
+        
 
         score = self.metric.score(hypothesis, reference)
         self.assertIsInstance(score, float)  # Check if the score is a float
-        self.assertAlmostEqual(score, difference)
+        self.assertAlmostEqual(score, expected_mean)
 
 class TestDistanceMetricL2(unittest.TestCase):
     def setUp(self):
-        self.metric = DistanceMetric("l2")
+        self.default_distance = 17
+        self.metric = DistanceMetric("l2_metric", distance_measure=AggregatedPowerDistance(2, self.default_distance))
+
+    def _check_against_expected(self, hypothesis, reference, expected):
+        score = self.metric.score(hypothesis, reference)
+        self.assertIsInstance(score, float)  # Check if the score is a float
+        self.assertAlmostEqual(score, expected)
 
     def test_score_equal_length(self):
         hypothesis, reference = get_poses(2, 2)
 
-        # calculate what the difference should be
-        difference = math.sqrt(12) * np.prod(hypothesis.body.confidence.shape)
+        expected_mean = np.sqrt(2**2+2**2+2**2) # all pairs are (2,2,2) and (0,0,0), so the mean is the same: sqrt(12)
+        self._check_against_expected(hypothesis, reference, expected=expected_mean)
 
-        score = self.metric.score(hypothesis, reference)
-        self.assertIsInstance(score, float)  # Check if the score is a float
-        self.assertAlmostEqual(score, difference)
+    def test_score_equal_length_one_masked(self):
+        hypothesis, reference = get_poses(2, 2, conf1=0.0)
+        self._check_against_expected(hypothesis, reference, expected=self.default_distance)
 
+    # TODO: mean, max, sum, min, other powers
 
 if __name__ == '__main__':
     unittest.main()

From 28e5aba718402a22c1d3731a8bd0871b3640f5d8 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Thu, 27 Feb 2025 16:13:45 -0500
Subject: [PATCH 04/22] Add a ScoreWithSignature, and do some pylint fixes

---
 .../examples/example_metric_construction.py   | 75 +++++++++++++++++++
 pose_evaluation/metrics/base.py               | 32 ++++++--
 pose_evaluation/metrics/distance_measure.py   | 29 ++++---
 pose_evaluation/metrics/distance_metric.py    |  3 -
 4 files changed, 119 insertions(+), 20 deletions(-)
 create mode 100644 pose_evaluation/examples/example_metric_construction.py

diff --git a/pose_evaluation/examples/example_metric_construction.py b/pose_evaluation/examples/example_metric_construction.py
new file mode 100644
index 0000000..5127b99
--- /dev/null
+++ b/pose_evaluation/examples/example_metric_construction.py
@@ -0,0 +1,75 @@
+from pathlib import Path
+
+from pose_format import Pose
+
+from pose_evaluation.metrics.distance_metric import DistanceMetric
+from pose_evaluation.metrics.distance_measure import AggregatedPowerDistance
+from pose_evaluation.metrics.base import BaseMetric
+from pose_evaluation.metrics.test_distance_metric import get_poses
+# from pose_evaluation.metrics.pose_processors import get_standard_pose_processors
+# from pose_evaluation.metrics.dynamic_time_warping_metric import DTWMetric
+
+if __name__ == "__main__":
+
+    reference_file = Path(r"pose_evaluation\utils\test\test_data\colin-1-HOUSE.pose")
+    hypothesis_file = Path(r"pose_evaluation\utils\test\test_data\colin-2-HOUSE.pose")
+
+    # poses = [Pose.read(hypothesis_file.read_bytes()),Pose.read(reference_file.read_bytes())] # not the same shape!
+    # hypothesis, reference = get_poses(2, 2, conf1=0, conf2=0)
+    hypothesis, reference = get_poses(2, 2, conf1=1, conf2=1)
+    poses = [hypothesis, reference]
+    # signature = DistanceMetric().get_signature()
+    # print(signature)
+
+    # MeanJointError = DistanceMetric(
+    #     distance_measure=PowerDistance(2),
+    #     pose_preprocessors=get_standard_pose_processors(),
+    # )
+    distance_measure = AggregatedPowerDistance(1, 17)
+    mean_l1_metric = DistanceMetric("mean_l1_metric", distance_measure=distance_measure)
+
+    metrics = [
+        BaseMetric("base"),
+        DistanceMetric("PowerDistanceMetric", AggregatedPowerDistance(2, 1)),
+        DistanceMetric("AnotherPowerDistanceMetric", distance_measure=AggregatedPowerDistance(1, 10)),
+        mean_l1_metric,
+        DistanceMetric("max_l1_metric", distance_measure=AggregatedPowerDistance(order=1, aggregation_strategy="max", default_distance=0))
+        # DTWMetric(),
+        # MeanJointError,
+    ]
+
+    for m in metrics:
+
+        print("*" * 10)
+        print(m.get_signature().format())
+        print(m.get_signature().format(short=True))
+        
+        try:
+            score = m.score(poses[0], poses[1])
+            print(f"SCORE: {score}")
+            print(f"SCORE With Signature: ")
+            score = m.score_with_signature(poses[0], poses[1])
+            print(f"{score}")
+            print(repr(score))
+            print(f"{type(score)}")
+            
+            # still behaves like a float
+            doubled = score*2
+            print(f"score * 2 = {doubled}")
+            print(type(doubled))
+
+        except NotImplementedError:
+            print(f"{m} score not implemented")
+        print("*" * 10)
+
+
+    # hypothesis, reference = get_poses(2, 2, conf1=0, conf2=0)
+    # print(reference.body.data.mask)
+
+    
+
+    # score = mean_l1_metric.score()
+
+    # print(f"SCORE: {mean_l1_metric.score(poses[0], poses[1])}")
+
+    # print(distance_measure._calculate_distances(hyp_data=hypothesis.body.data, ref_data=reference.body.data))
diff --git a/pose_evaluation/metrics/base.py b/pose_evaluation/metrics/base.py
index 90a4be6..8a4fc72 100644
--- a/pose_evaluation/metrics/base.py
+++ b/pose_evaluation/metrics/base.py
@@ -1,5 +1,5 @@
 # pylint: disable=undefined-variable
-from typing import Any, Callable
+from typing import Any, Callable, Sequence
 from tqdm import tqdm
 
 
@@ -63,6 +63,21 @@ def __str__(self):
     def __repr__(self):
         return self.format()
 
+class ScoreWithSignature(float):
+    __slots__ = ("_signature",)  # Explicitly allow the attribute
+
+    def __new__(cls, value, signature):
+        obj = super().__new__(cls, value)  # Create the float instance
+        obj._signature = signature  # Store signature object
+        return obj
+
+    def __str__(self):
+        return f"{self._signature.format()} = {float(self)}"
+
+    def __repr__(self):
+        return f"ScoreWithSignature({super().__repr__()}, signature={repr(self._signature)})"
+    
+
 class BaseMetric[T]:
     """Base class for all metrics."""
     # Each metric should define its Signature class' name here
@@ -77,24 +92,29 @@ def __call__(self, hypothesis: T, reference: T) -> float:
 
     def score(self, hypothesis: T, reference: T) -> float:
         raise NotImplementedError
+    
+    def score_with_signature(self, hypothesis: T, reference: T) -> ScoreWithSignature:
+        return ScoreWithSignature(self.score(hypothesis, reference), self.get_signature())
+
 
-    def score_max(self, hypothesis: T, references: list[T]) -> float:
+    def score_max(self, hypothesis: T, references: Sequence[T]) -> float:
         all_scores = self.score_all([hypothesis], references)
         return max(max(scores) for scores in all_scores)
 
-    def validate_corpus_score_input(self, hypotheses: list[T], references: list[list[T]]):
+    def validate_corpus_score_input(self, hypotheses: Sequence[T], references: Sequence[Sequence[T]]):
         # This method is designed to avoid mistakes in the use of the corpus_score method
         for reference in references:
             assert len(hypotheses) == len(reference), \
                 "Hypothesis and reference must have the same number of instances"
 
-    def corpus_score(self, hypotheses: list[T], references: list[list[T]]) -> float:
+    def corpus_score(self, hypotheses: Sequence[T], references: Sequence[list[T]]) -> float:
         # Default implementation: average over sentence scores
         self.validate_corpus_score_input(hypotheses, references)
         transpose_references = list(zip(*references))
-        return sum(self.score_max(h, r) for h, r in zip(hypotheses, transpose_references)) / len(hypotheses)
+        scores = [self.score_max(h, r) for h, r in zip(hypotheses, transpose_references)]
+        return sum(scores) / len(hypotheses)
 
-    def score_all(self, hypotheses: list[T], references: list[T], progress_bar=True) -> list[list[float]]:
+    def score_all(self, hypotheses: Sequence[T], references: Sequence[T], progress_bar=True) -> list[list[float]]:
         # Default implementation: call the score function for each hypothesis-reference pair
         return [[self.score(h, r) for r in references]
                 for h in tqdm(hypotheses, disable=not progress_bar or len(hypotheses) == 1)]
diff --git a/pose_evaluation/metrics/distance_measure.py b/pose_evaluation/metrics/distance_measure.py
index a5d023d..4acafd3 100644
--- a/pose_evaluation/metrics/distance_measure.py
+++ b/pose_evaluation/metrics/distance_measure.py
@@ -1,12 +1,16 @@
 import numpy as np
 import numpy.ma as ma
+from typing import Literal
 from pose_evaluation.metrics.base import Signature
 
+AggregationStrategy=Literal["max", "min", "mean", "sum"]
+
 
 class DistanceMeasureSignature(Signature):
     def __init__(self, name: str, args: dict):
         super().__init__(name=name, args=args)
         self.update_abbr("distance", "dist")
+        self.update_abbr("power", "pow")
 
 
 class DistanceMeasure:
@@ -44,10 +48,10 @@ def __init__(
         self,
         order: int = 2,
         default_distance=0,
-        aggregation_strategy = "mean"
+        aggregation_strategy:AggregationStrategy = "mean"
     ) -> None:
         super().__init__(name="power_distance")
-        self.power = order
+        self.power = float(order)
         self.default_distance = default_distance
         self.aggregation_strategy = aggregation_strategy
 
@@ -60,18 +64,21 @@ def aggregate(self, distances: ma.MaskedArray)->float:
             return distances.min()
         if self.aggregation_strategy == "sum":
             return distances.sum()
-        
+
         raise NotImplementedError(f"Aggregation Strategy {self.aggregation_strategy} not implemented")
-    
-    def _calculate_distances(self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray):
 
-        diffs = ma.abs(hyp_data - ref_data) # elementwise, for example if 3D the last dim is still 3, e.g. (2, 2, 2)
-        raised_to_power = ma.power(diffs, self.power)  # (2, 2, 2) becomes (2**power, 2**power, 2**power), for example (4, 4, 4)
-        summed_results = ma.sum(raised_to_power, axis=-1, keepdims=True) # (4, 4, 4) becomes (12). If we had (30 frames, 1 person, 137 keypoints, xyz), now we have just (30, 1, 137, 1)
-        roots = ma.power(summed_results, 1/self.power)
+    def _calculate_distances(self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray):
 
+        diffs = ma.abs(hyp_data - ref_data) # element-wise, for example if 3D the last dim is still 3, e.g. (2, 2, 2)
+        # (2, 2, 2) becomes (2**power, 2**power, 2**power), for example (4, 4, 4)
+        raised_to_power = ma.power(diffs,
+                                   self.power)
 
-        
+        # (4, 4, 4) becomes (12). If we had (30 frames, 1 person, 137 keypoints, xyz), now we have just (30, 1, 137, 1)
+        summed_results = ma.sum(raised_to_power,
+                                axis=-1,
+                                keepdims=True)
+        roots = ma.power(summed_results, 1/self.power)
         filled_with_defaults = ma.filled(roots, self.default_distance)
         # distances = ma.linalg.norm(diffs, ord=self.power, axis=-1)
         return filled_with_defaults
@@ -79,5 +86,5 @@ def _calculate_distances(self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArra
 
     def get_distance(
         self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray
-    ) -> float:        
+    ) -> float:
         return self.aggregate(self._calculate_distances(hyp_data, ref_data))
diff --git a/pose_evaluation/metrics/distance_metric.py b/pose_evaluation/metrics/distance_metric.py
index a69cdf6..25d44bf 100644
--- a/pose_evaluation/metrics/distance_metric.py
+++ b/pose_evaluation/metrics/distance_metric.py
@@ -1,6 +1,3 @@
-from typing import Literal
-
-from numpy import ma
 from pose_format import Pose
 
 from pose_evaluation.metrics.base_pose_metric import PoseMetric

From 09e85cd2465f59f1c7415e0765f1e4cfcbe9306f Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Thu, 27 Feb 2025 16:20:36 -0500
Subject: [PATCH 05/22] More pylint updates

---
 pose_evaluation/metrics/base.py             | 4 ++--
 pose_evaluation/metrics/distance_measure.py | 6 ++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/pose_evaluation/metrics/base.py b/pose_evaluation/metrics/base.py
index 8a4fc72..35b72e7 100644
--- a/pose_evaluation/metrics/base.py
+++ b/pose_evaluation/metrics/base.py
@@ -76,7 +76,7 @@ def __str__(self):
 
     def __repr__(self):
         return f"ScoreWithSignature({super().__repr__()}, signature={repr(self._signature)})"
-    
+
 
 class BaseMetric[T]:
     """Base class for all metrics."""
@@ -92,7 +92,7 @@ def __call__(self, hypothesis: T, reference: T) -> float:
 
     def score(self, hypothesis: T, reference: T) -> float:
         raise NotImplementedError
-    
+
     def score_with_signature(self, hypothesis: T, reference: T) -> ScoreWithSignature:
         return ScoreWithSignature(self.score(hypothesis, reference), self.get_signature())
 
diff --git a/pose_evaluation/metrics/distance_measure.py b/pose_evaluation/metrics/distance_measure.py
index 4acafd3..4a3254e 100644
--- a/pose_evaluation/metrics/distance_measure.py
+++ b/pose_evaluation/metrics/distance_measure.py
@@ -1,6 +1,8 @@
-import numpy as np
-import numpy.ma as ma
 from typing import Literal
+
+import numpy as np
+import numpy.ma as ma # pylint: disable=consider-using-from-import
+
 from pose_evaluation.metrics.base import Signature
 
 AggregationStrategy=Literal["max", "min", "mean", "sum"]

From 7cb156128bf83bc249235e981112aa7b15031eed Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Thu, 27 Feb 2025 16:27:00 -0500
Subject: [PATCH 06/22] Take out unused test

---
 pose_evaluation/utils/test_pose_utils.py | 30 ------------------------
 1 file changed, 30 deletions(-)

diff --git a/pose_evaluation/utils/test_pose_utils.py b/pose_evaluation/utils/test_pose_utils.py
index cd54cea..adc69f6 100644
--- a/pose_evaluation/utils/test_pose_utils.py
+++ b/pose_evaluation/utils/test_pose_utils.py
@@ -16,7 +16,6 @@
     reduce_poses_to_intersection,
     get_component_names_and_points_dict,
     zero_pad_shorter_poses,
-    set_masked_to_origin_position,
 )
 
 
@@ -233,35 +232,6 @@ def test_detect_format(
             detect_known_pose_format(pose)
 
 
-def test_set_masked_to_origin_pos(mediapipe_poses_test_data: List[Pose]):
-    # Create a copy of the original poses for comparison
-    originals = [pose.copy() for pose in mediapipe_poses_test_data]
-
-    # Apply the transformation
-    poses = [set_masked_to_origin_position(pose) for pose in mediapipe_poses_test_data]
-
-    for original, transformed in zip(originals, poses):
-        # 1. Ensure the transformed data is still a MaskedArray
-        assert isinstance(transformed.body.data, np.ma.MaskedArray)
-
-        # # 2. Ensure the mask is now all False, meaning data _exists_ though its _value_ is now zero
-        # assert np.ma.all(~transformed.body.data.mask)
-        # assert original.body.data.mask.sum() == 0
-        assert transformed.body.data.mask.sum() == 0
-
-        # 3. Check the shape matches the original
-        assert transformed.body.data.shape == original.body.data.shape
-
-        # 4. Validate masked positions in the original are now zeros
-        assert ma.all(transformed.body.data.data[original.body.data.mask] == 0)
-
-        # 5. Validate unmasked positions in the original remain unchanged
-        assert ma.all(
-            transformed.body.data.data[~original.body.data.mask]
-            == original.body.data.data[~original.body.data.mask]
-        )
-
-
 def test_hide_low_conf(mediapipe_poses_test_data: List[Pose]):
     copies = [pose.copy() for pose in mediapipe_poses_test_data]
     for pose, copy in zip(mediapipe_poses_test_data, copies):

From 7965943b3a6ff9212464102f696131364da8b0a8 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Thu, 27 Feb 2025 16:33:16 -0500
Subject: [PATCH 07/22] dedupe gitignore

---
 .gitignore | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/.gitignore b/.gitignore
index 896022e..ec00402 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,14 +3,8 @@ build/
 pose_evaluation.egg-info/
 **/__pycache__/
 .coverage
-.vscode/
-coverage.lcov
 **/test_data/
 *.npz
 *.code-workspace
-.coverage
 .vscode/
-coverage.lcov
-**/test_data/
-*.npz
-*.code-workspace
\ No newline at end of file
+coverage.lcov
\ No newline at end of file

From ab3a88d3d37229218916c82dfad9055f7455dddf Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Thu, 27 Feb 2025 16:35:28 -0500
Subject: [PATCH 08/22] cross platform paths

---
 pose_evaluation/examples/example_metric_construction.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pose_evaluation/examples/example_metric_construction.py b/pose_evaluation/examples/example_metric_construction.py
index 5127b99..cf0cbbb 100644
--- a/pose_evaluation/examples/example_metric_construction.py
+++ b/pose_evaluation/examples/example_metric_construction.py
@@ -11,8 +11,9 @@
 
 if __name__ == "__main__":
 
-    reference_file = Path(r"pose_evaluation\utils\test\test_data\colin-1-HOUSE.pose")
-    hypothesis_file = Path(r"pose_evaluation\utils\test\test_data\colin-2-HOUSE.pose")
+
+    reference_file = Path("pose_evaluation") / "utils" / "test" / "test_data" / "colin-1-HOUSE.pose"
+    hypothesis_file = Path("pose_evaluation") / "utils" / "test" / "test_data" / "colin-2-HOUSE.pose"
 
     # poses = [Pose.read(hypothesis_file.read_bytes()),Pose.read(reference_file.read_bytes())] # not the same shape!
     # hypothesis, reference = get_poses(2, 2, conf1=0, conf2=0)

From ac8f6f61dc10ab23d254c0e6b9ba0dd213e1830d Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Thu, 27 Feb 2025 16:35:40 -0500
Subject: [PATCH 09/22] np.ma to ma

---
 pose_evaluation/metrics/distance_measure.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pose_evaluation/metrics/distance_measure.py b/pose_evaluation/metrics/distance_measure.py
index 4a3254e..fb8e75c 100644
--- a/pose_evaluation/metrics/distance_measure.py
+++ b/pose_evaluation/metrics/distance_measure.py
@@ -22,12 +22,12 @@ def __init__(self, name: str) -> None:
         self.name = name
 
     def get_distance(
-        self, hyp_data: np.ma.MaskedArray, ref_data: np.ma.MaskedArray
+        self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray
     ) -> float:
         raise NotImplementedError
 
     def __call__(
-        self, hyp_data: np.ma.MaskedArray, ref_data: np.ma.MaskedArray
+        self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray
     ) -> float:
         return self.get_distance(hyp_data, ref_data)
 

From 6e61688007c81d7c0ac6afd2e00b6d7076ddf248 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 28 Feb 2025 16:10:05 -0500
Subject: [PATCH 10/22] Cleaned up example metrics

---
 .../examples/example_metric_construction.py   | 82 +++++++------------
 1 file changed, 30 insertions(+), 52 deletions(-)

diff --git a/pose_evaluation/examples/example_metric_construction.py b/pose_evaluation/examples/example_metric_construction.py
index cf0cbbb..10d1f63 100644
--- a/pose_evaluation/examples/example_metric_construction.py
+++ b/pose_evaluation/examples/example_metric_construction.py
@@ -1,76 +1,54 @@
 from pathlib import Path
-
 from pose_format import Pose
-
 from pose_evaluation.metrics.distance_metric import DistanceMetric
 from pose_evaluation.metrics.distance_measure import AggregatedPowerDistance
 from pose_evaluation.metrics.base import BaseMetric
 from pose_evaluation.metrics.test_distance_metric import get_poses
-# from pose_evaluation.metrics.pose_processors import get_standard_pose_processors
-# from pose_evaluation.metrics.dynamic_time_warping_metric import DTWMetric
 
 if __name__ == "__main__":
-
-
+    # Define file paths for test pose data
     reference_file = Path("pose_evaluation") / "utils" / "test" / "test_data" / "colin-1-HOUSE.pose"
     hypothesis_file = Path("pose_evaluation") / "utils" / "test" / "test_data" / "colin-2-HOUSE.pose"
-
-    # poses = [Pose.read(hypothesis_file.read_bytes()),Pose.read(reference_file.read_bytes())] # not the same shape!
-    # hypothesis, reference = get_poses(2, 2, conf1=0, conf2=0)
-    hypothesis, reference = get_poses(2, 2, conf1=1, conf2=1)
-    poses = [hypothesis, reference]
-    # signature = DistanceMetric().get_signature()
-    # print(signature)
-
-    # MeanJointError = DistanceMetric(
-    #     distance_measure=PowerDistance(2),
-    #     pose_preprocessors=get_standard_pose_processors(),
-    # )
-    distance_measure = AggregatedPowerDistance(1, 17)
-    mean_l1_metric = DistanceMetric("mean_l1_metric", distance_measure=distance_measure)
-
+    
+    # Choose whether to load real files or generate test poses
+    use_real_files = True  # Change to False to use get_poses instead
+    
+    if use_real_files:
+        poses = [Pose.read(hypothesis_file.read_bytes()), Pose.read(reference_file.read_bytes())]
+    else:
+        hypothesis, reference = get_poses(2, 2, conf1=1, conf2=1)
+        poses = [hypothesis, reference]
+    
+    # Define distance metrics
+    mean_l1_metric = DistanceMetric("mean_l1_metric", distance_measure=AggregatedPowerDistance(1, 17))
     metrics = [
         BaseMetric("base"),
         DistanceMetric("PowerDistanceMetric", AggregatedPowerDistance(2, 1)),
-        DistanceMetric("AnotherPowerDistanceMetric", distance_measure=AggregatedPowerDistance(1, 10)),
+        DistanceMetric("AnotherPowerDistanceMetric", AggregatedPowerDistance(1, 10)),
         mean_l1_metric,
-        DistanceMetric("max_l1_metric", distance_measure=AggregatedPowerDistance(order=1, aggregation_strategy="max", default_distance=0))
-        # DTWMetric(),
-        # MeanJointError,
+        DistanceMetric("max_l1_metric", AggregatedPowerDistance(order=1, aggregation_strategy="max", default_distance=0)),
     ]
-
-    for m in metrics:
-
+    
+    # Evaluate each metric on the test poses
+    for metric in metrics:
         print("*" * 10)
-        print(m.get_signature().format())
-        print(m.get_signature().format(short=True))
+        print(metric.get_signature().format())
+        print(metric.get_signature().format(short=True))
         
         try:
-            score = m.score(poses[0], poses[1])
+            score = metric.score(poses[0], poses[1])
             print(f"SCORE: {score}")
-            print(f"SCORE With Signature: ")
-            score = m.score_with_signature(poses[0], poses[1])
-            print(f"{score}")
-            print(repr(score))
-            print(f"{type(score)}")
+            print("SCORE With Signature:")
+            score_with_sig = metric.score_with_signature(poses[0], poses[1])
+            print(score_with_sig)
+            print(repr(score_with_sig))
+            print(f"{type(score_with_sig)}")
             
-            # still behaves like a float
-            doubled = score*2
+            # Verify that score behaves like a float
+            doubled = score_with_sig * 2
             print(f"score * 2 = {doubled}")
             print(type(doubled))
-
+        
         except NotImplementedError:
-            print(f"{m} score not implemented")
+            print(f"{metric} score not implemented")
         print("*" * 10)
-
-
-    # hypothesis, reference = get_poses(2, 2, conf1=0, conf2=0)
-    # print(reference.body.data.mask)
-
-    
-
-    # score = mean_l1_metric.score()
-
-    # print(f"SCORE: {mean_l1_metric.score(poses[0], poses[1])}")
-
-    # print(distance_measure._calculate_distances(hyp_data=hypothesis.body.data, ref_data=reference.body.data))

From f2e425bd56d2fe93d6b7a96846557b397a4d169f Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 28 Feb 2025 16:16:51 -0500
Subject: [PATCH 11/22] Distance Measure cleanup and documentation, created
 with chatgpt help:
 https://chatgpt.com/share/67c227a2-4dcc-800a-b173-7d8ae34929a9

---
 pose_evaluation/metrics/distance_measure.py | 121 ++++++++++++--------
 1 file changed, 72 insertions(+), 49 deletions(-)

diff --git a/pose_evaluation/metrics/distance_measure.py b/pose_evaluation/metrics/distance_measure.py
index fb8e75c..5af07f2 100644
--- a/pose_evaluation/metrics/distance_measure.py
+++ b/pose_evaluation/metrics/distance_measure.py
@@ -1,42 +1,43 @@
-from typing import Literal
-
-import numpy as np
-import numpy.ma as ma # pylint: disable=consider-using-from-import
-
+from typing import Literal, Dict, Any
+import numpy.ma as ma  # pylint: disable=consider-using-from-import
 from pose_evaluation.metrics.base import Signature
 
-AggregationStrategy=Literal["max", "min", "mean", "sum"]
-
+AggregationStrategy = Literal["max", "min", "mean", "sum"]
 
 class DistanceMeasureSignature(Signature):
-    def __init__(self, name: str, args: dict):
+    """Signature for distance measure metrics."""
+    def __init__(self, name: str, args: Dict[str, Any]) -> None:
         super().__init__(name=name, args=args)
         self.update_abbr("distance", "dist")
         self.update_abbr("power", "pow")
 
 
 class DistanceMeasure:
+    """Abstract base class for distance measures."""
     _SIGNATURE_TYPE = DistanceMeasureSignature
 
     def __init__(self, name: str) -> None:
         self.name = name
 
-    def get_distance(
-        self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray
-    ) -> float:
+    def get_distance(self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray) -> float:
+        """
+        Compute the distance between hypothesis and reference data.
+        
+        This method should be implemented by subclasses.
+        """
         raise NotImplementedError
 
-    def __call__(
-        self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray
-    ) -> float:
+    def __call__(self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray) -> float:
         return self.get_distance(hyp_data, ref_data)
 
     def get_signature(self) -> Signature:
+        """Return the signature of the distance measure."""
         return self._SIGNATURE_TYPE(self.name, self.__dict__)
 
 
 class PowerDistanceSignature(DistanceMeasureSignature):
-    def __init__(self, name, args: dict):
+    """Signature for power distance measures."""
+    def __init__(self, name: str, args: Dict[str, Any]) -> None:
         super().__init__(name=name, args=args)
         self.update_signature_and_abbr("order", "ord", args)
         self.update_signature_and_abbr("default_distance", "dflt", args)
@@ -44,49 +45,71 @@ def __init__(self, name, args: dict):
 
 
 class AggregatedPowerDistance(DistanceMeasure):
+    """Aggregated power distance metric using a specified aggregation strategy."""
     _SIGNATURE_TYPE = PowerDistanceSignature
 
     def __init__(
         self,
         order: int = 2,
-        default_distance=0,
-        aggregation_strategy:AggregationStrategy = "mean"
+        default_distance: float = 0.0,
+        aggregation_strategy: AggregationStrategy = "mean",
     ) -> None:
+        """
+        Initialize the aggregated power distance metric.
+        
+        :param order: The exponent to which differences are raised.
+        :param default_distance: The value to fill in for masked entries.
+        :param aggregation_strategy: Strategy to aggregate computed distances.
+        """
         super().__init__(name="power_distance")
         self.power = float(order)
         self.default_distance = default_distance
         self.aggregation_strategy = aggregation_strategy
 
-    def aggregate(self, distances: ma.MaskedArray)->float:
-        if self.aggregation_strategy == "mean":
-            return distances.mean()
-        if self.aggregation_strategy == "max":
-            return distances.max()
-        if self.aggregation_strategy == "min":
-            return distances.min()
-        if self.aggregation_strategy == "sum":
-            return distances.sum()
-
-        raise NotImplementedError(f"Aggregation Strategy {self.aggregation_strategy} not implemented")
-
-    def _calculate_distances(self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray):
-
-        diffs = ma.abs(hyp_data - ref_data) # element-wise, for example if 3D the last dim is still 3, e.g. (2, 2, 2)
-        # (2, 2, 2) becomes (2**power, 2**power, 2**power), for example (4, 4, 4)
-        raised_to_power = ma.power(diffs,
-                                   self.power)
-
-        # (4, 4, 4) becomes (12). If we had (30 frames, 1 person, 137 keypoints, xyz), now we have just (30, 1, 137, 1)
-        summed_results = ma.sum(raised_to_power,
-                                axis=-1,
-                                keepdims=True)
-        roots = ma.power(summed_results, 1/self.power)
-        filled_with_defaults = ma.filled(roots, self.default_distance)
-        # distances = ma.linalg.norm(diffs, ord=self.power, axis=-1)
-        return filled_with_defaults
-
-
-    def get_distance(
+    def _aggregate(self, distances: ma.MaskedArray) -> float:
+        """
+        Aggregate computed distances using the specified strategy.
+        
+        :param distances: A masked array of computed distances.
+        :return: A single aggregated distance value.
+        """
+        aggregation_funcs = {
+            "mean": distances.mean,
+            "max": distances.max,
+            "min": distances.min,
+            "sum": distances.sum,
+        }
+        if self.aggregation_strategy in aggregation_funcs:
+            return aggregation_funcs[self.aggregation_strategy]()
+        else:
+            raise NotImplementedError(
+                f"Aggregation Strategy {self.aggregation_strategy} not implemented"
+            )
+
+    def _calculate_distances(
         self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray
-    ) -> float:
-        return self.aggregate(self._calculate_distances(hyp_data, ref_data))
+    ) -> ma.MaskedArray:
+        """
+        Compute element-wise distances between hypothesis and reference data.
+        
+        Steps:
+          1. Compute the absolute differences.
+          2. Raise the differences to the specified power.
+          3. Sum the powered differences along the last axis.
+          4. Extract the root corresponding to the power.
+          5. Fill masked values with the default distance.
+        
+        :param hyp_data: Hypothesis data as a masked array.
+        :param ref_data: Reference data as a masked array.
+        :return: A masked array of computed distances.
+        """
+        diffs = ma.abs(hyp_data - ref_data)
+        raised_to_power = ma.power(diffs, self.power)
+        summed_results = ma.sum(raised_to_power, axis=-1, keepdims=True)
+        roots = ma.power(summed_results, 1 / self.power)
+        return ma.filled(roots, self.default_distance)
+
+    def get_distance(self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray) -> float:
+        """Compute and aggregate the distance between hypothesis and reference data."""
+        calculated = self._calculate_distances(hyp_data, ref_data)
+        return self._aggregate(calculated)

From 8ab300e46d38c1854e329560d68630f9378e70f0 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 28 Feb 2025 16:24:17 -0500
Subject: [PATCH 12/22] Add some docstrings to DistanceMetric

---
 pose_evaluation/metrics/distance_metric.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pose_evaluation/metrics/distance_metric.py b/pose_evaluation/metrics/distance_metric.py
index 25d44bf..830db13 100644
--- a/pose_evaluation/metrics/distance_metric.py
+++ b/pose_evaluation/metrics/distance_metric.py
@@ -1,13 +1,15 @@
 from pose_format import Pose
-
 from pose_evaluation.metrics.base_pose_metric import PoseMetric
 from pose_evaluation.metrics.distance_measure import DistanceMeasure
 
 
 class DistanceMetric(PoseMetric):
-    def __init__(self, name, distance_measure:DistanceMeasure):
+    """Computes the distance between two poses using the provided distance measure."""
+
+    def __init__(self, name: str, distance_measure: DistanceMeasure) -> None:
         super().__init__(name=name, higher_is_better=False)
         self.distance_measure = distance_measure
 
     def score(self, hypothesis: Pose, reference: Pose) -> float:
-        return self.distance_measure.get_distance(hypothesis.body.data, reference.body.data)
+        """Calculate the distance score between hypothesis and reference poses."""
+        return self.distance_measure(hypothesis.body.data, reference.body.data)

From 3b0c8202e15e5793858770a21cd97eb59ca44f55 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 28 Feb 2025 16:24:35 -0500
Subject: [PATCH 13/22] example metric construction uses generated poses by
 default

---
 pose_evaluation/examples/example_metric_construction.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pose_evaluation/examples/example_metric_construction.py b/pose_evaluation/examples/example_metric_construction.py
index 10d1f63..792f5d5 100644
--- a/pose_evaluation/examples/example_metric_construction.py
+++ b/pose_evaluation/examples/example_metric_construction.py
@@ -11,7 +11,7 @@
     hypothesis_file = Path("pose_evaluation") / "utils" / "test" / "test_data" / "colin-2-HOUSE.pose"
     
     # Choose whether to load real files or generate test poses
-    use_real_files = True  # Change to False to use get_poses instead
+    use_real_files = False  # Change to True to use real poses instead. They have different lengths, and so some metrics will not work
     
     if use_real_files:
         poses = [Pose.read(hypothesis_file.read_bytes()), Pose.read(reference_file.read_bytes())]

From 00415f3dda75a86bba003a3988b602796a3c60c0 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 28 Feb 2025 16:29:01 -0500
Subject: [PATCH 14/22] More updates to example script, including zero-padding

---
 .../examples/example_metric_construction.py   | 52 ++++++++++++++-----
 1 file changed, 39 insertions(+), 13 deletions(-)

diff --git a/pose_evaluation/examples/example_metric_construction.py b/pose_evaluation/examples/example_metric_construction.py
index 792f5d5..895c763 100644
--- a/pose_evaluation/examples/example_metric_construction.py
+++ b/pose_evaluation/examples/example_metric_construction.py
@@ -4,37 +4,63 @@
 from pose_evaluation.metrics.distance_measure import AggregatedPowerDistance
 from pose_evaluation.metrics.base import BaseMetric
 from pose_evaluation.metrics.test_distance_metric import get_poses
+from pose_evaluation.utils.pose_utils import zero_pad_shorter_poses
 
 if __name__ == "__main__":
     # Define file paths for test pose data
-    reference_file = Path("pose_evaluation") / "utils" / "test" / "test_data" / "colin-1-HOUSE.pose"
-    hypothesis_file = Path("pose_evaluation") / "utils" / "test" / "test_data" / "colin-2-HOUSE.pose"
-    
+    reference_file = (
+        Path("pose_evaluation") / "utils" / "test" / "test_data" / "colin-1-HOUSE.pose"
+    )
+    hypothesis_file = (
+        Path("pose_evaluation") / "utils" / "test" / "test_data" / "colin-2-HOUSE.pose"
+    )
+
     # Choose whether to load real files or generate test poses
-    use_real_files = False  # Change to True to use real poses instead. They have different lengths, and so some metrics will not work
-    
+    # They have different lengths, and so some metrics will crash!
+    # Change to False to generate fake poses with known distances, e.g. all 0 and all 1
+    use_real_files = True
+
     if use_real_files:
-        poses = [Pose.read(hypothesis_file.read_bytes()), Pose.read(reference_file.read_bytes())]
+        poses = [
+            Pose.read(hypothesis_file.read_bytes()),
+            Pose.read(reference_file.read_bytes()),
+        ]
+        # TODO: add PosePreprocessors to PoseDistanceMetrics, with their own signatures
+        poses = zero_pad_shorter_poses(poses)
+
     else:
         hypothesis, reference = get_poses(2, 2, conf1=1, conf2=1)
         poses = [hypothesis, reference]
-    
+
     # Define distance metrics
-    mean_l1_metric = DistanceMetric("mean_l1_metric", distance_measure=AggregatedPowerDistance(1, 17))
+    mean_l1_metric = DistanceMetric(
+        "mean_l1_metric", distance_measure=AggregatedPowerDistance(1, 17)
+    )
     metrics = [
         BaseMetric("base"),
         DistanceMetric("PowerDistanceMetric", AggregatedPowerDistance(2, 1)),
         DistanceMetric("AnotherPowerDistanceMetric", AggregatedPowerDistance(1, 10)),
         mean_l1_metric,
-        DistanceMetric("max_l1_metric", AggregatedPowerDistance(order=1, aggregation_strategy="max", default_distance=0)),
+        DistanceMetric(
+            "max_l1_metric",
+            AggregatedPowerDistance(
+                order=1, aggregation_strategy="max", default_distance=0
+            ),
+        ),
+        DistanceMetric(
+            "MeanL2Score",
+            AggregatedPowerDistance(
+                order=2, aggregation_strategy="mean", default_distance=0
+            ),
+        ),
     ]
-    
+
     # Evaluate each metric on the test poses
     for metric in metrics:
         print("*" * 10)
         print(metric.get_signature().format())
         print(metric.get_signature().format(short=True))
-        
+
         try:
             score = metric.score(poses[0], poses[1])
             print(f"SCORE: {score}")
@@ -43,12 +69,12 @@
             print(score_with_sig)
             print(repr(score_with_sig))
             print(f"{type(score_with_sig)}")
-            
+
             # Verify that score behaves like a float
             doubled = score_with_sig * 2
             print(f"score * 2 = {doubled}")
             print(type(doubled))
-        
+
         except NotImplementedError:
             print(f"{metric} score not implemented")
         print("*" * 10)

From 9f37ee4c3cf69c009ee92b71fd1e880369248567 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 28 Feb 2025 16:33:33 -0500
Subject: [PATCH 15/22] Remove unused/commented UnitTest

---
 .../metrics/test_distance_metric.py           | 21 -------------------
 1 file changed, 21 deletions(-)

diff --git a/pose_evaluation/metrics/test_distance_metric.py b/pose_evaluation/metrics/test_distance_metric.py
index e119f4e..e559312 100644
--- a/pose_evaluation/metrics/test_distance_metric.py
+++ b/pose_evaluation/metrics/test_distance_metric.py
@@ -24,27 +24,6 @@ def get_poses(length1: int, length2: int, conf1= None, conf2=None):
     reference = Pose(header=None, body=NumPyPoseBody(fps=1, data=zeros_tensor, confidence=zeros_confidence))
     return hypothesis, reference
 
-# class TestDistanceMetricGeneric(unittest.TestCase):
-#     def setUp(self):
-#         self.metric = DistanceMetric("l2")
-
-#     def test_scores_are_symmetric(self):
-#         hypothesis, reference = get_poses(2, 2)
-
-#         score1 = self.metric.score(hypothesis, reference)
-#         # pylint: disable=arguments-out-of-order
-#         score2 = self.metric.score(reference, hypothesis)
-#         self.assertAlmostEqual(score1, score2)
-
-#     def test_score_different_length(self):
-#         hypothesis, reference = get_poses(3, 2)
-
-#         difference = 6 * np.prod(hypothesis.body.confidence.shape)
-
-#         score = self.metric.score(hypothesis, reference)
-#         self.assertIsInstance(score, float)
-#         self.assertAlmostEqual(score, difference)
-
 class TestDistanceMetricMeanL1(unittest.TestCase):
     def setUp(self):
         self.metric = DistanceMetric("mean_l1_metric", distance_measure=AggregatedPowerDistance(1, 0))

From d58833a89f8aaa00071d87087fcedeea700366f3 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 28 Feb 2025 16:34:08 -0500
Subject: [PATCH 16/22] change constant naming for pylint

---
 pose_evaluation/examples/example_metric_construction.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pose_evaluation/examples/example_metric_construction.py b/pose_evaluation/examples/example_metric_construction.py
index 895c763..5158348 100644
--- a/pose_evaluation/examples/example_metric_construction.py
+++ b/pose_evaluation/examples/example_metric_construction.py
@@ -18,9 +18,9 @@
     # Choose whether to load real files or generate test poses
     # They have different lengths, and so some metrics will crash!
     # Change to False to generate fake poses with known distances, e.g. all 0 and all 1
-    use_real_files = True
+    USE_REAL_FILES = True
 
-    if use_real_files:
+    if USE_REAL_FILES:
         poses = [
             Pose.read(hypothesis_file.read_bytes()),
             Pose.read(reference_file.read_bytes()),

From d5a34ec97673ac0bd63dcfae0e8ed7d833537717 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 28 Feb 2025 16:40:24 -0500
Subject: [PATCH 17/22] cleanup of test_distance_metric.py

---
 .../metrics/test_distance_metric.py           | 79 ++++++++++++++-----
 1 file changed, 59 insertions(+), 20 deletions(-)

diff --git a/pose_evaluation/metrics/test_distance_metric.py b/pose_evaluation/metrics/test_distance_metric.py
index e559312..7d2278a 100644
--- a/pose_evaluation/metrics/test_distance_metric.py
+++ b/pose_evaluation/metrics/test_distance_metric.py
@@ -1,5 +1,5 @@
-import math
 import unittest
+from typing import Optional
 
 import numpy as np
 from pose_format import Pose
@@ -9,56 +9,95 @@
 from pose_evaluation.metrics.distance_metric import DistanceMetric
 
 
-def get_poses(length1: int, length2: int, conf1= None, conf2=None):
+def get_poses(
+    length1: int,
+    length2: int,
+    conf1: Optional[float] = None,
+    conf2: Optional[float] = None,
+):
+    """
+    Utility function to generate hypothesis and reference Pose objects for testing.
+
+    Args:
+        length1 (int): Number of frames in the hypothesis pose.
+        length2 (int): Number of frames in the reference pose.
+        conf1 (float, optional): Confidence multiplier for the hypothesis.
+        conf2 (float, optional): Confidence multiplier for the reference.
+
+    Returns:
+        tuple: A tuple containing (hypothesis, reference) Pose objects.
+    """
     data_tensor = np.full((length1, 3, 4, 3), fill_value=2)
     zeros_tensor = np.zeros((length2, 3, 4, 3))
     data_confidence = np.ones(data_tensor.shape[:-1])
     zeros_confidence = np.ones(zeros_tensor.shape[:-1])
-    
+
     if conf1 is not None:
         data_confidence = conf1 * data_confidence
-    if conf2 is not None: 
+    if conf2 is not None:
         zeros_confidence = conf2 * zeros_confidence
 
-    hypothesis = Pose(header=None, body=NumPyPoseBody(fps=1, data=data_tensor, confidence=data_confidence))
-    reference = Pose(header=None, body=NumPyPoseBody(fps=1, data=zeros_tensor, confidence=zeros_confidence))
+    # TODO: add an actual header, something like
+    # header = PoseHeader(1.0, PoseHeaderDimensions(10, 20, 5), [PoseHeaderComponent(...)], is_bbox=True)
+    hypothesis = Pose(
+        header=None,  # type: ignore
+        body=NumPyPoseBody(fps=1, data=data_tensor, confidence=data_confidence),
+    )
+    reference = Pose(
+        header=None,  # type: ignore
+        body=NumPyPoseBody(fps=1, data=zeros_tensor, confidence=zeros_confidence),
+    )
     return hypothesis, reference
 
+
 class TestDistanceMetricMeanL1(unittest.TestCase):
+    """Tests for the L1 (Manhattan) distance metric using mean aggregation."""
+
     def setUp(self):
-        self.metric = DistanceMetric("mean_l1_metric", distance_measure=AggregatedPowerDistance(1, 0))
+        self.metric = DistanceMetric(
+            "mean_l1_metric",
+            distance_measure=AggregatedPowerDistance(order=1, default_distance=0),
+        )
 
     def test_score_equal_length(self):
         hypothesis, reference = get_poses(2, 2)
-
-        expected_mean = 6 # absolute distance between (2, 2, 2) and (0, 0, 0)
-        
+        expected_distance = 6  # Sum of absolute differences: 2 + 2 + 2
 
         score = self.metric.score(hypothesis, reference)
-        self.assertIsInstance(score, float)  # Check if the score is a float
-        self.assertAlmostEqual(score, expected_mean)
+        self.assertIsInstance(score, float)
+        self.assertAlmostEqual(score, expected_distance)
+
 
 class TestDistanceMetricL2(unittest.TestCase):
+    """Tests for the L2 (Euclidean) distance metric with default distance substitution."""
+
     def setUp(self):
         self.default_distance = 17
-        self.metric = DistanceMetric("l2_metric", distance_measure=AggregatedPowerDistance(2, self.default_distance))
+        self.metric = DistanceMetric(
+            "l2_metric",
+            distance_measure=AggregatedPowerDistance(
+                order=2, default_distance=self.default_distance
+            ),
+        )
 
     def _check_against_expected(self, hypothesis, reference, expected):
         score = self.metric.score(hypothesis, reference)
-        self.assertIsInstance(score, float)  # Check if the score is a float
+        self.assertIsInstance(score, float)
         self.assertAlmostEqual(score, expected)
 
     def test_score_equal_length(self):
         hypothesis, reference = get_poses(2, 2)
-
-        expected_mean = np.sqrt(2**2+2**2+2**2) # all pairs are (2,2,2) and (0,0,0), so the mean is the same: sqrt(12)
-        self._check_against_expected(hypothesis, reference, expected=expected_mean)
+        expected_distance = np.sqrt(2**2 + 2**2 + 2**2)  # sqrt(12)
+        self._check_against_expected(hypothesis, reference, expected=expected_distance)
 
     def test_score_equal_length_one_masked(self):
         hypothesis, reference = get_poses(2, 2, conf1=0.0)
-        self._check_against_expected(hypothesis, reference, expected=self.default_distance)
+        self._check_against_expected(
+            hypothesis, reference, expected=self.default_distance
+        )
+
+    # TODO: Add tests for other aggregation strategies and power values
 
-    # TODO: mean, max, sum, min, other powers
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()

From 32233d99b7e8d8c8120f010c8ddc8986434d12f7 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 28 Feb 2025 16:49:57 -0500
Subject: [PATCH 18/22] Some cleanup of base including slightly simplified
 format()

---
 pose_evaluation/metrics/base.py | 95 +++++++++++++++++----------------
 1 file changed, 48 insertions(+), 47 deletions(-)

diff --git a/pose_evaluation/metrics/base.py b/pose_evaluation/metrics/base.py
index 35b72e7..42a9d05 100644
--- a/pose_evaluation/metrics/base.py
+++ b/pose_evaluation/metrics/base.py
@@ -4,71 +4,60 @@
 
 
 class Signature:
-    """Represents reproducibility signatures for metrics. Inspired by sacreBLEU
-    """
-    def __init__(self, name:str, args: dict):
+    """Represents reproducibility signatures for metrics. Inspired by sacreBLEU"""
 
-        self._abbreviated = {
-            "name":"n",
-            "higher_is_better":"hb"
-        }
+    def __init__(self, name: str, args: dict):
+
+        self._abbreviated = {"name": "n", "higher_is_better": "hb"}
 
         self.signature_info = {"name": name, **args}
 
     def update(self, key: str, value: Any):
         self.signature_info[key] = value
 
-    def update_abbr(self, key, abbr):
-        self._abbreviated.update({
-            key: abbr
-        })
-
+    def update_abbr(self, key: str, abbr: str):
+        self._abbreviated.update({key: abbr})
 
-    def update_signature_and_abbr(self, key:str, abbr:str, args:dict):
+    def update_signature_and_abbr(self, key: str, abbr: str, args: dict):
         self.update_abbr(key, abbr)
 
-        self.signature_info.update({
-            key: args.get(key, None)
-        })
+        self.signature_info.update({key: args.get(key, None)})
 
     def format(self, short: bool = False) -> str:
         pairs = []
-        keys = list(self.signature_info.keys())
-        for name in keys:
-            value = self.signature_info[name]
+        for key, value in self.signature_info.items():
             if value is not None:
-                # Check for nested signature objects
+                # Check for nested signature objects and wrap them in brackets.
                 if hasattr(value, "get_signature"):
-                    # Wrap nested signatures in brackets
                     nested_signature = value.get_signature()
                     if isinstance(nested_signature, Signature):
                         nested_signature = nested_signature.format(short=short)
                     value = f"{{{nested_signature}}}"
+                # Replace booleans with yes/no.
                 if isinstance(value, bool):
-                    # Replace True/False with yes/no
                     value = "yes" if value else "no"
+                # Represent callable values by their name.
                 if isinstance(value, Callable):
                     value = value.__name__
 
-                # if the abbreviation is not defined, use the full name as a fallback.
-                abbreviated_name = self._abbreviated.get(name, name)
-                final_name = abbreviated_name if short else name
-                pairs.append(f"{final_name}:{value}")
-
+                abbreviated_key = self._abbreviated.get(key, key)
+                final_key = abbreviated_key if short else key
+                pairs.append(f"{final_key}:{value}")
         return "|".join(pairs)
 
-    def __str__(self):
+    def __str__(self) -> str:
         return self.format()
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return self.format()
 
+
 class ScoreWithSignature(float):
-    __slots__ = ("_signature",)  # Explicitly allow the attribute
+    __slots__ = ("_signature",)
 
-    def __new__(cls, value, signature):
-        obj = super().__new__(cls, value)  # Create the float instance
-        obj._signature = signature  # Store signature object
+    def __new__(cls, value, signature) -> "ScoreWithSignature":
+        obj = super().__new__(cls, value)
+        obj._signature = signature
         return obj
 
     def __str__(self):
@@ -80,7 +69,7 @@ def __repr__(self):
 
 class BaseMetric[T]:
     """Base class for all metrics."""
-    # Each metric should define its Signature class' name here
+
     _SIGNATURE_TYPE = Signature
 
     def __init__(self, name: str, higher_is_better: bool = False):
@@ -94,30 +83,42 @@ def score(self, hypothesis: T, reference: T) -> float:
         raise NotImplementedError
 
     def score_with_signature(self, hypothesis: T, reference: T) -> ScoreWithSignature:
-        return ScoreWithSignature(self.score(hypothesis, reference), self.get_signature())
-
+        return ScoreWithSignature(
+            self.score(hypothesis, reference), self.get_signature()
+        )
 
     def score_max(self, hypothesis: T, references: Sequence[T]) -> float:
         all_scores = self.score_all([hypothesis], references)
         return max(max(scores) for scores in all_scores)
 
-    def validate_corpus_score_input(self, hypotheses: Sequence[T], references: Sequence[Sequence[T]]):
+    def validate_corpus_score_input(
+        self, hypotheses: Sequence[T], references: Sequence[Sequence[T]]
+    ):
         # This method is designed to avoid mistakes in the use of the corpus_score method
         for reference in references:
-            assert len(hypotheses) == len(reference), \
-                "Hypothesis and reference must have the same number of instances"
-
-    def corpus_score(self, hypotheses: Sequence[T], references: Sequence[list[T]]) -> float:
-        # Default implementation: average over sentence scores
+            assert len(hypotheses) == len(
+                reference
+            ), "Hypothesis and reference must have the same number of instances"
+
+    def corpus_score(
+        self, hypotheses: Sequence[T], references: Sequence[list[T]]
+    ) -> float:
+        """Default implementation: average over sentence scores."""
         self.validate_corpus_score_input(hypotheses, references)
         transpose_references = list(zip(*references))
-        scores = [self.score_max(h, r) for h, r in zip(hypotheses, transpose_references)]
+        scores = [
+            self.score_max(h, r) for h, r in zip(hypotheses, transpose_references)
+        ]
         return sum(scores) / len(hypotheses)
 
-    def score_all(self, hypotheses: Sequence[T], references: Sequence[T], progress_bar=True) -> list[list[float]]:
-        # Default implementation: call the score function for each hypothesis-reference pair
-        return [[self.score(h, r) for r in references]
-                for h in tqdm(hypotheses, disable=not progress_bar or len(hypotheses) == 1)]
+    def score_all(
+        self, hypotheses: Sequence[T], references: Sequence[T], progress_bar=True
+    ) -> list[list[float]]:
+        """Call the score function for each hypothesis-reference pair."""
+        return [
+            [self.score(h, r) for r in references]
+            for h in tqdm(hypotheses, disable=not progress_bar or len(hypotheses) == 1)
+        ]
 
     def __str__(self):
         return self.name

From 7d4843c5e24bdaac48ed35675d846ae1e2d7eeb2 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 28 Feb 2025 16:56:53 -0500
Subject: [PATCH 19/22] Don't print name: or n:

---
 pose_evaluation/metrics/base.py | 41 ++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/pose_evaluation/metrics/base.py b/pose_evaluation/metrics/base.py
index 42a9d05..c31fd25 100644
--- a/pose_evaluation/metrics/base.py
+++ b/pose_evaluation/metrics/base.py
@@ -24,26 +24,29 @@ def update_signature_and_abbr(self, key: str, abbr: str, args: dict):
         self.signature_info.update({key: args.get(key, None)})
 
     def format(self, short: bool = False) -> str:
-        pairs = []
+        parts = []
+        # Always print the "name" value first, if available.
+        name_value = self.signature_info.get("name")
+        if name_value is not None:
+            parts.append(str(name_value))
+        # Process all other keys.
         for key, value in self.signature_info.items():
-            if value is not None:
-                # Check for nested signature objects and wrap them in brackets.
-                if hasattr(value, "get_signature"):
-                    nested_signature = value.get_signature()
-                    if isinstance(nested_signature, Signature):
-                        nested_signature = nested_signature.format(short=short)
-                    value = f"{{{nested_signature}}}"
-                # Replace booleans with yes/no.
-                if isinstance(value, bool):
-                    value = "yes" if value else "no"
-                # Represent callable values by their name.
-                if isinstance(value, Callable):
-                    value = value.__name__
-
-                abbreviated_key = self._abbreviated.get(key, key)
-                final_key = abbreviated_key if short else key
-                pairs.append(f"{final_key}:{value}")
-        return "|".join(pairs)
+            if key == "name" or value is None:
+                continue
+            # Handle nested signature objects and wrap them in curly braces.
+            if hasattr(value, "get_signature"):
+                nested_signature = value.get_signature()
+                if isinstance(nested_signature, Signature):
+                    value = "{" + nested_signature.format(short=short) + "}"
+            if isinstance(value, bool):
+                value = "yes" if value else "no"
+            if isinstance(value, Callable):
+                value = value.__name__
+            abbreviated_key = self._abbreviated.get(key, key) if short else key
+            parts.append(f"{abbreviated_key}:{value}")
+        return "|".join(parts)
+
+
 
     def __str__(self) -> str:
         return self.format()

From 2d8c8003234ca23d4df522aed2ce8bd22f020294 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Wed, 5 Mar 2025 17:24:32 -0500
Subject: [PATCH 20/22] Basic Score class

---
 .../examples/example_metric_construction.py   |  5 +---
 pose_evaluation/metrics/base.py               | 28 ++++++++++---------
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/pose_evaluation/examples/example_metric_construction.py b/pose_evaluation/examples/example_metric_construction.py
index 5158348..c1ff2db 100644
--- a/pose_evaluation/examples/example_metric_construction.py
+++ b/pose_evaluation/examples/example_metric_construction.py
@@ -70,10 +70,7 @@
             print(repr(score_with_sig))
             print(f"{type(score_with_sig)}")
 
-            # Verify that score behaves like a float
-            doubled = score_with_sig * 2
-            print(f"score * 2 = {doubled}")
-            print(type(doubled))
+            print(metric.score_with_signature(poses[0], poses[1], short=True))
 
         except NotImplementedError:
             print(f"{metric} score not implemented")
diff --git a/pose_evaluation/metrics/base.py b/pose_evaluation/metrics/base.py
index c31fd25..1c69942 100644
--- a/pose_evaluation/metrics/base.py
+++ b/pose_evaluation/metrics/base.py
@@ -46,8 +46,6 @@ def format(self, short: bool = False) -> str:
             parts.append(f"{abbreviated_key}:{value}")
         return "|".join(parts)
 
-
-
     def __str__(self) -> str:
         return self.format()
 
@@ -55,19 +53,19 @@ def __repr__(self) -> str:
         return self.format()
 
 
-class ScoreWithSignature(float):
-    __slots__ = ("_signature",)
+class Score:
+    """Inspired by Sacrebleu, a base score class which can add signature information after the value."""
 
-    def __new__(cls, value, signature) -> "ScoreWithSignature":
-        obj = super().__new__(cls, value)
-        obj._signature = signature
-        return obj
+    def __init__(self, name: str, score: float, signature: str) -> None:
+        self.name = name
+        self.score = score
+        self._signature = signature
 
     def __str__(self):
-        return f"{self._signature.format()} = {float(self)}"
+        return f"{self._signature} = {self.score}"
 
     def __repr__(self):
-        return f"ScoreWithSignature({super().__repr__()}, signature={repr(self._signature)})"
+        return f"Score({super().__repr__()}, signature={repr(self._signature)})"
 
 
 class BaseMetric[T]:
@@ -85,9 +83,13 @@ def __call__(self, hypothesis: T, reference: T) -> float:
     def score(self, hypothesis: T, reference: T) -> float:
         raise NotImplementedError
 
-    def score_with_signature(self, hypothesis: T, reference: T) -> ScoreWithSignature:
-        return ScoreWithSignature(
-            self.score(hypothesis, reference), self.get_signature()
+    def score_with_signature(
+        self, hypothesis: T, reference: T, short: bool = False
+    ) -> Score:
+        return Score(
+            name=self.name,
+            score=self.score(hypothesis, reference),
+            signature=self.get_signature().format(short=short),
         )
 
     def score_max(self, hypothesis: T, references: Sequence[T]) -> float:

From 4702d6e24526655bb52d279cffae49c23917c65e Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Wed, 5 Mar 2025 17:31:00 -0500
Subject: [PATCH 21/22] Trying to fix a pytest bug

---
 pose_evaluation/metrics/test_embedding_distance_metric.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pose_evaluation/metrics/test_embedding_distance_metric.py b/pose_evaluation/metrics/test_embedding_distance_metric.py
index ab275c6..0f08bb9 100644
--- a/pose_evaluation/metrics/test_embedding_distance_metric.py
+++ b/pose_evaluation/metrics/test_embedding_distance_metric.py
@@ -87,7 +87,7 @@ def save_and_plot_distances(distances, matrix_name, num_points, dim):
     """Helper function to save distance matrix and plot distances."""
 
     distances = distances.cpu()
-    test_artifacts_dir = Path(__file__).parent / "temp"
+    test_artifacts_dir = Path(__file__).parent / "tests"
     output_path = test_artifacts_dir / f"distance_matrix_{matrix_name}_{num_points}_{dim}D.csv"
     np.savetxt(output_path, distances.numpy(), delimiter=",", fmt="%.4f")
     print(f"Distance matrix saved to {output_path}")

From 55833ad2db43a851932b9bf6c6b1e7588bbc166d Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Thu, 6 Mar 2025 11:47:26 -0500
Subject: [PATCH 22/22] Unnecessary else after return

---
 pose_evaluation/metrics/distance_measure.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pose_evaluation/metrics/distance_measure.py b/pose_evaluation/metrics/distance_measure.py
index 5af07f2..1ab5433 100644
--- a/pose_evaluation/metrics/distance_measure.py
+++ b/pose_evaluation/metrics/distance_measure.py
@@ -81,10 +81,10 @@ def _aggregate(self, distances: ma.MaskedArray) -> float:
         }
         if self.aggregation_strategy in aggregation_funcs:
             return aggregation_funcs[self.aggregation_strategy]()
-        else:
-            raise NotImplementedError(
-                f"Aggregation Strategy {self.aggregation_strategy} not implemented"
-            )
+
+        raise NotImplementedError(
+            f"Aggregation Strategy {self.aggregation_strategy} not implemented"
+        )
 
     def _calculate_distances(
         self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray