sign-language-processing · AmitMY · Mar 19, 2025 · Mar 7, 2025 · Mar 10, 2025 · Mar 14, 2025
diff --git a/pose_evaluation/evaluation/evaluate_signclip.py b/pose_evaluation/evaluation/evaluate_signclip.py
@@ -166,7 +166,9 @@ def calculate_class_means(gloss_indices, scores):
 #    return within_class_means_by_gloss
 
 
-def evaluate_signclip(emb_dir: Path, split_file: Path, out_path: Path, kind: str = "cosine"):
+def evaluate_signclip(
+    emb_dir: Path, split_file: Path, out_path: Path, kind: str = "cosine"
+):  # pylint: disable=too-many-locals, too-many-statements
     """
     Evaluate SignCLIP embeddings using score_all.
 
@@ -263,7 +265,7 @@ def evaluate_signclip(emb_dir: Path, split_file: Path, out_path: Path, kind: str
 
     save_start = time.perf_counter()
     class_means_json = out_path.with_name(f"{out_path.stem}_class_means").with_suffix(".json")
-    with open(class_means_json, "w") as f:
+    with open(class_means_json, "w", encoding="utf-8") as f:
         print(f"Writing class means to {f}")
         json.dump(class_means, f)
     np.savez(out_path, scores=scores, files=files)

diff --git a/pose_evaluation/examples/example_metric_construction.py b/pose_evaluation/examples/example_metric_construction.py
@@ -2,11 +2,20 @@
 
 from pose_format import Pose
 
-from pose_evaluation.metrics.base import BaseMetric
 from pose_evaluation.metrics.distance_measure import AggregatedPowerDistance
 from pose_evaluation.metrics.distance_metric import DistanceMetric
+from pose_evaluation.metrics.dtw_metric import (
+    DTWAggregatedPowerDistanceMeasure,
+    DTWAggregatedScipyDistanceMeasure,
+)
 from pose_evaluation.metrics.test_distance_metric import get_poses
-from pose_evaluation.utils.pose_utils import zero_pad_shorter_poses
+from pose_evaluation.metrics.pose_processors import (
+    NormalizePosesProcessor,
+    ZeroPadShorterPosesProcessor,
+    HideLegsPosesProcessor,
+    ReduceHolisticPoseProcessor,
+    get_standard_pose_processors,
+)
 
 if __name__ == "__main__":
     # Define file paths for test pose data
@@ -16,53 +25,138 @@
 
     # Choose whether to load real files or generate test poses
     # They have different lengths, and so some metrics will crash!
-    # Change to False to generate fake poses with known distances, e.g. all 0 and all 1
+    # Metrics with ZeroPadShorterPosesProcessor, DTWMetrics are fine.
+    # Change to False to generate fake poses with known distances, e.g. all 0 and all 1\
     USE_REAL_FILES = True
 
     if USE_REAL_FILES:
         poses = [
             Pose.read(hypothesis_file.read_bytes()),
             Pose.read(reference_file.read_bytes()),
         ]
-        # TODO: add PosePreprocessors to PoseDistanceMetrics, with their own signatures
-        poses = zero_pad_shorter_poses(poses)
 
     else:
         hypothesis, reference = get_poses(2, 2, conf1=1, conf2=1)
         poses = [hypothesis, reference]
 
+    hypotheses = [pose.copy() for pose in poses]
+    references = [pose.copy() for pose in poses]
+
+    #############################
+    # Abstract classes:
+
+    # BaseMetric does not actually have score() function
+    # base_metric = BaseMetric("base")
+
+    # PoseMetric calls preprocessors before scoring,
+    # It is also an abstract class
+    # PoseMetric("pose base"),
+
+    # Segments first, also abstract.
+    # SegmentedPoseMetric("SegmentedMetric")
+
     # Define distance metrics
-    mean_l1_metric = DistanceMetric("mean_l1_metric", distance_measure=AggregatedPowerDistance(1, 17))
     metrics = [
-        BaseMetric("base"),
-        DistanceMetric("PowerDistanceMetric", AggregatedPowerDistance(2, 1)),
-        DistanceMetric("AnotherPowerDistanceMetric", AggregatedPowerDistance(1, 10)),
-        mean_l1_metric,
+        # a DistanceMetric uses a DistanceMeasure to calculate distances between two Poses
+        # This one is effectively (normalized) Average Position Error (APE)
+        # as it by default will run zero-padding of the shorter pose, and normalization,
+        # and AggregatedPowerDistance does mean absolute (euclidean) distances by default.
         DistanceMetric(
-            "max_l1_metric",
-            AggregatedPowerDistance(order=1, aggregation_strategy="max", default_distance=0),
+            "NormalizedAveragePositionError",
+            AggregatedPowerDistance(),  #
         ),
+        # Customizing Distances
+        # Distance Measures have signatures as well.
+        # You can set options on the DistanceMeasure and they will be reflected in the signature.
+        # This one would be distance_measure:{power_distance|pow:1.0|dflt:1.0|agg:max}
         DistanceMetric(
-            "MeanL2Score",
+            "MaxL1DistanceMetric",
+            AggregatedPowerDistance(order=1, default_distance=1, aggregation_strategy="max"),  #
+        ),
+        # Customizing Preprocessing
+        # A DistanceMetric is a PoseMetric, and so it will call PosePreprocessors before scoring
+        # get_standard_pose_processors gives you some default options,
+        # for example you could decide not to remove the legs
+        DistanceMetric(
+            "CustomizedPosePreprocessorsWithLegsMetric",
+            distance_measure=AggregatedPowerDistance("A custom name", order=1, default_distance=10),
+            pose_preprocessors=get_standard_pose_processors(
+                remove_legs=False,  # If you want the legs
+            ),
+        ),
+        # Recreating Existing Metrics: Average Position Error/ Mean Joint Error
+        # As defined in Ham2Pose,
+        # APE is "the average L2 distance between the predicted and the GT pose keypoints
+        # across all frames and data samples. Since it compares absolute positions,
+        # it is sensitive to different body shapes and slight changes
+        # in timing or position of the performed movement"
+        # So we:
+        # - Select AggregatedPowerDistance measure
+        # - set the order to 2 (Euclidean distance)
+        # - set the aggregation strategy to mean
+        # - recreate the set of preprocessors from https://github.com/rotem-shalev/Ham2Pose/blob/main/metrics.py#L32-L62
+        # (adapting to MediaPipe Holistic keypoints format instead of OpenPose)
+        DistanceMetric(
+            "AveragePositionError",
             AggregatedPowerDistance(order=2, aggregation_strategy="mean", default_distance=0),
+            pose_preprocessors=[
+                NormalizePosesProcessor(),
+                HideLegsPosesProcessor(),
+                ZeroPadShorterPosesProcessor(),
+                ReduceHolisticPoseProcessor(),
+            ],
+        ),
+        # Recreating Dynamic Time Warping - Mean Joint Error
+        # As before, only now we use the Dynamic Time Warping version!
+        DistanceMetric(
+            "DTWPowerDistance",
+            DTWAggregatedPowerDistanceMeasure(aggregation_strategy="mean", default_distance=0.0, order=2),
+            pose_preprocessors=get_standard_pose_processors(
+                zero_pad_shorter=False, reduce_holistic_to_face_and_upper_body=True
+            ),
+        ),
+        # We can also implement a version that uses scipy distances "cdist"
+        # This lets us experiment with e.g. jaccard
+        # Options are listed at the documentation for scipy:
+        # https://docs.scipy.org/doc/scipy-1.15.0/reference/generated/scipy.spatial.distance.cdist.html
+        DistanceMetric(
+            "DTWScipyDistance",
+            DTWAggregatedScipyDistanceMeasure(aggregation_strategy="mean", default_distance=0.0, metric="jaccard"),
+            pose_preprocessors=get_standard_pose_processors(
+                zero_pad_shorter=False, reduce_holistic_to_face_and_upper_body=True
+            ),
         ),
     ]
 
     # Evaluate each metric on the test poses
     for metric in metrics:
         print("*" * 10)
+        print(metric.name)
+
+        print("\nMETRIC __str__: ")
+        print(str(metric))
+
+        print("\nMETRIC to repr: ")
+        print(repr(metric))
+
+        print("\nSIGNATURE: ")
         print(metric.get_signature().format())
+
+        print("\nSIGNATURE (short): ")
         print(metric.get_signature().format(short=True))
 
         try:
+            #
+            print("\nSCORE ALL with Signature (short):")
+            print(metric.score_all_with_signature(hypotheses, references, short=True, progress_bar=True))
+
             score = metric.score(poses[0], poses[1])
-            print(f"SCORE: {score}")
-            print("SCORE With Signature:")
-            score_with_sig = metric.score_with_signature(poses[0], poses[1])
-            print(score_with_sig)
-            print(repr(score_with_sig))
-            print(f"{type(score_with_sig)}")
+            print(f"\nSCORE: {score}")
+
+            print("\nSCORE With Signature:")
+            print(metric.score_with_signature(poses[0], poses[1]))
 
+            print("\nSCORE with Signature (short):")
             print(metric.score_with_signature(poses[0], poses[1], short=True))
 
         except NotImplementedError:

diff --git a/pose_evaluation/metrics/base.py b/pose_evaluation/metrics/base.py
@@ -1,8 +1,10 @@
-# pylint: disable=undefined-variable
-from typing import Any, Callable, Sequence
+from abc import ABC, abstractmethod
+from typing import Any, Callable, Generic, Sequence, TypeVar
 
 from tqdm import tqdm
 
+T = TypeVar("T")
+
 
 class Signature:
     """Represents reproducibility signatures for metrics. Inspired by sacreBLEU"""
@@ -21,7 +23,6 @@ def update_abbr(self, key: str, abbr: str):
 
     def update_signature_and_abbr(self, key: str, abbr: str, args: dict):
         self.update_abbr(key, abbr)
-
         self.signature_info.update({key: args.get(key, None)})
 
     def format(self, short: bool = False) -> str:
@@ -39,6 +40,9 @@ def format(self, short: bool = False) -> str:
                 nested_signature = value.get_signature()
                 if isinstance(nested_signature, Signature):
                     value = "{" + nested_signature.format(short=short) + "}"
+            elif isinstance(value, list) and all(hasattr(v, "get_signature") for v in value):
+                value = "[" + ",".join(v.get_signature().format(short=short) for v in value) + "]"
+
             if isinstance(value, bool):
                 value = "yes" if value else "no"
             if isinstance(value, Callable):
@@ -60,16 +64,31 @@ class Score:
     def __init__(self, name: str, score: float, signature: str) -> None:
         self.name = name
         self.score = score
-        self._signature = signature
+        self.signature = signature
 
     def __str__(self):
-        return f"{self._signature} = {self.score}"
+        return f"{self.signature} = {self.score}"
+
+    def format(
+        self,
+        width: int = 2,
+        score_only: bool = False,
+    ) -> str:
+
+        sc = f"{self.score:.{width}f}"
+
+        full_score = f"{self.signature}" if self.signature else self.name
+        full_score = f"{full_score} = {sc}"
+
+        if score_only:
+            return sc
+        return full_score
 
     def __repr__(self):
-        return f"Score({super().__repr__()}, signature={repr(self._signature)})"
+        return self.format()
 
 
-class BaseMetric[T]:
+class BaseMetric(ABC, Generic[T]):  # Ensure it extends ABC
     """Base class for all metrics."""
 
     _SIGNATURE_TYPE = Signature
@@ -81,10 +100,16 @@ def __init__(self, name: str, higher_is_better: bool = False):
     def __call__(self, hypothesis: T, reference: T) -> float:
         return self.score(hypothesis, reference)
 
+    @abstractmethod
     def score(self, hypothesis: T, reference: T) -> float:
         raise NotImplementedError
 
-    def score_with_signature(self, hypothesis: T, reference: T, short: bool = False) -> Score:
+    def score_with_signature(
+        self,
+        hypothesis: T,
+        reference: T,
+        short: bool = False,
+    ) -> Score:
         return Score(
             name=self.name,
             score=self.score(hypothesis, reference),
@@ -107,15 +132,27 @@ def corpus_score(self, hypotheses: Sequence[T], references: Sequence[list[T]]) -
         scores = [self.score_max(h, r) for h, r in zip(hypotheses, transpose_references)]
         return sum(scores) / len(hypotheses)
 
-    def score_all(self, hypotheses: Sequence[T], references: Sequence[T], progress_bar=True) -> list[list[float]]:
+    def score_all(self, hypotheses: Sequence[T], references: Sequence[T], progress_bar=False) -> list[list[float]]:
         """Call the score function for each hypothesis-reference pair."""
         return [
             [self.score(h, r) for r in references]
             for h in tqdm(hypotheses, disable=not progress_bar or len(hypotheses) == 1)
         ]
 
+    def score_all_with_signature(
+        self,
+        hypotheses: Sequence[T],
+        references: Sequence[T],
+        progress_bar=False,
+        short: bool = False,
+    ) -> list[list[Score]]:
+        return [
+            [self.score_with_signature(h, r, short=short) for r in references]
+            for h in tqdm(hypotheses, disable=not progress_bar or len(hypotheses) == 1)
+        ]
+
     def __str__(self):
-        return self.name
+        return str(self.get_signature())
 
     def get_signature(self) -> Signature:
         return self._SIGNATURE_TYPE(self.name, self.__dict__)