Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
5404307
Adding DistanceMeasure signature
cleong110 Feb 11, 2025
6ef9b51
Throwing in some .gitignores
cleong110 Feb 11, 2025
6606e8b
Implemented and tested PowerDistance
cleong110 Feb 11, 2025
2150c21
Merge branch 'main' into distance_measure_signature
cleong110 Feb 27, 2025
28e5aba
Add a ScoreWithSignature, and do some pylint fixes
cleong110 Feb 27, 2025
09e85cd
More pylint updates
cleong110 Feb 27, 2025
7cb1561
Take out unused test
cleong110 Feb 27, 2025
7965943
dedupe gitignore
cleong110 Feb 27, 2025
ab3a88d
cross platform paths
cleong110 Feb 27, 2025
ac8f6f6
np.ma to ma
cleong110 Feb 27, 2025
6e61688
Cleaned up example metrics
cleong110 Feb 28, 2025
f2e425b
Distance Measure cleanup and documentation, created with chatgpt help…
cleong110 Feb 28, 2025
8ab300e
Add some docstrings to DistanceMetric
cleong110 Feb 28, 2025
3b0c820
example metric construction uses generated poses by default
cleong110 Feb 28, 2025
00415f3
More updates to example script, including zero-padding
cleong110 Feb 28, 2025
9f37ee4
Remove unused/commented UnitTest
cleong110 Feb 28, 2025
d58833a
change constant naming for pylint
cleong110 Feb 28, 2025
d5a34ec
cleanup of test_distance_metric.py
cleong110 Feb 28, 2025
32233d9
Some cleanup of base including slightly simplified format()
cleong110 Feb 28, 2025
7d4843c
Don't print name: or n:
cleong110 Feb 28, 2025
2d8c800
Basic Score class
cleong110 Mar 5, 2025
4702d6e
Trying to fix a pytest bug
cleong110 Mar 5, 2025
55833ad
Unnecessary else after return
cleong110 Mar 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ build/
pose_evaluation.egg-info/
**/__pycache__/
.coverage
.vscode/
coverage.lcov
**/test_data/
*.npz
*.code-workspace
.vscode/
coverage.lcov
77 changes: 77 additions & 0 deletions pose_evaluation/examples/example_metric_construction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from pathlib import Path
from pose_format import Pose
from pose_evaluation.metrics.distance_metric import DistanceMetric
from pose_evaluation.metrics.distance_measure import AggregatedPowerDistance
from pose_evaluation.metrics.base import BaseMetric
from pose_evaluation.metrics.test_distance_metric import get_poses
from pose_evaluation.utils.pose_utils import zero_pad_shorter_poses

if __name__ == "__main__":
# Define file paths for test pose data
reference_file = (
Path("pose_evaluation") / "utils" / "test" / "test_data" / "colin-1-HOUSE.pose"
)
hypothesis_file = (
Path("pose_evaluation") / "utils" / "test" / "test_data" / "colin-2-HOUSE.pose"
)

# Choose whether to load real files or generate test poses
# They have different lengths, and so some metrics will crash!
# Change to False to generate fake poses with known distances, e.g. all 0 and all 1
USE_REAL_FILES = True

if USE_REAL_FILES:
poses = [
Pose.read(hypothesis_file.read_bytes()),
Pose.read(reference_file.read_bytes()),
]
# TODO: add PosePreprocessors to PoseDistanceMetrics, with their own signatures
poses = zero_pad_shorter_poses(poses)

else:
hypothesis, reference = get_poses(2, 2, conf1=1, conf2=1)
poses = [hypothesis, reference]

# Define distance metrics
mean_l1_metric = DistanceMetric(
"mean_l1_metric", distance_measure=AggregatedPowerDistance(1, 17)
)
metrics = [
BaseMetric("base"),
DistanceMetric("PowerDistanceMetric", AggregatedPowerDistance(2, 1)),
DistanceMetric("AnotherPowerDistanceMetric", AggregatedPowerDistance(1, 10)),
mean_l1_metric,
DistanceMetric(
"max_l1_metric",
AggregatedPowerDistance(
order=1, aggregation_strategy="max", default_distance=0
),
),
DistanceMetric(
"MeanL2Score",
AggregatedPowerDistance(
order=2, aggregation_strategy="mean", default_distance=0
),
),
]

# Evaluate each metric on the test poses
for metric in metrics:
print("*" * 10)
print(metric.get_signature().format())
print(metric.get_signature().format(short=True))

try:
score = metric.score(poses[0], poses[1])
print(f"SCORE: {score}")
print("SCORE With Signature:")
score_with_sig = metric.score_with_signature(poses[0], poses[1])
print(score_with_sig)
print(repr(score_with_sig))
print(f"{type(score_with_sig)}")

print(metric.score_with_signature(poses[0], poses[1], short=True))

except NotImplementedError:
print(f"{metric} score not implemented")
print("*" * 10)
140 changes: 85 additions & 55 deletions pose_evaluation/metrics/base.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,76 @@
# pylint: disable=undefined-variable
from typing import Any, Callable
from typing import Any, Callable, Sequence
from tqdm import tqdm


class Signature:
"""Represents reproducibility signatures for metrics. Inspired by sacreBLEU
"""
def __init__(self, name:str, args: dict):
"""Represents reproducibility signatures for metrics. Inspired by sacreBLEU"""

self._abbreviated = {
"name":"n",
"higher_is_better":"hb"
}
def __init__(self, name: str, args: dict):

self._abbreviated = {"name": "n", "higher_is_better": "hb"}

self.signature_info = {"name": name, **args}

def update(self, key: str, value: Any):
self.signature_info[key] = value

def update_signature_and_abbr(self, key:str, abbr:str, args:dict):
self._abbreviated.update({
key: abbr
})
def update_abbr(self, key: str, abbr: str):
self._abbreviated.update({key: abbr})

def update_signature_and_abbr(self, key: str, abbr: str, args: dict):
self.update_abbr(key, abbr)

self.signature_info.update({
key: args.get(key, None)
})
self.signature_info.update({key: args.get(key, None)})

def format(self, short: bool = False) -> str:
pairs = []
keys = list(self.signature_info.keys())
for name in keys:
value = self.signature_info[name]
if value is not None:
# Check for nested signature objects
if hasattr(value, "get_signature"):
# Wrap nested signatures in brackets
nested_signature = value.get_signature()
if isinstance(nested_signature, Signature):
nested_signature = nested_signature.format(short=short)
value = f"{{{nested_signature}}}"
if isinstance(value, bool):
# Replace True/False with yes/no
value = "yes" if value else "no"
if isinstance(value, Callable):
value = value.__name__

# if the abbreviation is not defined, use the full name as a fallback.
abbreviated_name = self._abbreviated.get(name, name)
final_name = abbreviated_name if short else name
pairs.append(f"{final_name}:{value}")

return "|".join(pairs)
parts = []
# Always print the "name" value first, if available.
name_value = self.signature_info.get("name")
if name_value is not None:
parts.append(str(name_value))
# Process all other keys.
for key, value in self.signature_info.items():
if key == "name" or value is None:
continue
# Handle nested signature objects and wrap them in curly braces.
if hasattr(value, "get_signature"):
nested_signature = value.get_signature()
if isinstance(nested_signature, Signature):
value = "{" + nested_signature.format(short=short) + "}"
if isinstance(value, bool):
value = "yes" if value else "no"
if isinstance(value, Callable):
value = value.__name__
abbreviated_key = self._abbreviated.get(key, key) if short else key
parts.append(f"{abbreviated_key}:{value}")
return "|".join(parts)

def __str__(self) -> str:
return self.format()

def __str__(self):
def __repr__(self) -> str:
return self.format()


class Score:
"""Inspired by Sacrebleu, a base score class which can add signature information after the value."""

def __init__(self, name: str, score: float, signature: str) -> None:
self.name = name
self.score = score
self._signature = signature

def __str__(self):
return f"{self._signature} = {self.score}"

def __repr__(self):
return self.format()
return f"Score({super().__repr__()}, signature={repr(self._signature)})"


class BaseMetric[T]:
"""Base class for all metrics."""
# Each metric should define its Signature class' name here

_SIGNATURE_TYPE = Signature

def __init__(self, name: str, higher_is_better: bool = False):
Expand All @@ -74,26 +83,47 @@ def __call__(self, hypothesis: T, reference: T) -> float:
def score(self, hypothesis: T, reference: T) -> float:
raise NotImplementedError

def score_max(self, hypothesis: T, references: list[T]) -> float:
def score_with_signature(
self, hypothesis: T, reference: T, short: bool = False
) -> Score:
return Score(
name=self.name,
score=self.score(hypothesis, reference),
signature=self.get_signature().format(short=short),
)

def score_max(self, hypothesis: T, references: Sequence[T]) -> float:
all_scores = self.score_all([hypothesis], references)
return max(max(scores) for scores in all_scores)

def validate_corpus_score_input(self, hypotheses: list[T], references: list[list[T]]):
def validate_corpus_score_input(
self, hypotheses: Sequence[T], references: Sequence[Sequence[T]]
):
# This method is designed to avoid mistakes in the use of the corpus_score method
for reference in references:
assert len(hypotheses) == len(reference), \
"Hypothesis and reference must have the same number of instances"

def corpus_score(self, hypotheses: list[T], references: list[list[T]]) -> float:
# Default implementation: average over sentence scores
assert len(hypotheses) == len(
reference
), "Hypothesis and reference must have the same number of instances"

def corpus_score(
self, hypotheses: Sequence[T], references: Sequence[list[T]]
) -> float:
"""Default implementation: average over sentence scores."""
self.validate_corpus_score_input(hypotheses, references)
transpose_references = list(zip(*references))
return sum(self.score_max(h, r) for h, r in zip(hypotheses, transpose_references)) / len(hypotheses)

def score_all(self, hypotheses: list[T], references: list[T], progress_bar=True) -> list[list[float]]:
# Default implementation: call the score function for each hypothesis-reference pair
return [[self.score(h, r) for r in references]
for h in tqdm(hypotheses, disable=not progress_bar or len(hypotheses) == 1)]
scores = [
self.score_max(h, r) for h, r in zip(hypotheses, transpose_references)
]
return sum(scores) / len(hypotheses)

def score_all(
self, hypotheses: Sequence[T], references: Sequence[T], progress_bar=True
) -> list[list[float]]:
"""Call the score function for each hypothesis-reference pair."""
return [
[self.score(h, r) for r in references]
for h in tqdm(hypotheses, disable=not progress_bar or len(hypotheses) == 1)
]

def __str__(self):
return self.name
Expand Down
115 changes: 115 additions & 0 deletions pose_evaluation/metrics/distance_measure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
from typing import Literal, Dict, Any
import numpy.ma as ma # pylint: disable=consider-using-from-import
from pose_evaluation.metrics.base import Signature

AggregationStrategy = Literal["max", "min", "mean", "sum"]

class DistanceMeasureSignature(Signature):
"""Signature for distance measure metrics."""
def __init__(self, name: str, args: Dict[str, Any]) -> None:
super().__init__(name=name, args=args)
self.update_abbr("distance", "dist")
self.update_abbr("power", "pow")


class DistanceMeasure:
"""Abstract base class for distance measures."""
_SIGNATURE_TYPE = DistanceMeasureSignature

def __init__(self, name: str) -> None:
self.name = name

def get_distance(self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray) -> float:
"""
Compute the distance between hypothesis and reference data.

This method should be implemented by subclasses.
"""
raise NotImplementedError

def __call__(self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray) -> float:
return self.get_distance(hyp_data, ref_data)

def get_signature(self) -> Signature:
"""Return the signature of the distance measure."""
return self._SIGNATURE_TYPE(self.name, self.__dict__)


class PowerDistanceSignature(DistanceMeasureSignature):
"""Signature for power distance measures."""
def __init__(self, name: str, args: Dict[str, Any]) -> None:
super().__init__(name=name, args=args)
self.update_signature_and_abbr("order", "ord", args)
self.update_signature_and_abbr("default_distance", "dflt", args)
self.update_signature_and_abbr("aggregation_strategy", "agg", args)


class AggregatedPowerDistance(DistanceMeasure):
"""Aggregated power distance metric using a specified aggregation strategy."""
_SIGNATURE_TYPE = PowerDistanceSignature

def __init__(
self,
order: int = 2,
default_distance: float = 0.0,
aggregation_strategy: AggregationStrategy = "mean",
) -> None:
"""
Initialize the aggregated power distance metric.

:param order: The exponent to which differences are raised.
:param default_distance: The value to fill in for masked entries.
:param aggregation_strategy: Strategy to aggregate computed distances.
"""
super().__init__(name="power_distance")
self.power = float(order)
self.default_distance = default_distance
self.aggregation_strategy = aggregation_strategy

def _aggregate(self, distances: ma.MaskedArray) -> float:
"""
Aggregate computed distances using the specified strategy.

:param distances: A masked array of computed distances.
:return: A single aggregated distance value.
"""
aggregation_funcs = {
"mean": distances.mean,
"max": distances.max,
"min": distances.min,
"sum": distances.sum,
}
if self.aggregation_strategy in aggregation_funcs:
return aggregation_funcs[self.aggregation_strategy]()

raise NotImplementedError(
f"Aggregation Strategy {self.aggregation_strategy} not implemented"
)

def _calculate_distances(
self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray
) -> ma.MaskedArray:
"""
Compute element-wise distances between hypothesis and reference data.

Steps:
1. Compute the absolute differences.
2. Raise the differences to the specified power.
3. Sum the powered differences along the last axis.
4. Extract the root corresponding to the power.
5. Fill masked values with the default distance.

:param hyp_data: Hypothesis data as a masked array.
:param ref_data: Reference data as a masked array.
:return: A masked array of computed distances.
"""
diffs = ma.abs(hyp_data - ref_data)
raised_to_power = ma.power(diffs, self.power)
summed_results = ma.sum(raised_to_power, axis=-1, keepdims=True)
roots = ma.power(summed_results, 1 / self.power)
return ma.filled(roots, self.default_distance)

def get_distance(self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray) -> float:
"""Compute and aggregate the distance between hypothesis and reference data."""
calculated = self._calculate_distances(hyp_data, ref_data)
return self._aggregate(calculated)
Loading
Loading