sign-language-processing · AmitMY · Jan 16, 2024 · Jan 4, 2024 · Jan 4, 2024 · Jan 4, 2024
diff --git a/README.md b/README.md
@@ -22,6 +22,7 @@ the more effective the evaluation metric is in capturing the nuances of sign lan
 - ✅ [Tokenized BLEU](signwriting_evaluation/metrics/bleu.py) - BLEU score for tokenized SignWriting FSW strings.
 - ✅ [chrF](signwriting_evaluation/metrics/chrf.py) - chrF score for untokenized SignWriting FSW strings.
 - ✅ [CLIPScore](signwriting_evaluation/metrics/clipscore.py) - CLIPScore between SignWriting images. (Using the original CLIP model)
+- ✅ [SymbolDistance](signwriting_evaluation/metrics/symbol_distance.py) - symbol distance score for SignWriting FSW strings [(README)](signwriting_evaluation/metrics/symbol_distance.md).
 
 ## Qualitative Evaluation
 
@@ -80,5 +81,4 @@ For each sign and metric, either the first match is incorrect, or there is a mor
       [^4]: Jack Hessel, Ari Holtzman, Maxwell Forbes, Ronan Le Bras, and Yejin Choi.
 2021. [CLIPScore: A Reference-free Evaluation Metric for Image Captioning](https://aclanthology.org/2021.emnlp-main.595/).
       In Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pages 7514–7528, Online
-      and
-      Punta Cana, Dominican Republic. Association for Computational Linguistics.
+      and Punta Cana, Dominican Republic. Association for Computational Linguistics.
diff --git a/assets/equations/graph1.png b/assets/equations/graph1.png
diff --git a/assets/equations/graph2.png b/assets/equations/graph2.png
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ authors = [
 ]
 readme = "README.md"
 dependencies = [
-    "signwriting @ git+https://github.com/sign-language-processing/signwriting",
+    "signwriting @ git+https://github.com/sign-language-processing/signwriting", # For Symbol Distance
     "sacrebleu", # For BLEU and chrF
     "transformers", # For CLIP features extraction
     "diskcache", # For CLIP features caching
@@ -20,7 +20,8 @@ dev = [
     "pylint",
     # to plot metric evaluation results
     "matplotlib",
-    "numpy"
+    "numpy",
+    "scipy"
 ]
 
 [tool.yapf]

diff --git a/signwriting_evaluation/evaluation/closest_matches.py b/signwriting_evaluation/evaluation/closest_matches.py
@@ -4,11 +4,11 @@
 import numpy as np
 
 from signwriting.visualizer.visualize import signwriting_to_image
-
 from signwriting_evaluation.metrics.base import SignWritingMetric
 from signwriting_evaluation.metrics.bleu import SignWritingBLEU
 from signwriting_evaluation.metrics.chrf import SignWritingCHRF
 from signwriting_evaluation.metrics.clip import SignWritingCLIPScore
+from signwriting_evaluation.metrics.symbol_distance import SignWritingSimilarityMetric
 
 
 CURRENT_DIR = Path(__file__).parent
@@ -84,6 +84,7 @@ def metrics_distribution(signs: list[str], metrics: list[SignWritingMetric]):
     print(f"Found {len(single_signs)} signs")
 
     all_metrics = [
+        SignWritingSimilarityMetric(),
         SignWritingBLEU(),
         SignWritingCHRF(),
         SignWritingCLIPScore(cache_directory=None),

diff --git a/signwriting_evaluation/metrics/symbol_distance.md b/signwriting_evaluation/metrics/symbol_distance.md
@@ -0,0 +1,35 @@
+# Evaluation metric for SignWriting
+### Introduction
+This code introduces a novel metric for assessing the similarity of two phrases written
+in Formal SignWriting (FSW). Unlike generic string comparison methods like BLEU and CHRF, our approach
+is tailored to the unique characteristics and rules of SignWriting, offering a task-specific evaluation.
+
+### Evaluation Method
+Our method addresses key aspects of SignWriting, such as:
+
+- Symbols are organized in the FSW dictionary to reflect their types (e.g., hand signals, motion, touch), with proximity
+indicating visual and semantic closeness.
+- Symbols forming a sign can be written in different orders, representing the same visual output.
+- Each symbol part has distinct meaning and importance, emphasizing aspects like symbol type, facing direction, angle, and position.
+
+### Main concept
+The evaluation process is built on three main stages, each with its own intent and purposes:
+1. Symbol Distance Function: Evaluates similarity between two symbols based on SignWriting rules, considering custom 
+weights for different symbol differences.
+2. Distance Normalization: Normalizes distance values using the following non-linear function for better representation.
+
+![Graph of f(x) = x^{\frac{1}{3}}](/assets/equations/graph1.png)
+
+$$
+f(x) = x^{\frac{1}{3}}
+$$
+
+3. Matching and Grading: Utilizes symbol distances to generalize similarity for entire signs. The Hungarian algorithm
+matches similar parts, and using a weight calculated using the formula below, the weighted mean accounts for length differences.
+
+![Graph of f(x) = x^{\frac{3}{2}}](/assets/equations/graph2.png)
+
+
+$$
+f(x) = x^{\frac{3}{2}}
+$$
diff --git a/signwriting_evaluation/metrics/symbol_distance.py b/signwriting_evaluation/metrics/symbol_distance.py
@@ -0,0 +1,102 @@
+from typing import Tuple
+
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+from scipy.spatial import distance as dis
+from signwriting.types import Sign, SignSymbol
+from signwriting.formats.fsw_to_sign import fsw_to_sign
+from signwriting_evaluation.metrics.base import SignWritingMetric
+
+
+class SignWritingSimilarityMetric(SignWritingMetric):
+    def __init__(self):
+        super().__init__("SymbolsDistances")
+        self.symbol_classes = {
+            'hands_shapes': range(0x100, 0x205),
+            'contact_symbols': range(0x205, 0x221),
+            'movement_paths': range(0x221, 0x2FF),
+            'head_movement': range(0x2FF, 0x30A),
+            'facial_expressions': range(0x30A, 0x36A),
+            'etc': range(0x36A, 0x38C)
+        }
+        self.weight = {
+            "shape": 5,  # same weight as switching parallelization
+            "facing": 5/3,  # more important than angle, not as much as shape and orientation
+            "angle": 5/24,  # lowest importance out of the criteria
+            "parallel": 5,  # parallelization is 3 columns compare to 1 for the facing direction
+            "positional": 1/10,  # may be big values
+            "normalized_factor": 1 / 2.5,  # fitting shape of function
+            "exp_factor": 1.5,  # exponential distribution
+            "class_penalty": 100,  # big penalty for each class type passed
+        }
+        self.max_distance = self.calculate_distance({"symbol": "S10000", "position": (250, 250)},
+                                                    {"symbol": "S38b07", "position": (750, 750)})
+
+    def get_shape_class_index(self, symbol_attribute) -> int:
+        shape = symbol_attribute[0]
+        return next((i for i, r in enumerate(self.symbol_classes.values()) if shape in r), None)
+
+    def get_attributes(self, symbol: SignSymbol) -> Tuple[int, int, int, bool]:
+        shape = int(symbol['symbol'][1:4], 16)
+        facing = int(symbol['symbol'][4], 16)
+        angle = int(symbol['symbol'][5], 16)
+        parallel = facing > 2
+        return shape, facing, angle, parallel
+
+    def weight_vector(self, vector: Tuple[int, int, int, bool]) -> Tuple[float, ...]:
+        weights = [self.weight["shape"], self.weight["angle"], self.weight["facing"], self.weight["parallel"]]
+        weighted_values = [float(val * weight) for val, weight in zip(vector, weights)]
+        return tuple(weighted_values)
+
+    # return to this
+
+    def calculate_distance(self, hyp: SignSymbol, ref: SignSymbol) -> float:
+        hyp_veq = self.get_attributes(hyp)
+        ref_veq = self.get_attributes(ref)
+
+        hyp_class = self.get_shape_class_index(hyp_veq)
+        ref_class = self.get_shape_class_index(ref_veq)
+
+        hyp_veq = self.weight_vector(hyp_veq)
+        ref_veq = self.weight_vector(ref_veq)
+        distance = (dis.euclidean(hyp_veq, ref_veq) +
+                    self.weight["positional"] * dis.euclidean(hyp["position"], ref["position"]))
+        distance = distance + abs(hyp_class - ref_class) * self.weight["class_penalty"]
+        return distance
+
+    def normalized_distance(self, unnormalized: float) -> float:
+        return pow(unnormalized / self.max_distance, self.weight["normalized_factor"])
+
+    def symbols_score(self, hyp: SignSymbol, ref: SignSymbol) -> float:
+        distance = self.calculate_distance(hyp, ref)
+        normalized = self.normalized_distance(distance)
+        return normalized
+
+    def length_acc(self, hyp: Sign, ref: Sign) -> float:
+        hyp = hyp["symbols"]
+        ref = ref["symbols"]
+        # plus 1 for the box symbol
+        return abs(len(hyp) - len(ref)) / (max(len(hyp), len(ref)) + 1)
+
+    def error_rate(self, hyp: Sign, ref: Sign) -> float:
+        # Calculate the evaluate score for a given hypothesis and ref.
+        if (not hyp["symbols"] and ref["symbols"]) or (hyp["symbols"] and not ref["symbols"]):
+            return 1
+        cost_matrix = np.array(
+            [self.symbols_score(first, second) for first in hyp["symbols"] for second in ref["symbols"]])
+        cost_matrix = cost_matrix.reshape(len(hyp["symbols"]), -1)
+        # Find the lowest cost matching
+        row_ind, col_ind = linear_sum_assignment(cost_matrix)
+        pairs = list(zip(row_ind, col_ind))
+        # Print the matching and total cost
+        values = [cost_matrix[row, col] for row, col in pairs]
+        mean_cost = sum(values) / len(values)
+        length_error = self.length_acc(hyp, ref)
+        length_weight = pow(length_error, self.weight["exp_factor"])
+        return length_weight + mean_cost * (1 - length_weight)
+
+    def score(self, hypothesis: str, reference: str) -> float:
+        # Calculate the evaluate score for a given hypothesis and ref.
+        hyp = fsw_to_sign(hypothesis)
+        ref = fsw_to_sign(reference)
+        return pow(1 - self.error_rate(hyp, ref), 2)
diff --git a/signwriting_evaluation/metrics/test_symbol_distance.py b/signwriting_evaluation/metrics/test_symbol_distance.py
@@ -0,0 +1,30 @@
+import unittest
+from signwriting_evaluation.metrics.symbol_distance import SignWritingSimilarityMetric
+
+
+class TestSignWritingSymbolDistance(unittest.TestCase):
+    def setUp(self):
+        self.metric = SignWritingSimilarityMetric()
+
+    def test_score(self):
+        hypothesis = "M530x538S37602508x462S15a11493x494S20e00488x510S22f03469x517"
+        reference = "M519x534S37900497x466S3770b497x485S15a51491x501S22f03481x513"
+        score = self.metric.score(hypothesis, reference)
+        self.assertIsInstance(score, float)  # Check if the score is a float
+        self.assertAlmostEqual(score, 0.5509574768254414)
+
+        hypothesis = "M530x538S37602508x462S15a11493x494S20e00488x510S22f03469x517"
+        reference = "M530x538S22f03469x517S37602508x462S20e00488x510S15a11493x494"
+        score = self.metric.score(hypothesis, reference)
+        self.assertIsInstance(score, float)  # Check if the score is a float
+        self.assertAlmostEqual(score, 1)
+
+        hypothesis = "M530x538S17600508x462S15a11493x494S20e00488x510S22f03469x517"
+        reference = "M530x538S17600508x462S12a11493x494S20e00488x510S22f13469x517"
+        score = self.metric.score(hypothesis, reference)
+        self.assertIsInstance(score, float)
+        self.assertAlmostEqual(score, 0.8326259781509948)
+
+
+if __name__ == '__main__':
+    unittest.main()