Merge pull request #65 from MochiXu/mochix/support_min_max_inverted_norm

Support `min-max-inverted` norm
AmenRa · Jul 1, 2024 · 1a078b7 · 1a078b7
2 parents 94fe982 + 93af953
commit 1a078b7
Show file tree

Hide file tree

Showing 4 changed files with 60 additions and 16 deletions.
diff --git a/docs/normalization.md b/docs/normalization.md
@@ -4,14 +4,16 @@
 Normalization aims at transforming the scores of a result list into new values to make them comparable with those of other normalized result lists, which is mandatory for correctly applying many of the provided fusion methods.
 The normalization strategy to apply before fusion can be defined through the `norm` parameter of the functions `fuse` and `optimize_fusion` (defaults to `min-max`).
 
-| **Normalization Strategies** | **Alias** |
-| ---------------------------- | --------- |
-| [Min-Max Norm][min-max-norm] | min-max   |
-| [Max Norm][max-norm]         | max       |
-| [Sum Norm][sum-norm]         | sum       |
-| [ZMUV Norm][zmuv-norm]       | zmuv      |
-| [Rank Norm][rank-norm]       | rank      |
-| [Borda Norm][borda-norm]     | borda     |
+| **Normalization Strategies**                   | **Alias**         |
+|------------------------------------------------|-------------------|
+| [Min-Max Norm][min-max-norm]                   | min-max           |
+| [Min-Max-Inverted Norm][min-max-norm-inverted] | min-max-inverted  |
+| [Max Norm][max-norm]                           | max               |
+| [Sum Norm][sum-norm]                           | sum               |
+| [ZMUV Norm][zmuv-norm]                         | zmuv              |
+| [Rank Norm][rank-norm]                         | rank              |
+| [Borda Norm][borda-norm]                       | borda             |
+
 
 ## Min-Max Norm
 ---
@@ -21,6 +23,14 @@ $$
 \operatorname{MinMaxNorm(s)}=\frac{s - s_{min}}{s_{max} - s_{min}}
 $$
 
+## Min-Max Inverted Norm
+---
+Min-Max Norm scales the scores (s) of a result list between 0 and 1, scaling to 1 the minimum score ($s_{min}$) and 0 the maximum score ($s_{max}$).
+
+$$
+\operatorname{MinMaxNorm(s)}=\frac{s_{max} - s}{s_{max} - s_{min}}
+$$
+
 ## Max Norm
 ---
 Max Norm scales the scores (s) of a result list the maximum score ($s_{max}$) is scaled to 1.

diff --git a/ranx/normalization/__init__.py b/ranx/normalization/__init__.py
@@ -21,7 +21,9 @@ def norm_switch(method: str = "min-max"):
     elif method == "max":
         return max_norm
     elif method in {"min_max", "min-max"}:
-        return min_max_norm
+        return lambda run: min_max_norm(run, invert=False)
+    elif method in {"min_max_inverted", "min-max-inverted"}:
+        return lambda run: min_max_norm(run, invert=True)
     elif method == "rank":
         return rank_norm
     elif method == "sum":

diff --git a/ranx/normalization/min_max_norm.py b/ranx/normalization/min_max_norm.py
@@ -15,7 +15,7 @@
 
 # LOW LEVEL FUNCTIONS ==========================================================
 @njit(cache=True)
-def _min_max_norm(results):
+def _min_max_norm(results, invert):
     """Apply `min-max norm` to a given results dictionary."""
     scores = extract_scores(results)
     min_score = safe_min(scores)
@@ -26,25 +26,28 @@ def _min_max_norm(results):
     normalized_results = create_empty_results_dict()
     for doc_id in results.keys():
         doc_id = to_unicode(doc_id)
-        normalized_results[doc_id] = (results[doc_id] - min_score) / (denominator)
+        if invert:
+            normalized_results[doc_id] = (max_score - results[doc_id]) / (denominator)
+        else:
+            normalized_results[doc_id] = (results[doc_id] - min_score) / (denominator)
 
     return normalized_results
 
 
 @njit(cache=True, parallel=True)
-def _min_max_norm_parallel(run):
+def _min_max_norm_parallel(run, invert):
     """Apply `min_max norm` to a each results dictionary of a run in parallel."""
     q_ids = TypedList(run.keys())
 
     normalized_run = create_empty_results_dict_list(len(q_ids))
     for i in prange(len(q_ids)):
-        normalized_run[i] = _min_max_norm(run[q_ids[i]])
+        normalized_run[i] = _min_max_norm(run[q_ids[i]], invert)
 
     return convert_results_dict_list_to_run(q_ids, normalized_run)
 
 
 # HIGH LEVEL FUNCTIONS =========================================================
-def min_max_norm(run: Run) -> Run:
+def min_max_norm(run: Run, invert: bool = False) -> Run:
     """Apply `min_max norm` to a given run.
 
     Args:
@@ -55,5 +58,7 @@ def min_max_norm(run: Run) -> Run:
     """
     normalized_run = Run()
     normalized_run.name = run.name
-    normalized_run.run = _min_max_norm_parallel(run.run)
+    normalized_run.run = _min_max_norm_parallel(run.run, invert)
+    if invert:
+        normalized_run.sort()
     return normalized_run
diff --git a/tests/unit/ranx/normalization/min_max_norm_test.py b/tests/unit/ranx/normalization/min_max_norm_test.py
@@ -1,7 +1,7 @@
 import pytest
 
 from ranx import Run
-from ranx.normalization import min_max_norm
+from ranx.normalization import min_max_norm, norm_switch
 
 
 # FIXTURES =====================================================================
@@ -41,3 +41,30 @@ def test_min_max_norm(run):
     assert norm_run["q1"]["d3"] == (3 - 1) / (3 - 1)
     assert norm_run["q2"]["d1"] == (1 - 1) / (2 - 1)
     assert norm_run["q2"]["d2"] == (2 - 1) / (2 - 1)
+
+
+@pytest.mark.parametrize("norm_name", ["min_max_inverted", "min-max-inverted"])
+def test_min_max_norm_with_invert(run, norm_name):
+    run_copy = run.run.copy()
+    assert run.run == run_copy
+
+    assert len(run.run) == 2
+    assert len(run.run["q1"]) == 3
+    assert len(run.run["q2"]) == 2
+    assert run.run["q1"]["d1"] == 1
+    assert run.run["q1"]["d2"] == 2
+    assert run.run["q1"]["d3"] == 3
+    assert run.run["q2"]["d1"] == 1
+    assert run.run["q2"]["d2"] == 2
+    assert run.size == 2
+
+    norm_run = norm_switch(norm_name)(run)
+
+    assert len(norm_run) == 2
+    assert len(norm_run["q1"]) == 3
+    assert len(norm_run["q2"]) == 2
+    assert norm_run["q1"]["d1"] == (3 - 1) / (3 - 1)
+    assert norm_run["q1"]["d2"] == (3 - 2) / (3 - 1)
+    assert norm_run["q1"]["d3"] == (3 - 3) / (3 - 1)
+    assert norm_run["q2"]["d1"] == (2 - 1) / (2 - 1)
+    assert norm_run["q2"]["d2"] == (2 - 2) / (2 - 1)