Skip to content

Commit

Permalink
Merge pull request #65 from MochiXu/mochix/support_min_max_inverted_norm
Browse files Browse the repository at this point in the history
Support `min-max-inverted` norm
  • Loading branch information
AmenRa authored Jul 1, 2024
2 parents 94fe982 + 93af953 commit 1a078b7
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 16 deletions.
26 changes: 18 additions & 8 deletions docs/normalization.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@
Normalization aims at transforming the scores of a result list into new values to make them comparable with those of other normalized result lists, which is mandatory for correctly applying many of the provided fusion methods.
The normalization strategy to apply before fusion can be defined through the `norm` parameter of the functions `fuse` and `optimize_fusion` (defaults to `min-max`).

| **Normalization Strategies** | **Alias** |
| ---------------------------- | --------- |
| [Min-Max Norm][min-max-norm] | min-max |
| [Max Norm][max-norm] | max |
| [Sum Norm][sum-norm] | sum |
| [ZMUV Norm][zmuv-norm] | zmuv |
| [Rank Norm][rank-norm] | rank |
| [Borda Norm][borda-norm] | borda |
| **Normalization Strategies** | **Alias** |
|------------------------------------------------|-------------------|
| [Min-Max Norm][min-max-norm] | min-max |
| [Min-Max-Inverted Norm][min-max-norm-inverted] | min-max-inverted |
| [Max Norm][max-norm] | max |
| [Sum Norm][sum-norm] | sum |
| [ZMUV Norm][zmuv-norm] | zmuv |
| [Rank Norm][rank-norm] | rank |
| [Borda Norm][borda-norm] | borda |


## Min-Max Norm
---
Expand All @@ -21,6 +23,14 @@ $$
\operatorname{MinMaxNorm(s)}=\frac{s - s_{min}}{s_{max} - s_{min}}
$$

## Min-Max Inverted Norm
---
Min-Max Norm scales the scores (s) of a result list between 0 and 1, scaling to 1 the minimum score ($s_{min}$) and 0 the maximum score ($s_{max}$).

$$
\operatorname{MinMaxNorm(s)}=\frac{s_{max} - s}{s_{max} - s_{min}}
$$

## Max Norm
---
Max Norm scales the scores (s) of a result list the maximum score ($s_{max}$) is scaled to 1.
Expand Down
4 changes: 3 additions & 1 deletion ranx/normalization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ def norm_switch(method: str = "min-max"):
elif method == "max":
return max_norm
elif method in {"min_max", "min-max"}:
return min_max_norm
return lambda run: min_max_norm(run, invert=False)
elif method in {"min_max_inverted", "min-max-inverted"}:
return lambda run: min_max_norm(run, invert=True)
elif method == "rank":
return rank_norm
elif method == "sum":
Expand Down
17 changes: 11 additions & 6 deletions ranx/normalization/min_max_norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

# LOW LEVEL FUNCTIONS ==========================================================
@njit(cache=True)
def _min_max_norm(results):
def _min_max_norm(results, invert):
"""Apply `min-max norm` to a given results dictionary."""
scores = extract_scores(results)
min_score = safe_min(scores)
Expand All @@ -26,25 +26,28 @@ def _min_max_norm(results):
normalized_results = create_empty_results_dict()
for doc_id in results.keys():
doc_id = to_unicode(doc_id)
normalized_results[doc_id] = (results[doc_id] - min_score) / (denominator)
if invert:
normalized_results[doc_id] = (max_score - results[doc_id]) / (denominator)
else:
normalized_results[doc_id] = (results[doc_id] - min_score) / (denominator)

return normalized_results


@njit(cache=True, parallel=True)
def _min_max_norm_parallel(run):
def _min_max_norm_parallel(run, invert):
"""Apply `min_max norm` to a each results dictionary of a run in parallel."""
q_ids = TypedList(run.keys())

normalized_run = create_empty_results_dict_list(len(q_ids))
for i in prange(len(q_ids)):
normalized_run[i] = _min_max_norm(run[q_ids[i]])
normalized_run[i] = _min_max_norm(run[q_ids[i]], invert)

return convert_results_dict_list_to_run(q_ids, normalized_run)


# HIGH LEVEL FUNCTIONS =========================================================
def min_max_norm(run: Run) -> Run:
def min_max_norm(run: Run, invert: bool = False) -> Run:
"""Apply `min_max norm` to a given run.
Args:
Expand All @@ -55,5 +58,7 @@ def min_max_norm(run: Run) -> Run:
"""
normalized_run = Run()
normalized_run.name = run.name
normalized_run.run = _min_max_norm_parallel(run.run)
normalized_run.run = _min_max_norm_parallel(run.run, invert)
if invert:
normalized_run.sort()
return normalized_run
29 changes: 28 additions & 1 deletion tests/unit/ranx/normalization/min_max_norm_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

from ranx import Run
from ranx.normalization import min_max_norm
from ranx.normalization import min_max_norm, norm_switch


# FIXTURES =====================================================================
Expand Down Expand Up @@ -41,3 +41,30 @@ def test_min_max_norm(run):
assert norm_run["q1"]["d3"] == (3 - 1) / (3 - 1)
assert norm_run["q2"]["d1"] == (1 - 1) / (2 - 1)
assert norm_run["q2"]["d2"] == (2 - 1) / (2 - 1)


@pytest.mark.parametrize("norm_name", ["min_max_inverted", "min-max-inverted"])
def test_min_max_norm_with_invert(run, norm_name):
run_copy = run.run.copy()
assert run.run == run_copy

assert len(run.run) == 2
assert len(run.run["q1"]) == 3
assert len(run.run["q2"]) == 2
assert run.run["q1"]["d1"] == 1
assert run.run["q1"]["d2"] == 2
assert run.run["q1"]["d3"] == 3
assert run.run["q2"]["d1"] == 1
assert run.run["q2"]["d2"] == 2
assert run.size == 2

norm_run = norm_switch(norm_name)(run)

assert len(norm_run) == 2
assert len(norm_run["q1"]) == 3
assert len(norm_run["q2"]) == 2
assert norm_run["q1"]["d1"] == (3 - 1) / (3 - 1)
assert norm_run["q1"]["d2"] == (3 - 2) / (3 - 1)
assert norm_run["q1"]["d3"] == (3 - 3) / (3 - 1)
assert norm_run["q2"]["d1"] == (2 - 1) / (2 - 1)
assert norm_run["q2"]["d2"] == (2 - 2) / (2 - 1)

0 comments on commit 1a078b7

Please sign in to comment.