Skip to content

Commit

Permalink
Fixed grouped scores and added test (#65)
Browse files Browse the repository at this point in the history
* Fixed grouped scores and added test

* Updated changelog
  • Loading branch information
wfondrie authored Jul 20, 2022
1 parent 21680cc commit 10559e4
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 5 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog for mokapot

## [0.8.3] - 2022-07-20
### Fixed
- Fixed the reported mokapot score when group FDR is used.

## [0.8.2] - 2022-07-18
### Added
- `mokapot.Model()` objects now recored the CV fold that they were fit on.
Expand Down
11 changes: 6 additions & 5 deletions mokapot/confidence.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,10 @@ def __init__(self, psms, scores, desc=True, eval_fdr=0.01):
group_psms = copy.copy(psms)
self.group_column = group_psms._group_column
group_psms._group_column = None
scores = scores * (desc * 2 - 1)

# Do TDC
# Do TDC to eliminate multiples PSMs for a spectrum that may occur
# in different groups.
keep = "last" if desc else "first"
scores = (
pd.Series(scores, index=psms._data.index)
.sample(frac=1)
Expand All @@ -74,7 +75,7 @@ def __init__(self, psms, scores, desc=True, eval_fdr=0.01):

idx = (
psms.data.loc[scores.index, :]
.drop_duplicates(psms._spectrum_columns, keep="last")
.drop_duplicates(psms._spectrum_columns, keep=keep)
.index
)

Expand All @@ -84,9 +85,9 @@ def __init__(self, psms, scores, desc=True, eval_fdr=0.01):
group_psms._data = None
tdc_winners = group_df.index.intersection(idx)
group_psms._data = group_df.loc[tdc_winners, :]
group_scores = scores.loc[group_psms._data.index].values + 1
group_scores = scores.loc[group_psms._data.index].values
res = group_psms.assign_confidence(
group_scores * (2 * desc - 1), desc=desc, eval_fdr=eval_fdr
group_scores, desc=desc, eval_fdr=eval_fdr
)
self._group_confidence_estimates[group] = res

Expand Down
38 changes: 38 additions & 0 deletions tests/unit_tests/test_confidence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Test that Confidence classes are working correctly"""
import pytest
import numpy as np
import pandas as pd
from mokapot import LinearPsmDataset


def test_one_group(psm_df_1000):
"""Test that one group is equivalent to no group."""
psm_data, _ = psm_df_1000
psm_data["group"] = 0

psms = LinearPsmDataset(
psms=psm_data,
target_column="target",
spectrum_columns="spectrum",
peptide_column="peptide",
feature_columns="score",
filename_column="filename",
scan_column="spectrum",
calcmass_column="calcmass",
expmass_column="expmass",
rt_column="ret_time",
charge_column="charge",
group_column="group",
copy_data=True,
)

np.random.seed(42)
grouped = psms.assign_confidence()
scores1 = grouped.group_confidence_estimates[0].psms["mokapot score"]

np.random.seed(42)
psms._group_column = None
ungrouped = psms.assign_confidence()
scores2 = ungrouped.psms["mokapot score"]

pd.testing.assert_series_equal(scores1, scores2)

0 comments on commit 10559e4

Please sign in to comment.