From 258edb40368fb94f0aeb9a67bb7590b5c8fc31cb Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Wed, 27 Mar 2024 16:44:25 -0700
Subject: [PATCH 01/84] begin adding tests for annotate mode

---
 casanovo/casanovo.py         | 46 +++++++++++++++++
 casanovo/data/annotate_db.py |  9 ++++
 tests/conftest.py            | 97 ++++++++++++++++++++++++++++++++++++
 tests/test_integration.py    |  5 ++
 4 files changed, 157 insertions(+)
 create mode 100644 casanovo/data/annotate_db.py

diff --git a/casanovo/casanovo.py b/casanovo/casanovo.py
index 8bdfa58f..dcbecea3 100644
--- a/casanovo/casanovo.py
+++ b/casanovo/casanovo.py
@@ -42,6 +42,7 @@
 from . import utils
 from .denovo import ModelRunner
 from .config import Config
+from .data.annotate_db import annotate_mgf
 
 logger = logging.getLogger("casanovo")
 click.rich_click.USE_MARKDOWN = True
@@ -145,6 +146,51 @@ def sequence(
     logger.info("DONE!")
 
 
+@main.command(cls=_SharedParams)
+@click.argument(
+    "peak_path",
+    required=True,
+    nargs=1,
+    type=click.Path(exists=True, dir_okay=False),
+)
+@click.argument(
+    "tide_path",
+    required=True,
+    nargs=1,
+    type=click.Path(exists=True, dir_okay=True),
+)
+def annotate(
+    peak_path: str,
+    tide_path: str,
+    model: Optional[str],
+    config: Optional[str],
+    output: Optional[str],
+    verbosity: str,
+) -> None:
+    """Annotate a given .mgf with candidates as selected by a Tide search for Casanovo-DB.
+
+    PEAK_PATH must be one MGF file from which to annotate spectra.
+
+    TIDE_PATH must be one directory containing the Tide search results of the <PEAK_PATH> .mgf.
+    This directory must contain tide-search.decoy.txt and tide-search.target.txt
+    """
+    for peak_file in peak_path:
+        logger.info("  %s", peak_file)
+
+    if output is None:
+        output = setup_logging(output, verbosity)
+        logger.info(
+            "Output file not specified. Annotated MGF will be saved in the same directory as the input MGF."
+        )
+        output = peak_path.replace(".mgf", "_annotated.mgf")
+    else:
+        output = setup_logging(output, verbosity)
+
+    annotate_mgf(peak_path, tide_path, output)
+
+    logger.info("DONE!")
+
+
 @main.command(cls=_SharedParams)
 @click.argument(
     "annotated_peak_path",
diff --git a/casanovo/data/annotate_db.py b/casanovo/data/annotate_db.py
new file mode 100644
index 00000000..984edee0
--- /dev/null
+++ b/casanovo/data/annotate_db.py
@@ -0,0 +1,9 @@
+"""Methods used to annotate an .mgf so that it can be used by Casanovo-DB"""
+
+from pathlib import Path
+from typing import Optional, Tuple
+
+
+def annotate_mgf(peak_path: str, tide_path: str, output: Optional[str]):
+    print(peak_path, tide_path, output)
+    ## TODO
diff --git a/tests/conftest.py b/tests/conftest.py
index 02a6d0f2..d6db572c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -260,3 +260,100 @@ def tiny_config(tmp_path):
         yaml.dump(cfg, out_file)
 
     return cfg_file
+
+
+@pytest.fixture
+def mgf_small_unannotated(tmp_path):
+    """An MGF file with 2 unannotated spectra and scan numbers."""
+    peptides = ["LESLIEK", "PEPTIDEK"]
+    mgf_file = tmp_path / "small_unannotated.mgf"
+    return _create_unannotated_mgf(peptides, mgf_file)
+
+
+def _create_unannotated_mgf(peptides, mgf_file, random_state=999):
+    """
+    Create a fake MGF file from one or more peptides.
+    This file will have no SEQ= parameter, but will have a SCANS= parameter.
+
+    Parameters
+    ----------
+    peptides : str or list of str
+        The peptides for which to create spectra.
+    mgf_file : Path
+        The MGF file to create.
+    random_state : int or numpy.random.Generator, optional
+        The random seed. The charge states are chosen to be 2 or 3 randomly.
+
+    Returns
+    -------
+    mgf_file : Path
+    """
+    rng = np.random.default_rng(random_state)
+    entries = [
+        _create_unannotated_mgf_entry(p, idx, rng.choice([2, 3]))
+        for idx, p in enumerate(peptides)
+    ]
+    with mgf_file.open("w+") as mgf_ref:
+        mgf_ref.write("\n".join(entries))
+
+    return mgf_file
+
+
+def _create_unannotated_mgf_entry(peptide, scan_num, charge):
+    """
+    Create a MassIVE-KB style MGF entry for a single PSM.
+    Each entry will have no SEQ= parameter, but will have a SCANS= parameter.
+
+    Parameters
+    ----------
+    peptide : str
+        A peptide sequence.
+    scan_num : int
+        The scan number.
+    charge : int, optional
+        The peptide charge state.
+
+    Returns
+    -------
+    str
+        The PSM entry in an MGF file format.
+    """
+    precursor_mz = calculate_mass(peptide, charge=int(charge))
+    mzs, intensities = _peptide_to_peaks(peptide, charge)
+    frags = "\n".join([f"{m} {i}" for m, i in zip(mzs, intensities)])
+
+    mgf = [
+        "BEGIN IONS",
+        f"PEPMASS={precursor_mz}",
+        f"CHARGE={charge}+",
+        f"SCANS={scan_num}",
+        f"{frags}",
+        "END IONS",
+    ]
+    return "\n".join(mgf)
+
+
+@pytest.fixture
+def tide_dir_small(tmp_path):
+    """A directory with a very small TIDE search result."""
+    tide_dir = tmp_path / "tide_results"
+    tide_dir.mkdir()
+
+    _create_tide_results_target(tide_dir)
+    _create_tide_results_decoy(tide_dir)
+
+    return tide_dir
+
+
+def _create_tide_results_target(tide_dir):
+    """Create a fake TIDE search result file (target)."""
+    out_file = tide_dir / "tide-search.target.txt"
+    ## TODO
+    pass
+
+
+def _create_tide_results_decoy(tide_dir):
+    """Create a fake TIDE search result file (decoy)."""
+    out_file = tide_dir / "tide-search.decoy.txt"
+    ## TODO
+    pass
diff --git a/tests/test_integration.py b/tests/test_integration.py
index e5d4b285..50bd1791 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -7,6 +7,11 @@
 from casanovo import casanovo
 
 
+def test_annotate(mgf_small_unannotated, tide_dir_small, tmp_path):
+    ## TODO
+    pass
+
+
 def test_train_and_run(
     mgf_small, mzml_small, tiny_config, tmp_path, monkeypatch
 ):

From 30f598481377a69094b500250acbe9852c17b4fb Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Fri, 29 Mar 2024 15:26:34 -0700
Subject: [PATCH 02/84] add basic test for annotate mode

---
 casanovo/data/annotate_db.py | 125 ++++++++++++++++++++++++++++++++++-
 tests/conftest.py            |  48 +++++++++++---
 tests/test_integration.py    |  28 +++++++-
 3 files changed, 188 insertions(+), 13 deletions(-)

diff --git a/casanovo/data/annotate_db.py b/casanovo/data/annotate_db.py
index 984edee0..3e154dfb 100644
--- a/casanovo/data/annotate_db.py
+++ b/casanovo/data/annotate_db.py
@@ -2,8 +2,129 @@
 
 from pathlib import Path
 from typing import Optional, Tuple
+import os
+import re
+import logging
+
+import pandas as pd
+import pyteomics.mgf as mgf
+
+
+def _normalize_mods(seq: str) -> str:
+    """
+    Turns tide-style modifications into the format used by Casanovo-DB.
+
+        Parameters
+        ----------
+        seq : str
+            The peptide sequence with tide-style modifications.
+
+        Returns
+        -------
+        str
+            The peptide sequence with Casanovo-DB-style modifications.
+    """
+    seq = seq.replace("C", "C+57.021")
+    seq = re.sub(r"M\[15\..*\]", r"M+15.995", seq)
+    seq = re.sub(r"N\[0\.9.*\]", r"N+0.984", seq)
+    seq = re.sub(r"Q\[0\.9.*\]", r"Q+0.984", seq)
+    seq = re.sub(r"(.*)\[42\..*\]", r"+42.011\1", seq)
+    seq = re.sub(r"(.*)\[43\..*\]", r"+43.006\1", seq)
+    seq = re.sub(r"(.*)\[\-17\..*\]", r"-17.027\1", seq)
+    seq = re.sub(r"(.*)\[25\..*\]", r"+43.006-17.027\1", seq)
+    return seq
 
 
 def annotate_mgf(peak_path: str, tide_path: str, output: Optional[str]):
-    print(peak_path, tide_path, output)
-    ## TODO
+    """
+    Accepts a directory containing the results of a successful tide search, and an .mgf file containing MS/MS spectra.
+    The .mgf file is then annotated in the SEQ field with all of the candidate peptides for each spectrum, as well as their target/decoy status.
+    This annotated .mgf can be given directly to Casanovo-DB to perfrom a database search.
+
+        Parameters
+        ----------
+        tide_dir_path : str
+            Path to the directory containing the results of a successful tide search.
+        mgf_file : str
+            Path to the .mgf file containing MS/MS spectra.
+        output_file : str
+            Path to where the annotated .mgf will be written.
+
+    """
+    logger = logging.getLogger("casanovo")
+    # Get paths to tide search text files
+    tdf_path = os.path.join(tide_path, "tide-search.target.txt")
+    ddf_path = os.path.join(tide_path, "tide-search.decoy.txt")
+    try:
+        target_df = pd.read_csv(
+            tdf_path, sep="\t", usecols=["scan", "sequence", "target/decoy"]
+        )
+        decoy_df = pd.read_csv(
+            ddf_path, sep="\t", usecols=["scan", "sequence", "target/decoy"]
+        )
+    except FileNotFoundError as e:
+        logger.error(
+            "Could not find tide search results in the specified directory. "
+            "Please ensure that the directory contains the following files: "
+            "tide-search.target.txt and tide-search.decoy.txt"
+        )
+        raise e
+
+    logger.info("Successfully read tide search results from %s.", tide_path)
+
+    df = pd.concat([target_df, decoy_df])
+    scan_groups = df.groupby("scan")[["sequence", "target/decoy"]]
+
+    scan_map = {}
+
+    for scan, item in scan_groups:
+        td_group = item.groupby("target/decoy")["sequence"].apply(list)
+        if "target" in td_group.index:
+            target_candidate_list = list(
+                map(
+                    _normalize_mods,
+                    td_group["target"],
+                )
+            )
+        else:
+            target_candidate_list = []
+            logger.warn(f"No target peptides found for scan {scan}.")
+        if "decoy" in td_group.index:
+            decoy_candidate_list = list(
+                map(
+                    _normalize_mods,
+                    td_group["decoy"],
+                )
+            )
+            decoy_candidate_list = list(
+                map(lambda x: "decoy_" + str(x), decoy_candidate_list)
+            )
+        else:
+            decoy_candidate_list = []
+            logger.warn(f"No decoy peptides found for scan {scan}.")
+
+        scan_map[scan] = target_candidate_list + decoy_candidate_list
+
+    all_spec = []
+    for idx, spec_dict in enumerate(mgf.read(peak_path)):
+        try:
+            scan = int(spec_dict["params"]["scans"])
+        except KeyError as e:
+            logger.error(
+                "Could not find the scan number in the .mgf file. Please ensure that the .mgf file contains the scan number in the 'SCANS' field."
+            )
+            raise e
+        try:
+            spec_dict["params"]["seq"] = ",".join(list(scan_map[scan]))
+            all_spec.append(spec_dict)
+        except KeyError as e:
+            # No need to do anything if the scan is not found in the scan map
+            pass
+    try:
+        output = str(output)
+        logger.info(output)
+        mgf.write(all_spec, output, file_mode="w")
+        logger.info("Annotated .mgf file written to %s.", output)
+    except Exception as e:
+        print(f"Write to {output} failed. Check if the file path is correct.")
+        print(e)
diff --git a/tests/conftest.py b/tests/conftest.py
index d6db572c..237d0292 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,7 @@
 """Fixtures used for testing."""
 
 import numpy as np
+import pandas as pd
 import psims
 import pytest
 import yaml
@@ -324,6 +325,7 @@ def _create_unannotated_mgf_entry(peptide, scan_num, charge):
 
     mgf = [
         "BEGIN IONS",
+        f"TITLE=title::{scan_num}",
         f"PEPMASS={precursor_mz}",
         f"CHARGE={charge}+",
         f"SCANS={scan_num}",
@@ -339,21 +341,51 @@ def tide_dir_small(tmp_path):
     tide_dir = tmp_path / "tide_results"
     tide_dir.mkdir()
 
-    _create_tide_results_target(tide_dir)
-    _create_tide_results_decoy(tide_dir)
+    # Key is the scan number
+    built_dict = {
+        0: {
+            "targets": ["LESLIEK", "PEPTIDEK"],
+            "decoys": ["KEILSEL", "KEDITEPP"],
+        },
+        1: {
+            "targets": ["LESLIEK", "PEPTIDEK"],
+            "decoys": ["KEILSEL", "KEDITEPP"],
+        },
+    }
+
+    _create_tide_results_target(tide_dir, built_dict)
+    _create_tide_results_decoy(tide_dir, built_dict)
 
     return tide_dir
 
 
-def _create_tide_results_target(tide_dir):
+def _create_tide_results_target(tide_dir, built_dict):
     """Create a fake TIDE search result file (target)."""
     out_file = tide_dir / "tide-search.target.txt"
-    ## TODO
-    pass
+    df = pd.DataFrame(columns=["scan", "sequence", "target/decoy"])
+    for scan, peptides in built_dict.items():
+        entry = pd.DataFrame.from_dict(
+            {
+                "scan": [scan] * len(peptides["targets"]),
+                "sequence": peptides["targets"],
+                "target/decoy": ["target"] * len(peptides["targets"]),
+            }
+        )
+        df = pd.concat([df, entry], ignore_index=True)
+    df.to_csv(out_file, sep="\t", index=True)
 
 
-def _create_tide_results_decoy(tide_dir):
+def _create_tide_results_decoy(tide_dir, built_dict):
     """Create a fake TIDE search result file (decoy)."""
     out_file = tide_dir / "tide-search.decoy.txt"
-    ## TODO
-    pass
+    df = pd.DataFrame(columns=["scan", "sequence", "target/decoy"])
+    for scan, peptides in built_dict.items():
+        entry = pd.DataFrame.from_dict(
+            {
+                "scan": [scan] * len(peptides["decoys"]),
+                "sequence": peptides["decoys"],
+                "target/decoy": ["decoy"] * len(peptides["decoys"]),
+            }
+        )
+        df = pd.concat([df, entry], ignore_index=True)
+    df.to_csv(out_file, sep="\t", index=True)
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 50bd1791..8228432e 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -7,9 +7,31 @@
 from casanovo import casanovo
 
 
-def test_annotate(mgf_small_unannotated, tide_dir_small, tmp_path):
-    ## TODO
-    pass
+def test_annotate(
+    mgf_small_unannotated, tide_dir_small, tiny_config, tmp_path
+):
+
+    # Run a command:
+    run = functools.partial(
+        CliRunner().invoke, casanovo.main, catch_exceptions=False
+    )
+
+    annotate_args = [
+        "annotate",
+        str(mgf_small_unannotated),
+        str(tide_dir_small),
+        "--config",
+        tiny_config,
+        "--output",
+        str(tmp_path / "annotated_mgf.mgf"),
+    ]
+
+    result = run(annotate_args)
+
+    assert result.exit_code == 0
+    assert (tmp_path / "annotated_mgf.mgf").exists()
+
+    ## TODO: Write rest of test to verify the output file.
 
 
 def test_train_and_run(

From 186bc0fa353517095ff043b91881420438451616 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Mon, 8 Apr 2024 22:05:22 -0700
Subject: [PATCH 03/84] added test case for annotate mode and modified method

---
 casanovo/data/annotate_db.py | 15 ++++++++-------
 tests/conftest.py            | 13 ++++++++++++-
 tests/test_integration.py    | 20 +++++++++++++++++++-
 3 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/casanovo/data/annotate_db.py b/casanovo/data/annotate_db.py
index 3e154dfb..3ff3c4b7 100644
--- a/casanovo/data/annotate_db.py
+++ b/casanovo/data/annotate_db.py
@@ -24,14 +24,15 @@ def _normalize_mods(seq: str) -> str:
         str
             The peptide sequence with Casanovo-DB-style modifications.
     """
+    logger = logging.getLogger("casanovo")
     seq = seq.replace("C", "C+57.021")
-    seq = re.sub(r"M\[15\..*\]", r"M+15.995", seq)
-    seq = re.sub(r"N\[0\.9.*\]", r"N+0.984", seq)
-    seq = re.sub(r"Q\[0\.9.*\]", r"Q+0.984", seq)
-    seq = re.sub(r"(.*)\[42\..*\]", r"+42.011\1", seq)
-    seq = re.sub(r"(.*)\[43\..*\]", r"+43.006\1", seq)
-    seq = re.sub(r"(.*)\[\-17\..*\]", r"-17.027\1", seq)
-    seq = re.sub(r"(.*)\[25\..*\]", r"+43.006-17.027\1", seq)
+    seq = re.sub(r"M\[15\.[0-9]*\]", r"M+15.995", seq)
+    seq = re.sub(r"N\[0\.9[0-9]*\]", r"N+0.984", seq)
+    seq = re.sub(r"Q\[0\.9[0-9]*\]", r"Q+0.984", seq)
+    seq = re.sub(r"(.*)\[42\.[0-9]*\]", r"+42.011\1", seq)
+    seq = re.sub(r"(.*)\[43\.[0-9]*\]", r"+43.006\1", seq)
+    seq = re.sub(r"(.*)\[\-17\.[0-9]*\]", r"-17.027\1", seq)
+    seq = re.sub(r"(.*)\[25\.[0-9]*\]", r"+43.006-17.027\1", seq)
     return seq
 
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 237d0292..eed4f39a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -266,7 +266,7 @@ def tiny_config(tmp_path):
 @pytest.fixture
 def mgf_small_unannotated(tmp_path):
     """An MGF file with 2 unannotated spectra and scan numbers."""
-    peptides = ["LESLIEK", "PEPTIDEK"]
+    peptides = ["LESLIEK", "PEPTIDEK", "LESTIEK"]
     mgf_file = tmp_path / "small_unannotated.mgf"
     return _create_unannotated_mgf(peptides, mgf_file)
 
@@ -351,6 +351,17 @@ def tide_dir_small(tmp_path):
             "targets": ["LESLIEK", "PEPTIDEK"],
             "decoys": ["KEILSEL", "KEDITEPP"],
         },
+        2: {
+            "targets": [
+                "L[42.011]EM[15.9]SLIM[15.995]EK",
+                "P[43.01]EN[0.99]PTIQ[0.984]DEK",
+            ],
+            "decoys": [
+                "K[-17.03]M[15.995]EILSEL",
+                "K[25.1]EDITEPP",
+                "KEDIQ[0.984]TEPPQ[0.984]",
+            ],
+        },
     }
 
     _create_tide_results_target(tide_dir, built_dict)
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 8228432e..d4d86d7d 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -31,7 +31,25 @@ def test_annotate(
     assert result.exit_code == 0
     assert (tmp_path / "annotated_mgf.mgf").exists()
 
-    ## TODO: Write rest of test to verify the output file.
+    # Read in the annotated file
+    with open(tmp_path / "annotated_mgf.mgf") as f:
+        annotated_lines = f.readlines()
+
+    # Get each SEQ= line
+    seq_lines = [line for line in annotated_lines if line.startswith("SEQ=")]
+    assert len(seq_lines) == 3
+    assert (
+        seq_lines[0].strip()
+        == "SEQ=LESLIEK,PEPTIDEK,decoy_KEILSEL,decoy_KEDITEPP"
+    )
+    assert (
+        seq_lines[1].strip()
+        == "SEQ=LESLIEK,PEPTIDEK,decoy_KEILSEL,decoy_KEDITEPP"
+    )
+    assert (
+        seq_lines[2].strip()
+        == "SEQ=+42.011LEM+15.995SLIM+15.995EK,+43.006PEN+0.984PTIQ+0.984DEK,decoy_-17.027KM+15.995EILSEL,decoy_+43.006-17.027KEDITEPP,decoy_KEDIQ+0.984TEPPQ+0.984"
+    )
 
 
 def test_train_and_run(

From a8f50f473f6b8f1fb5da505c51b62ef0bc3fb24e Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Wed, 10 Apr 2024 23:58:30 -0700
Subject: [PATCH 04/84] very rough sketch of db upgrade (untested)

---
 casanovo/casanovo.py            |  26 +++++
 casanovo/data/datasets.py       |  71 ++++++++++++++
 casanovo/data/ms_io.py          |  48 ++++++++++
 casanovo/denovo/dataloaders.py  |  96 ++++++++++++++++++-
 casanovo/denovo/model.py        | 165 ++++++++++++++++++++++++++++++++
 casanovo/denovo/model_runner.py | 115 +++++++++++++++++++++-
 6 files changed, 518 insertions(+), 3 deletions(-)

diff --git a/casanovo/casanovo.py b/casanovo/casanovo.py
index dcbecea3..c2bce3ef 100644
--- a/casanovo/casanovo.py
+++ b/casanovo/casanovo.py
@@ -191,6 +191,32 @@ def annotate(
     logger.info("DONE!")
 
 
+@main.command(cls=_SharedParams)
+@click.argument(
+    "peak_path",
+    required=True,
+    nargs=-1,
+    type=click.Path(exists=True, dir_okay=False),
+)
+def db_search(
+    peak_path: Tuple[str],
+    model: Optional[str],
+    config: Optional[str],
+    output: Optional[str],
+    verbosity: str,
+) -> None:
+    """Perform a search using Casanovo-DB.
+
+    PEAK_PATH must be one MGF file that has ANNOTATED spectra, as output by annotate mode.
+    """
+    output = setup_logging(output, verbosity)
+    config, model = setup_model(model, config, output, False)
+    with ModelRunner(config, model) as runner:
+        runner.db_search(peak_path, output)
+
+    logger.info("DONE!")
+
+
 @main.command(cls=_SharedParams)
 @click.argument(
     "annotated_peak_path",
diff --git a/casanovo/data/datasets.py b/casanovo/data/datasets.py
index 6244e88f..d0c6f347 100644
--- a/casanovo/data/datasets.py
+++ b/casanovo/data/datasets.py
@@ -265,3 +265,74 @@ def __getitem__(self, idx: int) -> Tuple[torch.Tensor, float, int, str]:
             mz_array, int_array, precursor_mz, precursor_charge
         )
         return spectrum, precursor_mz, precursor_charge, peptide
+
+
+class DBSpectrumDataset(AnnotatedSpectrumDataset):
+    """
+    Parse and retrieve collections of annotated MS/MS spectra, additionally keep track of spectrum ids for Casanovo-DB.
+
+    Parameters
+    ----------
+    annotated_spectrum_index : depthcharge.data.SpectrumIndex
+        The MS/MS spectra to use as a dataset.
+    n_peaks : Optional[int]
+        The number of top-n most intense peaks to keep in each spectrum. `None`
+        retains all peaks.
+    min_mz : float
+        The minimum m/z to include. The default is 140 m/z, in order to exclude
+        TMT and iTRAQ reporter ions.
+    max_mz : float
+        The maximum m/z to include.
+    min_intensity : float
+        Remove peaks whose intensity is below `min_intensity` percentage of the
+        base peak intensity.
+    remove_precursor_tol : float
+        Remove peaks within the given mass tolerance in Dalton around the
+        precursor mass.
+    random_state : Optional[int]
+        The NumPy random state. ``None`` leaves mass spectra in the order they
+        were parsed.
+    """
+
+    def __getitem__(
+        self, idx: int
+    ) -> Tuple[torch.Tensor, float, int, str, Tuple[str, str]]:
+        """
+        Return the annotated MS/MS spectrum with the given index.
+
+        Parameters
+        ----------
+        idx : int
+            The index of the spectrum to return.
+
+        Returns
+        -------
+        spectrum : torch.Tensor of shape (n_peaks, 2)
+            A tensor of the spectrum with the m/z and intensity peak values.
+        precursor_mz : float
+            The precursor m/z.
+        precursor_charge : int
+            The precursor charge.
+        annotation : str
+            The peptide annotation of the spectrum.
+        spectrum_id: Tuple[str, str]
+            The unique spectrum identifier, formed by its original peak file and
+            identifier (index or scan number) therein.
+        """
+        (
+            mz_array,
+            int_array,
+            precursor_mz,
+            precursor_charge,
+            peptide,
+        ) = self.index[idx]
+        spectrum = self._process_peaks(
+            mz_array, int_array, precursor_mz, precursor_charge
+        )
+        return (
+            spectrum,
+            precursor_mz,
+            precursor_charge,
+            peptide,
+            self.get_spectrum_id(idx),
+        )
diff --git a/casanovo/data/ms_io.py b/casanovo/data/ms_io.py
index de69592e..1ae8cd16 100644
--- a/casanovo/data/ms_io.py
+++ b/casanovo/data/ms_io.py
@@ -209,3 +209,51 @@ def save(self) -> None:
                         psm[6],  # opt_ms_run[1]_aa_scores
                     ]
                 )
+
+
+class DBWriter(MztabWriter):
+    """
+    Export DB search results to an mzTab file.
+
+    Parameters
+    ----------
+    filename : str
+        The name of the mzTab file.
+    """
+
+    def save(self) -> None:
+        """
+        Export the DB search results to the mzTab file.
+        """
+        with open(self.filename, "w", newline="") as f:
+            writer = csv.writer(f, delimiter="\t", lineterminator=os.linesep)
+            # Write metadata.
+            for row in self.metadata:
+                writer.writerow(["MTD", *row])
+            # Write PSMs.
+            writer.writerow(
+                [
+                    "PSH",
+                    "spectrum_index",
+                    "sequence",
+                    "precursor",
+                    "score",
+                    "target",
+                    "aa_scores",
+                ]
+            )
+            for i, psm in enumerate(
+                natsort.natsorted(self.psms, key=operator.itemgetter(1)), 1
+            ):
+                for psm in list(zip(*psm)):
+                    writer.writerow(
+                        [
+                            "PSM",
+                            psm[0],  # spectrum_index
+                            psm[1],  # sequence
+                            psm[2],  # precursor
+                            psm[3],  # score
+                            bool(psm[4]),  # target
+                            psm[5],  # aa_scores
+                        ]
+                    )
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index fe5d6237..760b0509 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -9,7 +9,11 @@
 import torch
 from depthcharge.data import AnnotatedSpectrumIndex
 
-from ..data.datasets import AnnotatedSpectrumDataset, SpectrumDataset
+from ..data.datasets import (
+    AnnotatedSpectrumDataset,
+    SpectrumDataset,
+    DBSpectrumDataset,
+)
 
 
 class DeNovoDataModule(pl.LightningDataModule):
@@ -88,7 +92,7 @@ def setup(self, stage: str = None, annotated: bool = True) -> None:
 
         Parameters
         ----------
-        stage : str {"fit", "validate", "test"}
+        stage : str {"fit", "validate", "test", "db"}
             The stage indicating which Datasets to prepare. All are prepared by
             default.
         annotated: bool
@@ -122,6 +126,17 @@ def setup(self, stage: str = None, annotated: bool = True) -> None:
             )
             if self.test_index is not None:
                 self.test_dataset = make_dataset(self.test_index)
+        if stage in (None, "db"):
+            make_dataset = functools.partial(
+                DBSpectrumDataset,
+                n_peaks=self.n_peaks,
+                min_mz=self.min_mz,
+                max_mz=self.max_mz,
+                min_intensity=self.min_intensity,
+                remove_precursor_tol=self.remove_precursor_tol,
+            )
+            if self.test_index is not None:
+                self.test_dataset = make_dataset(self.test_index)
 
     def _make_loader(
         self,
@@ -155,6 +170,35 @@ def _make_loader(
             shuffle=shuffle,
         )
 
+    def _make_db_loader(
+        self, dataset: torch.utils.data.Dataset, batch_size: int
+    ) -> torch.utils.data.DataLoader:
+        """
+        Create a PyTorch DataLoader.
+
+        Parameters
+        ----------
+        dataset : torch.utils.data.Dataset
+            A PyTorch Dataset.
+
+        Returns
+        -------
+        torch.utils.data.DataLoader
+            A PyTorch DataLoader.
+        """
+        # Calculate new batch size to saturate previous batch size with PSMs
+        pep_per_spec = []
+        for i in range(min(10, len(dataset))):
+            pep_per_spec.append(len(dataset[i][3].split(",")))
+        new_batch_size = int(batch_size // np.mean(pep_per_spec))
+        return torch.utils.data.DataLoader(
+            dataset,
+            batch_size=new_batch_size,
+            collate_fn=prepare_db_batch,
+            pin_memory=True,
+            num_workers=self.n_workers,
+        )
+
     def train_dataloader(self) -> torch.utils.data.DataLoader:
         """Get the training DataLoader."""
         return self._make_loader(
@@ -173,6 +217,10 @@ def predict_dataloader(self) -> torch.utils.data.DataLoader:
         """Get the predict DataLoader."""
         return self._make_loader(self.test_dataset, self.eval_batch_size)
 
+    def db_dataloader(self) -> torch.utils.data.DataLoader:
+        """Get the predict DataLoader."""
+        return self._make_db_loader(self.test_dataset, self.eval_batch_size)
+
 
 def prepare_batch(
     batch: List[Tuple[torch.Tensor, float, int, str]]
@@ -211,3 +259,47 @@ def prepare_batch(
         [precursor_masses, precursor_charges, precursor_mzs]
     ).T.float()
     return spectra, precursors, np.asarray(spectrum_ids)
+
+
+def prepare_db_batch(
+    batch: List[Tuple[torch.Tensor, float, int, str, Tuple[str, str]]]
+) -> Tuple[torch.Tensor, torch.Tensor, np.ndarray, Tuple[str, str]]:
+    """
+    Collate MS/MS spectra into a batch meant for Casanovo-DB.
+
+    Parameters
+    ----------
+    batch : List[Tuple[torch.Tensor, float, int, str, Tuple[str, str]]]
+        A batch of data from an AnnotatedSpectrumDataset, consisting of for each
+        spectrum (i) a tensor with the m/z and intensity peak values, (ii), the
+        precursor m/z, (iii) the precursor charge, (iv) the spectrum identifier (peptide), (v)
+        spectrum identifiers (file and scan).
+
+    Returns
+    -------
+    spectra : torch.Tensor of shape (batch_size, n_peaks, 2)
+        The padded mass spectra tensor with the m/z and intensity peak values
+        for each spectrum.
+    precursors : torch.Tensor of shape (batch_size, 3)
+        A tensor with the precursor neutral mass, precursor charge, and
+        precursor m/z.
+    spectrum_peps : np.ndarray
+        Peptide sequences
+    spectrum_ids : Tuple[str, str]
+        Peak file and spectrum identifier
+    """
+    (
+        spectra,
+        precursor_mzs,
+        precursor_charges,
+        spectrum_peps,
+        spectrum_ids,
+    ) = list(zip(*batch))
+    spectra = torch.nn.utils.rnn.pad_sequence(spectra, batch_first=True)
+    precursor_mzs = torch.tensor(precursor_mzs)
+    precursor_charges = torch.tensor(precursor_charges)
+    precursor_masses = (precursor_mzs - 1.007276) * precursor_charges
+    precursors = torch.vstack(
+        [precursor_masses, precursor_charges, precursor_mzs]
+    ).T.float()
+    return spectra, precursors, np.asarray(spectrum_peps), spectrum_ids
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 77df6df5..f1466907 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -989,6 +989,171 @@ def configure_optimizers(
         return [optimizer], {"scheduler": lr_scheduler, "interval": "step"}
 
 
+class DBSpec2Pep(Spec2Pep):
+    """
+    Inherits Spec2Pep
+
+    Hijacks teacher-forcing implemented in Spec2Pep and uses it to predict scores between a spectra and associated peptide.
+    Input format is .mgf, with comma-separated targets and decoys in the SEQ field. Decoys should have a prefix of "decoy_".
+    """
+
+    num_pairs = 1024
+    decoy_prefix = "decoy_"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def predict_step(self, batch, *args):
+        batch_res = []
+        for (
+            indexes,
+            t_or_d,
+            peptides,
+            precursors,
+            encoded_ms,
+        ) in self.smart_batch_gen(batch):
+            with torch.set_grad_enabled(True):
+                pred, truth = self.decoder(peptides, precursors, *encoded_ms)
+                sm = torch.nn.Softmax(dim=2)
+                pred = sm(pred)
+                score_result, per_aa_score = calc_match_score(
+                    pred, truth
+                )  # Calculate the score between spectra + peptide list
+                batch_res.append(
+                    (
+                        indexes,
+                        t_or_d,
+                        peptides,
+                        score_result,
+                        per_aa_score,
+                        precursors,
+                    )
+                )
+        return batch_res
+
+    def smart_batch_gen(self, batch):
+        all_psm = []
+        enc = self.encoder(batch[0])
+        precursors = batch[1]
+        indexes = batch[3]
+        enc = list(zip(*enc))
+        for idx, _ in enumerate(batch[0]):
+            spec_peptides = batch[2][idx].split(",")
+            # Check for decoy prefixes and create a bit-vector indicating targets (1) or decoys (0)
+            t_or_ds = [
+                0 if p.startswith(self.decoy_prefix) else 1
+                for p in spec_peptides
+            ]
+            # Remove decoy prefix
+            spec_peptides = [
+                s[len(self.decoy_prefix) :]
+                if s.startswith(self.decoy_prefix)
+                else s
+                for s in spec_peptides
+            ]
+            spec_precursors = [precursors[idx]] * len(spec_peptides)
+            spec_enc = [enc[idx]] * len(spec_peptides)
+            spec_idx = [indexes[idx]] * len(spec_peptides)
+            all_psm.extend(
+                list(
+                    zip(
+                        spec_enc,
+                        spec_precursors,
+                        spec_peptides,
+                        spec_idx,
+                        t_or_ds,
+                    )
+                )
+            )
+        # Continually grab num_pairs items from all_psm until list is exhausted
+        while len(all_psm) > 0:
+            batch = all_psm[: self.num_pairs]
+            all_psm = all_psm[self.num_pairs :]
+            batch = list(zip(*batch))
+            encoded_ms = (
+                torch.stack([a[0] for a in batch[0]]),
+                torch.stack([a[1] for a in batch[0]]),
+            )
+            prec_data = torch.stack(batch[1])
+            pep_str = list(batch[2])
+            indexes = [a[1] for a in batch[3]]
+            t_or_ds = batch[4]
+            yield (indexes, t_or_ds, pep_str, prec_data, encoded_ms)
+
+    def on_predict_batch_end(
+        self,
+        outputs: List[Tuple[np.ndarray, List[str], torch.Tensor]],
+        *args,
+    ) -> None:
+        if self.out_writer is None:
+            return
+        (
+            indexes,
+            t_or_d,
+            peptides,
+            score_result,
+            per_aa_score,
+            precursors,
+        ) = list(zip(*outputs))
+        for index, t_or_d, peptide, score, per_aa_scores, precursor in zip(
+            indexes, t_or_d, peptides, score_result, per_aa_score, precursors
+        ):
+            per_aa_scores = per_aa_scores.cpu().numpy()
+            per_aa_scores = list(per_aa_scores[per_aa_scores != 0])
+            score = score.cpu().numpy()
+            precursor = precursor.cpu().numpy()
+            self.out_writer.psms.append(
+                (index, peptide, precursor, score, t_or_d, per_aa_scores),
+            )
+
+
+def calc_match_score(
+    batch_all_aa_scores: torch.Tensor, truth_aa_indicies: torch.Tensor
+) -> List[float]:
+    """
+    Take in teacher-forced scoring of amino acids of the peptides (in a batch) and use the truth labels
+    to calculate a score between the input spectra and associated peptide. The score is the geometric
+    mean of the AA probabilities
+
+        Parameters
+        ----------
+        batch_all_aa_scores : torch.Tensor
+            Amino acid scores for all amino acids in the vocabulary for every prediction made to generate the associated peptide (for an entire batch)
+        truth_aa_indicies : torch.Tensor
+            Indicies of the score for each actual amino acid in the peptide (for an entire batch)
+
+        Returns
+        -------
+        score : list[float], list[list[float]]
+            The score between the input spectra and associated peptide (for an entire batch)
+            a list of lists of per amino acid scores (for an entire batch)
+    """
+    # Remove trailing tokens from predictions,
+    batch_all_aa_scores = batch_all_aa_scores[:, :-1]
+
+    # Vectorized scoring using efficient indexing.
+    rows = (
+        torch.arange(batch_all_aa_scores.shape[0])
+        .unsqueeze(-1)
+        .expand(-1, batch_all_aa_scores.shape[1])
+    )
+    cols = torch.arange(0, batch_all_aa_scores.shape[1]).expand_as(rows)
+
+    per_aa_scores = batch_all_aa_scores[rows, cols, truth_aa_indicies]
+
+    score_mask = truth_aa_indicies != 0
+    masked_per_aa_scores = per_aa_scores * score_mask
+    # all_scores = masked_per_aa_scores.sum(dim=1) / score_mask.sum(dim=1) # Calculated arithmetic score
+    all_scores = torch.where(
+        torch.log(masked_per_aa_scores) == float("-inf"),
+        torch.tensor(0.0),
+        torch.log(masked_per_aa_scores),
+    ).sum(dim=1) / score_mask.sum(
+        dim=1
+    )  # Calculates geometric score
+    return all_scores, masked_per_aa_scores
+
+
 class CosineWarmupScheduler(torch.optim.lr_scheduler._LRScheduler):
     """
     Learning rate scheduler with linear warm-up followed by cosine shaped decay.
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 4bd2165e..f70e2be6 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -20,7 +20,7 @@
 from ..config import Config
 from ..data import ms_io
 from ..denovo.dataloaders import DeNovoDataModule
-from ..denovo.model import Spec2Pep
+from ..denovo.model import Spec2Pep, DBSpec2Pep
 
 
 logger = logging.getLogger("casanovo")
@@ -79,6 +79,25 @@ def __exit__(self, exc_type, exc_value, traceback):
         if self.writer is not None:
             self.writer.save()
 
+    def db_search(self, peak_path: Iterable[str], output: str) -> None:
+        """Casanovo-DB TODO DOCS"""
+        self.writer = ms_io.DBWriter(Path(output).with_suffix(".mztab"))
+        self.writer.set_metadata(
+            self.config,
+            model=str(self.model_filename),
+            config_filename=self.config.file,
+        )
+
+        self.initialize_trainer(train=True)
+        self.initialize_db_model()
+        self.model.out_writer = self.writer
+
+        test_index = self._get_index(peak_path, True, "db search")
+        self.writer.set_ms_run(test_index.ms_files)
+        self.initialize_data_module(test_index=test_index)
+        self.loaders.setup(stage="db")
+        self.trainer.predict(self.model, self.loaders.db_dataloader())
+
     def train(
         self,
         train_peak_path: Iterable[str],
@@ -198,6 +217,100 @@ def initialize_trainer(self, train: bool) -> None:
 
         self.trainer = pl.Trainer(**trainer_cfg)
 
+    def initialize_db_model(self) -> None:
+        """Initialize the Casanovo-DB model.
+        Required because the DB search model is a unique subclass of the Spec2Pep model.
+        """
+        model_params = dict(
+            dim_model=self.config.dim_model,
+            n_head=self.config.n_head,
+            dim_feedforward=self.config.dim_feedforward,
+            n_layers=self.config.n_layers,
+            dropout=self.config.dropout,
+            dim_intensity=self.config.dim_intensity,
+            max_length=self.config.max_length,
+            residues=self.config.residues,
+            max_charge=self.config.max_charge,
+            precursor_mass_tol=self.config.precursor_mass_tol,
+            isotope_error_range=self.config.isotope_error_range,
+            min_peptide_len=self.config.min_peptide_len,
+            n_beams=self.config.n_beams,
+            top_match=self.config.top_match,
+            n_log=self.config.n_log,
+            tb_summarywriter=self.config.tb_summarywriter,
+            train_label_smoothing=self.config.train_label_smoothing,
+            warmup_iters=self.config.warmup_iters,
+            cosine_schedule_period_iters=self.config.cosine_schedule_period_iters,
+            lr=self.config.learning_rate,
+            weight_decay=self.config.weight_decay,
+            out_writer=self.writer,
+            calculate_precision=self.config.calculate_precision,
+        )
+
+        # Reconfigurable non-architecture related parameters for a loaded model.
+        loaded_model_params = dict(
+            max_length=self.config.max_length,
+            precursor_mass_tol=self.config.precursor_mass_tol,
+            isotope_error_range=self.config.isotope_error_range,
+            n_beams=self.config.n_beams,
+            min_peptide_len=self.config.min_peptide_len,
+            top_match=self.config.top_match,
+            n_log=self.config.n_log,
+            tb_summarywriter=self.config.tb_summarywriter,
+            train_label_smoothing=self.config.train_label_smoothing,
+            warmup_iters=self.config.warmup_iters,
+            cosine_schedule_period_iters=self.config.cosine_schedule_period_iters,
+            lr=self.config.learning_rate,
+            weight_decay=self.config.weight_decay,
+            out_writer=self.writer,
+            calculate_precision=self.config.calculate_precision,
+        )
+
+        # Model file must exist for DB search
+        if self.model_filename is None:
+            logger.error("A model file must be provided")
+            raise ValueError("A model file must be provided")
+
+        if not Path(self.model_filename).exists():
+            logger.error(
+                "Could not find the model weights at file %s",
+                self.model_filename,
+            )
+            raise FileNotFoundError("Could not find the model weights file")
+
+        # First try loading model details from the weights file, otherwise use
+        # the provided configuration.
+        device = torch.empty(1).device  # Use the default device.
+        try:
+            self.model = DBSpec2Pep.load_from_checkpoint(
+                self.model_filename, map_location=device, **loaded_model_params
+            )
+
+            architecture_params = set(model_params.keys()) - set(
+                loaded_model_params.keys()
+            )
+            for param in architecture_params:
+                if model_params[param] != self.model.hparams[param]:
+                    warnings.warn(
+                        f"Mismatching {param} parameter in "
+                        f"model checkpoint ({self.model.hparams[param]}) "
+                        f"vs config file ({model_params[param]}); "
+                        "using the checkpoint."
+                    )
+        except RuntimeError:
+            # This only doesn't work if the weights are from an older version
+            try:
+                self.model = DBSpec2Pep.load_from_checkpoint(
+                    self.model_filename,
+                    map_location=device,
+                    **model_params,
+                )
+            except RuntimeError:
+                raise RuntimeError(
+                    "Weights file incompatible with the current version of "
+                    "Casanovo."
+                )
+
     def initialize_model(self, train: bool) -> None:
         """Initialize the Casanovo model.
 

From dae9c8a78b712575ed699e690ce674b7f6a46377 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Sun, 14 Apr 2024 21:27:41 -0700
Subject: [PATCH 05/84] small upgrades to documentation

---
 casanovo/denovo/dataloaders.py  |  2 +-
 casanovo/denovo/model.py        | 48 +++++++++++++++------------------
 casanovo/denovo/model_runner.py | 19 ++++++++++++-
 3 files changed, 41 insertions(+), 28 deletions(-)

diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index 760b0509..efb346ab 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -190,7 +190,7 @@ def _make_db_loader(
         pep_per_spec = []
         for i in range(min(10, len(dataset))):
             pep_per_spec.append(len(dataset[i][3].split(",")))
-        new_batch_size = int(batch_size // np.mean(pep_per_spec))
+        new_batch_size = max(1, int(batch_size // np.mean(pep_per_spec)))
         return torch.utils.data.DataLoader(
             dataset,
             batch_size=new_batch_size,
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index f1466907..d31820f2 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -997,13 +997,13 @@ class DBSpec2Pep(Spec2Pep):
     Input format is .mgf, with comma-separated targets and decoys in the SEQ field. Decoys should have a prefix of "decoy_".
     """
 
-    num_pairs = 1024
-    decoy_prefix = "decoy_"
+    num_pairs = None  # Modified to be predict_batch_size from config
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
     def predict_step(self, batch, *args):
+        logger.info("New batch")
         batch_res = []
         for (
             indexes,
@@ -1012,23 +1012,22 @@ def predict_step(self, batch, *args):
             precursors,
             encoded_ms,
         ) in self.smart_batch_gen(batch):
-            with torch.set_grad_enabled(True):
-                pred, truth = self.decoder(peptides, precursors, *encoded_ms)
-                sm = torch.nn.Softmax(dim=2)
-                pred = sm(pred)
-                score_result, per_aa_score = calc_match_score(
-                    pred, truth
-                )  # Calculate the score between spectra + peptide list
-                batch_res.append(
-                    (
-                        indexes,
-                        t_or_d,
-                        peptides,
-                        score_result,
-                        per_aa_score,
-                        precursors,
-                    )
+            pred, truth = self.decoder(peptides, precursors, *encoded_ms)
+            sm = torch.nn.Softmax(dim=2)
+            pred = sm(pred)
+            score_result, per_aa_score = calc_match_score(
+                pred, truth
+            )  # Calculate the score between spectra + peptide list
+            batch_res.append(
+                (
+                    indexes,
+                    t_or_d,
+                    peptides,
+                    score_result.cpu().detach().numpy(),
+                    per_aa_score.cpu().detach().numpy(),
+                    precursors.cpu().detach().numpy(),
                 )
+            )
         return batch_res
 
     def smart_batch_gen(self, batch):
@@ -1040,15 +1039,13 @@ def smart_batch_gen(self, batch):
         for idx, _ in enumerate(batch[0]):
             spec_peptides = batch[2][idx].split(",")
             # Check for decoy prefixes and create a bit-vector indicating targets (1) or decoys (0)
+            decoy_prefix = "decoy_"  # Decoy prefix
             t_or_ds = [
-                0 if p.startswith(self.decoy_prefix) else 1
-                for p in spec_peptides
+                0 if p.startswith(decoy_prefix) else 1 for p in spec_peptides
             ]
             # Remove decoy prefix
             spec_peptides = [
-                s[len(self.decoy_prefix) :]
-                if s.startswith(self.decoy_prefix)
-                else s
+                s[len(decoy_prefix) :] if s.startswith(decoy_prefix) else s
                 for s in spec_peptides
             ]
             spec_precursors = [precursors[idx]] * len(spec_peptides)
@@ -1066,6 +1063,8 @@ def smart_batch_gen(self, batch):
                 )
             )
         # Continually grab num_pairs items from all_psm until list is exhausted
+        logger.info(f"Received {len(all_psm)} PSMs")
+        logger.info(f"Processing num_pairs: {self.num_pairs}")
         while len(all_psm) > 0:
             batch = all_psm[: self.num_pairs]
             all_psm = all_psm[self.num_pairs :]
@@ -1098,10 +1097,7 @@ def on_predict_batch_end(
         for index, t_or_d, peptide, score, per_aa_scores, precursor in zip(
             indexes, t_or_d, peptides, score_result, per_aa_score, precursors
         ):
-            per_aa_scores = per_aa_scores.cpu().numpy()
             per_aa_scores = list(per_aa_scores[per_aa_scores != 0])
-            score = score.cpu().numpy()
-            precursor = precursor.cpu().numpy()
             self.out_writer.psms.append(
                 (index, peptide, precursor, score, t_or_d, per_aa_scores),
             )
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index f70e2be6..e6956049 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -80,7 +80,19 @@ def __exit__(self, exc_type, exc_value, traceback):
             self.writer.save()
 
     def db_search(self, peak_path: Iterable[str], output: str) -> None:
-        """Casanovo-DB TODO DOCS"""
+        """Perform database search with Casanovo.
+
+        Parameters
+        ----------
+        peak_path : iterable of str
+            The path to the annotated .mgf data files for database search.
+        output : str
+            Where should the output be saved?
+
+        Returns
+        -------
+        self
+        """
         self.writer = ms_io.DBWriter(Path(output).with_suffix(".mztab"))
         self.writer.set_metadata(
             self.config,
@@ -286,6 +298,9 @@ def initialize_db_model(self) -> None:
                 self.model_filename, map_location=device, **loaded_model_params
             )
 
+            # Pass in information about predict_batch_size to the model for batch saturation
+            self.model.num_pairs = self.config.predict_batch_size
+
             architecture_params = set(model_params.keys()) - set(
                 loaded_model_params.keys()
             )
@@ -305,6 +320,8 @@ def initialize_db_model(self) -> None:
                     map_location=device,
                     **model_params,
                 )
+                # Pass in information about predict_batch_size to the model for batch saturation
+                self.model.num_pairs = self.config.predict_batch_size
             except RuntimeError:
                 raise RuntimeError(
                     "Weights file incompatible with the current version of "

From 7f95ae5a0c07f76cfea3004daabfe9bb832423fd Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Sun, 14 Apr 2024 23:01:31 -0700
Subject: [PATCH 06/84] better output formatting

---
 casanovo/data/ms_io.py   | 30 +++++++++++++++++++++---------
 casanovo/denovo/model.py |  6 +-----
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/casanovo/data/ms_io.py b/casanovo/data/ms_io.py
index 1ae8cd16..922a6de7 100644
--- a/casanovo/data/ms_io.py
+++ b/casanovo/data/ms_io.py
@@ -236,24 +236,36 @@ def save(self) -> None:
                     "PSH",
                     "spectrum_index",
                     "sequence",
-                    "precursor",
+                    "precursor_mass",
+                    "precursor_charge",
+                    "precursor_mz",
                     "score",
                     "target",
                     "aa_scores",
                 ]
             )
             for i, psm in enumerate(
-                natsort.natsorted(self.psms, key=operator.itemgetter(1)), 1
+                natsort.natsorted(self.psms, key=operator.itemgetter(0)), 1
             ):
-                for psm in list(zip(*psm)):
+                # [precursor_masses, precursor_charges, precursor_mzs]
+                for rowinfo in list(zip(*psm)):
                     writer.writerow(
                         [
                             "PSM",
-                            psm[0],  # spectrum_index
-                            psm[1],  # sequence
-                            psm[2],  # precursor
-                            psm[3],  # score
-                            bool(psm[4]),  # target
-                            psm[5],  # aa_scores
+                            rowinfo[0],  # spectrum_index
+                            rowinfo[1],  # sequence
+                            rowinfo[2][0],  # precursor mass
+                            int(rowinfo[2][1]),  # precursor charge
+                            rowinfo[2][2],  # precursor m/z
+                            rowinfo[3],  # score
+                            bool(rowinfo[4]),  # target
+                            ",".join(
+                                list(
+                                    map(
+                                        "{:.5f}".format,
+                                        rowinfo[5][rowinfo[5] != 0],
+                                    )
+                                )
+                            ),  # aa_scores including stop token
                         ]
                     )
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index d31820f2..30e8862e 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1003,7 +1003,6 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
     def predict_step(self, batch, *args):
-        logger.info("New batch")
         batch_res = []
         for (
             indexes,
@@ -1063,8 +1062,6 @@ def smart_batch_gen(self, batch):
                 )
             )
         # Continually grab num_pairs items from all_psm until list is exhausted
-        logger.info(f"Received {len(all_psm)} PSMs")
-        logger.info(f"Processing num_pairs: {self.num_pairs}")
         while len(all_psm) > 0:
             batch = all_psm[: self.num_pairs]
             all_psm = all_psm[self.num_pairs :]
@@ -1097,7 +1094,6 @@ def on_predict_batch_end(
         for index, t_or_d, peptide, score, per_aa_scores, precursor in zip(
             indexes, t_or_d, peptides, score_result, per_aa_score, precursors
         ):
-            per_aa_scores = list(per_aa_scores[per_aa_scores != 0])
             self.out_writer.psms.append(
                 (index, peptide, precursor, score, t_or_d, per_aa_scores),
             )
@@ -1139,7 +1135,7 @@ def calc_match_score(
 
     score_mask = truth_aa_indicies != 0
     masked_per_aa_scores = per_aa_scores * score_mask
-    # all_scores = masked_per_aa_scores.sum(dim=1) / score_mask.sum(dim=1) # Calculated arithmetic score
+    # all_scores = masked_per_aa_scores.sum(dim=1) / score_mask.sum(dim=1) # Calculated arithmetic score that was used before
     all_scores = torch.where(
         torch.log(masked_per_aa_scores) == float("-inf"),
         torch.tensor(0.0),

From 278436b2b93be01f49632b5c7a00d4f16c31d8f0 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Sat, 27 Apr 2024 13:16:23 -0700
Subject: [PATCH 07/84] all tests added

---
 casanovo/config.yaml          |  3 +-
 casanovo/data/ms_io.py        | 83 ++++++++++++++++++++++-------------
 casanovo/denovo/model.py      | 40 ++++++++++++-----
 tests/test_integration.py     | 77 ++++++++++++++++++++++++++++++++
 tests/unit_tests/test_unit.py | 50 ++++++++++++++++++++-
 5 files changed, 209 insertions(+), 44 deletions(-)

diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index c7186ff7..7b207664 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -15,7 +15,8 @@ precursor_mass_tol: 50  # ppm
 isotope_error_range: [0, 1]
 # The minimum length of predicted peptides.
 min_peptide_len: 6
-# Number of spectra in one inference batch.
+# Number of spectra in one inference batch. 
+# Also the number of PSMs processed by Casanovo-DB per batch.
 predict_batch_size: 1024
 # Number of beams used in beam search.
 n_beams: 1
diff --git a/casanovo/data/ms_io.py b/casanovo/data/ms_io.py
index 922a6de7..ae4f3b54 100644
--- a/casanovo/data/ms_io.py
+++ b/casanovo/data/ms_io.py
@@ -224,6 +224,7 @@ class DBWriter(MztabWriter):
     def save(self) -> None:
         """
         Export the DB search results to the mzTab file.
+        Outputs PSMs in the order they were scored (i.e. the order in the annotated .mgf file).
         """
         with open(self.filename, "w", newline="") as f:
             writer = csv.writer(f, delimiter="\t", lineterminator=os.linesep)
@@ -234,38 +235,58 @@ def save(self) -> None:
             writer.writerow(
                 [
                     "PSH",
-                    "spectrum_index",
                     "sequence",
-                    "precursor_mass",
-                    "precursor_charge",
-                    "precursor_mz",
-                    "score",
-                    "target",
-                    "aa_scores",
+                    "PSM_ID",
+                    "accession",
+                    "unique",
+                    "database",
+                    "database_version",
+                    "search_engine",
+                    "search_engine_score[1]",
+                    "modifications",
+                    "retention_time",
+                    "charge",
+                    "exp_mass_to_charge",
+                    "calc_mass_to_charge",
+                    "spectra_ref",
+                    "pre",
+                    "post",
+                    "start",
+                    "end",
+                    "opt_ms_run[1]_aa_scores",
+                    "opt_target",
                 ]
             )
-            for i, psm in enumerate(
-                natsort.natsorted(self.psms, key=operator.itemgetter(0)), 1
-            ):
-                # [precursor_masses, precursor_charges, precursor_mzs]
-                for rowinfo in list(zip(*psm)):
-                    writer.writerow(
-                        [
-                            "PSM",
-                            rowinfo[0],  # spectrum_index
-                            rowinfo[1],  # sequence
-                            rowinfo[2][0],  # precursor mass
-                            int(rowinfo[2][1]),  # precursor charge
-                            rowinfo[2][2],  # precursor m/z
-                            rowinfo[3],  # score
-                            bool(rowinfo[4]),  # target
-                            ",".join(
-                                list(
-                                    map(
-                                        "{:.5f}".format,
-                                        rowinfo[5][rowinfo[5] != 0],
-                                    )
+            for i, psm in enumerate(self.psms):
+                writer.writerow(
+                    [
+                        "PSM",
+                        psm[0],  # sequence
+                        f"{psm[5]}:{i}",  # spectra_ref
+                        "null",  # accession
+                        "null",  # unique
+                        "null",  # database
+                        "null",  # database_version
+                        "null",  # search_engine
+                        psm[1],  # search_engine_score[1]
+                        "null",  # modifications
+                        "null",  # retention_time
+                        int(psm[2]),  # charge
+                        psm[3],  # exp_mass_to_charge
+                        psm[4],  # calc_mass_to_charge
+                        psm[5],  # spectra_ref
+                        "null",  # pre
+                        "null",  # post
+                        "null",  # start
+                        "null",  # end
+                        ",".join(
+                            list(
+                                map(
+                                    "{:.5f}".format,
+                                    psm[6][psm[6] != 0],
                                 )
-                            ),  # aa_scores including stop token
-                        ]
-                    )
+                            )
+                        ),  # opt_ms_run[1]_aa_scores
+                        bool(psm[7]),  # opt_target
+                    ]
+                )
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 30e8862e..4efe0f92 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1014,7 +1014,7 @@ def predict_step(self, batch, *args):
             pred, truth = self.decoder(peptides, precursors, *encoded_ms)
             sm = torch.nn.Softmax(dim=2)
             pred = sm(pred)
-            score_result, per_aa_score = calc_match_score(
+            score_result, per_aa_score = _calc_match_score(
                 pred, truth
             )  # Calculate the score between spectra + peptide list
             batch_res.append(
@@ -1083,23 +1083,40 @@ def on_predict_batch_end(
     ) -> None:
         if self.out_writer is None:
             return
-        (
+        for (
             indexes,
             t_or_d,
             peptides,
             score_result,
             per_aa_score,
             precursors,
-        ) = list(zip(*outputs))
-        for index, t_or_d, peptide, score, per_aa_scores, precursor in zip(
-            indexes, t_or_d, peptides, score_result, per_aa_score, precursors
-        ):
-            self.out_writer.psms.append(
-                (index, peptide, precursor, score, t_or_d, per_aa_scores),
-            )
+        ) in outputs:
+            for index, t_or_d, peptide, score, per_aa_scores, precursor in zip(
+                indexes,
+                t_or_d,
+                peptides,
+                score_result,
+                per_aa_score,
+                precursors,
+            ):
+                prec_charge = precursor[1]
+                prec_mz = precursor[2]
+                calc_mz = precursor[2]
+                self.out_writer.psms.append(
+                    (
+                        peptide,
+                        score,
+                        prec_charge,
+                        prec_mz,
+                        calc_mz,
+                        index,
+                        per_aa_scores,
+                        t_or_d,
+                    ),
+                )
 
 
-def calc_match_score(
+def _calc_match_score(
     batch_all_aa_scores: torch.Tensor, truth_aa_indicies: torch.Tensor
 ) -> List[float]:
     """
@@ -1135,7 +1152,8 @@ def calc_match_score(
 
     score_mask = truth_aa_indicies != 0
     masked_per_aa_scores = per_aa_scores * score_mask
-    # all_scores = masked_per_aa_scores.sum(dim=1) / score_mask.sum(dim=1) # Calculated arithmetic score that was used before
+    # Arithmetic score that was used before
+    ## all_scores = masked_per_aa_scores.sum(dim=1) / score_mask.sum(dim=1)
     all_scores = torch.where(
         torch.log(masked_per_aa_scores) == float("-inf"),
         torch.tensor(0.0),
diff --git a/tests/test_integration.py b/tests/test_integration.py
index d4d86d7d..73232fa7 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -52,6 +52,83 @@ def test_annotate(
     )
 
 
+def test_db_search(
+    mgf_small_unannotated, tide_dir_small, tiny_config, tmp_path
+):
+    # Run a command:
+    run = functools.partial(
+        CliRunner().invoke, casanovo.main, catch_exceptions=False
+    )
+
+    annotate_args = [
+        "annotate",
+        str(mgf_small_unannotated),
+        str(tide_dir_small),
+        "--config",
+        tiny_config,
+        "--output",
+        str(tmp_path / "annotated_mgf.mgf"),
+    ]
+
+    result = run(annotate_args)
+
+    assert result.exit_code == 0
+    assert (tmp_path / "annotated_mgf.mgf").exists()
+
+    # Follow up annotate run with db search
+
+    output_path = tmp_path / "db_search.mztab"
+
+    search_args = [
+        "db-search",
+        str(tmp_path / "annotated_mgf.mgf"),
+        "--config",
+        tiny_config,
+        "--output",
+        str(output_path),
+    ]
+
+    result = run(search_args)
+
+    assert result.exit_code == 0
+    assert output_path.exists()
+    assert output_path.is_file()
+
+    mztab = pyteomics.mztab.MzTab(str(output_path))
+
+    psms = mztab.spectrum_match_table
+    assert list(psms.sequence) == [
+        "LESLIEK",
+        "PEPTIDEK",
+        "KEILSEL",
+        "KEDITEPP",
+        "LESLIEK",
+        "PEPTIDEK",
+        "KEILSEL",
+        "KEDITEPP",
+        "+42.011LEM+15.995SLIM+15.995EK",
+        "+43.006PEN+0.984PTIQ+0.984DEK",
+        "-17.027KM+15.995EILSEL",
+        "+43.006-17.027KEDITEPP",
+        "KEDIQ+0.984TEPPQ+0.984",
+    ]
+    assert list(psms.opt_target) == [
+        "True",
+        "True",
+        "False",
+        "False",
+        "True",
+        "True",
+        "False",
+        "False",
+        "True",
+        "True",
+        "False",
+        "False",
+        "False",
+    ]
+
+
 def test_train_and_run(
     mgf_small, mzml_small, tiny_config, tmp_path, monkeypatch
 ):
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index f615a099..bcc61446 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -16,7 +16,7 @@
 from casanovo.data import ms_io
 from casanovo.data.datasets import SpectrumDataset, AnnotatedSpectrumDataset
 from casanovo.denovo.evaluate import aa_match_batch, aa_match_metrics
-from casanovo.denovo.model import Spec2Pep, _aa_pep_score
+from casanovo.denovo.model import Spec2Pep, _aa_pep_score, _calc_match_score
 from depthcharge.data import SpectrumIndex, AnnotatedSpectrumIndex
 
 
@@ -139,6 +139,54 @@ def test_aa_pep_score():
     assert peptide_score == pytest.approx(-0.5)
 
 
+def test_calc_match_score():
+    """
+    Test the calculation of geometric scores using teacher-forced
+    decoder output probabilities and ground truth amino acid sequences.
+    """
+    first_slot_prob = torch.zeros(29)
+    first_slot_prob[1] = 1.0  # A
+    second_slot_prob = torch.zeros(29)
+    second_slot_prob[2] = 1.0  # B
+    third_slot_prob = torch.zeros(29)
+    third_slot_prob[3] = 1.0  # C
+    stop_slot_prob = torch.zeros(29)
+    stop_slot_prob[28] = 1.0  # $
+    blank_slot_prob = torch.zeros(29)
+
+    pep_1_aa = torch.stack(
+        [
+            first_slot_prob,
+            second_slot_prob,
+            third_slot_prob,
+            stop_slot_prob,
+            blank_slot_prob,
+        ]
+    )
+    pep_2_aa = torch.stack(
+        [
+            third_slot_prob,
+            second_slot_prob,
+            stop_slot_prob,
+            blank_slot_prob,
+            blank_slot_prob,
+        ]
+    )
+
+    batch_all_aa_scores = torch.stack([pep_1_aa, pep_2_aa])
+    truth_aa_indices = torch.tensor([[1, 2, 3, 28], [3, 2, 28, 0]])
+
+    all_scores, masked_per_aa_scores = _calc_match_score(
+        batch_all_aa_scores, truth_aa_indices
+    )
+
+    assert all_scores.numpy()[0] == pytest.approx(0)
+    assert all_scores.numpy()[1] == pytest.approx(0)
+
+    assert np.sum(masked_per_aa_scores.numpy()[0]) == pytest.approx(4)
+    assert np.sum(masked_per_aa_scores.numpy()[1]) == pytest.approx(3)
+
+
 def test_beam_search_decode():
     """
     Test beam search decoding and its sub-functions.

From 949ea9392c625b63b20fceba25e1bea973d68cba Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Sat, 27 Apr 2024 13:25:49 -0700
Subject: [PATCH 08/84] remove minor debugging print statement

---
 casanovo/data/annotate_db.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/casanovo/data/annotate_db.py b/casanovo/data/annotate_db.py
index 3ff3c4b7..db27b05f 100644
--- a/casanovo/data/annotate_db.py
+++ b/casanovo/data/annotate_db.py
@@ -127,5 +127,6 @@ def annotate_mgf(peak_path: str, tide_path: str, output: Optional[str]):
         mgf.write(all_spec, output, file_mode="w")
         logger.info("Annotated .mgf file written to %s.", output)
     except Exception as e:
-        print(f"Write to {output} failed. Check if the file path is correct.")
-        print(e)
+        logger.error(
+            "Write to %s failed. Check if the file path is correct.", output
+        )

From da5ef5e29f8337974815b9c09c27e09967182e25 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Sat, 27 Apr 2024 20:28:46 +0000
Subject: [PATCH 09/84] Generate new screengrabs with rich-codex

---
 docs/images/help.svg | 213 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 213 insertions(+)
 create mode 100644 docs/images/help.svg

diff --git a/docs/images/help.svg b/docs/images/help.svg
new file mode 100644
index 00000000..533b5f70
--- /dev/null
+++ b/docs/images/help.svg
@@ -0,0 +1,213 @@
+<svg class="rich-terminal" viewBox="0 0 994 977.1999999999999" xmlns="http://www.w3.org/2000/svg">
+    <!-- Generated with Rich https://www.textualize.io -->
+    <style>
+
+    @font-face {
+        font-family: "Fira Code";
+        src: local("FiraCode-Regular"),
+                url("https://cdnjs.cloudflare.com/ajax/libs/firacode/6.2.0/woff2/FiraCode-Regular.woff2") format("woff2"),
+                url("https://cdnjs.cloudflare.com/ajax/libs/firacode/6.2.0/woff/FiraCode-Regular.woff") format("woff");
+        font-style: normal;
+        font-weight: 400;
+    }
+    @font-face {
+        font-family: "Fira Code";
+        src: local("FiraCode-Bold"),
+                url("https://cdnjs.cloudflare.com/ajax/libs/firacode/6.2.0/woff2/FiraCode-Bold.woff2") format("woff2"),
+                url("https://cdnjs.cloudflare.com/ajax/libs/firacode/6.2.0/woff/FiraCode-Bold.woff") format("woff");
+        font-style: bold;
+        font-weight: 700;
+    }
+
+    .terminal-1180240827-matrix {
+        font-family: Fira Code, monospace;
+        font-size: 20px;
+        line-height: 24.4px;
+        font-variant-east-asian: full-width;
+    }
+
+    .terminal-1180240827-title {
+        font-size: 18px;
+        font-weight: bold;
+        font-family: arial;
+    }
+
+    .terminal-1180240827-r1 { fill: #c5c8c6 }
+.terminal-1180240827-r2 { fill: #d0b344 }
+.terminal-1180240827-r3 { fill: #c5c8c6;font-weight: bold }
+.terminal-1180240827-r4 { fill: #68a0b3;font-weight: bold }
+.terminal-1180240827-r5 { fill: #d0b344;font-weight: bold }
+.terminal-1180240827-r6 { fill: #608ab1;text-decoration: underline; }
+.terminal-1180240827-r7 { fill: #868887 }
+.terminal-1180240827-r8 { fill: #98a84b;font-weight: bold }
+    </style>
+
+    <defs>
+    <clipPath id="terminal-1180240827-clip-terminal">
+      <rect x="0" y="0" width="975.0" height="926.1999999999999" />
+    </clipPath>
+    <clipPath id="terminal-1180240827-line-0">
+    <rect x="0" y="1.5" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-1">
+    <rect x="0" y="25.9" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-2">
+    <rect x="0" y="50.3" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-3">
+    <rect x="0" y="74.7" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-4">
+    <rect x="0" y="99.1" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-5">
+    <rect x="0" y="123.5" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-6">
+    <rect x="0" y="147.9" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-7">
+    <rect x="0" y="172.3" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-8">
+    <rect x="0" y="196.7" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-9">
+    <rect x="0" y="221.1" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-10">
+    <rect x="0" y="245.5" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-11">
+    <rect x="0" y="269.9" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-12">
+    <rect x="0" y="294.3" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-13">
+    <rect x="0" y="318.7" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-14">
+    <rect x="0" y="343.1" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-15">
+    <rect x="0" y="367.5" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-16">
+    <rect x="0" y="391.9" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-17">
+    <rect x="0" y="416.3" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-18">
+    <rect x="0" y="440.7" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-19">
+    <rect x="0" y="465.1" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-20">
+    <rect x="0" y="489.5" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-21">
+    <rect x="0" y="513.9" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-22">
+    <rect x="0" y="538.3" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-23">
+    <rect x="0" y="562.7" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-24">
+    <rect x="0" y="587.1" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-25">
+    <rect x="0" y="611.5" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-26">
+    <rect x="0" y="635.9" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-27">
+    <rect x="0" y="660.3" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-28">
+    <rect x="0" y="684.7" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-29">
+    <rect x="0" y="709.1" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-30">
+    <rect x="0" y="733.5" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-31">
+    <rect x="0" y="757.9" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-32">
+    <rect x="0" y="782.3" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-33">
+    <rect x="0" y="806.7" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-34">
+    <rect x="0" y="831.1" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-35">
+    <rect x="0" y="855.5" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1180240827-line-36">
+    <rect x="0" y="879.9" width="976" height="24.65"/>
+            </clipPath>
+    </defs>
+
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="975.2" rx="8"/>
+            <g transform="translate(26,22)">
+            <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
+            <circle cx="22" cy="0" r="7" fill="#febc2e"/>
+            <circle cx="44" cy="0" r="7" fill="#28c840"/>
+            </g>
+        
+    <g transform="translate(9, 41)" clip-path="url(#terminal-1180240827-clip-terminal)">
+    
+    <g class="terminal-1180240827-matrix">
+    <text class="terminal-1180240827-r1" x="0" y="20" textLength="207.4" clip-path="url(#terminal-1180240827-line-0)">$&#160;casanovo&#160;--help</text><text class="terminal-1180240827-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-1180240827-line-0)">
+</text><text class="terminal-1180240827-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-1180240827-line-1)">
+</text><text class="terminal-1180240827-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-1180240827-line-2)">Usage:</text><text class="terminal-1180240827-r3" x="97.6" y="68.8" textLength="97.6" clip-path="url(#terminal-1180240827-line-2)">casanovo</text><text class="terminal-1180240827-r1" x="195.2" y="68.8" textLength="24.4" clip-path="url(#terminal-1180240827-line-2)">&#160;[</text><text class="terminal-1180240827-r4" x="219.6" y="68.8" textLength="85.4" clip-path="url(#terminal-1180240827-line-2)">OPTIONS</text><text class="terminal-1180240827-r1" x="305" y="68.8" textLength="24.4" clip-path="url(#terminal-1180240827-line-2)">]&#160;</text><text class="terminal-1180240827-r4" x="329.4" y="68.8" textLength="85.4" clip-path="url(#terminal-1180240827-line-2)">COMMAND</text><text class="terminal-1180240827-r1" x="414.8" y="68.8" textLength="24.4" clip-path="url(#terminal-1180240827-line-2)">&#160;[</text><text class="terminal-1180240827-r4" x="439.2" y="68.8" textLength="48.8" clip-path="url(#terminal-1180240827-line-2)">ARGS</text><text class="terminal-1180240827-r1" x="488" y="68.8" textLength="488" clip-path="url(#terminal-1180240827-line-2)">]...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-1180240827-line-2)">
+</text><text class="terminal-1180240827-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-3)">
+</text><text class="terminal-1180240827-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-1180240827-line-4)">&#160;┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓&#160;</text><text class="terminal-1180240827-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-4)">
+</text><text class="terminal-1180240827-r1" x="0" y="142" textLength="439.2" clip-path="url(#terminal-1180240827-line-5)">&#160;┃&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r3" x="439.2" y="142" textLength="97.6" clip-path="url(#terminal-1180240827-line-5)">Casanovo</text><text class="terminal-1180240827-r1" x="536.8" y="142" textLength="439.2" clip-path="url(#terminal-1180240827-line-5)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;┃&#160;</text><text class="terminal-1180240827-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-1180240827-line-5)">
+</text><text class="terminal-1180240827-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-1180240827-line-6)">&#160;┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛&#160;</text><text class="terminal-1180240827-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-1180240827-line-6)">
+</text><text class="terminal-1180240827-r1" x="0" y="190.8" textLength="976" clip-path="url(#terminal-1180240827-line-7)">&#160;Casanovo&#160;de&#160;novo&#160;sequences&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra&#160;using&#160;a&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-1180240827-line-7)">
+</text><text class="terminal-1180240827-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-1180240827-line-8)">&#160;Transformer&#160;model.&#160;Casanovo&#160;currently&#160;supports&#160;mzML,&#160;mzXML,&#160;and&#160;MGF&#160;files&#160;for&#160;&#160;</text><text class="terminal-1180240827-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-8)">
+</text><text class="terminal-1180240827-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-1180240827-line-9)">&#160;de&#160;novo&#160;sequencing&#160;and&#160;annotated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;from&#160;MassIVE-KB,&#160;for&#160;</text><text class="terminal-1180240827-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-9)">
+</text><text class="terminal-1180240827-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-1180240827-line-10)">&#160;training&#160;new&#160;models.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-1180240827-line-10)">
+</text><text class="terminal-1180240827-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-1180240827-line-11)">
+</text><text class="terminal-1180240827-r1" x="0" y="312.8" textLength="976" clip-path="url(#terminal-1180240827-line-12)">&#160;Links:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-1180240827-line-12)">
+</text><text class="terminal-1180240827-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-13)">
+</text><text class="terminal-1180240827-r5" x="12.2" y="361.6" textLength="36.6" clip-path="url(#terminal-1180240827-line-14)">&#160;•&#160;</text><text class="terminal-1180240827-r1" x="48.8" y="361.6" textLength="183" clip-path="url(#terminal-1180240827-line-14)">Documentation:&#160;</text><text class="terminal-1180240827-r6" x="231.8" y="361.6" textLength="378.2" clip-path="url(#terminal-1180240827-line-14)">https://casanovo.readthedocs.io</text><text class="terminal-1180240827-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-14)">
+</text><text class="terminal-1180240827-r5" x="12.2" y="386" textLength="36.6" clip-path="url(#terminal-1180240827-line-15)">&#160;•&#160;</text><text class="terminal-1180240827-r1" x="48.8" y="386" textLength="317.2" clip-path="url(#terminal-1180240827-line-15)">Official&#160;code&#160;repository:&#160;</text><text class="terminal-1180240827-r6" x="366" y="386" textLength="451.4" clip-path="url(#terminal-1180240827-line-15)">https://github.com/Noble-Lab/casanovo</text><text class="terminal-1180240827-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-1180240827-line-15)">
+</text><text class="terminal-1180240827-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-1180240827-line-16)">
+</text><text class="terminal-1180240827-r1" x="0" y="434.8" textLength="976" clip-path="url(#terminal-1180240827-line-17)">&#160;If&#160;you&#160;use&#160;Casanovo&#160;in&#160;your&#160;work,&#160;please&#160;cite:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-1180240827-line-17)">
+</text><text class="terminal-1180240827-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-18)">
+</text><text class="terminal-1180240827-r5" x="12.2" y="483.6" textLength="36.6" clip-path="url(#terminal-1180240827-line-19)">&#160;•&#160;</text><text class="terminal-1180240827-r1" x="48.8" y="483.6" textLength="927.2" clip-path="url(#terminal-1180240827-line-19)">Yilmaz,&#160;M.,&#160;Fondrie,&#160;W.&#160;E.,&#160;Bittremieux,&#160;W.,&#160;Oh,&#160;S.&#160;&amp;&#160;Noble,&#160;W.&#160;S.&#160;De&#160;novo&#160;&#160;</text><text class="terminal-1180240827-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-19)">
+</text><text class="terminal-1180240827-r1" x="48.8" y="508" textLength="927.2" clip-path="url(#terminal-1180240827-line-20)">mass&#160;spectrometry&#160;peptide&#160;sequencing&#160;with&#160;a&#160;transformer&#160;model.&#160;Proceedings&#160;&#160;</text><text class="terminal-1180240827-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-1180240827-line-20)">
+</text><text class="terminal-1180240827-r1" x="48.8" y="532.4" textLength="927.2" clip-path="url(#terminal-1180240827-line-21)">of&#160;the&#160;39th&#160;International&#160;Conference&#160;on&#160;Machine&#160;Learning&#160;-&#160;ICML&#160;&#x27;22&#160;(2022)&#160;&#160;</text><text class="terminal-1180240827-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-1180240827-line-21)">
+</text><text class="terminal-1180240827-r1" x="48.8" y="556.8" textLength="927.2" clip-path="url(#terminal-1180240827-line-22)">doi:10.1101/2022.02.07.479481.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-1180240827-line-22)">
+</text><text class="terminal-1180240827-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-23)">
+</text><text class="terminal-1180240827-r7" x="0" y="605.6" textLength="24.4" clip-path="url(#terminal-1180240827-line-24)">╭─</text><text class="terminal-1180240827-r7" x="24.4" y="605.6" textLength="109.8" clip-path="url(#terminal-1180240827-line-24)">&#160;Options&#160;</text><text class="terminal-1180240827-r7" x="134.2" y="605.6" textLength="817.4" clip-path="url(#terminal-1180240827-line-24)">───────────────────────────────────────────────────────────────────</text><text class="terminal-1180240827-r7" x="951.6" y="605.6" textLength="24.4" clip-path="url(#terminal-1180240827-line-24)">─╮</text><text class="terminal-1180240827-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-24)">
+</text><text class="terminal-1180240827-r7" x="0" y="630" textLength="12.2" clip-path="url(#terminal-1180240827-line-25)">│</text><text class="terminal-1180240827-r4" x="24.4" y="630" textLength="12.2" clip-path="url(#terminal-1180240827-line-25)">-</text><text class="terminal-1180240827-r4" x="36.6" y="630" textLength="61" clip-path="url(#terminal-1180240827-line-25)">-help</text><text class="terminal-1180240827-r8" x="122" y="630" textLength="24.4" clip-path="url(#terminal-1180240827-line-25)">-h</text><text class="terminal-1180240827-r1" x="146.4" y="630" textLength="817.4" clip-path="url(#terminal-1180240827-line-25)">&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r7" x="963.8" y="630" textLength="12.2" clip-path="url(#terminal-1180240827-line-25)">│</text><text class="terminal-1180240827-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-1180240827-line-25)">
+</text><text class="terminal-1180240827-r7" x="0" y="654.4" textLength="976" clip-path="url(#terminal-1180240827-line-26)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-1180240827-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-1180240827-line-26)">
+</text><text class="terminal-1180240827-r7" x="0" y="678.8" textLength="24.4" clip-path="url(#terminal-1180240827-line-27)">╭─</text><text class="terminal-1180240827-r7" x="24.4" y="678.8" textLength="122" clip-path="url(#terminal-1180240827-line-27)">&#160;Commands&#160;</text><text class="terminal-1180240827-r7" x="146.4" y="678.8" textLength="805.2" clip-path="url(#terminal-1180240827-line-27)">──────────────────────────────────────────────────────────────────</text><text class="terminal-1180240827-r7" x="951.6" y="678.8" textLength="24.4" clip-path="url(#terminal-1180240827-line-27)">─╮</text><text class="terminal-1180240827-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-1180240827-line-27)">
+</text><text class="terminal-1180240827-r7" x="0" y="703.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-28)">│</text><text class="terminal-1180240827-r4" x="24.4" y="703.2" textLength="109.8" clip-path="url(#terminal-1180240827-line-28)">annotate&#160;</text><text class="terminal-1180240827-r1" x="146.4" y="703.2" textLength="817.4" clip-path="url(#terminal-1180240827-line-28)">&#160;Annotate&#160;a&#160;given&#160;.mgf&#160;with&#160;candidates&#160;as&#160;selected&#160;by&#160;a&#160;Tide&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r7" x="963.8" y="703.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-28)">│</text><text class="terminal-1180240827-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-28)">
+</text><text class="terminal-1180240827-r7" x="0" y="727.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-29)">│</text><text class="terminal-1180240827-r1" x="146.4" y="727.6" textLength="817.4" clip-path="url(#terminal-1180240827-line-29)">&#160;search&#160;for&#160;Casanovo-DB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r7" x="963.8" y="727.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-29)">│</text><text class="terminal-1180240827-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-29)">
+</text><text class="terminal-1180240827-r7" x="0" y="752" textLength="12.2" clip-path="url(#terminal-1180240827-line-30)">│</text><text class="terminal-1180240827-r4" x="24.4" y="752" textLength="109.8" clip-path="url(#terminal-1180240827-line-30)">configure</text><text class="terminal-1180240827-r1" x="146.4" y="752" textLength="817.4" clip-path="url(#terminal-1180240827-line-30)">&#160;Generate&#160;a&#160;Casanovo&#160;configuration&#160;file&#160;to&#160;customize.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r7" x="963.8" y="752" textLength="12.2" clip-path="url(#terminal-1180240827-line-30)">│</text><text class="terminal-1180240827-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-1180240827-line-30)">
+</text><text class="terminal-1180240827-r7" x="0" y="776.4" textLength="12.2" clip-path="url(#terminal-1180240827-line-31)">│</text><text class="terminal-1180240827-r4" x="24.4" y="776.4" textLength="109.8" clip-path="url(#terminal-1180240827-line-31)">db-search</text><text class="terminal-1180240827-r1" x="146.4" y="776.4" textLength="817.4" clip-path="url(#terminal-1180240827-line-31)">&#160;Perform&#160;a&#160;search&#160;using&#160;Casanovo-DB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r7" x="963.8" y="776.4" textLength="12.2" clip-path="url(#terminal-1180240827-line-31)">│</text><text class="terminal-1180240827-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-1180240827-line-31)">
+</text><text class="terminal-1180240827-r7" x="0" y="800.8" textLength="12.2" clip-path="url(#terminal-1180240827-line-32)">│</text><text class="terminal-1180240827-r4" x="24.4" y="800.8" textLength="109.8" clip-path="url(#terminal-1180240827-line-32)">evaluate&#160;</text><text class="terminal-1180240827-r1" x="146.4" y="800.8" textLength="817.4" clip-path="url(#terminal-1180240827-line-32)">&#160;Evaluate&#160;de&#160;novo&#160;peptide&#160;sequencing&#160;performance.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r7" x="963.8" y="800.8" textLength="12.2" clip-path="url(#terminal-1180240827-line-32)">│</text><text class="terminal-1180240827-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-1180240827-line-32)">
+</text><text class="terminal-1180240827-r7" x="0" y="825.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-33)">│</text><text class="terminal-1180240827-r4" x="24.4" y="825.2" textLength="109.8" clip-path="url(#terminal-1180240827-line-33)">sequence&#160;</text><text class="terminal-1180240827-r1" x="146.4" y="825.2" textLength="817.4" clip-path="url(#terminal-1180240827-line-33)">&#160;De&#160;novo&#160;sequence&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r7" x="963.8" y="825.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-33)">│</text><text class="terminal-1180240827-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-33)">
+</text><text class="terminal-1180240827-r7" x="0" y="849.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-34)">│</text><text class="terminal-1180240827-r4" x="24.4" y="849.6" textLength="109.8" clip-path="url(#terminal-1180240827-line-34)">train&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r1" x="146.4" y="849.6" textLength="817.4" clip-path="url(#terminal-1180240827-line-34)">&#160;Train&#160;a&#160;Casanovo&#160;model&#160;on&#160;your&#160;own&#160;data.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r7" x="963.8" y="849.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-34)">│</text><text class="terminal-1180240827-r1" x="976" y="849.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-34)">
+</text><text class="terminal-1180240827-r7" x="0" y="874" textLength="12.2" clip-path="url(#terminal-1180240827-line-35)">│</text><text class="terminal-1180240827-r4" x="24.4" y="874" textLength="109.8" clip-path="url(#terminal-1180240827-line-35)">version&#160;&#160;</text><text class="terminal-1180240827-r1" x="146.4" y="874" textLength="817.4" clip-path="url(#terminal-1180240827-line-35)">&#160;Get&#160;the&#160;Casanovo&#160;version&#160;information&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r7" x="963.8" y="874" textLength="12.2" clip-path="url(#terminal-1180240827-line-35)">│</text><text class="terminal-1180240827-r1" x="976" y="874" textLength="12.2" clip-path="url(#terminal-1180240827-line-35)">
+</text><text class="terminal-1180240827-r7" x="0" y="898.4" textLength="976" clip-path="url(#terminal-1180240827-line-36)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-1180240827-r1" x="976" y="898.4" textLength="12.2" clip-path="url(#terminal-1180240827-line-36)">
+</text><text class="terminal-1180240827-r1" x="976" y="922.8" textLength="12.2" clip-path="url(#terminal-1180240827-line-37)">
+</text>
+    </g>
+    </g>
+</svg>

From 53f6bec021f2dd19285278317b0f56370b0652ab Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Sat, 27 Apr 2024 13:52:39 -0700
Subject: [PATCH 10/84] remove excess info logs, add monkeypatch to tests

---
 casanovo/data/annotate_db.py | 1 -
 tests/test_integration.py    | 6 ++++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/casanovo/data/annotate_db.py b/casanovo/data/annotate_db.py
index db27b05f..4f33b798 100644
--- a/casanovo/data/annotate_db.py
+++ b/casanovo/data/annotate_db.py
@@ -123,7 +123,6 @@ def annotate_mgf(peak_path: str, tide_path: str, output: Optional[str]):
             pass
     try:
         output = str(output)
-        logger.info(output)
         mgf.write(all_spec, output, file_mode="w")
         logger.info("Annotated .mgf file written to %s.", output)
     except Exception as e:
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 73232fa7..56fb0790 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -8,10 +8,11 @@
 
 
 def test_annotate(
-    mgf_small_unannotated, tide_dir_small, tiny_config, tmp_path
+    mgf_small_unannotated, tide_dir_small, tiny_config, tmp_path, monkeypatch
 ):
 
     # Run a command:
+    monkeypatch.setattr(casanovo, "__version__", "3.0.1")
     run = functools.partial(
         CliRunner().invoke, casanovo.main, catch_exceptions=False
     )
@@ -53,9 +54,10 @@ def test_annotate(
 
 
 def test_db_search(
-    mgf_small_unannotated, tide_dir_small, tiny_config, tmp_path
+    mgf_small_unannotated, tide_dir_small, tiny_config, tmp_path, monkeypatch
 ):
     # Run a command:
+    monkeypatch.setattr(casanovo, "__version__", "3.0.1")
     run = functools.partial(
         CliRunner().invoke, casanovo.main, catch_exceptions=False
     )

From 81aa073c55510cb7842bdb5a1b983bcda1a66457 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Sat, 27 Apr 2024 13:59:22 -0700
Subject: [PATCH 11/84] mp fix

---
 tests/test_integration.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/test_integration.py b/tests/test_integration.py
index 56fb0790..e8654c68 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -8,11 +8,10 @@
 
 
 def test_annotate(
-    mgf_small_unannotated, tide_dir_small, tiny_config, tmp_path, monkeypatch
+    mgf_small_unannotated, tide_dir_small, tiny_config, tmp_path
 ):
 
     # Run a command:
-    monkeypatch.setattr(casanovo, "__version__", "3.0.1")
     run = functools.partial(
         CliRunner().invoke, casanovo.main, catch_exceptions=False
     )
@@ -57,7 +56,7 @@ def test_db_search(
     mgf_small_unannotated, tide_dir_small, tiny_config, tmp_path, monkeypatch
 ):
     # Run a command:
-    monkeypatch.setattr(casanovo, "__version__", "3.0.1")
+    monkeypatch.setattr(casanovo, "__version__", "4.1.1")
     run = functools.partial(
         CliRunner().invoke, casanovo.main, catch_exceptions=False
     )

From 0ecbd80c9e209d3b796e6596bbb8b665bd05900c Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Mon, 6 May 2024 23:26:39 -0700
Subject: [PATCH 12/84] fix line lengths and modify test

---
 casanovo/denovo/model.py  | 25 +++++++++++++++++--------
 tests/test_integration.py |  7 ++++---
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 4efe0f92..ec234691 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -993,8 +993,10 @@ class DBSpec2Pep(Spec2Pep):
     """
     Inherits Spec2Pep
 
-    Hijacks teacher-forcing implemented in Spec2Pep and uses it to predict scores between a spectra and associated peptide.
-    Input format is .mgf, with comma-separated targets and decoys in the SEQ field. Decoys should have a prefix of "decoy_".
+    Hijacks teacher-forcing implemented in Spec2Pep and
+    uses it to predict scores between a spectra and associated peptide.
+    Input format is .mgf, with comma-separated targets
+    and decoys in the SEQ field. Decoys should have a prefix of "decoy_".
     """
 
     num_pairs = None  # Modified to be predict_batch_size from config
@@ -1120,22 +1122,29 @@ def _calc_match_score(
     batch_all_aa_scores: torch.Tensor, truth_aa_indicies: torch.Tensor
 ) -> List[float]:
     """
-    Take in teacher-forced scoring of amino acids of the peptides (in a batch) and use the truth labels
-    to calculate a score between the input spectra and associated peptide. The score is the geometric
+    Take in teacher-forced scoring of amino acids
+    of the peptides (in a batch) and use the truth labels
+    to calculate a score between the input spectra and
+    associated peptide. The score is the geometric
     mean of the AA probabilities
 
         Parameters
         ----------
         batch_all_aa_scores : torch.Tensor
-            Amino acid scores for all amino acids in the vocabulary for every prediction made to generate the associated peptide (for an entire batch)
+            Amino acid scores for all amino acids in
+            the vocabulary for every prediction made to generate
+            the associated peptide (for an entire batch)
         truth_aa_indicies : torch.Tensor
-            Indicies of the score for each actual amino acid in the peptide (for an entire batch)
+            Indicies of the score for each actual amino acid
+            in the peptide (for an entire batch)
 
         Returns
         -------
         score : list[float], list[list[float]]
-            The score between the input spectra and associated peptide (for an entire batch)
-            a list of lists of per amino acid scores (for an entire batch)
+            The score between the input spectra and associated peptide
+            (for an entire batch)
+            a list of lists of per amino acid scores
+            (for an entire batch)
     """
     # Remove trailing tokens from predictions,
     batch_all_aa_scores = batch_all_aa_scores[:, :-1]
diff --git a/tests/test_integration.py b/tests/test_integration.py
index e8654c68..3ad1a4f4 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -47,8 +47,9 @@ def test_annotate(
         == "SEQ=LESLIEK,PEPTIDEK,decoy_KEILSEL,decoy_KEDITEPP"
     )
     assert (
-        seq_lines[2].strip()
-        == "SEQ=+42.011LEM+15.995SLIM+15.995EK,+43.006PEN+0.984PTIQ+0.984DEK,decoy_-17.027KM+15.995EILSEL,decoy_+43.006-17.027KEDITEPP,decoy_KEDIQ+0.984TEPPQ+0.984"
+        seq_lines[2].strip() == "SEQ=+42.011LEM+15.995SLIM+15.995EK,"
+        "+43.006PEN+0.984PTIQ+0.984DEK,decoy_-17.027KM+15.995EILSEL,"
+        "decoy_+43.006-17.027KEDITEPP,decoy_KEDIQ+0.984TEPPQ+0.984"
     )
 
 
@@ -56,7 +57,7 @@ def test_db_search(
     mgf_small_unannotated, tide_dir_small, tiny_config, tmp_path, monkeypatch
 ):
     # Run a command:
-    monkeypatch.setattr(casanovo, "__version__", "4.1.1")
+    monkeypatch.setattr(casanovo, "__version__", "4.1.0")
     run = functools.partial(
         CliRunner().invoke, casanovo.main, catch_exceptions=False
     )

From ee6638e70c30e27727f7b409d70fa3672ca2ee11 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Tue, 7 May 2024 06:30:44 +0000
Subject: [PATCH 13/84] Generate new screengrabs with rich-codex

---
 docs/images/configure-help.svg |  64 ++++++------
 docs/images/evaluate-help.svg  | 123 +++++++++++------------
 docs/images/help.svg           | 169 +++++++++++++++----------------
 docs/images/sequence-help.svg  | 123 +++++++++++------------
 docs/images/train-help.svg     | 175 ++++++++++++++++-----------------
 5 files changed, 307 insertions(+), 347 deletions(-)

diff --git a/docs/images/configure-help.svg b/docs/images/configure-help.svg
index d5dd7aa8..0822927a 100644
--- a/docs/images/configure-help.svg
+++ b/docs/images/configure-help.svg
@@ -19,63 +19,57 @@
         font-weight: 700;
     }
 
-    .terminal-2285289330-matrix {
+    .terminal-3936755216-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-2285289330-title {
+    .terminal-3936755216-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-2285289330-r1 { fill: #c5c8c6 }
-.terminal-2285289330-r2 { fill: #d0b344 }
-.terminal-2285289330-r3 { fill: #c5c8c6;font-weight: bold }
-.terminal-2285289330-r4 { fill: #68a0b3;font-weight: bold }
-.terminal-2285289330-r5 { fill: #868887 }
-.terminal-2285289330-r6 { fill: #98a84b;font-weight: bold }
-.terminal-2285289330-r7 { fill: #d0b344;font-weight: bold }
+    .terminal-3936755216-r1 { fill: #c5c8c6 }
     </style>
 
     <defs>
-    <clipPath id="terminal-2285289330-clip-terminal">
+    <clipPath id="terminal-3936755216-clip-terminal">
       <rect x="0" y="0" width="975.0" height="291.79999999999995" />
     </clipPath>
-    <clipPath id="terminal-2285289330-line-0">
+    <clipPath id="terminal-3936755216-line-0">
     <rect x="0" y="1.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2285289330-line-1">
+<clipPath id="terminal-3936755216-line-1">
     <rect x="0" y="25.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2285289330-line-2">
+<clipPath id="terminal-3936755216-line-2">
     <rect x="0" y="50.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2285289330-line-3">
+<clipPath id="terminal-3936755216-line-3">
     <rect x="0" y="74.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2285289330-line-4">
+<clipPath id="terminal-3936755216-line-4">
     <rect x="0" y="99.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2285289330-line-5">
+<clipPath id="terminal-3936755216-line-5">
     <rect x="0" y="123.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2285289330-line-6">
+<clipPath id="terminal-3936755216-line-6">
     <rect x="0" y="147.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2285289330-line-7">
+<clipPath id="terminal-3936755216-line-7">
     <rect x="0" y="172.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2285289330-line-8">
+<clipPath id="terminal-3936755216-line-8">
     <rect x="0" y="196.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2285289330-line-9">
+<clipPath id="terminal-3936755216-line-9">
     <rect x="0" y="221.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2285289330-line-10">
+<clipPath id="terminal-3936755216-line-10">
     <rect x="0" y="245.5" width="976" height="24.65"/>
             </clipPath>
     </defs>
@@ -87,21 +81,21 @@
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-2285289330-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-3936755216-clip-terminal)">
     
-    <g class="terminal-2285289330-matrix">
-    <text class="terminal-2285289330-r1" x="0" y="20" textLength="329.4" clip-path="url(#terminal-2285289330-line-0)">$&#160;casanovo&#160;configure&#160;--help</text><text class="terminal-2285289330-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-2285289330-line-0)">
-</text><text class="terminal-2285289330-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-2285289330-line-1)">
-</text><text class="terminal-2285289330-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-2285289330-line-2)">Usage:</text><text class="terminal-2285289330-r3" x="97.6" y="68.8" textLength="219.6" clip-path="url(#terminal-2285289330-line-2)">casanovo&#160;configure</text><text class="terminal-2285289330-r1" x="317.2" y="68.8" textLength="24.4" clip-path="url(#terminal-2285289330-line-2)">&#160;[</text><text class="terminal-2285289330-r4" x="341.6" y="68.8" textLength="85.4" clip-path="url(#terminal-2285289330-line-2)">OPTIONS</text><text class="terminal-2285289330-r1" x="427" y="68.8" textLength="549" clip-path="url(#terminal-2285289330-line-2)">]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2285289330-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-2285289330-line-2)">
-</text><text class="terminal-2285289330-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-2285289330-line-3)">
-</text><text class="terminal-2285289330-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-2285289330-line-4)">&#160;Generate&#160;a&#160;Casanovo&#160;configuration&#160;file&#160;to&#160;customize.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2285289330-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-2285289330-line-4)">
-</text><text class="terminal-2285289330-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-2285289330-line-5)">&#160;The&#160;casanovo&#160;configuration&#160;file&#160;is&#160;in&#160;the&#160;YAML&#160;format.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2285289330-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-2285289330-line-5)">
-</text><text class="terminal-2285289330-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-2285289330-line-6)">
-</text><text class="terminal-2285289330-r5" x="0" y="190.8" textLength="24.4" clip-path="url(#terminal-2285289330-line-7)">╭─</text><text class="terminal-2285289330-r5" x="24.4" y="190.8" textLength="109.8" clip-path="url(#terminal-2285289330-line-7)">&#160;Options&#160;</text><text class="terminal-2285289330-r5" x="134.2" y="190.8" textLength="817.4" clip-path="url(#terminal-2285289330-line-7)">───────────────────────────────────────────────────────────────────</text><text class="terminal-2285289330-r5" x="951.6" y="190.8" textLength="24.4" clip-path="url(#terminal-2285289330-line-7)">─╮</text><text class="terminal-2285289330-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-2285289330-line-7)">
-</text><text class="terminal-2285289330-r5" x="0" y="215.2" textLength="12.2" clip-path="url(#terminal-2285289330-line-8)">│</text><text class="terminal-2285289330-r4" x="24.4" y="215.2" textLength="12.2" clip-path="url(#terminal-2285289330-line-8)">-</text><text class="terminal-2285289330-r4" x="36.6" y="215.2" textLength="85.4" clip-path="url(#terminal-2285289330-line-8)">-output</text><text class="terminal-2285289330-r6" x="146.4" y="215.2" textLength="24.4" clip-path="url(#terminal-2285289330-line-8)">-o</text><text class="terminal-2285289330-r7" x="195.2" y="215.2" textLength="48.8" clip-path="url(#terminal-2285289330-line-8)">FILE</text><text class="terminal-2285289330-r1" x="244" y="215.2" textLength="719.8" clip-path="url(#terminal-2285289330-line-8)">&#160;&#160;The&#160;output&#160;configuration&#160;file.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2285289330-r5" x="963.8" y="215.2" textLength="12.2" clip-path="url(#terminal-2285289330-line-8)">│</text><text class="terminal-2285289330-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-2285289330-line-8)">
-</text><text class="terminal-2285289330-r5" x="0" y="239.6" textLength="12.2" clip-path="url(#terminal-2285289330-line-9)">│</text><text class="terminal-2285289330-r4" x="24.4" y="239.6" textLength="12.2" clip-path="url(#terminal-2285289330-line-9)">-</text><text class="terminal-2285289330-r4" x="36.6" y="239.6" textLength="61" clip-path="url(#terminal-2285289330-line-9)">-help</text><text class="terminal-2285289330-r6" x="146.4" y="239.6" textLength="24.4" clip-path="url(#terminal-2285289330-line-9)">-h</text><text class="terminal-2285289330-r1" x="244" y="239.6" textLength="719.8" clip-path="url(#terminal-2285289330-line-9)">&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2285289330-r5" x="963.8" y="239.6" textLength="12.2" clip-path="url(#terminal-2285289330-line-9)">│</text><text class="terminal-2285289330-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-2285289330-line-9)">
-</text><text class="terminal-2285289330-r5" x="0" y="264" textLength="976" clip-path="url(#terminal-2285289330-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2285289330-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-2285289330-line-10)">
-</text><text class="terminal-2285289330-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-2285289330-line-11)">
+    <g class="terminal-3936755216-matrix">
+    <text class="terminal-3936755216-r1" x="0" y="20" textLength="329.4" clip-path="url(#terminal-3936755216-line-0)">$&#160;casanovo&#160;configure&#160;--help</text><text class="terminal-3936755216-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-3936755216-line-0)">
+</text><text class="terminal-3936755216-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-3936755216-line-1)">
+</text><text class="terminal-3936755216-r1" x="0" y="68.8" textLength="976" clip-path="url(#terminal-3936755216-line-2)">&#160;Usage:&#160;casanovo&#160;configure&#160;[OPTIONS]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3936755216-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-3936755216-line-2)">
+</text><text class="terminal-3936755216-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-3936755216-line-3)">
+</text><text class="terminal-3936755216-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-3936755216-line-4)">&#160;Generate&#160;a&#160;Casanovo&#160;configuration&#160;file&#160;to&#160;customize.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3936755216-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-3936755216-line-4)">
+</text><text class="terminal-3936755216-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-3936755216-line-5)">&#160;The&#160;casanovo&#160;configuration&#160;file&#160;is&#160;in&#160;the&#160;YAML&#160;format.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3936755216-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-3936755216-line-5)">
+</text><text class="terminal-3936755216-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-3936755216-line-6)">
+</text><text class="terminal-3936755216-r1" x="0" y="190.8" textLength="976" clip-path="url(#terminal-3936755216-line-7)">╭─&#160;Options&#160;────────────────────────────────────────────────────────────────────╮</text><text class="terminal-3936755216-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-3936755216-line-7)">
+</text><text class="terminal-3936755216-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-3936755216-line-8)">│&#160;--output&#160;&#160;-o&#160;&#160;FILE&#160;&#160;The&#160;output&#160;configuration&#160;file.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-3936755216-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-3936755216-line-8)">
+</text><text class="terminal-3936755216-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-3936755216-line-9)">│&#160;--help&#160;&#160;&#160;&#160;-h&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-3936755216-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-3936755216-line-9)">
+</text><text class="terminal-3936755216-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-3936755216-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-3936755216-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-3936755216-line-10)">
+</text><text class="terminal-3936755216-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-3936755216-line-11)">
 </text>
     </g>
     </g>
diff --git a/docs/images/evaluate-help.svg b/docs/images/evaluate-help.svg
index e220664b..b16c4ffd 100644
--- a/docs/images/evaluate-help.svg
+++ b/docs/images/evaluate-help.svg
@@ -19,108 +19,99 @@
         font-weight: 700;
     }
 
-    .terminal-1788431117-matrix {
+    .terminal-1284026435-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-1788431117-title {
+    .terminal-1284026435-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-1788431117-r1 { fill: #c5c8c6 }
-.terminal-1788431117-r2 { fill: #d0b344 }
-.terminal-1788431117-r3 { fill: #c5c8c6;font-weight: bold }
-.terminal-1788431117-r4 { fill: #68a0b3;font-weight: bold }
-.terminal-1788431117-r5 { fill: #868887 }
-.terminal-1788431117-r6 { fill: #cc555a }
-.terminal-1788431117-r7 { fill: #d0b344;font-weight: bold }
-.terminal-1788431117-r8 { fill: #8a4346 }
-.terminal-1788431117-r9 { fill: #98a84b;font-weight: bold }
-.terminal-1788431117-r10 { fill: #8d7b39;font-weight: bold }
+    .terminal-1284026435-r1 { fill: #c5c8c6 }
     </style>
 
     <defs>
-    <clipPath id="terminal-1788431117-clip-terminal">
+    <clipPath id="terminal-1284026435-clip-terminal">
       <rect x="0" y="0" width="975.0" height="633.4" />
     </clipPath>
-    <clipPath id="terminal-1788431117-line-0">
+    <clipPath id="terminal-1284026435-line-0">
     <rect x="0" y="1.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-1">
+<clipPath id="terminal-1284026435-line-1">
     <rect x="0" y="25.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-2">
+<clipPath id="terminal-1284026435-line-2">
     <rect x="0" y="50.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-3">
+<clipPath id="terminal-1284026435-line-3">
     <rect x="0" y="74.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-4">
+<clipPath id="terminal-1284026435-line-4">
     <rect x="0" y="99.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-5">
+<clipPath id="terminal-1284026435-line-5">
     <rect x="0" y="123.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-6">
+<clipPath id="terminal-1284026435-line-6">
     <rect x="0" y="147.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-7">
+<clipPath id="terminal-1284026435-line-7">
     <rect x="0" y="172.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-8">
+<clipPath id="terminal-1284026435-line-8">
     <rect x="0" y="196.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-9">
+<clipPath id="terminal-1284026435-line-9">
     <rect x="0" y="221.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-10">
+<clipPath id="terminal-1284026435-line-10">
     <rect x="0" y="245.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-11">
+<clipPath id="terminal-1284026435-line-11">
     <rect x="0" y="269.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-12">
+<clipPath id="terminal-1284026435-line-12">
     <rect x="0" y="294.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-13">
+<clipPath id="terminal-1284026435-line-13">
     <rect x="0" y="318.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-14">
+<clipPath id="terminal-1284026435-line-14">
     <rect x="0" y="343.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-15">
+<clipPath id="terminal-1284026435-line-15">
     <rect x="0" y="367.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-16">
+<clipPath id="terminal-1284026435-line-16">
     <rect x="0" y="391.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-17">
+<clipPath id="terminal-1284026435-line-17">
     <rect x="0" y="416.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-18">
+<clipPath id="terminal-1284026435-line-18">
     <rect x="0" y="440.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-19">
+<clipPath id="terminal-1284026435-line-19">
     <rect x="0" y="465.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-20">
+<clipPath id="terminal-1284026435-line-20">
     <rect x="0" y="489.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-21">
+<clipPath id="terminal-1284026435-line-21">
     <rect x="0" y="513.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-22">
+<clipPath id="terminal-1284026435-line-22">
     <rect x="0" y="538.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-23">
+<clipPath id="terminal-1284026435-line-23">
     <rect x="0" y="562.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1788431117-line-24">
+<clipPath id="terminal-1284026435-line-24">
     <rect x="0" y="587.1" width="976" height="24.65"/>
             </clipPath>
     </defs>
@@ -132,35 +123,35 @@
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-1788431117-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-1284026435-clip-terminal)">
     
-    <g class="terminal-1788431117-matrix">
-    <text class="terminal-1788431117-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-1788431117-line-0)">$&#160;casanovo&#160;evaluate&#160;--help</text><text class="terminal-1788431117-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-1788431117-line-0)">
-</text><text class="terminal-1788431117-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-1788431117-line-1)">
-</text><text class="terminal-1788431117-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-1788431117-line-2)">Usage:</text><text class="terminal-1788431117-r3" x="97.6" y="68.8" textLength="207.4" clip-path="url(#terminal-1788431117-line-2)">casanovo&#160;evaluate</text><text class="terminal-1788431117-r1" x="305" y="68.8" textLength="24.4" clip-path="url(#terminal-1788431117-line-2)">&#160;[</text><text class="terminal-1788431117-r4" x="329.4" y="68.8" textLength="85.4" clip-path="url(#terminal-1788431117-line-2)">OPTIONS</text><text class="terminal-1788431117-r1" x="414.8" y="68.8" textLength="24.4" clip-path="url(#terminal-1788431117-line-2)">]&#160;</text><text class="terminal-1788431117-r4" x="439.2" y="68.8" textLength="231.8" clip-path="url(#terminal-1788431117-line-2)">ANNOTATED_PEAK_PATH</text><text class="terminal-1788431117-r1" x="671" y="68.8" textLength="305" clip-path="url(#terminal-1788431117-line-2)">...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1788431117-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-1788431117-line-2)">
-</text><text class="terminal-1788431117-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-1788431117-line-3)">
-</text><text class="terminal-1788431117-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-1788431117-line-4)">&#160;Evaluate&#160;de&#160;novo&#160;peptide&#160;sequencing&#160;performance.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1788431117-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-1788431117-line-4)">
-</text><text class="terminal-1788431117-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-1788431117-line-5)">&#160;ANNOTATED_PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;annoated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1788431117-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-1788431117-line-5)">
-</text><text class="terminal-1788431117-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-1788431117-line-6)">&#160;provided&#160;by&#160;MassIVE-KB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1788431117-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-1788431117-line-6)">
-</text><text class="terminal-1788431117-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-1788431117-line-7)">
-</text><text class="terminal-1788431117-r5" x="0" y="215.2" textLength="24.4" clip-path="url(#terminal-1788431117-line-8)">╭─</text><text class="terminal-1788431117-r5" x="24.4" y="215.2" textLength="134.2" clip-path="url(#terminal-1788431117-line-8)">&#160;Arguments&#160;</text><text class="terminal-1788431117-r5" x="158.6" y="215.2" textLength="793" clip-path="url(#terminal-1788431117-line-8)">─────────────────────────────────────────────────────────────────</text><text class="terminal-1788431117-r5" x="951.6" y="215.2" textLength="24.4" clip-path="url(#terminal-1788431117-line-8)">─╮</text><text class="terminal-1788431117-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-1788431117-line-8)">
-</text><text class="terminal-1788431117-r5" x="0" y="239.6" textLength="12.2" clip-path="url(#terminal-1788431117-line-9)">│</text><text class="terminal-1788431117-r6" x="24.4" y="239.6" textLength="12.2" clip-path="url(#terminal-1788431117-line-9)">*</text><text class="terminal-1788431117-r1" x="36.6" y="239.6" textLength="305" clip-path="url(#terminal-1788431117-line-9)">&#160;&#160;ANNOTATED_PEAK_PATH&#160;&#160;&#160;&#160;</text><text class="terminal-1788431117-r7" x="341.6" y="239.6" textLength="48.8" clip-path="url(#terminal-1788431117-line-9)">FILE</text><text class="terminal-1788431117-r8" x="414.8" y="239.6" textLength="122" clip-path="url(#terminal-1788431117-line-9)">[required]</text><text class="terminal-1788431117-r5" x="963.8" y="239.6" textLength="12.2" clip-path="url(#terminal-1788431117-line-9)">│</text><text class="terminal-1788431117-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-1788431117-line-9)">
-</text><text class="terminal-1788431117-r5" x="0" y="264" textLength="976" clip-path="url(#terminal-1788431117-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-1788431117-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-1788431117-line-10)">
-</text><text class="terminal-1788431117-r5" x="0" y="288.4" textLength="24.4" clip-path="url(#terminal-1788431117-line-11)">╭─</text><text class="terminal-1788431117-r5" x="24.4" y="288.4" textLength="109.8" clip-path="url(#terminal-1788431117-line-11)">&#160;Options&#160;</text><text class="terminal-1788431117-r5" x="134.2" y="288.4" textLength="817.4" clip-path="url(#terminal-1788431117-line-11)">───────────────────────────────────────────────────────────────────</text><text class="terminal-1788431117-r5" x="951.6" y="288.4" textLength="24.4" clip-path="url(#terminal-1788431117-line-11)">─╮</text><text class="terminal-1788431117-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-1788431117-line-11)">
-</text><text class="terminal-1788431117-r5" x="0" y="312.8" textLength="12.2" clip-path="url(#terminal-1788431117-line-12)">│</text><text class="terminal-1788431117-r4" x="24.4" y="312.8" textLength="12.2" clip-path="url(#terminal-1788431117-line-12)">-</text><text class="terminal-1788431117-r4" x="36.6" y="312.8" textLength="73.2" clip-path="url(#terminal-1788431117-line-12)">-model</text><text class="terminal-1788431117-r9" x="183" y="312.8" textLength="24.4" clip-path="url(#terminal-1788431117-line-12)">-m</text><text class="terminal-1788431117-r7" x="231.8" y="312.8" textLength="317.2" clip-path="url(#terminal-1788431117-line-12)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1788431117-r1" x="549" y="312.8" textLength="414.8" clip-path="url(#terminal-1788431117-line-12)">&#160;&#160;The&#160;model&#160;weights&#160;(.ckpt&#160;file).&#160;</text><text class="terminal-1788431117-r5" x="963.8" y="312.8" textLength="12.2" clip-path="url(#terminal-1788431117-line-12)">│</text><text class="terminal-1788431117-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-1788431117-line-12)">
-</text><text class="terminal-1788431117-r5" x="0" y="337.2" textLength="12.2" clip-path="url(#terminal-1788431117-line-13)">│</text><text class="terminal-1788431117-r1" x="12.2" y="337.2" textLength="951.6" clip-path="url(#terminal-1788431117-line-13)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;If&#160;not&#160;provided,&#160;Casanovo&#160;will&#160;&#160;</text><text class="terminal-1788431117-r5" x="963.8" y="337.2" textLength="12.2" clip-path="url(#terminal-1788431117-line-13)">│</text><text class="terminal-1788431117-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-1788431117-line-13)">
-</text><text class="terminal-1788431117-r5" x="0" y="361.6" textLength="12.2" clip-path="url(#terminal-1788431117-line-14)">│</text><text class="terminal-1788431117-r1" x="12.2" y="361.6" textLength="951.6" clip-path="url(#terminal-1788431117-line-14)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;try&#160;to&#160;download&#160;the&#160;latest&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1788431117-r5" x="963.8" y="361.6" textLength="12.2" clip-path="url(#terminal-1788431117-line-14)">│</text><text class="terminal-1788431117-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-1788431117-line-14)">
-</text><text class="terminal-1788431117-r5" x="0" y="386" textLength="12.2" clip-path="url(#terminal-1788431117-line-15)">│</text><text class="terminal-1788431117-r1" x="12.2" y="386" textLength="951.6" clip-path="url(#terminal-1788431117-line-15)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;release.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1788431117-r5" x="963.8" y="386" textLength="12.2" clip-path="url(#terminal-1788431117-line-15)">│</text><text class="terminal-1788431117-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-1788431117-line-15)">
-</text><text class="terminal-1788431117-r5" x="0" y="410.4" textLength="12.2" clip-path="url(#terminal-1788431117-line-16)">│</text><text class="terminal-1788431117-r4" x="24.4" y="410.4" textLength="12.2" clip-path="url(#terminal-1788431117-line-16)">-</text><text class="terminal-1788431117-r4" x="36.6" y="410.4" textLength="85.4" clip-path="url(#terminal-1788431117-line-16)">-output</text><text class="terminal-1788431117-r9" x="183" y="410.4" textLength="24.4" clip-path="url(#terminal-1788431117-line-16)">-o</text><text class="terminal-1788431117-r7" x="231.8" y="410.4" textLength="317.2" clip-path="url(#terminal-1788431117-line-16)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1788431117-r1" x="549" y="410.4" textLength="414.8" clip-path="url(#terminal-1788431117-line-16)">&#160;&#160;The&#160;mzTab&#160;file&#160;to&#160;which&#160;results&#160;</text><text class="terminal-1788431117-r5" x="963.8" y="410.4" textLength="12.2" clip-path="url(#terminal-1788431117-line-16)">│</text><text class="terminal-1788431117-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-1788431117-line-16)">
-</text><text class="terminal-1788431117-r5" x="0" y="434.8" textLength="12.2" clip-path="url(#terminal-1788431117-line-17)">│</text><text class="terminal-1788431117-r1" x="12.2" y="434.8" textLength="951.6" clip-path="url(#terminal-1788431117-line-17)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;will&#160;be&#160;written.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1788431117-r5" x="963.8" y="434.8" textLength="12.2" clip-path="url(#terminal-1788431117-line-17)">│</text><text class="terminal-1788431117-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-1788431117-line-17)">
-</text><text class="terminal-1788431117-r5" x="0" y="459.2" textLength="12.2" clip-path="url(#terminal-1788431117-line-18)">│</text><text class="terminal-1788431117-r4" x="24.4" y="459.2" textLength="12.2" clip-path="url(#terminal-1788431117-line-18)">-</text><text class="terminal-1788431117-r4" x="36.6" y="459.2" textLength="85.4" clip-path="url(#terminal-1788431117-line-18)">-config</text><text class="terminal-1788431117-r9" x="183" y="459.2" textLength="24.4" clip-path="url(#terminal-1788431117-line-18)">-c</text><text class="terminal-1788431117-r7" x="231.8" y="459.2" textLength="317.2" clip-path="url(#terminal-1788431117-line-18)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1788431117-r1" x="549" y="459.2" textLength="414.8" clip-path="url(#terminal-1788431117-line-18)">&#160;&#160;The&#160;YAML&#160;configuration&#160;file&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1788431117-r5" x="963.8" y="459.2" textLength="12.2" clip-path="url(#terminal-1788431117-line-18)">│</text><text class="terminal-1788431117-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-1788431117-line-18)">
-</text><text class="terminal-1788431117-r5" x="0" y="483.6" textLength="12.2" clip-path="url(#terminal-1788431117-line-19)">│</text><text class="terminal-1788431117-r1" x="12.2" y="483.6" textLength="951.6" clip-path="url(#terminal-1788431117-line-19)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;overriding&#160;the&#160;default&#160;options.&#160;</text><text class="terminal-1788431117-r5" x="963.8" y="483.6" textLength="12.2" clip-path="url(#terminal-1788431117-line-19)">│</text><text class="terminal-1788431117-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-1788431117-line-19)">
-</text><text class="terminal-1788431117-r5" x="0" y="508" textLength="12.2" clip-path="url(#terminal-1788431117-line-20)">│</text><text class="terminal-1788431117-r4" x="24.4" y="508" textLength="12.2" clip-path="url(#terminal-1788431117-line-20)">-</text><text class="terminal-1788431117-r4" x="36.6" y="508" textLength="122" clip-path="url(#terminal-1788431117-line-20)">-verbosity</text><text class="terminal-1788431117-r9" x="183" y="508" textLength="24.4" clip-path="url(#terminal-1788431117-line-20)">-v</text><text class="terminal-1788431117-r10" x="231.8" y="508" textLength="12.2" clip-path="url(#terminal-1788431117-line-20)">[</text><text class="terminal-1788431117-r7" x="244" y="508" textLength="61" clip-path="url(#terminal-1788431117-line-20)">debug</text><text class="terminal-1788431117-r10" x="305" y="508" textLength="12.2" clip-path="url(#terminal-1788431117-line-20)">|</text><text class="terminal-1788431117-r7" x="317.2" y="508" textLength="48.8" clip-path="url(#terminal-1788431117-line-20)">info</text><text class="terminal-1788431117-r10" x="366" y="508" textLength="12.2" clip-path="url(#terminal-1788431117-line-20)">|</text><text class="terminal-1788431117-r7" x="378.2" y="508" textLength="85.4" clip-path="url(#terminal-1788431117-line-20)">warning</text><text class="terminal-1788431117-r10" x="463.6" y="508" textLength="12.2" clip-path="url(#terminal-1788431117-line-20)">|</text><text class="terminal-1788431117-r7" x="475.8" y="508" textLength="61" clip-path="url(#terminal-1788431117-line-20)">error</text><text class="terminal-1788431117-r10" x="536.8" y="508" textLength="12.2" clip-path="url(#terminal-1788431117-line-20)">]</text><text class="terminal-1788431117-r1" x="549" y="508" textLength="414.8" clip-path="url(#terminal-1788431117-line-20)">&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;console&#160;&#160;&#160;&#160;</text><text class="terminal-1788431117-r5" x="963.8" y="508" textLength="12.2" clip-path="url(#terminal-1788431117-line-20)">│</text><text class="terminal-1788431117-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-1788431117-line-20)">
-</text><text class="terminal-1788431117-r5" x="0" y="532.4" textLength="12.2" clip-path="url(#terminal-1788431117-line-21)">│</text><text class="terminal-1788431117-r1" x="12.2" y="532.4" textLength="951.6" clip-path="url(#terminal-1788431117-line-21)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;logging&#160;messages.&#160;Log&#160;files&#160;are&#160;</text><text class="terminal-1788431117-r5" x="963.8" y="532.4" textLength="12.2" clip-path="url(#terminal-1788431117-line-21)">│</text><text class="terminal-1788431117-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-1788431117-line-21)">
-</text><text class="terminal-1788431117-r5" x="0" y="556.8" textLength="12.2" clip-path="url(#terminal-1788431117-line-22)">│</text><text class="terminal-1788431117-r1" x="12.2" y="556.8" textLength="951.6" clip-path="url(#terminal-1788431117-line-22)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;always&#160;set&#160;to&#160;&#x27;debug&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1788431117-r5" x="963.8" y="556.8" textLength="12.2" clip-path="url(#terminal-1788431117-line-22)">│</text><text class="terminal-1788431117-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-1788431117-line-22)">
-</text><text class="terminal-1788431117-r5" x="0" y="581.2" textLength="12.2" clip-path="url(#terminal-1788431117-line-23)">│</text><text class="terminal-1788431117-r4" x="24.4" y="581.2" textLength="12.2" clip-path="url(#terminal-1788431117-line-23)">-</text><text class="terminal-1788431117-r4" x="36.6" y="581.2" textLength="61" clip-path="url(#terminal-1788431117-line-23)">-help</text><text class="terminal-1788431117-r9" x="183" y="581.2" textLength="24.4" clip-path="url(#terminal-1788431117-line-23)">-h</text><text class="terminal-1788431117-r1" x="549" y="581.2" textLength="414.8" clip-path="url(#terminal-1788431117-line-23)">&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1788431117-r5" x="963.8" y="581.2" textLength="12.2" clip-path="url(#terminal-1788431117-line-23)">│</text><text class="terminal-1788431117-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-1788431117-line-23)">
-</text><text class="terminal-1788431117-r5" x="0" y="605.6" textLength="976" clip-path="url(#terminal-1788431117-line-24)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-1788431117-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-1788431117-line-24)">
-</text><text class="terminal-1788431117-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-1788431117-line-25)">
+    <g class="terminal-1284026435-matrix">
+    <text class="terminal-1284026435-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-1284026435-line-0)">$&#160;casanovo&#160;evaluate&#160;--help</text><text class="terminal-1284026435-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-1284026435-line-0)">
+</text><text class="terminal-1284026435-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-1284026435-line-1)">
+</text><text class="terminal-1284026435-r1" x="0" y="68.8" textLength="976" clip-path="url(#terminal-1284026435-line-2)">&#160;Usage:&#160;casanovo&#160;evaluate&#160;[OPTIONS]&#160;ANNOTATED_PEAK_PATH...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1284026435-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-1284026435-line-2)">
+</text><text class="terminal-1284026435-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-1284026435-line-3)">
+</text><text class="terminal-1284026435-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-1284026435-line-4)">&#160;Evaluate&#160;de&#160;novo&#160;peptide&#160;sequencing&#160;performance.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1284026435-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-1284026435-line-4)">
+</text><text class="terminal-1284026435-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-1284026435-line-5)">&#160;ANNOTATED_PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;annoated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1284026435-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-1284026435-line-5)">
+</text><text class="terminal-1284026435-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-1284026435-line-6)">&#160;provided&#160;by&#160;MassIVE-KB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1284026435-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-1284026435-line-6)">
+</text><text class="terminal-1284026435-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-1284026435-line-7)">
+</text><text class="terminal-1284026435-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-1284026435-line-8)">╭─&#160;Arguments&#160;──────────────────────────────────────────────────────────────────╮</text><text class="terminal-1284026435-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-1284026435-line-8)">
+</text><text class="terminal-1284026435-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-1284026435-line-9)">│&#160;*&#160;&#160;ANNOTATED_PEAK_PATH&#160;&#160;&#160;&#160;FILE&#160;&#160;[required]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-1284026435-line-9)">
+</text><text class="terminal-1284026435-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-1284026435-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-1284026435-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-1284026435-line-10)">
+</text><text class="terminal-1284026435-r1" x="0" y="288.4" textLength="976" clip-path="url(#terminal-1284026435-line-11)">╭─&#160;Options&#160;────────────────────────────────────────────────────────────────────╮</text><text class="terminal-1284026435-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-1284026435-line-11)">
+</text><text class="terminal-1284026435-r1" x="0" y="312.8" textLength="976" clip-path="url(#terminal-1284026435-line-12)">│&#160;--model&#160;&#160;&#160;&#160;&#160;&#160;-m&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;model&#160;weights&#160;(.ckpt&#160;file).&#160;│</text><text class="terminal-1284026435-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-1284026435-line-12)">
+</text><text class="terminal-1284026435-r1" x="0" y="337.2" textLength="976" clip-path="url(#terminal-1284026435-line-13)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;If&#160;not&#160;provided,&#160;Casanovo&#160;will&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-1284026435-line-13)">
+</text><text class="terminal-1284026435-r1" x="0" y="361.6" textLength="976" clip-path="url(#terminal-1284026435-line-14)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;try&#160;to&#160;download&#160;the&#160;latest&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-1284026435-line-14)">
+</text><text class="terminal-1284026435-r1" x="0" y="386" textLength="976" clip-path="url(#terminal-1284026435-line-15)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;release.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-1284026435-line-15)">
+</text><text class="terminal-1284026435-r1" x="0" y="410.4" textLength="976" clip-path="url(#terminal-1284026435-line-16)">│&#160;--output&#160;&#160;&#160;&#160;&#160;-o&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;mzTab&#160;file&#160;to&#160;which&#160;results&#160;│</text><text class="terminal-1284026435-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-1284026435-line-16)">
+</text><text class="terminal-1284026435-r1" x="0" y="434.8" textLength="976" clip-path="url(#terminal-1284026435-line-17)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;will&#160;be&#160;written.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-1284026435-line-17)">
+</text><text class="terminal-1284026435-r1" x="0" y="459.2" textLength="976" clip-path="url(#terminal-1284026435-line-18)">│&#160;--config&#160;&#160;&#160;&#160;&#160;-c&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;YAML&#160;configuration&#160;file&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-1284026435-line-18)">
+</text><text class="terminal-1284026435-r1" x="0" y="483.6" textLength="976" clip-path="url(#terminal-1284026435-line-19)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;overriding&#160;the&#160;default&#160;options.&#160;│</text><text class="terminal-1284026435-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-1284026435-line-19)">
+</text><text class="terminal-1284026435-r1" x="0" y="508" textLength="976" clip-path="url(#terminal-1284026435-line-20)">│&#160;--verbosity&#160;&#160;-v&#160;&#160;[debug|info|warning|error]&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;console&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-1284026435-line-20)">
+</text><text class="terminal-1284026435-r1" x="0" y="532.4" textLength="976" clip-path="url(#terminal-1284026435-line-21)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;logging&#160;messages.&#160;Log&#160;files&#160;are&#160;│</text><text class="terminal-1284026435-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-1284026435-line-21)">
+</text><text class="terminal-1284026435-r1" x="0" y="556.8" textLength="976" clip-path="url(#terminal-1284026435-line-22)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;always&#160;set&#160;to&#160;&#x27;debug&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-1284026435-line-22)">
+</text><text class="terminal-1284026435-r1" x="0" y="581.2" textLength="976" clip-path="url(#terminal-1284026435-line-23)">│&#160;--help&#160;&#160;&#160;&#160;&#160;&#160;&#160;-h&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-1284026435-line-23)">
+</text><text class="terminal-1284026435-r1" x="0" y="605.6" textLength="976" clip-path="url(#terminal-1284026435-line-24)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-1284026435-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-1284026435-line-24)">
+</text><text class="terminal-1284026435-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-1284026435-line-25)">
 </text>
     </g>
     </g>
diff --git a/docs/images/help.svg b/docs/images/help.svg
index 533b5f70..67dca83e 100644
--- a/docs/images/help.svg
+++ b/docs/images/help.svg
@@ -19,142 +19,135 @@
         font-weight: 700;
     }
 
-    .terminal-1180240827-matrix {
+    .terminal-2658734560-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-1180240827-title {
+    .terminal-2658734560-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-1180240827-r1 { fill: #c5c8c6 }
-.terminal-1180240827-r2 { fill: #d0b344 }
-.terminal-1180240827-r3 { fill: #c5c8c6;font-weight: bold }
-.terminal-1180240827-r4 { fill: #68a0b3;font-weight: bold }
-.terminal-1180240827-r5 { fill: #d0b344;font-weight: bold }
-.terminal-1180240827-r6 { fill: #608ab1;text-decoration: underline; }
-.terminal-1180240827-r7 { fill: #868887 }
-.terminal-1180240827-r8 { fill: #98a84b;font-weight: bold }
+    .terminal-2658734560-r1 { fill: #c5c8c6 }
     </style>
 
     <defs>
-    <clipPath id="terminal-1180240827-clip-terminal">
+    <clipPath id="terminal-2658734560-clip-terminal">
       <rect x="0" y="0" width="975.0" height="926.1999999999999" />
     </clipPath>
-    <clipPath id="terminal-1180240827-line-0">
+    <clipPath id="terminal-2658734560-line-0">
     <rect x="0" y="1.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-1">
+<clipPath id="terminal-2658734560-line-1">
     <rect x="0" y="25.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-2">
+<clipPath id="terminal-2658734560-line-2">
     <rect x="0" y="50.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-3">
+<clipPath id="terminal-2658734560-line-3">
     <rect x="0" y="74.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-4">
+<clipPath id="terminal-2658734560-line-4">
     <rect x="0" y="99.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-5">
+<clipPath id="terminal-2658734560-line-5">
     <rect x="0" y="123.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-6">
+<clipPath id="terminal-2658734560-line-6">
     <rect x="0" y="147.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-7">
+<clipPath id="terminal-2658734560-line-7">
     <rect x="0" y="172.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-8">
+<clipPath id="terminal-2658734560-line-8">
     <rect x="0" y="196.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-9">
+<clipPath id="terminal-2658734560-line-9">
     <rect x="0" y="221.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-10">
+<clipPath id="terminal-2658734560-line-10">
     <rect x="0" y="245.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-11">
+<clipPath id="terminal-2658734560-line-11">
     <rect x="0" y="269.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-12">
+<clipPath id="terminal-2658734560-line-12">
     <rect x="0" y="294.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-13">
+<clipPath id="terminal-2658734560-line-13">
     <rect x="0" y="318.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-14">
+<clipPath id="terminal-2658734560-line-14">
     <rect x="0" y="343.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-15">
+<clipPath id="terminal-2658734560-line-15">
     <rect x="0" y="367.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-16">
+<clipPath id="terminal-2658734560-line-16">
     <rect x="0" y="391.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-17">
+<clipPath id="terminal-2658734560-line-17">
     <rect x="0" y="416.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-18">
+<clipPath id="terminal-2658734560-line-18">
     <rect x="0" y="440.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-19">
+<clipPath id="terminal-2658734560-line-19">
     <rect x="0" y="465.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-20">
+<clipPath id="terminal-2658734560-line-20">
     <rect x="0" y="489.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-21">
+<clipPath id="terminal-2658734560-line-21">
     <rect x="0" y="513.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-22">
+<clipPath id="terminal-2658734560-line-22">
     <rect x="0" y="538.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-23">
+<clipPath id="terminal-2658734560-line-23">
     <rect x="0" y="562.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-24">
+<clipPath id="terminal-2658734560-line-24">
     <rect x="0" y="587.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-25">
+<clipPath id="terminal-2658734560-line-25">
     <rect x="0" y="611.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-26">
+<clipPath id="terminal-2658734560-line-26">
     <rect x="0" y="635.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-27">
+<clipPath id="terminal-2658734560-line-27">
     <rect x="0" y="660.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-28">
+<clipPath id="terminal-2658734560-line-28">
     <rect x="0" y="684.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-29">
+<clipPath id="terminal-2658734560-line-29">
     <rect x="0" y="709.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-30">
+<clipPath id="terminal-2658734560-line-30">
     <rect x="0" y="733.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-31">
+<clipPath id="terminal-2658734560-line-31">
     <rect x="0" y="757.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-32">
+<clipPath id="terminal-2658734560-line-32">
     <rect x="0" y="782.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-33">
+<clipPath id="terminal-2658734560-line-33">
     <rect x="0" y="806.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-34">
+<clipPath id="terminal-2658734560-line-34">
     <rect x="0" y="831.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-35">
+<clipPath id="terminal-2658734560-line-35">
     <rect x="0" y="855.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1180240827-line-36">
+<clipPath id="terminal-2658734560-line-36">
     <rect x="0" y="879.9" width="976" height="24.65"/>
             </clipPath>
     </defs>
@@ -166,47 +159,47 @@
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-1180240827-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-2658734560-clip-terminal)">
     
-    <g class="terminal-1180240827-matrix">
-    <text class="terminal-1180240827-r1" x="0" y="20" textLength="207.4" clip-path="url(#terminal-1180240827-line-0)">$&#160;casanovo&#160;--help</text><text class="terminal-1180240827-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-1180240827-line-0)">
-</text><text class="terminal-1180240827-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-1180240827-line-1)">
-</text><text class="terminal-1180240827-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-1180240827-line-2)">Usage:</text><text class="terminal-1180240827-r3" x="97.6" y="68.8" textLength="97.6" clip-path="url(#terminal-1180240827-line-2)">casanovo</text><text class="terminal-1180240827-r1" x="195.2" y="68.8" textLength="24.4" clip-path="url(#terminal-1180240827-line-2)">&#160;[</text><text class="terminal-1180240827-r4" x="219.6" y="68.8" textLength="85.4" clip-path="url(#terminal-1180240827-line-2)">OPTIONS</text><text class="terminal-1180240827-r1" x="305" y="68.8" textLength="24.4" clip-path="url(#terminal-1180240827-line-2)">]&#160;</text><text class="terminal-1180240827-r4" x="329.4" y="68.8" textLength="85.4" clip-path="url(#terminal-1180240827-line-2)">COMMAND</text><text class="terminal-1180240827-r1" x="414.8" y="68.8" textLength="24.4" clip-path="url(#terminal-1180240827-line-2)">&#160;[</text><text class="terminal-1180240827-r4" x="439.2" y="68.8" textLength="48.8" clip-path="url(#terminal-1180240827-line-2)">ARGS</text><text class="terminal-1180240827-r1" x="488" y="68.8" textLength="488" clip-path="url(#terminal-1180240827-line-2)">]...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-1180240827-line-2)">
-</text><text class="terminal-1180240827-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-3)">
-</text><text class="terminal-1180240827-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-1180240827-line-4)">&#160;┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓&#160;</text><text class="terminal-1180240827-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-4)">
-</text><text class="terminal-1180240827-r1" x="0" y="142" textLength="439.2" clip-path="url(#terminal-1180240827-line-5)">&#160;┃&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r3" x="439.2" y="142" textLength="97.6" clip-path="url(#terminal-1180240827-line-5)">Casanovo</text><text class="terminal-1180240827-r1" x="536.8" y="142" textLength="439.2" clip-path="url(#terminal-1180240827-line-5)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;┃&#160;</text><text class="terminal-1180240827-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-1180240827-line-5)">
-</text><text class="terminal-1180240827-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-1180240827-line-6)">&#160;┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛&#160;</text><text class="terminal-1180240827-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-1180240827-line-6)">
-</text><text class="terminal-1180240827-r1" x="0" y="190.8" textLength="976" clip-path="url(#terminal-1180240827-line-7)">&#160;Casanovo&#160;de&#160;novo&#160;sequences&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra&#160;using&#160;a&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-1180240827-line-7)">
-</text><text class="terminal-1180240827-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-1180240827-line-8)">&#160;Transformer&#160;model.&#160;Casanovo&#160;currently&#160;supports&#160;mzML,&#160;mzXML,&#160;and&#160;MGF&#160;files&#160;for&#160;&#160;</text><text class="terminal-1180240827-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-8)">
-</text><text class="terminal-1180240827-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-1180240827-line-9)">&#160;de&#160;novo&#160;sequencing&#160;and&#160;annotated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;from&#160;MassIVE-KB,&#160;for&#160;</text><text class="terminal-1180240827-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-9)">
-</text><text class="terminal-1180240827-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-1180240827-line-10)">&#160;training&#160;new&#160;models.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-1180240827-line-10)">
-</text><text class="terminal-1180240827-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-1180240827-line-11)">
-</text><text class="terminal-1180240827-r1" x="0" y="312.8" textLength="976" clip-path="url(#terminal-1180240827-line-12)">&#160;Links:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-1180240827-line-12)">
-</text><text class="terminal-1180240827-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-13)">
-</text><text class="terminal-1180240827-r5" x="12.2" y="361.6" textLength="36.6" clip-path="url(#terminal-1180240827-line-14)">&#160;•&#160;</text><text class="terminal-1180240827-r1" x="48.8" y="361.6" textLength="183" clip-path="url(#terminal-1180240827-line-14)">Documentation:&#160;</text><text class="terminal-1180240827-r6" x="231.8" y="361.6" textLength="378.2" clip-path="url(#terminal-1180240827-line-14)">https://casanovo.readthedocs.io</text><text class="terminal-1180240827-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-14)">
-</text><text class="terminal-1180240827-r5" x="12.2" y="386" textLength="36.6" clip-path="url(#terminal-1180240827-line-15)">&#160;•&#160;</text><text class="terminal-1180240827-r1" x="48.8" y="386" textLength="317.2" clip-path="url(#terminal-1180240827-line-15)">Official&#160;code&#160;repository:&#160;</text><text class="terminal-1180240827-r6" x="366" y="386" textLength="451.4" clip-path="url(#terminal-1180240827-line-15)">https://github.com/Noble-Lab/casanovo</text><text class="terminal-1180240827-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-1180240827-line-15)">
-</text><text class="terminal-1180240827-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-1180240827-line-16)">
-</text><text class="terminal-1180240827-r1" x="0" y="434.8" textLength="976" clip-path="url(#terminal-1180240827-line-17)">&#160;If&#160;you&#160;use&#160;Casanovo&#160;in&#160;your&#160;work,&#160;please&#160;cite:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-1180240827-line-17)">
-</text><text class="terminal-1180240827-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-18)">
-</text><text class="terminal-1180240827-r5" x="12.2" y="483.6" textLength="36.6" clip-path="url(#terminal-1180240827-line-19)">&#160;•&#160;</text><text class="terminal-1180240827-r1" x="48.8" y="483.6" textLength="927.2" clip-path="url(#terminal-1180240827-line-19)">Yilmaz,&#160;M.,&#160;Fondrie,&#160;W.&#160;E.,&#160;Bittremieux,&#160;W.,&#160;Oh,&#160;S.&#160;&amp;&#160;Noble,&#160;W.&#160;S.&#160;De&#160;novo&#160;&#160;</text><text class="terminal-1180240827-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-19)">
-</text><text class="terminal-1180240827-r1" x="48.8" y="508" textLength="927.2" clip-path="url(#terminal-1180240827-line-20)">mass&#160;spectrometry&#160;peptide&#160;sequencing&#160;with&#160;a&#160;transformer&#160;model.&#160;Proceedings&#160;&#160;</text><text class="terminal-1180240827-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-1180240827-line-20)">
-</text><text class="terminal-1180240827-r1" x="48.8" y="532.4" textLength="927.2" clip-path="url(#terminal-1180240827-line-21)">of&#160;the&#160;39th&#160;International&#160;Conference&#160;on&#160;Machine&#160;Learning&#160;-&#160;ICML&#160;&#x27;22&#160;(2022)&#160;&#160;</text><text class="terminal-1180240827-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-1180240827-line-21)">
-</text><text class="terminal-1180240827-r1" x="48.8" y="556.8" textLength="927.2" clip-path="url(#terminal-1180240827-line-22)">doi:10.1101/2022.02.07.479481.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-1180240827-line-22)">
-</text><text class="terminal-1180240827-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-23)">
-</text><text class="terminal-1180240827-r7" x="0" y="605.6" textLength="24.4" clip-path="url(#terminal-1180240827-line-24)">╭─</text><text class="terminal-1180240827-r7" x="24.4" y="605.6" textLength="109.8" clip-path="url(#terminal-1180240827-line-24)">&#160;Options&#160;</text><text class="terminal-1180240827-r7" x="134.2" y="605.6" textLength="817.4" clip-path="url(#terminal-1180240827-line-24)">───────────────────────────────────────────────────────────────────</text><text class="terminal-1180240827-r7" x="951.6" y="605.6" textLength="24.4" clip-path="url(#terminal-1180240827-line-24)">─╮</text><text class="terminal-1180240827-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-24)">
-</text><text class="terminal-1180240827-r7" x="0" y="630" textLength="12.2" clip-path="url(#terminal-1180240827-line-25)">│</text><text class="terminal-1180240827-r4" x="24.4" y="630" textLength="12.2" clip-path="url(#terminal-1180240827-line-25)">-</text><text class="terminal-1180240827-r4" x="36.6" y="630" textLength="61" clip-path="url(#terminal-1180240827-line-25)">-help</text><text class="terminal-1180240827-r8" x="122" y="630" textLength="24.4" clip-path="url(#terminal-1180240827-line-25)">-h</text><text class="terminal-1180240827-r1" x="146.4" y="630" textLength="817.4" clip-path="url(#terminal-1180240827-line-25)">&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r7" x="963.8" y="630" textLength="12.2" clip-path="url(#terminal-1180240827-line-25)">│</text><text class="terminal-1180240827-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-1180240827-line-25)">
-</text><text class="terminal-1180240827-r7" x="0" y="654.4" textLength="976" clip-path="url(#terminal-1180240827-line-26)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-1180240827-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-1180240827-line-26)">
-</text><text class="terminal-1180240827-r7" x="0" y="678.8" textLength="24.4" clip-path="url(#terminal-1180240827-line-27)">╭─</text><text class="terminal-1180240827-r7" x="24.4" y="678.8" textLength="122" clip-path="url(#terminal-1180240827-line-27)">&#160;Commands&#160;</text><text class="terminal-1180240827-r7" x="146.4" y="678.8" textLength="805.2" clip-path="url(#terminal-1180240827-line-27)">──────────────────────────────────────────────────────────────────</text><text class="terminal-1180240827-r7" x="951.6" y="678.8" textLength="24.4" clip-path="url(#terminal-1180240827-line-27)">─╮</text><text class="terminal-1180240827-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-1180240827-line-27)">
-</text><text class="terminal-1180240827-r7" x="0" y="703.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-28)">│</text><text class="terminal-1180240827-r4" x="24.4" y="703.2" textLength="109.8" clip-path="url(#terminal-1180240827-line-28)">annotate&#160;</text><text class="terminal-1180240827-r1" x="146.4" y="703.2" textLength="817.4" clip-path="url(#terminal-1180240827-line-28)">&#160;Annotate&#160;a&#160;given&#160;.mgf&#160;with&#160;candidates&#160;as&#160;selected&#160;by&#160;a&#160;Tide&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r7" x="963.8" y="703.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-28)">│</text><text class="terminal-1180240827-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-28)">
-</text><text class="terminal-1180240827-r7" x="0" y="727.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-29)">│</text><text class="terminal-1180240827-r1" x="146.4" y="727.6" textLength="817.4" clip-path="url(#terminal-1180240827-line-29)">&#160;search&#160;for&#160;Casanovo-DB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r7" x="963.8" y="727.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-29)">│</text><text class="terminal-1180240827-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-29)">
-</text><text class="terminal-1180240827-r7" x="0" y="752" textLength="12.2" clip-path="url(#terminal-1180240827-line-30)">│</text><text class="terminal-1180240827-r4" x="24.4" y="752" textLength="109.8" clip-path="url(#terminal-1180240827-line-30)">configure</text><text class="terminal-1180240827-r1" x="146.4" y="752" textLength="817.4" clip-path="url(#terminal-1180240827-line-30)">&#160;Generate&#160;a&#160;Casanovo&#160;configuration&#160;file&#160;to&#160;customize.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r7" x="963.8" y="752" textLength="12.2" clip-path="url(#terminal-1180240827-line-30)">│</text><text class="terminal-1180240827-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-1180240827-line-30)">
-</text><text class="terminal-1180240827-r7" x="0" y="776.4" textLength="12.2" clip-path="url(#terminal-1180240827-line-31)">│</text><text class="terminal-1180240827-r4" x="24.4" y="776.4" textLength="109.8" clip-path="url(#terminal-1180240827-line-31)">db-search</text><text class="terminal-1180240827-r1" x="146.4" y="776.4" textLength="817.4" clip-path="url(#terminal-1180240827-line-31)">&#160;Perform&#160;a&#160;search&#160;using&#160;Casanovo-DB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r7" x="963.8" y="776.4" textLength="12.2" clip-path="url(#terminal-1180240827-line-31)">│</text><text class="terminal-1180240827-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-1180240827-line-31)">
-</text><text class="terminal-1180240827-r7" x="0" y="800.8" textLength="12.2" clip-path="url(#terminal-1180240827-line-32)">│</text><text class="terminal-1180240827-r4" x="24.4" y="800.8" textLength="109.8" clip-path="url(#terminal-1180240827-line-32)">evaluate&#160;</text><text class="terminal-1180240827-r1" x="146.4" y="800.8" textLength="817.4" clip-path="url(#terminal-1180240827-line-32)">&#160;Evaluate&#160;de&#160;novo&#160;peptide&#160;sequencing&#160;performance.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r7" x="963.8" y="800.8" textLength="12.2" clip-path="url(#terminal-1180240827-line-32)">│</text><text class="terminal-1180240827-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-1180240827-line-32)">
-</text><text class="terminal-1180240827-r7" x="0" y="825.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-33)">│</text><text class="terminal-1180240827-r4" x="24.4" y="825.2" textLength="109.8" clip-path="url(#terminal-1180240827-line-33)">sequence&#160;</text><text class="terminal-1180240827-r1" x="146.4" y="825.2" textLength="817.4" clip-path="url(#terminal-1180240827-line-33)">&#160;De&#160;novo&#160;sequence&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r7" x="963.8" y="825.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-33)">│</text><text class="terminal-1180240827-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-1180240827-line-33)">
-</text><text class="terminal-1180240827-r7" x="0" y="849.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-34)">│</text><text class="terminal-1180240827-r4" x="24.4" y="849.6" textLength="109.8" clip-path="url(#terminal-1180240827-line-34)">train&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r1" x="146.4" y="849.6" textLength="817.4" clip-path="url(#terminal-1180240827-line-34)">&#160;Train&#160;a&#160;Casanovo&#160;model&#160;on&#160;your&#160;own&#160;data.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r7" x="963.8" y="849.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-34)">│</text><text class="terminal-1180240827-r1" x="976" y="849.6" textLength="12.2" clip-path="url(#terminal-1180240827-line-34)">
-</text><text class="terminal-1180240827-r7" x="0" y="874" textLength="12.2" clip-path="url(#terminal-1180240827-line-35)">│</text><text class="terminal-1180240827-r4" x="24.4" y="874" textLength="109.8" clip-path="url(#terminal-1180240827-line-35)">version&#160;&#160;</text><text class="terminal-1180240827-r1" x="146.4" y="874" textLength="817.4" clip-path="url(#terminal-1180240827-line-35)">&#160;Get&#160;the&#160;Casanovo&#160;version&#160;information&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1180240827-r7" x="963.8" y="874" textLength="12.2" clip-path="url(#terminal-1180240827-line-35)">│</text><text class="terminal-1180240827-r1" x="976" y="874" textLength="12.2" clip-path="url(#terminal-1180240827-line-35)">
-</text><text class="terminal-1180240827-r7" x="0" y="898.4" textLength="976" clip-path="url(#terminal-1180240827-line-36)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-1180240827-r1" x="976" y="898.4" textLength="12.2" clip-path="url(#terminal-1180240827-line-36)">
-</text><text class="terminal-1180240827-r1" x="976" y="922.8" textLength="12.2" clip-path="url(#terminal-1180240827-line-37)">
+    <g class="terminal-2658734560-matrix">
+    <text class="terminal-2658734560-r1" x="0" y="20" textLength="207.4" clip-path="url(#terminal-2658734560-line-0)">$&#160;casanovo&#160;--help</text><text class="terminal-2658734560-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-2658734560-line-0)">
+</text><text class="terminal-2658734560-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-1)">
+</text><text class="terminal-2658734560-r1" x="0" y="68.8" textLength="976" clip-path="url(#terminal-2658734560-line-2)">&#160;Usage:&#160;casanovo&#160;[OPTIONS]&#160;COMMAND&#160;[ARGS]...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-2)">
+</text><text class="terminal-2658734560-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-3)">
+</text><text class="terminal-2658734560-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-2658734560-line-4)">&#160;┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓&#160;</text><text class="terminal-2658734560-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-4)">
+</text><text class="terminal-2658734560-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-2658734560-line-5)">&#160;┃&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;Casanovo&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;┃&#160;</text><text class="terminal-2658734560-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-2658734560-line-5)">
+</text><text class="terminal-2658734560-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-2658734560-line-6)">&#160;┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛&#160;</text><text class="terminal-2658734560-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-6)">
+</text><text class="terminal-2658734560-r1" x="0" y="190.8" textLength="976" clip-path="url(#terminal-2658734560-line-7)">&#160;Casanovo&#160;de&#160;novo&#160;sequences&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra&#160;using&#160;a&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-7)">
+</text><text class="terminal-2658734560-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-2658734560-line-8)">&#160;Transformer&#160;model.&#160;Casanovo&#160;currently&#160;supports&#160;mzML,&#160;mzXML,&#160;and&#160;MGF&#160;files&#160;for&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-8)">
+</text><text class="terminal-2658734560-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-2658734560-line-9)">&#160;de&#160;novo&#160;sequencing&#160;and&#160;annotated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;from&#160;MassIVE-KB,&#160;for&#160;</text><text class="terminal-2658734560-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-9)">
+</text><text class="terminal-2658734560-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-2658734560-line-10)">&#160;training&#160;new&#160;models.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-2658734560-line-10)">
+</text><text class="terminal-2658734560-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-11)">
+</text><text class="terminal-2658734560-r1" x="0" y="312.8" textLength="976" clip-path="url(#terminal-2658734560-line-12)">&#160;Links:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-12)">
+</text><text class="terminal-2658734560-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-13)">
+</text><text class="terminal-2658734560-r1" x="0" y="361.6" textLength="976" clip-path="url(#terminal-2658734560-line-14)">&#160;&#160;•&#160;Documentation:&#160;https://casanovo.readthedocs.io&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-14)">
+</text><text class="terminal-2658734560-r1" x="0" y="386" textLength="976" clip-path="url(#terminal-2658734560-line-15)">&#160;&#160;•&#160;Official&#160;code&#160;repository:&#160;https://github.com/Noble-Lab/casanovo&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-2658734560-line-15)">
+</text><text class="terminal-2658734560-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-16)">
+</text><text class="terminal-2658734560-r1" x="0" y="434.8" textLength="976" clip-path="url(#terminal-2658734560-line-17)">&#160;If&#160;you&#160;use&#160;Casanovo&#160;in&#160;your&#160;work,&#160;please&#160;cite:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-17)">
+</text><text class="terminal-2658734560-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-18)">
+</text><text class="terminal-2658734560-r1" x="0" y="483.6" textLength="976" clip-path="url(#terminal-2658734560-line-19)">&#160;&#160;•&#160;Yilmaz,&#160;M.,&#160;Fondrie,&#160;W.&#160;E.,&#160;Bittremieux,&#160;W.,&#160;Oh,&#160;S.&#160;&amp;&#160;Noble,&#160;W.&#160;S.&#160;De&#160;novo&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-19)">
+</text><text class="terminal-2658734560-r1" x="0" y="508" textLength="976" clip-path="url(#terminal-2658734560-line-20)">&#160;&#160;&#160;&#160;mass&#160;spectrometry&#160;peptide&#160;sequencing&#160;with&#160;a&#160;transformer&#160;model.&#160;Proceedings&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-2658734560-line-20)">
+</text><text class="terminal-2658734560-r1" x="0" y="532.4" textLength="976" clip-path="url(#terminal-2658734560-line-21)">&#160;&#160;&#160;&#160;of&#160;the&#160;39th&#160;International&#160;Conference&#160;on&#160;Machine&#160;Learning&#160;-&#160;ICML&#160;&#x27;22&#160;(2022)&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-21)">
+</text><text class="terminal-2658734560-r1" x="0" y="556.8" textLength="976" clip-path="url(#terminal-2658734560-line-22)">&#160;&#160;&#160;&#160;doi:10.1101/2022.02.07.479481.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-22)">
+</text><text class="terminal-2658734560-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-23)">
+</text><text class="terminal-2658734560-r1" x="0" y="605.6" textLength="976" clip-path="url(#terminal-2658734560-line-24)">╭─&#160;Options&#160;────────────────────────────────────────────────────────────────────╮</text><text class="terminal-2658734560-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-24)">
+</text><text class="terminal-2658734560-r1" x="0" y="630" textLength="976" clip-path="url(#terminal-2658734560-line-25)">│&#160;--help&#160;&#160;-h&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-2658734560-line-25)">
+</text><text class="terminal-2658734560-r1" x="0" y="654.4" textLength="976" clip-path="url(#terminal-2658734560-line-26)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2658734560-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-26)">
+</text><text class="terminal-2658734560-r1" x="0" y="678.8" textLength="976" clip-path="url(#terminal-2658734560-line-27)">╭─&#160;Commands&#160;───────────────────────────────────────────────────────────────────╮</text><text class="terminal-2658734560-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-27)">
+</text><text class="terminal-2658734560-r1" x="0" y="703.2" textLength="976" clip-path="url(#terminal-2658734560-line-28)">│&#160;annotate&#160;&#160;&#160;Annotate&#160;a&#160;given&#160;.mgf&#160;with&#160;candidates&#160;as&#160;selected&#160;by&#160;a&#160;Tide&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-28)">
+</text><text class="terminal-2658734560-r1" x="0" y="727.6" textLength="976" clip-path="url(#terminal-2658734560-line-29)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;search&#160;for&#160;Casanovo-DB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-29)">
+</text><text class="terminal-2658734560-r1" x="0" y="752" textLength="976" clip-path="url(#terminal-2658734560-line-30)">│&#160;configure&#160;&#160;Generate&#160;a&#160;Casanovo&#160;configuration&#160;file&#160;to&#160;customize.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-2658734560-line-30)">
+</text><text class="terminal-2658734560-r1" x="0" y="776.4" textLength="976" clip-path="url(#terminal-2658734560-line-31)">│&#160;db-search&#160;&#160;Perform&#160;a&#160;search&#160;using&#160;Casanovo-DB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-31)">
+</text><text class="terminal-2658734560-r1" x="0" y="800.8" textLength="976" clip-path="url(#terminal-2658734560-line-32)">│&#160;evaluate&#160;&#160;&#160;Evaluate&#160;de&#160;novo&#160;peptide&#160;sequencing&#160;performance.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-32)">
+</text><text class="terminal-2658734560-r1" x="0" y="825.2" textLength="976" clip-path="url(#terminal-2658734560-line-33)">│&#160;sequence&#160;&#160;&#160;De&#160;novo&#160;sequence&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-33)">
+</text><text class="terminal-2658734560-r1" x="0" y="849.6" textLength="976" clip-path="url(#terminal-2658734560-line-34)">│&#160;train&#160;&#160;&#160;&#160;&#160;&#160;Train&#160;a&#160;Casanovo&#160;model&#160;on&#160;your&#160;own&#160;data.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="849.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-34)">
+</text><text class="terminal-2658734560-r1" x="0" y="874" textLength="976" clip-path="url(#terminal-2658734560-line-35)">│&#160;version&#160;&#160;&#160;&#160;Get&#160;the&#160;Casanovo&#160;version&#160;information&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="874" textLength="12.2" clip-path="url(#terminal-2658734560-line-35)">
+</text><text class="terminal-2658734560-r1" x="0" y="898.4" textLength="976" clip-path="url(#terminal-2658734560-line-36)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2658734560-r1" x="976" y="898.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-36)">
+</text><text class="terminal-2658734560-r1" x="976" y="922.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-37)">
 </text>
     </g>
     </g>
diff --git a/docs/images/sequence-help.svg b/docs/images/sequence-help.svg
index d493e2b2..f5799766 100644
--- a/docs/images/sequence-help.svg
+++ b/docs/images/sequence-help.svg
@@ -19,108 +19,99 @@
         font-weight: 700;
     }
 
-    .terminal-2396407494-matrix {
+    .terminal-2359602172-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-2396407494-title {
+    .terminal-2359602172-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-2396407494-r1 { fill: #c5c8c6 }
-.terminal-2396407494-r2 { fill: #d0b344 }
-.terminal-2396407494-r3 { fill: #c5c8c6;font-weight: bold }
-.terminal-2396407494-r4 { fill: #68a0b3;font-weight: bold }
-.terminal-2396407494-r5 { fill: #868887 }
-.terminal-2396407494-r6 { fill: #cc555a }
-.terminal-2396407494-r7 { fill: #d0b344;font-weight: bold }
-.terminal-2396407494-r8 { fill: #8a4346 }
-.terminal-2396407494-r9 { fill: #98a84b;font-weight: bold }
-.terminal-2396407494-r10 { fill: #8d7b39;font-weight: bold }
+    .terminal-2359602172-r1 { fill: #c5c8c6 }
     </style>
 
     <defs>
-    <clipPath id="terminal-2396407494-clip-terminal">
+    <clipPath id="terminal-2359602172-clip-terminal">
       <rect x="0" y="0" width="975.0" height="633.4" />
     </clipPath>
-    <clipPath id="terminal-2396407494-line-0">
+    <clipPath id="terminal-2359602172-line-0">
     <rect x="0" y="1.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-1">
+<clipPath id="terminal-2359602172-line-1">
     <rect x="0" y="25.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-2">
+<clipPath id="terminal-2359602172-line-2">
     <rect x="0" y="50.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-3">
+<clipPath id="terminal-2359602172-line-3">
     <rect x="0" y="74.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-4">
+<clipPath id="terminal-2359602172-line-4">
     <rect x="0" y="99.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-5">
+<clipPath id="terminal-2359602172-line-5">
     <rect x="0" y="123.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-6">
+<clipPath id="terminal-2359602172-line-6">
     <rect x="0" y="147.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-7">
+<clipPath id="terminal-2359602172-line-7">
     <rect x="0" y="172.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-8">
+<clipPath id="terminal-2359602172-line-8">
     <rect x="0" y="196.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-9">
+<clipPath id="terminal-2359602172-line-9">
     <rect x="0" y="221.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-10">
+<clipPath id="terminal-2359602172-line-10">
     <rect x="0" y="245.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-11">
+<clipPath id="terminal-2359602172-line-11">
     <rect x="0" y="269.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-12">
+<clipPath id="terminal-2359602172-line-12">
     <rect x="0" y="294.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-13">
+<clipPath id="terminal-2359602172-line-13">
     <rect x="0" y="318.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-14">
+<clipPath id="terminal-2359602172-line-14">
     <rect x="0" y="343.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-15">
+<clipPath id="terminal-2359602172-line-15">
     <rect x="0" y="367.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-16">
+<clipPath id="terminal-2359602172-line-16">
     <rect x="0" y="391.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-17">
+<clipPath id="terminal-2359602172-line-17">
     <rect x="0" y="416.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-18">
+<clipPath id="terminal-2359602172-line-18">
     <rect x="0" y="440.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-19">
+<clipPath id="terminal-2359602172-line-19">
     <rect x="0" y="465.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-20">
+<clipPath id="terminal-2359602172-line-20">
     <rect x="0" y="489.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-21">
+<clipPath id="terminal-2359602172-line-21">
     <rect x="0" y="513.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-22">
+<clipPath id="terminal-2359602172-line-22">
     <rect x="0" y="538.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-23">
+<clipPath id="terminal-2359602172-line-23">
     <rect x="0" y="562.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2396407494-line-24">
+<clipPath id="terminal-2359602172-line-24">
     <rect x="0" y="587.1" width="976" height="24.65"/>
             </clipPath>
     </defs>
@@ -132,35 +123,35 @@
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-2396407494-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-2359602172-clip-terminal)">
     
-    <g class="terminal-2396407494-matrix">
-    <text class="terminal-2396407494-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-2396407494-line-0)">$&#160;casanovo&#160;sequence&#160;--help</text><text class="terminal-2396407494-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-2396407494-line-0)">
-</text><text class="terminal-2396407494-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-2396407494-line-1)">
-</text><text class="terminal-2396407494-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-2396407494-line-2)">Usage:</text><text class="terminal-2396407494-r3" x="97.6" y="68.8" textLength="207.4" clip-path="url(#terminal-2396407494-line-2)">casanovo&#160;sequence</text><text class="terminal-2396407494-r1" x="305" y="68.8" textLength="24.4" clip-path="url(#terminal-2396407494-line-2)">&#160;[</text><text class="terminal-2396407494-r4" x="329.4" y="68.8" textLength="85.4" clip-path="url(#terminal-2396407494-line-2)">OPTIONS</text><text class="terminal-2396407494-r1" x="414.8" y="68.8" textLength="24.4" clip-path="url(#terminal-2396407494-line-2)">]&#160;</text><text class="terminal-2396407494-r4" x="439.2" y="68.8" textLength="109.8" clip-path="url(#terminal-2396407494-line-2)">PEAK_PATH</text><text class="terminal-2396407494-r1" x="549" y="68.8" textLength="427" clip-path="url(#terminal-2396407494-line-2)">...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2396407494-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-2396407494-line-2)">
-</text><text class="terminal-2396407494-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-2396407494-line-3)">
-</text><text class="terminal-2396407494-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-2396407494-line-4)">&#160;De&#160;novo&#160;sequence&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2396407494-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-2396407494-line-4)">
-</text><text class="terminal-2396407494-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-2396407494-line-5)">&#160;PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;mzMl,&#160;mzXML,&#160;or&#160;MGF&#160;files&#160;from&#160;which&#160;to&#160;sequence&#160;</text><text class="terminal-2396407494-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-2396407494-line-5)">
-</text><text class="terminal-2396407494-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-2396407494-line-6)">&#160;peptides.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2396407494-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-2396407494-line-6)">
-</text><text class="terminal-2396407494-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-2396407494-line-7)">
-</text><text class="terminal-2396407494-r5" x="0" y="215.2" textLength="24.4" clip-path="url(#terminal-2396407494-line-8)">╭─</text><text class="terminal-2396407494-r5" x="24.4" y="215.2" textLength="134.2" clip-path="url(#terminal-2396407494-line-8)">&#160;Arguments&#160;</text><text class="terminal-2396407494-r5" x="158.6" y="215.2" textLength="793" clip-path="url(#terminal-2396407494-line-8)">─────────────────────────────────────────────────────────────────</text><text class="terminal-2396407494-r5" x="951.6" y="215.2" textLength="24.4" clip-path="url(#terminal-2396407494-line-8)">─╮</text><text class="terminal-2396407494-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-2396407494-line-8)">
-</text><text class="terminal-2396407494-r5" x="0" y="239.6" textLength="12.2" clip-path="url(#terminal-2396407494-line-9)">│</text><text class="terminal-2396407494-r6" x="24.4" y="239.6" textLength="12.2" clip-path="url(#terminal-2396407494-line-9)">*</text><text class="terminal-2396407494-r1" x="36.6" y="239.6" textLength="183" clip-path="url(#terminal-2396407494-line-9)">&#160;&#160;PEAK_PATH&#160;&#160;&#160;&#160;</text><text class="terminal-2396407494-r7" x="219.6" y="239.6" textLength="48.8" clip-path="url(#terminal-2396407494-line-9)">FILE</text><text class="terminal-2396407494-r8" x="292.8" y="239.6" textLength="122" clip-path="url(#terminal-2396407494-line-9)">[required]</text><text class="terminal-2396407494-r5" x="963.8" y="239.6" textLength="12.2" clip-path="url(#terminal-2396407494-line-9)">│</text><text class="terminal-2396407494-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-2396407494-line-9)">
-</text><text class="terminal-2396407494-r5" x="0" y="264" textLength="976" clip-path="url(#terminal-2396407494-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2396407494-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-2396407494-line-10)">
-</text><text class="terminal-2396407494-r5" x="0" y="288.4" textLength="24.4" clip-path="url(#terminal-2396407494-line-11)">╭─</text><text class="terminal-2396407494-r5" x="24.4" y="288.4" textLength="109.8" clip-path="url(#terminal-2396407494-line-11)">&#160;Options&#160;</text><text class="terminal-2396407494-r5" x="134.2" y="288.4" textLength="817.4" clip-path="url(#terminal-2396407494-line-11)">───────────────────────────────────────────────────────────────────</text><text class="terminal-2396407494-r5" x="951.6" y="288.4" textLength="24.4" clip-path="url(#terminal-2396407494-line-11)">─╮</text><text class="terminal-2396407494-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-2396407494-line-11)">
-</text><text class="terminal-2396407494-r5" x="0" y="312.8" textLength="12.2" clip-path="url(#terminal-2396407494-line-12)">│</text><text class="terminal-2396407494-r4" x="24.4" y="312.8" textLength="12.2" clip-path="url(#terminal-2396407494-line-12)">-</text><text class="terminal-2396407494-r4" x="36.6" y="312.8" textLength="73.2" clip-path="url(#terminal-2396407494-line-12)">-model</text><text class="terminal-2396407494-r9" x="183" y="312.8" textLength="24.4" clip-path="url(#terminal-2396407494-line-12)">-m</text><text class="terminal-2396407494-r7" x="231.8" y="312.8" textLength="317.2" clip-path="url(#terminal-2396407494-line-12)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2396407494-r1" x="549" y="312.8" textLength="414.8" clip-path="url(#terminal-2396407494-line-12)">&#160;&#160;The&#160;model&#160;weights&#160;(.ckpt&#160;file).&#160;</text><text class="terminal-2396407494-r5" x="963.8" y="312.8" textLength="12.2" clip-path="url(#terminal-2396407494-line-12)">│</text><text class="terminal-2396407494-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-2396407494-line-12)">
-</text><text class="terminal-2396407494-r5" x="0" y="337.2" textLength="12.2" clip-path="url(#terminal-2396407494-line-13)">│</text><text class="terminal-2396407494-r1" x="12.2" y="337.2" textLength="951.6" clip-path="url(#terminal-2396407494-line-13)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;If&#160;not&#160;provided,&#160;Casanovo&#160;will&#160;&#160;</text><text class="terminal-2396407494-r5" x="963.8" y="337.2" textLength="12.2" clip-path="url(#terminal-2396407494-line-13)">│</text><text class="terminal-2396407494-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-2396407494-line-13)">
-</text><text class="terminal-2396407494-r5" x="0" y="361.6" textLength="12.2" clip-path="url(#terminal-2396407494-line-14)">│</text><text class="terminal-2396407494-r1" x="12.2" y="361.6" textLength="951.6" clip-path="url(#terminal-2396407494-line-14)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;try&#160;to&#160;download&#160;the&#160;latest&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2396407494-r5" x="963.8" y="361.6" textLength="12.2" clip-path="url(#terminal-2396407494-line-14)">│</text><text class="terminal-2396407494-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-2396407494-line-14)">
-</text><text class="terminal-2396407494-r5" x="0" y="386" textLength="12.2" clip-path="url(#terminal-2396407494-line-15)">│</text><text class="terminal-2396407494-r1" x="12.2" y="386" textLength="951.6" clip-path="url(#terminal-2396407494-line-15)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;release.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2396407494-r5" x="963.8" y="386" textLength="12.2" clip-path="url(#terminal-2396407494-line-15)">│</text><text class="terminal-2396407494-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-2396407494-line-15)">
-</text><text class="terminal-2396407494-r5" x="0" y="410.4" textLength="12.2" clip-path="url(#terminal-2396407494-line-16)">│</text><text class="terminal-2396407494-r4" x="24.4" y="410.4" textLength="12.2" clip-path="url(#terminal-2396407494-line-16)">-</text><text class="terminal-2396407494-r4" x="36.6" y="410.4" textLength="85.4" clip-path="url(#terminal-2396407494-line-16)">-output</text><text class="terminal-2396407494-r9" x="183" y="410.4" textLength="24.4" clip-path="url(#terminal-2396407494-line-16)">-o</text><text class="terminal-2396407494-r7" x="231.8" y="410.4" textLength="317.2" clip-path="url(#terminal-2396407494-line-16)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2396407494-r1" x="549" y="410.4" textLength="414.8" clip-path="url(#terminal-2396407494-line-16)">&#160;&#160;The&#160;mzTab&#160;file&#160;to&#160;which&#160;results&#160;</text><text class="terminal-2396407494-r5" x="963.8" y="410.4" textLength="12.2" clip-path="url(#terminal-2396407494-line-16)">│</text><text class="terminal-2396407494-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-2396407494-line-16)">
-</text><text class="terminal-2396407494-r5" x="0" y="434.8" textLength="12.2" clip-path="url(#terminal-2396407494-line-17)">│</text><text class="terminal-2396407494-r1" x="12.2" y="434.8" textLength="951.6" clip-path="url(#terminal-2396407494-line-17)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;will&#160;be&#160;written.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2396407494-r5" x="963.8" y="434.8" textLength="12.2" clip-path="url(#terminal-2396407494-line-17)">│</text><text class="terminal-2396407494-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-2396407494-line-17)">
-</text><text class="terminal-2396407494-r5" x="0" y="459.2" textLength="12.2" clip-path="url(#terminal-2396407494-line-18)">│</text><text class="terminal-2396407494-r4" x="24.4" y="459.2" textLength="12.2" clip-path="url(#terminal-2396407494-line-18)">-</text><text class="terminal-2396407494-r4" x="36.6" y="459.2" textLength="85.4" clip-path="url(#terminal-2396407494-line-18)">-config</text><text class="terminal-2396407494-r9" x="183" y="459.2" textLength="24.4" clip-path="url(#terminal-2396407494-line-18)">-c</text><text class="terminal-2396407494-r7" x="231.8" y="459.2" textLength="317.2" clip-path="url(#terminal-2396407494-line-18)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2396407494-r1" x="549" y="459.2" textLength="414.8" clip-path="url(#terminal-2396407494-line-18)">&#160;&#160;The&#160;YAML&#160;configuration&#160;file&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2396407494-r5" x="963.8" y="459.2" textLength="12.2" clip-path="url(#terminal-2396407494-line-18)">│</text><text class="terminal-2396407494-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-2396407494-line-18)">
-</text><text class="terminal-2396407494-r5" x="0" y="483.6" textLength="12.2" clip-path="url(#terminal-2396407494-line-19)">│</text><text class="terminal-2396407494-r1" x="12.2" y="483.6" textLength="951.6" clip-path="url(#terminal-2396407494-line-19)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;overriding&#160;the&#160;default&#160;options.&#160;</text><text class="terminal-2396407494-r5" x="963.8" y="483.6" textLength="12.2" clip-path="url(#terminal-2396407494-line-19)">│</text><text class="terminal-2396407494-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-2396407494-line-19)">
-</text><text class="terminal-2396407494-r5" x="0" y="508" textLength="12.2" clip-path="url(#terminal-2396407494-line-20)">│</text><text class="terminal-2396407494-r4" x="24.4" y="508" textLength="12.2" clip-path="url(#terminal-2396407494-line-20)">-</text><text class="terminal-2396407494-r4" x="36.6" y="508" textLength="122" clip-path="url(#terminal-2396407494-line-20)">-verbosity</text><text class="terminal-2396407494-r9" x="183" y="508" textLength="24.4" clip-path="url(#terminal-2396407494-line-20)">-v</text><text class="terminal-2396407494-r10" x="231.8" y="508" textLength="12.2" clip-path="url(#terminal-2396407494-line-20)">[</text><text class="terminal-2396407494-r7" x="244" y="508" textLength="61" clip-path="url(#terminal-2396407494-line-20)">debug</text><text class="terminal-2396407494-r10" x="305" y="508" textLength="12.2" clip-path="url(#terminal-2396407494-line-20)">|</text><text class="terminal-2396407494-r7" x="317.2" y="508" textLength="48.8" clip-path="url(#terminal-2396407494-line-20)">info</text><text class="terminal-2396407494-r10" x="366" y="508" textLength="12.2" clip-path="url(#terminal-2396407494-line-20)">|</text><text class="terminal-2396407494-r7" x="378.2" y="508" textLength="85.4" clip-path="url(#terminal-2396407494-line-20)">warning</text><text class="terminal-2396407494-r10" x="463.6" y="508" textLength="12.2" clip-path="url(#terminal-2396407494-line-20)">|</text><text class="terminal-2396407494-r7" x="475.8" y="508" textLength="61" clip-path="url(#terminal-2396407494-line-20)">error</text><text class="terminal-2396407494-r10" x="536.8" y="508" textLength="12.2" clip-path="url(#terminal-2396407494-line-20)">]</text><text class="terminal-2396407494-r1" x="549" y="508" textLength="414.8" clip-path="url(#terminal-2396407494-line-20)">&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;console&#160;&#160;&#160;&#160;</text><text class="terminal-2396407494-r5" x="963.8" y="508" textLength="12.2" clip-path="url(#terminal-2396407494-line-20)">│</text><text class="terminal-2396407494-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-2396407494-line-20)">
-</text><text class="terminal-2396407494-r5" x="0" y="532.4" textLength="12.2" clip-path="url(#terminal-2396407494-line-21)">│</text><text class="terminal-2396407494-r1" x="12.2" y="532.4" textLength="951.6" clip-path="url(#terminal-2396407494-line-21)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;logging&#160;messages.&#160;Log&#160;files&#160;are&#160;</text><text class="terminal-2396407494-r5" x="963.8" y="532.4" textLength="12.2" clip-path="url(#terminal-2396407494-line-21)">│</text><text class="terminal-2396407494-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-2396407494-line-21)">
-</text><text class="terminal-2396407494-r5" x="0" y="556.8" textLength="12.2" clip-path="url(#terminal-2396407494-line-22)">│</text><text class="terminal-2396407494-r1" x="12.2" y="556.8" textLength="951.6" clip-path="url(#terminal-2396407494-line-22)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;always&#160;set&#160;to&#160;&#x27;debug&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2396407494-r5" x="963.8" y="556.8" textLength="12.2" clip-path="url(#terminal-2396407494-line-22)">│</text><text class="terminal-2396407494-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-2396407494-line-22)">
-</text><text class="terminal-2396407494-r5" x="0" y="581.2" textLength="12.2" clip-path="url(#terminal-2396407494-line-23)">│</text><text class="terminal-2396407494-r4" x="24.4" y="581.2" textLength="12.2" clip-path="url(#terminal-2396407494-line-23)">-</text><text class="terminal-2396407494-r4" x="36.6" y="581.2" textLength="61" clip-path="url(#terminal-2396407494-line-23)">-help</text><text class="terminal-2396407494-r9" x="183" y="581.2" textLength="24.4" clip-path="url(#terminal-2396407494-line-23)">-h</text><text class="terminal-2396407494-r1" x="549" y="581.2" textLength="414.8" clip-path="url(#terminal-2396407494-line-23)">&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2396407494-r5" x="963.8" y="581.2" textLength="12.2" clip-path="url(#terminal-2396407494-line-23)">│</text><text class="terminal-2396407494-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-2396407494-line-23)">
-</text><text class="terminal-2396407494-r5" x="0" y="605.6" textLength="976" clip-path="url(#terminal-2396407494-line-24)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2396407494-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-2396407494-line-24)">
-</text><text class="terminal-2396407494-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-2396407494-line-25)">
+    <g class="terminal-2359602172-matrix">
+    <text class="terminal-2359602172-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-2359602172-line-0)">$&#160;casanovo&#160;sequence&#160;--help</text><text class="terminal-2359602172-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-2359602172-line-0)">
+</text><text class="terminal-2359602172-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-2359602172-line-1)">
+</text><text class="terminal-2359602172-r1" x="0" y="68.8" textLength="976" clip-path="url(#terminal-2359602172-line-2)">&#160;Usage:&#160;casanovo&#160;sequence&#160;[OPTIONS]&#160;PEAK_PATH...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2359602172-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-2359602172-line-2)">
+</text><text class="terminal-2359602172-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-2359602172-line-3)">
+</text><text class="terminal-2359602172-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-2359602172-line-4)">&#160;De&#160;novo&#160;sequence&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2359602172-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-2359602172-line-4)">
+</text><text class="terminal-2359602172-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-2359602172-line-5)">&#160;PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;mzMl,&#160;mzXML,&#160;or&#160;MGF&#160;files&#160;from&#160;which&#160;to&#160;sequence&#160;</text><text class="terminal-2359602172-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-2359602172-line-5)">
+</text><text class="terminal-2359602172-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-2359602172-line-6)">&#160;peptides.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2359602172-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-2359602172-line-6)">
+</text><text class="terminal-2359602172-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-2359602172-line-7)">
+</text><text class="terminal-2359602172-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-2359602172-line-8)">╭─&#160;Arguments&#160;──────────────────────────────────────────────────────────────────╮</text><text class="terminal-2359602172-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-2359602172-line-8)">
+</text><text class="terminal-2359602172-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-2359602172-line-9)">│&#160;*&#160;&#160;PEAK_PATH&#160;&#160;&#160;&#160;FILE&#160;&#160;[required]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-2359602172-line-9)">
+</text><text class="terminal-2359602172-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-2359602172-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2359602172-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-2359602172-line-10)">
+</text><text class="terminal-2359602172-r1" x="0" y="288.4" textLength="976" clip-path="url(#terminal-2359602172-line-11)">╭─&#160;Options&#160;────────────────────────────────────────────────────────────────────╮</text><text class="terminal-2359602172-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-2359602172-line-11)">
+</text><text class="terminal-2359602172-r1" x="0" y="312.8" textLength="976" clip-path="url(#terminal-2359602172-line-12)">│&#160;--model&#160;&#160;&#160;&#160;&#160;&#160;-m&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;model&#160;weights&#160;(.ckpt&#160;file).&#160;│</text><text class="terminal-2359602172-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-2359602172-line-12)">
+</text><text class="terminal-2359602172-r1" x="0" y="337.2" textLength="976" clip-path="url(#terminal-2359602172-line-13)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;If&#160;not&#160;provided,&#160;Casanovo&#160;will&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-2359602172-line-13)">
+</text><text class="terminal-2359602172-r1" x="0" y="361.6" textLength="976" clip-path="url(#terminal-2359602172-line-14)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;try&#160;to&#160;download&#160;the&#160;latest&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-2359602172-line-14)">
+</text><text class="terminal-2359602172-r1" x="0" y="386" textLength="976" clip-path="url(#terminal-2359602172-line-15)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;release.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-2359602172-line-15)">
+</text><text class="terminal-2359602172-r1" x="0" y="410.4" textLength="976" clip-path="url(#terminal-2359602172-line-16)">│&#160;--output&#160;&#160;&#160;&#160;&#160;-o&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;mzTab&#160;file&#160;to&#160;which&#160;results&#160;│</text><text class="terminal-2359602172-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-2359602172-line-16)">
+</text><text class="terminal-2359602172-r1" x="0" y="434.8" textLength="976" clip-path="url(#terminal-2359602172-line-17)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;will&#160;be&#160;written.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-2359602172-line-17)">
+</text><text class="terminal-2359602172-r1" x="0" y="459.2" textLength="976" clip-path="url(#terminal-2359602172-line-18)">│&#160;--config&#160;&#160;&#160;&#160;&#160;-c&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;YAML&#160;configuration&#160;file&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-2359602172-line-18)">
+</text><text class="terminal-2359602172-r1" x="0" y="483.6" textLength="976" clip-path="url(#terminal-2359602172-line-19)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;overriding&#160;the&#160;default&#160;options.&#160;│</text><text class="terminal-2359602172-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-2359602172-line-19)">
+</text><text class="terminal-2359602172-r1" x="0" y="508" textLength="976" clip-path="url(#terminal-2359602172-line-20)">│&#160;--verbosity&#160;&#160;-v&#160;&#160;[debug|info|warning|error]&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;console&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-2359602172-line-20)">
+</text><text class="terminal-2359602172-r1" x="0" y="532.4" textLength="976" clip-path="url(#terminal-2359602172-line-21)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;logging&#160;messages.&#160;Log&#160;files&#160;are&#160;│</text><text class="terminal-2359602172-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-2359602172-line-21)">
+</text><text class="terminal-2359602172-r1" x="0" y="556.8" textLength="976" clip-path="url(#terminal-2359602172-line-22)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;always&#160;set&#160;to&#160;&#x27;debug&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-2359602172-line-22)">
+</text><text class="terminal-2359602172-r1" x="0" y="581.2" textLength="976" clip-path="url(#terminal-2359602172-line-23)">│&#160;--help&#160;&#160;&#160;&#160;&#160;&#160;&#160;-h&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-2359602172-line-23)">
+</text><text class="terminal-2359602172-r1" x="0" y="605.6" textLength="976" clip-path="url(#terminal-2359602172-line-24)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2359602172-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-2359602172-line-24)">
+</text><text class="terminal-2359602172-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-2359602172-line-25)">
 </text>
     </g>
     </g>
diff --git a/docs/images/train-help.svg b/docs/images/train-help.svg
index 82c30122..fccd4140 100644
--- a/docs/images/train-help.svg
+++ b/docs/images/train-help.svg
@@ -19,147 +19,138 @@
         font-weight: 700;
     }
 
-    .terminal-3340932753-matrix {
+    .terminal-2430201580-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-3340932753-title {
+    .terminal-2430201580-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-3340932753-r1 { fill: #c5c8c6 }
-.terminal-3340932753-r2 { fill: #d0b344 }
-.terminal-3340932753-r3 { fill: #c5c8c6;font-weight: bold }
-.terminal-3340932753-r4 { fill: #68a0b3;font-weight: bold }
-.terminal-3340932753-r5 { fill: #868887 }
-.terminal-3340932753-r6 { fill: #cc555a }
-.terminal-3340932753-r7 { fill: #d0b344;font-weight: bold }
-.terminal-3340932753-r8 { fill: #8a4346 }
-.terminal-3340932753-r9 { fill: #98a84b;font-weight: bold }
-.terminal-3340932753-r10 { fill: #8d7b39;font-weight: bold }
+    .terminal-2430201580-r1 { fill: #c5c8c6 }
     </style>
 
     <defs>
-    <clipPath id="terminal-3340932753-clip-terminal">
+    <clipPath id="terminal-2430201580-clip-terminal">
       <rect x="0" y="0" width="975.0" height="950.5999999999999" />
     </clipPath>
-    <clipPath id="terminal-3340932753-line-0">
+    <clipPath id="terminal-2430201580-line-0">
     <rect x="0" y="1.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-1">
+<clipPath id="terminal-2430201580-line-1">
     <rect x="0" y="25.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-2">
+<clipPath id="terminal-2430201580-line-2">
     <rect x="0" y="50.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-3">
+<clipPath id="terminal-2430201580-line-3">
     <rect x="0" y="74.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-4">
+<clipPath id="terminal-2430201580-line-4">
     <rect x="0" y="99.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-5">
+<clipPath id="terminal-2430201580-line-5">
     <rect x="0" y="123.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-6">
+<clipPath id="terminal-2430201580-line-6">
     <rect x="0" y="147.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-7">
+<clipPath id="terminal-2430201580-line-7">
     <rect x="0" y="172.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-8">
+<clipPath id="terminal-2430201580-line-8">
     <rect x="0" y="196.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-9">
+<clipPath id="terminal-2430201580-line-9">
     <rect x="0" y="221.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-10">
+<clipPath id="terminal-2430201580-line-10">
     <rect x="0" y="245.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-11">
+<clipPath id="terminal-2430201580-line-11">
     <rect x="0" y="269.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-12">
+<clipPath id="terminal-2430201580-line-12">
     <rect x="0" y="294.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-13">
+<clipPath id="terminal-2430201580-line-13">
     <rect x="0" y="318.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-14">
+<clipPath id="terminal-2430201580-line-14">
     <rect x="0" y="343.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-15">
+<clipPath id="terminal-2430201580-line-15">
     <rect x="0" y="367.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-16">
+<clipPath id="terminal-2430201580-line-16">
     <rect x="0" y="391.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-17">
+<clipPath id="terminal-2430201580-line-17">
     <rect x="0" y="416.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-18">
+<clipPath id="terminal-2430201580-line-18">
     <rect x="0" y="440.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-19">
+<clipPath id="terminal-2430201580-line-19">
     <rect x="0" y="465.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-20">
+<clipPath id="terminal-2430201580-line-20">
     <rect x="0" y="489.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-21">
+<clipPath id="terminal-2430201580-line-21">
     <rect x="0" y="513.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-22">
+<clipPath id="terminal-2430201580-line-22">
     <rect x="0" y="538.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-23">
+<clipPath id="terminal-2430201580-line-23">
     <rect x="0" y="562.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-24">
+<clipPath id="terminal-2430201580-line-24">
     <rect x="0" y="587.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-25">
+<clipPath id="terminal-2430201580-line-25">
     <rect x="0" y="611.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-26">
+<clipPath id="terminal-2430201580-line-26">
     <rect x="0" y="635.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-27">
+<clipPath id="terminal-2430201580-line-27">
     <rect x="0" y="660.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-28">
+<clipPath id="terminal-2430201580-line-28">
     <rect x="0" y="684.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-29">
+<clipPath id="terminal-2430201580-line-29">
     <rect x="0" y="709.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-30">
+<clipPath id="terminal-2430201580-line-30">
     <rect x="0" y="733.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-31">
+<clipPath id="terminal-2430201580-line-31">
     <rect x="0" y="757.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-32">
+<clipPath id="terminal-2430201580-line-32">
     <rect x="0" y="782.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-33">
+<clipPath id="terminal-2430201580-line-33">
     <rect x="0" y="806.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-34">
+<clipPath id="terminal-2430201580-line-34">
     <rect x="0" y="831.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-35">
+<clipPath id="terminal-2430201580-line-35">
     <rect x="0" y="855.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-36">
+<clipPath id="terminal-2430201580-line-36">
     <rect x="0" y="879.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3340932753-line-37">
+<clipPath id="terminal-2430201580-line-37">
     <rect x="0" y="904.3" width="976" height="24.65"/>
             </clipPath>
     </defs>
@@ -171,48 +162,48 @@
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-3340932753-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-2430201580-clip-terminal)">
     
-    <g class="terminal-3340932753-matrix">
-    <text class="terminal-3340932753-r1" x="0" y="20" textLength="280.6" clip-path="url(#terminal-3340932753-line-0)">$&#160;casanovo&#160;train&#160;--help</text><text class="terminal-3340932753-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-3340932753-line-0)">
-</text><text class="terminal-3340932753-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-3340932753-line-1)">
-</text><text class="terminal-3340932753-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-3340932753-line-2)">Usage:</text><text class="terminal-3340932753-r3" x="97.6" y="68.8" textLength="170.8" clip-path="url(#terminal-3340932753-line-2)">casanovo&#160;train</text><text class="terminal-3340932753-r1" x="268.4" y="68.8" textLength="24.4" clip-path="url(#terminal-3340932753-line-2)">&#160;[</text><text class="terminal-3340932753-r4" x="292.8" y="68.8" textLength="85.4" clip-path="url(#terminal-3340932753-line-2)">OPTIONS</text><text class="terminal-3340932753-r1" x="378.2" y="68.8" textLength="24.4" clip-path="url(#terminal-3340932753-line-2)">]&#160;</text><text class="terminal-3340932753-r4" x="402.6" y="68.8" textLength="183" clip-path="url(#terminal-3340932753-line-2)">TRAIN_PEAK_PATH</text><text class="terminal-3340932753-r1" x="585.6" y="68.8" textLength="390.4" clip-path="url(#terminal-3340932753-line-2)">...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-2)">
-</text><text class="terminal-3340932753-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-3340932753-line-3)">
-</text><text class="terminal-3340932753-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-3340932753-line-4)">&#160;Train&#160;a&#160;Casanovo&#160;model&#160;on&#160;your&#160;own&#160;data.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-4)">
-</text><text class="terminal-3340932753-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-3340932753-line-5)">&#160;TRAIN_PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;annoated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;provided&#160;</text><text class="terminal-3340932753-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-3340932753-line-5)">
-</text><text class="terminal-3340932753-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-3340932753-line-6)">&#160;by&#160;MassIVE-KB,&#160;from&#160;which&#160;to&#160;train&#160;a&#160;new&#160;Casnovo&#160;model.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-3340932753-line-6)">
-</text><text class="terminal-3340932753-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-7)">
-</text><text class="terminal-3340932753-r5" x="0" y="215.2" textLength="24.4" clip-path="url(#terminal-3340932753-line-8)">╭─</text><text class="terminal-3340932753-r5" x="24.4" y="215.2" textLength="134.2" clip-path="url(#terminal-3340932753-line-8)">&#160;Arguments&#160;</text><text class="terminal-3340932753-r5" x="158.6" y="215.2" textLength="793" clip-path="url(#terminal-3340932753-line-8)">─────────────────────────────────────────────────────────────────</text><text class="terminal-3340932753-r5" x="951.6" y="215.2" textLength="24.4" clip-path="url(#terminal-3340932753-line-8)">─╮</text><text class="terminal-3340932753-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-3340932753-line-8)">
-</text><text class="terminal-3340932753-r5" x="0" y="239.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-9)">│</text><text class="terminal-3340932753-r6" x="24.4" y="239.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-9)">*</text><text class="terminal-3340932753-r1" x="36.6" y="239.6" textLength="256.2" clip-path="url(#terminal-3340932753-line-9)">&#160;&#160;TRAIN_PEAK_PATH&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r7" x="292.8" y="239.6" textLength="48.8" clip-path="url(#terminal-3340932753-line-9)">FILE</text><text class="terminal-3340932753-r8" x="366" y="239.6" textLength="122" clip-path="url(#terminal-3340932753-line-9)">[required]</text><text class="terminal-3340932753-r5" x="963.8" y="239.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-9)">│</text><text class="terminal-3340932753-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-9)">
-</text><text class="terminal-3340932753-r5" x="0" y="264" textLength="976" clip-path="url(#terminal-3340932753-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-3340932753-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-3340932753-line-10)">
-</text><text class="terminal-3340932753-r5" x="0" y="288.4" textLength="24.4" clip-path="url(#terminal-3340932753-line-11)">╭─</text><text class="terminal-3340932753-r5" x="24.4" y="288.4" textLength="109.8" clip-path="url(#terminal-3340932753-line-11)">&#160;Options&#160;</text><text class="terminal-3340932753-r5" x="134.2" y="288.4" textLength="817.4" clip-path="url(#terminal-3340932753-line-11)">───────────────────────────────────────────────────────────────────</text><text class="terminal-3340932753-r5" x="951.6" y="288.4" textLength="24.4" clip-path="url(#terminal-3340932753-line-11)">─╮</text><text class="terminal-3340932753-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-3340932753-line-11)">
-</text><text class="terminal-3340932753-r5" x="0" y="312.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-12)">│</text><text class="terminal-3340932753-r6" x="24.4" y="312.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-12)">*</text><text class="terminal-3340932753-r4" x="61" y="312.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-12)">-</text><text class="terminal-3340932753-r4" x="73.2" y="312.8" textLength="244" clip-path="url(#terminal-3340932753-line-12)">-validation_peak_pa…</text><text class="terminal-3340932753-r9" x="341.6" y="312.8" textLength="24.4" clip-path="url(#terminal-3340932753-line-12)">-p</text><text class="terminal-3340932753-r7" x="390.4" y="312.8" textLength="268.4" clip-path="url(#terminal-3340932753-line-12)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r1" x="658.8" y="312.8" textLength="305" clip-path="url(#terminal-3340932753-line-12)">&#160;&#160;An&#160;annotated&#160;MGF&#160;file&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="312.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-12)">│</text><text class="terminal-3340932753-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-12)">
-</text><text class="terminal-3340932753-r5" x="0" y="337.2" textLength="12.2" clip-path="url(#terminal-3340932753-line-13)">│</text><text class="terminal-3340932753-r1" x="12.2" y="337.2" textLength="951.6" clip-path="url(#terminal-3340932753-line-13)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;for&#160;validation,&#160;like&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="337.2" textLength="12.2" clip-path="url(#terminal-3340932753-line-13)">│</text><text class="terminal-3340932753-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-3340932753-line-13)">
-</text><text class="terminal-3340932753-r5" x="0" y="361.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-14)">│</text><text class="terminal-3340932753-r1" x="12.2" y="361.6" textLength="951.6" clip-path="url(#terminal-3340932753-line-14)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;from&#160;MassIVE-KB.&#160;Use&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="361.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-14)">│</text><text class="terminal-3340932753-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-14)">
-</text><text class="terminal-3340932753-r5" x="0" y="386" textLength="12.2" clip-path="url(#terminal-3340932753-line-15)">│</text><text class="terminal-3340932753-r1" x="12.2" y="386" textLength="951.6" clip-path="url(#terminal-3340932753-line-15)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;this&#160;option&#160;multiple&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="386" textLength="12.2" clip-path="url(#terminal-3340932753-line-15)">│</text><text class="terminal-3340932753-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-3340932753-line-15)">
-</text><text class="terminal-3340932753-r5" x="0" y="410.4" textLength="12.2" clip-path="url(#terminal-3340932753-line-16)">│</text><text class="terminal-3340932753-r1" x="12.2" y="410.4" textLength="951.6" clip-path="url(#terminal-3340932753-line-16)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;times&#160;to&#160;specify&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="410.4" textLength="12.2" clip-path="url(#terminal-3340932753-line-16)">│</text><text class="terminal-3340932753-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-3340932753-line-16)">
-</text><text class="terminal-3340932753-r5" x="0" y="434.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-17)">│</text><text class="terminal-3340932753-r1" x="12.2" y="434.8" textLength="951.6" clip-path="url(#terminal-3340932753-line-17)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;multiple&#160;files.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="434.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-17)">│</text><text class="terminal-3340932753-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-17)">
-</text><text class="terminal-3340932753-r5" x="0" y="459.2" textLength="12.2" clip-path="url(#terminal-3340932753-line-18)">│</text><text class="terminal-3340932753-r8" x="683.2" y="459.2" textLength="268.4" clip-path="url(#terminal-3340932753-line-18)">[required]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="459.2" textLength="12.2" clip-path="url(#terminal-3340932753-line-18)">│</text><text class="terminal-3340932753-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-3340932753-line-18)">
-</text><text class="terminal-3340932753-r5" x="0" y="483.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-19)">│</text><text class="terminal-3340932753-r4" x="61" y="483.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-19)">-</text><text class="terminal-3340932753-r4" x="73.2" y="483.6" textLength="73.2" clip-path="url(#terminal-3340932753-line-19)">-model</text><text class="terminal-3340932753-r9" x="341.6" y="483.6" textLength="24.4" clip-path="url(#terminal-3340932753-line-19)">-m</text><text class="terminal-3340932753-r7" x="390.4" y="483.6" textLength="268.4" clip-path="url(#terminal-3340932753-line-19)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r1" x="658.8" y="483.6" textLength="305" clip-path="url(#terminal-3340932753-line-19)">&#160;&#160;The&#160;model&#160;weights&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="483.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-19)">│</text><text class="terminal-3340932753-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-19)">
-</text><text class="terminal-3340932753-r5" x="0" y="508" textLength="12.2" clip-path="url(#terminal-3340932753-line-20)">│</text><text class="terminal-3340932753-r1" x="12.2" y="508" textLength="951.6" clip-path="url(#terminal-3340932753-line-20)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;(.ckpt&#160;file).&#160;If&#160;not&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="508" textLength="12.2" clip-path="url(#terminal-3340932753-line-20)">│</text><text class="terminal-3340932753-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-3340932753-line-20)">
-</text><text class="terminal-3340932753-r5" x="0" y="532.4" textLength="12.2" clip-path="url(#terminal-3340932753-line-21)">│</text><text class="terminal-3340932753-r1" x="12.2" y="532.4" textLength="951.6" clip-path="url(#terminal-3340932753-line-21)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;provided,&#160;Casanovo&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="532.4" textLength="12.2" clip-path="url(#terminal-3340932753-line-21)">│</text><text class="terminal-3340932753-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-3340932753-line-21)">
-</text><text class="terminal-3340932753-r5" x="0" y="556.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-22)">│</text><text class="terminal-3340932753-r1" x="12.2" y="556.8" textLength="951.6" clip-path="url(#terminal-3340932753-line-22)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;will&#160;try&#160;to&#160;download&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="556.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-22)">│</text><text class="terminal-3340932753-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-22)">
-</text><text class="terminal-3340932753-r5" x="0" y="581.2" textLength="12.2" clip-path="url(#terminal-3340932753-line-23)">│</text><text class="terminal-3340932753-r1" x="12.2" y="581.2" textLength="951.6" clip-path="url(#terminal-3340932753-line-23)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;the&#160;latest&#160;release.&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="581.2" textLength="12.2" clip-path="url(#terminal-3340932753-line-23)">│</text><text class="terminal-3340932753-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-3340932753-line-23)">
-</text><text class="terminal-3340932753-r5" x="0" y="605.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-24)">│</text><text class="terminal-3340932753-r4" x="61" y="605.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-24)">-</text><text class="terminal-3340932753-r4" x="73.2" y="605.6" textLength="85.4" clip-path="url(#terminal-3340932753-line-24)">-output</text><text class="terminal-3340932753-r9" x="341.6" y="605.6" textLength="24.4" clip-path="url(#terminal-3340932753-line-24)">-o</text><text class="terminal-3340932753-r7" x="390.4" y="605.6" textLength="268.4" clip-path="url(#terminal-3340932753-line-24)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r1" x="658.8" y="605.6" textLength="305" clip-path="url(#terminal-3340932753-line-24)">&#160;&#160;The&#160;mzTab&#160;file&#160;to&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="605.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-24)">│</text><text class="terminal-3340932753-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-24)">
-</text><text class="terminal-3340932753-r5" x="0" y="630" textLength="12.2" clip-path="url(#terminal-3340932753-line-25)">│</text><text class="terminal-3340932753-r1" x="12.2" y="630" textLength="951.6" clip-path="url(#terminal-3340932753-line-25)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;which&#160;results&#160;will&#160;be&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="630" textLength="12.2" clip-path="url(#terminal-3340932753-line-25)">│</text><text class="terminal-3340932753-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-3340932753-line-25)">
-</text><text class="terminal-3340932753-r5" x="0" y="654.4" textLength="12.2" clip-path="url(#terminal-3340932753-line-26)">│</text><text class="terminal-3340932753-r1" x="12.2" y="654.4" textLength="951.6" clip-path="url(#terminal-3340932753-line-26)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;written.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="654.4" textLength="12.2" clip-path="url(#terminal-3340932753-line-26)">│</text><text class="terminal-3340932753-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-3340932753-line-26)">
-</text><text class="terminal-3340932753-r5" x="0" y="678.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-27)">│</text><text class="terminal-3340932753-r4" x="61" y="678.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-27)">-</text><text class="terminal-3340932753-r4" x="73.2" y="678.8" textLength="85.4" clip-path="url(#terminal-3340932753-line-27)">-config</text><text class="terminal-3340932753-r9" x="341.6" y="678.8" textLength="24.4" clip-path="url(#terminal-3340932753-line-27)">-c</text><text class="terminal-3340932753-r7" x="390.4" y="678.8" textLength="268.4" clip-path="url(#terminal-3340932753-line-27)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r1" x="658.8" y="678.8" textLength="305" clip-path="url(#terminal-3340932753-line-27)">&#160;&#160;The&#160;YAML&#160;configuration&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="678.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-27)">│</text><text class="terminal-3340932753-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-27)">
-</text><text class="terminal-3340932753-r5" x="0" y="703.2" textLength="12.2" clip-path="url(#terminal-3340932753-line-28)">│</text><text class="terminal-3340932753-r1" x="12.2" y="703.2" textLength="951.6" clip-path="url(#terminal-3340932753-line-28)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;file&#160;overriding&#160;the&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="703.2" textLength="12.2" clip-path="url(#terminal-3340932753-line-28)">│</text><text class="terminal-3340932753-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-3340932753-line-28)">
-</text><text class="terminal-3340932753-r5" x="0" y="727.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-29)">│</text><text class="terminal-3340932753-r1" x="12.2" y="727.6" textLength="951.6" clip-path="url(#terminal-3340932753-line-29)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;default&#160;options.&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="727.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-29)">│</text><text class="terminal-3340932753-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-29)">
-</text><text class="terminal-3340932753-r5" x="0" y="752" textLength="12.2" clip-path="url(#terminal-3340932753-line-30)">│</text><text class="terminal-3340932753-r4" x="61" y="752" textLength="12.2" clip-path="url(#terminal-3340932753-line-30)">-</text><text class="terminal-3340932753-r4" x="73.2" y="752" textLength="122" clip-path="url(#terminal-3340932753-line-30)">-verbosity</text><text class="terminal-3340932753-r9" x="341.6" y="752" textLength="24.4" clip-path="url(#terminal-3340932753-line-30)">-v</text><text class="terminal-3340932753-r10" x="390.4" y="752" textLength="12.2" clip-path="url(#terminal-3340932753-line-30)">[</text><text class="terminal-3340932753-r7" x="402.6" y="752" textLength="61" clip-path="url(#terminal-3340932753-line-30)">debug</text><text class="terminal-3340932753-r10" x="463.6" y="752" textLength="12.2" clip-path="url(#terminal-3340932753-line-30)">|</text><text class="terminal-3340932753-r7" x="475.8" y="752" textLength="48.8" clip-path="url(#terminal-3340932753-line-30)">info</text><text class="terminal-3340932753-r10" x="524.6" y="752" textLength="12.2" clip-path="url(#terminal-3340932753-line-30)">|</text><text class="terminal-3340932753-r7" x="536.8" y="752" textLength="85.4" clip-path="url(#terminal-3340932753-line-30)">warning</text><text class="terminal-3340932753-r10" x="622.2" y="752" textLength="12.2" clip-path="url(#terminal-3340932753-line-30)">|</text><text class="terminal-3340932753-r7" x="634.4" y="752" textLength="24.4" clip-path="url(#terminal-3340932753-line-30)">er</text><text class="terminal-3340932753-r1" x="658.8" y="752" textLength="305" clip-path="url(#terminal-3340932753-line-30)">&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="752" textLength="12.2" clip-path="url(#terminal-3340932753-line-30)">│</text><text class="terminal-3340932753-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-3340932753-line-30)">
-</text><text class="terminal-3340932753-r5" x="0" y="776.4" textLength="12.2" clip-path="url(#terminal-3340932753-line-31)">│</text><text class="terminal-3340932753-r7" x="390.4" y="776.4" textLength="36.6" clip-path="url(#terminal-3340932753-line-31)">ror</text><text class="terminal-3340932753-r10" x="427" y="776.4" textLength="12.2" clip-path="url(#terminal-3340932753-line-31)">]</text><text class="terminal-3340932753-r1" x="658.8" y="776.4" textLength="305" clip-path="url(#terminal-3340932753-line-31)">&#160;&#160;console&#160;logging&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="776.4" textLength="12.2" clip-path="url(#terminal-3340932753-line-31)">│</text><text class="terminal-3340932753-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-3340932753-line-31)">
-</text><text class="terminal-3340932753-r5" x="0" y="800.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-32)">│</text><text class="terminal-3340932753-r1" x="12.2" y="800.8" textLength="951.6" clip-path="url(#terminal-3340932753-line-32)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;messages.&#160;Log&#160;files&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="800.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-32)">│</text><text class="terminal-3340932753-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-32)">
-</text><text class="terminal-3340932753-r5" x="0" y="825.2" textLength="12.2" clip-path="url(#terminal-3340932753-line-33)">│</text><text class="terminal-3340932753-r1" x="12.2" y="825.2" textLength="951.6" clip-path="url(#terminal-3340932753-line-33)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;are&#160;always&#160;set&#160;to&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="825.2" textLength="12.2" clip-path="url(#terminal-3340932753-line-33)">│</text><text class="terminal-3340932753-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-3340932753-line-33)">
-</text><text class="terminal-3340932753-r5" x="0" y="849.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-34)">│</text><text class="terminal-3340932753-r1" x="12.2" y="849.6" textLength="951.6" clip-path="url(#terminal-3340932753-line-34)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#x27;debug&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="849.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-34)">│</text><text class="terminal-3340932753-r1" x="976" y="849.6" textLength="12.2" clip-path="url(#terminal-3340932753-line-34)">
-</text><text class="terminal-3340932753-r5" x="0" y="874" textLength="12.2" clip-path="url(#terminal-3340932753-line-35)">│</text><text class="terminal-3340932753-r4" x="61" y="874" textLength="12.2" clip-path="url(#terminal-3340932753-line-35)">-</text><text class="terminal-3340932753-r4" x="73.2" y="874" textLength="61" clip-path="url(#terminal-3340932753-line-35)">-help</text><text class="terminal-3340932753-r9" x="341.6" y="874" textLength="24.4" clip-path="url(#terminal-3340932753-line-35)">-h</text><text class="terminal-3340932753-r1" x="658.8" y="874" textLength="305" clip-path="url(#terminal-3340932753-line-35)">&#160;&#160;Show&#160;this&#160;message&#160;and&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="874" textLength="12.2" clip-path="url(#terminal-3340932753-line-35)">│</text><text class="terminal-3340932753-r1" x="976" y="874" textLength="12.2" clip-path="url(#terminal-3340932753-line-35)">
-</text><text class="terminal-3340932753-r5" x="0" y="898.4" textLength="12.2" clip-path="url(#terminal-3340932753-line-36)">│</text><text class="terminal-3340932753-r1" x="12.2" y="898.4" textLength="951.6" clip-path="url(#terminal-3340932753-line-36)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3340932753-r5" x="963.8" y="898.4" textLength="12.2" clip-path="url(#terminal-3340932753-line-36)">│</text><text class="terminal-3340932753-r1" x="976" y="898.4" textLength="12.2" clip-path="url(#terminal-3340932753-line-36)">
-</text><text class="terminal-3340932753-r5" x="0" y="922.8" textLength="976" clip-path="url(#terminal-3340932753-line-37)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-3340932753-r1" x="976" y="922.8" textLength="12.2" clip-path="url(#terminal-3340932753-line-37)">
-</text><text class="terminal-3340932753-r1" x="976" y="947.2" textLength="12.2" clip-path="url(#terminal-3340932753-line-38)">
+    <g class="terminal-2430201580-matrix">
+    <text class="terminal-2430201580-r1" x="0" y="20" textLength="280.6" clip-path="url(#terminal-2430201580-line-0)">$&#160;casanovo&#160;train&#160;--help</text><text class="terminal-2430201580-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-2430201580-line-0)">
+</text><text class="terminal-2430201580-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-1)">
+</text><text class="terminal-2430201580-r1" x="0" y="68.8" textLength="976" clip-path="url(#terminal-2430201580-line-2)">&#160;Usage:&#160;casanovo&#160;train&#160;[OPTIONS]&#160;TRAIN_PEAK_PATH...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2430201580-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-2)">
+</text><text class="terminal-2430201580-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-3)">
+</text><text class="terminal-2430201580-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-2430201580-line-4)">&#160;Train&#160;a&#160;Casanovo&#160;model&#160;on&#160;your&#160;own&#160;data.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2430201580-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-4)">
+</text><text class="terminal-2430201580-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-2430201580-line-5)">&#160;TRAIN_PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;annoated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;provided&#160;</text><text class="terminal-2430201580-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-2430201580-line-5)">
+</text><text class="terminal-2430201580-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-2430201580-line-6)">&#160;by&#160;MassIVE-KB,&#160;from&#160;which&#160;to&#160;train&#160;a&#160;new&#160;Casnovo&#160;model.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2430201580-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-6)">
+</text><text class="terminal-2430201580-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-7)">
+</text><text class="terminal-2430201580-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-2430201580-line-8)">╭─&#160;Arguments&#160;──────────────────────────────────────────────────────────────────╮</text><text class="terminal-2430201580-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-8)">
+</text><text class="terminal-2430201580-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-2430201580-line-9)">│&#160;*&#160;&#160;TRAIN_PEAK_PATH&#160;&#160;&#160;&#160;FILE&#160;&#160;[required]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-9)">
+</text><text class="terminal-2430201580-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-2430201580-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2430201580-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-2430201580-line-10)">
+</text><text class="terminal-2430201580-r1" x="0" y="288.4" textLength="976" clip-path="url(#terminal-2430201580-line-11)">╭─&#160;Options&#160;────────────────────────────────────────────────────────────────────╮</text><text class="terminal-2430201580-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-11)">
+</text><text class="terminal-2430201580-r1" x="0" y="312.8" textLength="976" clip-path="url(#terminal-2430201580-line-12)">│&#160;*&#160;&#160;--validation_peak_pa…&#160;&#160;-p&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;An&#160;annotated&#160;MGF&#160;file&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-12)">
+</text><text class="terminal-2430201580-r1" x="0" y="337.2" textLength="976" clip-path="url(#terminal-2430201580-line-13)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;for&#160;validation,&#160;like&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-13)">
+</text><text class="terminal-2430201580-r1" x="0" y="361.6" textLength="976" clip-path="url(#terminal-2430201580-line-14)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;from&#160;MassIVE-KB.&#160;Use&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-14)">
+</text><text class="terminal-2430201580-r1" x="0" y="386" textLength="976" clip-path="url(#terminal-2430201580-line-15)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;this&#160;option&#160;multiple&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-2430201580-line-15)">
+</text><text class="terminal-2430201580-r1" x="0" y="410.4" textLength="976" clip-path="url(#terminal-2430201580-line-16)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;times&#160;to&#160;specify&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-16)">
+</text><text class="terminal-2430201580-r1" x="0" y="434.8" textLength="976" clip-path="url(#terminal-2430201580-line-17)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;multiple&#160;files.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-17)">
+</text><text class="terminal-2430201580-r1" x="0" y="459.2" textLength="976" clip-path="url(#terminal-2430201580-line-18)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;[required]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-18)">
+</text><text class="terminal-2430201580-r1" x="0" y="483.6" textLength="976" clip-path="url(#terminal-2430201580-line-19)">│&#160;&#160;&#160;&#160;--model&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;-m&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;model&#160;weights&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-19)">
+</text><text class="terminal-2430201580-r1" x="0" y="508" textLength="976" clip-path="url(#terminal-2430201580-line-20)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;(.ckpt&#160;file).&#160;If&#160;not&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-2430201580-line-20)">
+</text><text class="terminal-2430201580-r1" x="0" y="532.4" textLength="976" clip-path="url(#terminal-2430201580-line-21)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;provided,&#160;Casanovo&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-21)">
+</text><text class="terminal-2430201580-r1" x="0" y="556.8" textLength="976" clip-path="url(#terminal-2430201580-line-22)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;will&#160;try&#160;to&#160;download&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-22)">
+</text><text class="terminal-2430201580-r1" x="0" y="581.2" textLength="976" clip-path="url(#terminal-2430201580-line-23)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;the&#160;latest&#160;release.&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-23)">
+</text><text class="terminal-2430201580-r1" x="0" y="605.6" textLength="976" clip-path="url(#terminal-2430201580-line-24)">│&#160;&#160;&#160;&#160;--output&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;-o&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;mzTab&#160;file&#160;to&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-24)">
+</text><text class="terminal-2430201580-r1" x="0" y="630" textLength="976" clip-path="url(#terminal-2430201580-line-25)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;which&#160;results&#160;will&#160;be&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-2430201580-line-25)">
+</text><text class="terminal-2430201580-r1" x="0" y="654.4" textLength="976" clip-path="url(#terminal-2430201580-line-26)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;written.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-26)">
+</text><text class="terminal-2430201580-r1" x="0" y="678.8" textLength="976" clip-path="url(#terminal-2430201580-line-27)">│&#160;&#160;&#160;&#160;--config&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;-c&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;YAML&#160;configuration&#160;│</text><text class="terminal-2430201580-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-27)">
+</text><text class="terminal-2430201580-r1" x="0" y="703.2" textLength="976" clip-path="url(#terminal-2430201580-line-28)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;file&#160;overriding&#160;the&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-28)">
+</text><text class="terminal-2430201580-r1" x="0" y="727.6" textLength="976" clip-path="url(#terminal-2430201580-line-29)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;default&#160;options.&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-29)">
+</text><text class="terminal-2430201580-r1" x="0" y="752" textLength="976" clip-path="url(#terminal-2430201580-line-30)">│&#160;&#160;&#160;&#160;--verbosity&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;-v&#160;&#160;[debug|info|warning|er&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-2430201580-line-30)">
+</text><text class="terminal-2430201580-r1" x="0" y="776.4" textLength="976" clip-path="url(#terminal-2430201580-line-31)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;ror]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;console&#160;logging&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-31)">
+</text><text class="terminal-2430201580-r1" x="0" y="800.8" textLength="976" clip-path="url(#terminal-2430201580-line-32)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;messages.&#160;Log&#160;files&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-32)">
+</text><text class="terminal-2430201580-r1" x="0" y="825.2" textLength="976" clip-path="url(#terminal-2430201580-line-33)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;are&#160;always&#160;set&#160;to&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-33)">
+</text><text class="terminal-2430201580-r1" x="0" y="849.6" textLength="976" clip-path="url(#terminal-2430201580-line-34)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#x27;debug&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="849.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-34)">
+</text><text class="terminal-2430201580-r1" x="0" y="874" textLength="976" clip-path="url(#terminal-2430201580-line-35)">│&#160;&#160;&#160;&#160;--help&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;-h&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="874" textLength="12.2" clip-path="url(#terminal-2430201580-line-35)">
+</text><text class="terminal-2430201580-r1" x="0" y="898.4" textLength="976" clip-path="url(#terminal-2430201580-line-36)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="898.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-36)">
+</text><text class="terminal-2430201580-r1" x="0" y="922.8" textLength="976" clip-path="url(#terminal-2430201580-line-37)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2430201580-r1" x="976" y="922.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-37)">
+</text><text class="terminal-2430201580-r1" x="976" y="947.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-38)">
 </text>
     </g>
     </g>

From 2d57513fbcfed50e848ca47b9f82ac68baa884a2 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 7 May 2024 12:05:34 -0700
Subject: [PATCH 14/84] justins requested fixes

---
 casanovo/config.yaml           |  1 -
 casanovo/data/annotate_db.py   | 15 +++++++++++----
 casanovo/denovo/dataloaders.py |  2 +-
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index 7b207664..a92a7ffa 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -16,7 +16,6 @@ isotope_error_range: [0, 1]
 # The minimum length of predicted peptides.
 min_peptide_len: 6
 # Number of spectra in one inference batch. 
-# Also the number of PSMs processed by Casanovo-DB per batch.
 predict_batch_size: 1024
 # Number of beams used in beam search.
 n_beams: 1
diff --git a/casanovo/data/annotate_db.py b/casanovo/data/annotate_db.py
index 4f33b798..dd2e6c64 100644
--- a/casanovo/data/annotate_db.py
+++ b/casanovo/data/annotate_db.py
@@ -38,8 +38,10 @@ def _normalize_mods(seq: str) -> str:
 
 def annotate_mgf(peak_path: str, tide_path: str, output: Optional[str]):
     """
-    Accepts a directory containing the results of a successful tide search, and an .mgf file containing MS/MS spectra.
-    The .mgf file is then annotated in the SEQ field with all of the candidate peptides for each spectrum, as well as their target/decoy status.
+    Accepts a directory containing the results of a successful tide search,
+    and an .mgf file containing MS/MS spectra.
+    The .mgf file is then annotated in the SEQ field with
+    all of the candidate peptides for each spectrum, as well as their target/decoy status.
     This annotated .mgf can be given directly to Casanovo-DB to perfrom a database search.
 
         Parameters
@@ -104,7 +106,11 @@ def annotate_mgf(peak_path: str, tide_path: str, output: Optional[str]):
             decoy_candidate_list = []
             logger.warn(f"No decoy peptides found for scan {scan}.")
 
-        scan_map[scan] = target_candidate_list + decoy_candidate_list
+        pep_list = target_candidate_list + decoy_candidate_list
+        if len(pep_list) == 0:
+            logger.warn(f"No peptides found for scan {scan}.")
+        else:
+            scan_map[scan] = target_candidate_list + decoy_candidate_list
 
     all_spec = []
     for idx, spec_dict in enumerate(mgf.read(peak_path)):
@@ -112,7 +118,8 @@ def annotate_mgf(peak_path: str, tide_path: str, output: Optional[str]):
             scan = int(spec_dict["params"]["scans"])
         except KeyError as e:
             logger.error(
-                "Could not find the scan number in the .mgf file. Please ensure that the .mgf file contains the scan number in the 'SCANS' field."
+                "Could not find the scan number in the .mgf file."
+                "Please ensure that the .mgf file contains the scan number in the 'SCANS' field."
             )
             raise e
         try:
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index efb346ab..284aaeb7 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -126,7 +126,7 @@ def setup(self, stage: str = None, annotated: bool = True) -> None:
             )
             if self.test_index is not None:
                 self.test_dataset = make_dataset(self.test_index)
-        if stage in (None, "db"):
+        if stage == "db":
             make_dataset = functools.partial(
                 DBSpectrumDataset,
                 n_peaks=self.n_peaks,

From 3cfb7954a20cf2dd8ba79245db3291b1e2eaab3b Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Mon, 17 Jun 2024 12:11:24 -0700
Subject: [PATCH 15/84] added minor changes as requested by Wout

---
 casanovo/casanovo.py            | 34 +++++++++++++++++------
 casanovo/config.yaml            |  2 +-
 casanovo/data/datasets.py       |  5 ++--
 casanovo/data/ms_io.py          |  4 +--
 casanovo/denovo/dataloaders.py  |  4 +--
 casanovo/denovo/model.py        | 48 ++++++++++++++++-----------------
 casanovo/denovo/model_runner.py |  6 ++---
 tests/test_integration.py       |  8 +-----
 8 files changed, 61 insertions(+), 50 deletions(-)

diff --git a/casanovo/casanovo.py b/casanovo/casanovo.py
index c2bce3ef..7db5faa8 100644
--- a/casanovo/casanovo.py
+++ b/casanovo/casanovo.py
@@ -146,7 +146,7 @@ def sequence(
     logger.info("DONE!")
 
 
-@main.command(cls=_SharedParams)
+@main.command()
 @click.argument(
     "peak_path",
     required=True,
@@ -159,11 +159,28 @@ def sequence(
     nargs=1,
     type=click.Path(exists=True, dir_okay=True),
 )
+@click.option(
+    "-o",
+    "--output",
+    help="The output annotated MGF file.",
+    type=click.Path(dir_okay=False),
+)
+@click.option(
+    "-v",
+    "--verbosity",
+    help="""
+    Set the verbosity of console logging messages. Log files are
+    always set to 'debug'.
+    """,
+    type=click.Choice(
+        ["debug", "info", "warning", "error"],
+        case_sensitive=False,
+    ),
+    default="info",
+)
 def annotate(
     peak_path: str,
     tide_path: str,
-    model: Optional[str],
-    config: Optional[str],
     output: Optional[str],
     verbosity: str,
 ) -> None:
@@ -174,13 +191,12 @@ def annotate(
     TIDE_PATH must be one directory containing the Tide search results of the <PEAK_PATH> .mgf.
     This directory must contain tide-search.decoy.txt and tide-search.target.txt
     """
-    for peak_file in peak_path:
-        logger.info("  %s", peak_file)
-
     if output is None:
         output = setup_logging(output, verbosity)
         logger.info(
-            "Output file not specified. Annotated MGF will be saved in the same directory as the input MGF."
+            "Output file not specified. \
+            Annotated MGF will be saved in the same directory \
+            as the input MGF."
         )
         output = peak_path.replace(".mgf", "_annotated.mgf")
     else:
@@ -207,11 +223,13 @@ def db_search(
 ) -> None:
     """Perform a search using Casanovo-DB.
 
-    PEAK_PATH must be one MGF file that has ANNOTATED spectra, as output by annotate mode.
+    PEAK_PATH must be one MGF file that has ANNOTATED spectra,
+    as output by annotate mode.
     """
     output = setup_logging(output, verbosity)
     config, model = setup_model(model, config, output, False)
     with ModelRunner(config, model) as runner:
+        logger.info("DB-searching peptides from: %s", peak_path)
         runner.db_search(peak_path, output)
 
     logger.info("DONE!")
diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index a92a7ffa..c7186ff7 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -15,7 +15,7 @@ precursor_mass_tol: 50  # ppm
 isotope_error_range: [0, 1]
 # The minimum length of predicted peptides.
 min_peptide_len: 6
-# Number of spectra in one inference batch. 
+# Number of spectra in one inference batch.
 predict_batch_size: 1024
 # Number of beams used in beam search.
 n_beams: 1
diff --git a/casanovo/data/datasets.py b/casanovo/data/datasets.py
index d0c6f347..665d69e0 100644
--- a/casanovo/data/datasets.py
+++ b/casanovo/data/datasets.py
@@ -267,9 +267,10 @@ def __getitem__(self, idx: int) -> Tuple[torch.Tensor, float, int, str]:
         return spectrum, precursor_mz, precursor_charge, peptide
 
 
-class DBSpectrumDataset(AnnotatedSpectrumDataset):
+class DbSpectrumDataset(AnnotatedSpectrumDataset):
     """
-    Parse and retrieve collections of annotated MS/MS spectra, additionally keep track of spectrum ids for Casanovo-DB.
+    Parse and retrieve collections of annotated MS/MS spectra,
+    additionally keep track of spectrum ids for Casanovo-DB.
 
     Parameters
     ----------
diff --git a/casanovo/data/ms_io.py b/casanovo/data/ms_io.py
index ae4f3b54..de12e768 100644
--- a/casanovo/data/ms_io.py
+++ b/casanovo/data/ms_io.py
@@ -254,7 +254,7 @@ def save(self) -> None:
                     "start",
                     "end",
                     "opt_ms_run[1]_aa_scores",
-                    "opt_target",
+                    "opt_cv_MS:1002217_decoy_peptide",
                 ]
             )
             for i, psm in enumerate(self.psms):
@@ -287,6 +287,6 @@ def save(self) -> None:
                                 )
                             )
                         ),  # opt_ms_run[1]_aa_scores
-                        bool(psm[7]),  # opt_target
+                        bool(psm[7]),  # opt_cv_MS:1002217_decoy_peptide
                     ]
                 )
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index 284aaeb7..f9865572 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -12,7 +12,7 @@
 from ..data.datasets import (
     AnnotatedSpectrumDataset,
     SpectrumDataset,
-    DBSpectrumDataset,
+    DbSpectrumDataset,
 )
 
 
@@ -128,7 +128,7 @@ def setup(self, stage: str = None, annotated: bool = True) -> None:
                 self.test_dataset = make_dataset(self.test_index)
         if stage == "db":
             make_dataset = functools.partial(
-                DBSpectrumDataset,
+                DbSpectrumDataset,
                 n_peaks=self.n_peaks,
                 min_mz=self.min_mz,
                 max_mz=self.max_mz,
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index ec234691..6c440ce0 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -989,17 +989,16 @@ def configure_optimizers(
         return [optimizer], {"scheduler": lr_scheduler, "interval": "step"}
 
 
-class DBSpec2Pep(Spec2Pep):
+class DbSpec2Pep(Spec2Pep):
     """
     Inherits Spec2Pep
 
     Hijacks teacher-forcing implemented in Spec2Pep and
     uses it to predict scores between a spectra and associated peptide.
-    Input format is .mgf, with comma-separated targets
-    and decoys in the SEQ field. Decoys should have a prefix of "decoy_".
+    Decoys should have a prefix of "decoy_".
     """
 
-    num_pairs = None  # Modified to be predict_batch_size from config
+    num_pairs = None
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -1014,11 +1013,8 @@ def predict_step(self, batch, *args):
             encoded_ms,
         ) in self.smart_batch_gen(batch):
             pred, truth = self.decoder(peptides, precursors, *encoded_ms)
-            sm = torch.nn.Softmax(dim=2)
-            pred = sm(pred)
-            score_result, per_aa_score = _calc_match_score(
-                pred, truth
-            )  # Calculate the score between spectra + peptide list
+            pred = self.softmax(pred)
+            score_result, per_aa_score = _calc_match_score(pred, truth)
             batch_res.append(
                 (
                     indexes,
@@ -1122,29 +1118,31 @@ def _calc_match_score(
     batch_all_aa_scores: torch.Tensor, truth_aa_indicies: torch.Tensor
 ) -> List[float]:
     """
+    Calculate the score between the input spectra and associated peptide.
+
     Take in teacher-forced scoring of amino acids
     of the peptides (in a batch) and use the truth labels
     to calculate a score between the input spectra and
     associated peptide. The score is the geometric
     mean of the AA probabilities
 
-        Parameters
-        ----------
-        batch_all_aa_scores : torch.Tensor
-            Amino acid scores for all amino acids in
-            the vocabulary for every prediction made to generate
-            the associated peptide (for an entire batch)
-        truth_aa_indicies : torch.Tensor
-            Indicies of the score for each actual amino acid
-            in the peptide (for an entire batch)
+    Parameters
+    ----------
+    batch_all_aa_scores : torch.Tensor
+        Amino acid scores for all amino acids in
+        the vocabulary for every prediction made to generate
+        the associated peptide (for an entire batch)
+    truth_aa_indicies : torch.Tensor
+        Indicies of the score for each actual amino acid
+        in the peptide (for an entire batch)
 
-        Returns
-        -------
-        score : list[float], list[list[float]]
-            The score between the input spectra and associated peptide
-            (for an entire batch)
-            a list of lists of per amino acid scores
-            (for an entire batch)
+    Returns
+    -------
+    score : list[float], list[list[float]]
+        The score between the input spectra and associated peptide
+        (for an entire batch)
+        a list of lists of per amino acid scores
+        (for an entire batch)
     """
     # Remove trailing tokens from predictions,
     batch_all_aa_scores = batch_all_aa_scores[:, :-1]
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index e6956049..fd17378f 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -20,7 +20,7 @@
 from ..config import Config
 from ..data import ms_io
 from ..denovo.dataloaders import DeNovoDataModule
-from ..denovo.model import Spec2Pep, DBSpec2Pep
+from ..denovo.model import Spec2Pep, DbSpec2Pep
 
 
 logger = logging.getLogger("casanovo")
@@ -294,7 +294,7 @@ def initialize_db_model(self) -> None:
         # the provided configuration.
         device = torch.empty(1).device  # Use the default device.
         try:
-            self.model = DBSpec2Pep.load_from_checkpoint(
+            self.model = DbSpec2Pep.load_from_checkpoint(
                 self.model_filename, map_location=device, **loaded_model_params
             )
 
@@ -315,7 +315,7 @@ def initialize_db_model(self) -> None:
         except RuntimeError:
             # This only doesn't work if the weights are from an older version
             try:
-                self.model = DBSpec2Pep.load_from_checkpoint(
+                self.model = DbSpec2Pep.load_from_checkpoint(
                     self.model_filename,
                     map_location=device,
                     **model_params,
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 3ad1a4f4..aacc6d15 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -7,9 +7,7 @@
 from casanovo import casanovo
 
 
-def test_annotate(
-    mgf_small_unannotated, tide_dir_small, tiny_config, tmp_path
-):
+def test_annotate(mgf_small_unannotated, tide_dir_small, tmp_path):
 
     # Run a command:
     run = functools.partial(
@@ -20,8 +18,6 @@ def test_annotate(
         "annotate",
         str(mgf_small_unannotated),
         str(tide_dir_small),
-        "--config",
-        tiny_config,
         "--output",
         str(tmp_path / "annotated_mgf.mgf"),
     ]
@@ -66,8 +62,6 @@ def test_db_search(
         "annotate",
         str(mgf_small_unannotated),
         str(tide_dir_small),
-        "--config",
-        tiny_config,
         "--output",
         str(tmp_path / "annotated_mgf.mgf"),
     ]

From 49f44ada6452a2769d0e8bddd04f03995f7a8a1c Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Mon, 17 Jun 2024 17:15:14 -0700
Subject: [PATCH 16/84] partial fixes requested by wout. Lots of subclassing
 removed

---
 casanovo/data/datasets.py       |  84 +++---------------
 casanovo/data/ms_io.py          |  27 +++---
 casanovo/denovo/dataloaders.py  |  42 ++-------
 casanovo/denovo/model.py        |  12 +--
 casanovo/denovo/model_runner.py | 152 +++++++++-----------------------
 tests/test_integration.py       |   2 +-
 tests/unit_tests/test_unit.py   |  46 ++++++++--
 7 files changed, 121 insertions(+), 244 deletions(-)

diff --git a/casanovo/data/datasets.py b/casanovo/data/datasets.py
index 665d69e0..aff6af85 100644
--- a/casanovo/data/datasets.py
+++ b/casanovo/data/datasets.py
@@ -212,6 +212,8 @@ class AnnotatedSpectrumDataset(SpectrumDataset):
     random_state : Optional[int]
         The NumPy random state. ``None`` leaves mass spectra in the order they
         were parsed.
+    track_spectrum_id : Optional[bool]
+        Whether to keep track of the identifier of the MS/MS spectra.
     """
 
     def __init__(
@@ -223,6 +225,7 @@ def __init__(
         min_intensity: float = 0.01,
         remove_precursor_tol: float = 2.0,
         random_state: Optional[int] = None,
+        track_spectrum_id: Optional[bool] = False,
     ):
         super().__init__(
             annotated_spectrum_index,
@@ -233,6 +236,7 @@ def __init__(
             remove_precursor_tol=remove_precursor_tol,
             random_state=random_state,
         )
+        self.track_spectrum_id = track_spectrum_id
 
     def __getitem__(self, idx: int) -> Tuple[torch.Tensor, float, int, str]:
         """
@@ -264,76 +268,12 @@ def __getitem__(self, idx: int) -> Tuple[torch.Tensor, float, int, str]:
         spectrum = self._process_peaks(
             mz_array, int_array, precursor_mz, precursor_charge
         )
+        if self.track_spectrum_id:
+            return (
+                spectrum,
+                precursor_mz,
+                precursor_charge,
+                peptide,
+                self.get_spectrum_id(idx),
+            )
         return spectrum, precursor_mz, precursor_charge, peptide
-
-
-class DbSpectrumDataset(AnnotatedSpectrumDataset):
-    """
-    Parse and retrieve collections of annotated MS/MS spectra,
-    additionally keep track of spectrum ids for Casanovo-DB.
-
-    Parameters
-    ----------
-    annotated_spectrum_index : depthcharge.data.SpectrumIndex
-        The MS/MS spectra to use as a dataset.
-    n_peaks : Optional[int]
-        The number of top-n most intense peaks to keep in each spectrum. `None`
-        retains all peaks.
-    min_mz : float
-        The minimum m/z to include. The default is 140 m/z, in order to exclude
-        TMT and iTRAQ reporter ions.
-    max_mz : float
-        The maximum m/z to include.
-    min_intensity : float
-        Remove peaks whose intensity is below `min_intensity` percentage of the
-        base peak intensity.
-    remove_precursor_tol : float
-        Remove peaks within the given mass tolerance in Dalton around the
-        precursor mass.
-    random_state : Optional[int]
-        The NumPy random state. ``None`` leaves mass spectra in the order they
-        were parsed.
-    """
-
-    def __getitem__(
-        self, idx: int
-    ) -> Tuple[torch.Tensor, float, int, str, Tuple[str, str]]:
-        """
-        Return the annotated MS/MS spectrum with the given index.
-
-        Parameters
-        ----------
-        idx : int
-            The index of the spectrum to return.
-
-        Returns
-        -------
-        spectrum : torch.Tensor of shape (n_peaks, 2)
-            A tensor of the spectrum with the m/z and intensity peak values.
-        precursor_mz : float
-            The precursor m/z.
-        precursor_charge : int
-            The precursor charge.
-        annotation : str
-            The peptide annotation of the spectrum.
-        spectrum_id: Tuple[str, str]
-            The unique spectrum identifier, formed by its original peak file and
-            identifier (index or scan number) therein.
-        """
-        (
-            mz_array,
-            int_array,
-            precursor_mz,
-            precursor_charge,
-            peptide,
-        ) = self.index[idx]
-        spectrum = self._process_peaks(
-            mz_array, int_array, precursor_mz, precursor_charge
-        )
-        return (
-            spectrum,
-            precursor_mz,
-            precursor_charge,
-            peptide,
-            self.get_spectrum_id(idx),
-        )
diff --git a/casanovo/data/ms_io.py b/casanovo/data/ms_io.py
index de12e768..c4cfc7cb 100644
--- a/casanovo/data/ms_io.py
+++ b/casanovo/data/ms_io.py
@@ -22,10 +22,13 @@ class MztabWriter:
     ----------
     filename : str
         The name of the mzTab file.
+    is_db_variant : bool
+        Whether the mzTab file is for a Casanovo-DB search.
     """
 
-    def __init__(self, filename: str):
+    def __init__(self, filename: str, is_db_variant: bool = False):
         self.filename = filename
+        self.is_db_variant = is_db_variant
         self.metadata = [
             ("mzTab-version", "1.0.0"),
             ("mzTab-mode", "Summary"),
@@ -147,6 +150,9 @@ def save(self) -> None:
         """
         Export the spectrum identifications to the mzTab file.
         """
+        if self.is_db_variant:
+            self.save_db_variant()
+            return
         with open(self.filename, "w", newline="") as f:
             writer = csv.writer(f, delimiter="\t", lineterminator=os.linesep)
             # Write metadata.
@@ -210,21 +216,12 @@ def save(self) -> None:
                     ]
                 )
 
-
-class DBWriter(MztabWriter):
-    """
-    Export DB search results to an mzTab file.
-
-    Parameters
-    ----------
-    filename : str
-        The name of the mzTab file.
-    """
-
-    def save(self) -> None:
+    def save_db_variant(self) -> None:
         """
-        Export the DB search results to the mzTab file.
-        Outputs PSMs in the order they were scored (i.e. the order in the annotated .mgf file).
+        Export the Casanovo-DB search results to the mzTab file.
+
+        Outputs PSMs in the order they were scored
+        (i.e. the order in the annotated .mgf file).
         """
         with open(self.filename, "w", newline="") as f:
             writer = csv.writer(f, delimiter="\t", lineterminator=os.linesep)
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index f9865572..6731e532 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -12,7 +12,6 @@
 from ..data.datasets import (
     AnnotatedSpectrumDataset,
     SpectrumDataset,
-    DbSpectrumDataset,
 )
 
 
@@ -128,12 +127,13 @@ def setup(self, stage: str = None, annotated: bool = True) -> None:
                 self.test_dataset = make_dataset(self.test_index)
         if stage == "db":
             make_dataset = functools.partial(
-                DbSpectrumDataset,
+                AnnotatedSpectrumDataset,
                 n_peaks=self.n_peaks,
                 min_mz=self.min_mz,
                 max_mz=self.max_mz,
                 min_intensity=self.min_intensity,
                 remove_precursor_tol=self.remove_precursor_tol,
+                track_spectrum_id=True,
             )
             if self.test_index is not None:
                 self.test_dataset = make_dataset(self.test_index)
@@ -143,6 +143,7 @@ def _make_loader(
         dataset: torch.utils.data.Dataset,
         batch_size: int,
         shuffle: bool = False,
+        db_mode: bool = False,
     ) -> torch.utils.data.DataLoader:
         """
         Create a PyTorch DataLoader.
@@ -155,6 +156,8 @@ def _make_loader(
             The batch size to use.
         shuffle : bool
             Option to shuffle the batches.
+        db_mode : bool
+            Option to use the DataLoader for Casanovo-DB.
 
         Returns
         -------
@@ -164,41 +167,12 @@ def _make_loader(
         return torch.utils.data.DataLoader(
             dataset,
             batch_size=batch_size,
-            collate_fn=prepare_batch,
+            collate_fn=prepare_batch if not db_mode else prepare_db_batch,
             pin_memory=True,
             num_workers=self.n_workers,
             shuffle=shuffle,
         )
 
-    def _make_db_loader(
-        self, dataset: torch.utils.data.Dataset, batch_size: int
-    ) -> torch.utils.data.DataLoader:
-        """
-        Create a PyTorch DataLoader.
-
-        Parameters
-        ----------
-        dataset : torch.utils.data.Dataset
-            A PyTorch Dataset.
-
-        Returns
-        -------
-        torch.utils.data.DataLoader
-            A PyTorch DataLoader.
-        """
-        # Calculate new batch size to saturate previous batch size with PSMs
-        pep_per_spec = []
-        for i in range(min(10, len(dataset))):
-            pep_per_spec.append(len(dataset[i][3].split(",")))
-        new_batch_size = max(1, int(batch_size // np.mean(pep_per_spec)))
-        return torch.utils.data.DataLoader(
-            dataset,
-            batch_size=new_batch_size,
-            collate_fn=prepare_db_batch,
-            pin_memory=True,
-            num_workers=self.n_workers,
-        )
-
     def train_dataloader(self) -> torch.utils.data.DataLoader:
         """Get the training DataLoader."""
         return self._make_loader(
@@ -219,7 +193,9 @@ def predict_dataloader(self) -> torch.utils.data.DataLoader:
 
     def db_dataloader(self) -> torch.utils.data.DataLoader:
         """Get the predict DataLoader."""
-        return self._make_db_loader(self.test_dataset, self.eval_batch_size)
+        return self._make_loader(
+            self.test_dataset, self.eval_batch_size, db_mode=True
+        )
 
 
 def prepare_batch(
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 6c440ce0..31d90e24 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1157,18 +1157,18 @@ def _calc_match_score(
 
     per_aa_scores = batch_all_aa_scores[rows, cols, truth_aa_indicies]
 
+    per_aa_scores[per_aa_scores == 0] += 1e-10
     score_mask = truth_aa_indicies != 0
-    masked_per_aa_scores = per_aa_scores * score_mask
-    # Arithmetic score that was used before
-    ## all_scores = masked_per_aa_scores.sum(dim=1) / score_mask.sum(dim=1)
+    per_aa_scores[~score_mask] = 0
+    log_per_aa_scores = torch.log(per_aa_scores)
     all_scores = torch.where(
-        torch.log(masked_per_aa_scores) == float("-inf"),
+        log_per_aa_scores == float("-inf"),
         torch.tensor(0.0),
-        torch.log(masked_per_aa_scores),
+        log_per_aa_scores,
     ).sum(dim=1) / score_mask.sum(
         dim=1
     )  # Calculates geometric score
-    return all_scores, masked_per_aa_scores
+    return all_scores, per_aa_scores
 
 
 class CosineWarmupScheduler(torch.optim.lr_scheduler._LRScheduler):
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index fd17378f..14aebf8d 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -93,7 +93,9 @@ def db_search(self, peak_path: Iterable[str], output: str) -> None:
         -------
         self
         """
-        self.writer = ms_io.DBWriter(Path(output).with_suffix(".mztab"))
+        self.writer = ms_io.MztabWriter(
+            Path(output).with_suffix(".mztab"), is_db_variant=True
+        )
         self.writer.set_metadata(
             self.config,
             model=str(self.model_filename),
@@ -101,7 +103,7 @@ def db_search(self, peak_path: Iterable[str], output: str) -> None:
         )
 
         self.initialize_trainer(train=True)
-        self.initialize_db_model()
+        self.initialize_model(train=False, db_search=True)
         self.model.out_writer = self.writer
 
         test_index = self._get_index(peak_path, True, "db search")
@@ -229,106 +231,9 @@ def initialize_trainer(self, train: bool) -> None:
 
         self.trainer = pl.Trainer(**trainer_cfg)
 
-    def initialize_db_model(self) -> None:
-        """Initialize the Casanovo-DB model.
-        Required because the DB search model is a unique subclass of the Spec2Pep model.
-        """
-        model_params = dict(
-            dim_model=self.config.dim_model,
-            n_head=self.config.n_head,
-            dim_feedforward=self.config.dim_feedforward,
-            n_layers=self.config.n_layers,
-            dropout=self.config.dropout,
-            dim_intensity=self.config.dim_intensity,
-            max_length=self.config.max_length,
-            residues=self.config.residues,
-            max_charge=self.config.max_charge,
-            precursor_mass_tol=self.config.precursor_mass_tol,
-            isotope_error_range=self.config.isotope_error_range,
-            min_peptide_len=self.config.min_peptide_len,
-            n_beams=self.config.n_beams,
-            top_match=self.config.top_match,
-            n_log=self.config.n_log,
-            tb_summarywriter=self.config.tb_summarywriter,
-            train_label_smoothing=self.config.train_label_smoothing,
-            warmup_iters=self.config.warmup_iters,
-            cosine_schedule_period_iters=self.config.cosine_schedule_period_iters,
-            lr=self.config.learning_rate,
-            weight_decay=self.config.weight_decay,
-            out_writer=self.writer,
-            calculate_precision=self.config.calculate_precision,
-        )
-
-        # Reconfigurable non-architecture related parameters for a loaded model.
-        loaded_model_params = dict(
-            max_length=self.config.max_length,
-            precursor_mass_tol=self.config.precursor_mass_tol,
-            isotope_error_range=self.config.isotope_error_range,
-            n_beams=self.config.n_beams,
-            min_peptide_len=self.config.min_peptide_len,
-            top_match=self.config.top_match,
-            n_log=self.config.n_log,
-            tb_summarywriter=self.config.tb_summarywriter,
-            train_label_smoothing=self.config.train_label_smoothing,
-            warmup_iters=self.config.warmup_iters,
-            cosine_schedule_period_iters=self.config.cosine_schedule_period_iters,
-            lr=self.config.learning_rate,
-            weight_decay=self.config.weight_decay,
-            out_writer=self.writer,
-            calculate_precision=self.config.calculate_precision,
-        )
-
-        # Model file must exist for DB search
-        if self.model_filename is None:
-            logger.error("A model file must be provided")
-            raise ValueError("A model file must be provided")
-
-        if not Path(self.model_filename).exists():
-            logger.error(
-                "Could not find the model weights at file %s",
-                self.model_filename,
-            )
-            raise FileNotFoundError("Could not find the model weights file")
-
-        # First try loading model details from the weights file, otherwise use
-        # the provided configuration.
-        device = torch.empty(1).device  # Use the default device.
-        try:
-            self.model = DbSpec2Pep.load_from_checkpoint(
-                self.model_filename, map_location=device, **loaded_model_params
-            )
-
-            # Pass in information about predict_batch_size to the model for batch saturation
-            self.model.num_pairs = self.config.predict_batch_size
-
-            architecture_params = set(model_params.keys()) - set(
-                loaded_model_params.keys()
-            )
-            for param in architecture_params:
-                if model_params[param] != self.model.hparams[param]:
-                    warnings.warn(
-                        f"Mismatching {param} parameter in "
-                        f"model checkpoint ({self.model.hparams[param]}) "
-                        f"vs config file ({model_params[param]}); "
-                        "using the checkpoint."
-                    )
-        except RuntimeError:
-            # This only doesn't work if the weights are from an older version
-            try:
-                self.model = DbSpec2Pep.load_from_checkpoint(
-                    self.model_filename,
-                    map_location=device,
-                    **model_params,
-                )
-                # Pass in information about predict_batch_size to the model for batch saturation
-                self.model.num_pairs = self.config.predict_batch_size
-            except RuntimeError:
-                raise RuntimeError(
-                    "Weights file incompatible with the current version of "
-                    "Casanovo."
-                )
-
-    def initialize_model(self, train: bool) -> None:
+    def initialize_model(
+        self, train: bool, db_search: Optional[bool] = False
+    ) -> None:
         """Initialize the Casanovo model.
 
         Parameters
@@ -336,6 +241,8 @@ def initialize_model(self, train: bool) -> None:
         train : bool
             Determines whether to set the model up for model training or
             evaluation / inference.
+        db_search : Optional[bool]
+            Determines whether to use the DB search model subclass.
         """
         model_params = dict(
             dim_model=self.config.dim_model,
@@ -385,6 +292,11 @@ def initialize_model(self, train: bool) -> None:
         if self.model_filename is None:
             # Train a model from scratch if no model file is provided.
             if train:
+                if db_search:
+                    logger.error("Db search mode requires a model file.")
+                    raise ValueError(
+                        "A model file must be provided for DB search mode"
+                    )
                 self.model = Spec2Pep(**model_params)
                 return
             # Else we're not training, so a model file must be provided.
@@ -404,9 +316,20 @@ def initialize_model(self, train: bool) -> None:
         # the provided configuration.
         device = torch.empty(1).device  # Use the default device.
         try:
-            self.model = Spec2Pep.load_from_checkpoint(
-                self.model_filename, map_location=device, **loaded_model_params
-            )
+            if db_search:
+                self.model = DbSpec2Pep.load_from_checkpoint(
+                    self.model_filename,
+                    map_location=device,
+                    **loaded_model_params,
+                )
+                ## TODO move?
+                self.model.num_pairs = self.config.predict_batch_size
+            else:
+                self.model = Spec2Pep.load_from_checkpoint(
+                    self.model_filename,
+                    map_location=device,
+                    **loaded_model_params,
+                )
 
             architecture_params = set(model_params.keys()) - set(
                 loaded_model_params.keys()
@@ -422,11 +345,20 @@ def initialize_model(self, train: bool) -> None:
         except RuntimeError:
             # This only doesn't work if the weights are from an older version
             try:
-                self.model = Spec2Pep.load_from_checkpoint(
-                    self.model_filename,
-                    map_location=device,
-                    **model_params,
-                )
+                if db_search:
+                    self.model = DbSpec2Pep.load_from_checkpoint(
+                        self.model_filename,
+                        map_location=device,
+                        **model_params,
+                    )
+                    ## TODO move?
+                    self.model.num_pairs = self.config.predict_batch_size
+                else:
+                    self.model = Spec2Pep.load_from_checkpoint(
+                        self.model_filename,
+                        map_location=device,
+                        **model_params,
+                    )
             except RuntimeError:
                 raise RuntimeError(
                     "Weights file incompatible with the current version of "
diff --git a/tests/test_integration.py b/tests/test_integration.py
index aacc6d15..60e3977b 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -108,7 +108,7 @@ def test_db_search(
         "+43.006-17.027KEDITEPP",
         "KEDIQ+0.984TEPPQ+0.984",
     ]
-    assert list(psms.opt_target) == [
+    assert list(psms["opt_cv_MS:1002217_decoy_peptide"]) == [
         "True",
         "True",
         "False",
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index bcc61446..ec9085c0 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -153,6 +153,11 @@ def test_calc_match_score():
     stop_slot_prob = torch.zeros(29)
     stop_slot_prob[28] = 1.0  # $
     blank_slot_prob = torch.zeros(29)
+    blank_slot_prob[0] = 0.42  # Should never come into play
+    fourth_slot_prob = torch.zeros(29)
+    fourth_slot_prob[4] = 0.5  # D
+    fifth_slot_prob = torch.zeros(29)
+    fifth_slot_prob[5] = 0.5  # E
 
     pep_1_aa = torch.stack(
         [
@@ -172,19 +177,46 @@ def test_calc_match_score():
             blank_slot_prob,
         ]
     )
-
-    batch_all_aa_scores = torch.stack([pep_1_aa, pep_2_aa])
-    truth_aa_indices = torch.tensor([[1, 2, 3, 28], [3, 2, 28, 0]])
+    pep_3_aa = torch.stack(
+        [
+            fourth_slot_prob,
+            fifth_slot_prob,
+            first_slot_prob,
+            stop_slot_prob,
+            blank_slot_prob,
+        ]
+    )
+    pep_4_aa = torch.stack(
+        [
+            first_slot_prob,
+            second_slot_prob,
+            third_slot_prob,
+            stop_slot_prob,
+            blank_slot_prob,
+        ]
+    )
+    batch_all_aa_scores = torch.stack([pep_1_aa, pep_2_aa, pep_3_aa, pep_4_aa])
+    truth_aa_indices = torch.tensor(
+        [[1, 2, 3, 28], [3, 2, 28, 0], [4, 5, 1, 28], [2, 2, 3, 28]]
+    )
 
     all_scores, masked_per_aa_scores = _calc_match_score(
         batch_all_aa_scores, truth_aa_indices
     )
 
-    assert all_scores.numpy()[0] == pytest.approx(0)
-    assert all_scores.numpy()[1] == pytest.approx(0)
+    assert all_scores.numpy()[0] == 0
+    assert all_scores.numpy()[1] == 0
+    assert all_scores.numpy()[2] == pytest.approx(
+        np.log(0.5 * 0.5 * 1 * 1) / 4
+    )
+    assert all_scores.numpy()[3] == pytest.approx(
+        np.log(1e-10 * 1 * 1 * 1) / 4
+    )
 
-    assert np.sum(masked_per_aa_scores.numpy()[0]) == pytest.approx(4)
-    assert np.sum(masked_per_aa_scores.numpy()[1]) == pytest.approx(3)
+    assert np.sum(masked_per_aa_scores.numpy()[0]) == 4
+    assert np.sum(masked_per_aa_scores.numpy()[1]) == 3
+    assert np.sum(masked_per_aa_scores.numpy()[2]) == 3
+    assert np.sum(masked_per_aa_scores.numpy()[3]) == 3
 
 
 def test_beam_search_decode():

From d967c4218bf6f84d25e87e874c807e11c958cfbb Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 18 Jun 2024 12:25:09 -0700
Subject: [PATCH 17/84] documentation fixes and starting to cleanup batching
 code

---
 casanovo/denovo/dataloaders.py  |  9 +++---
 casanovo/denovo/model.py        | 53 +++++++++++++++++++++++----------
 casanovo/denovo/model_runner.py |  4 ---
 3 files changed, 43 insertions(+), 23 deletions(-)

diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index 6731e532..aff860a1 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -245,11 +245,12 @@ def prepare_db_batch(
 
     Parameters
     ----------
-    batch : List[Tuple[torch.Tensor, float, int, str, Tuple[str, str]]]
+    batch : List[Tuple[torch.Tensor, Tuple[float, int, float], str, Tuple[str, str]]]
         A batch of data from an AnnotatedSpectrumDataset, consisting of for each
-        spectrum (i) a tensor with the m/z and intensity peak values, (ii), the
-        precursor m/z, (iii) the precursor charge, (iv) the spectrum identifier (peptide), (v)
-        spectrum identifiers (file and scan).
+        spectrum (i) a tensor with the m/z and intensity peak values,
+        (ii) the precursor information [mass, charge, m/z], (iii) the
+        peptide sequence, the precursor m/z, (iv) spectrum identifiers
+        (file and scan).
 
     Returns
     -------
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 31d90e24..9dcb3e7e 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -995,30 +995,45 @@ class DbSpec2Pep(Spec2Pep):
 
     Hijacks teacher-forcing implemented in Spec2Pep and
     uses it to predict scores between a spectra and associated peptide.
-    Decoys should have a prefix of "decoy_".
     """
 
-    num_pairs = None
-
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
     def predict_step(self, batch, *args):
+        """
+        A single prediction step for Casanovo-DB
+
+        Parameters
+        ----------
+        batch : Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
+            A batch of (i) MS/MS spectra, (ii) precursor information, (iii)
+            spectrum identifiers as torch Tensors, (iv) scan numbers.
+
+        Returns
+        -------
+        predictions: List[Tuple[int, bool, str, float, np.ndarray, np.ndarray]]
+            Model predictions for the given batch of spectra containing spectrum
+            scan number, decoy flag, peptide sequence, Casanovo-DB score,
+            amino acid-level confidence scores, and precursor information.
+        """
         batch_res = []
         for (
             indexes,
-            t_or_d,
+            is_decoy,
             peptides,
             precursors,
             encoded_ms,
         ) in self.smart_batch_gen(batch):
             pred, truth = self.decoder(peptides, precursors, *encoded_ms)
             pred = self.softmax(pred)
-            score_result, per_aa_score = _calc_match_score(pred, truth)
+            score_result, per_aa_score = _calc_match_score(
+                pred, truth, self.decoder.reverse
+            )
             batch_res.append(
                 (
                     indexes,
-                    t_or_d,
+                    is_decoy,
                     peptides,
                     score_result.cpu().detach().numpy(),
                     per_aa_score.cpu().detach().numpy(),
@@ -1028,6 +1043,7 @@ def predict_step(self, batch, *args):
         return batch_res
 
     def smart_batch_gen(self, batch):
+        batch_size = len(batch[0])
         all_psm = []
         enc = self.encoder(batch[0])
         precursors = batch[1]
@@ -1037,7 +1053,7 @@ def smart_batch_gen(self, batch):
             spec_peptides = batch[2][idx].split(",")
             # Check for decoy prefixes and create a bit-vector indicating targets (1) or decoys (0)
             decoy_prefix = "decoy_"  # Decoy prefix
-            t_or_ds = [
+            decoy_mask = [
                 0 if p.startswith(decoy_prefix) else 1 for p in spec_peptides
             ]
             # Remove decoy prefix
@@ -1055,14 +1071,14 @@ def smart_batch_gen(self, batch):
                         spec_precursors,
                         spec_peptides,
                         spec_idx,
-                        t_or_ds,
+                        decoy_mask,
                     )
                 )
             )
         # Continually grab num_pairs items from all_psm until list is exhausted
         while len(all_psm) > 0:
-            batch = all_psm[: self.num_pairs]
-            all_psm = all_psm[self.num_pairs :]
+            batch = all_psm[:batch_size]
+            all_psm = all_psm[batch_size:]
             batch = list(zip(*batch))
             encoded_ms = (
                 torch.stack([a[0] for a in batch[0]]),
@@ -1071,8 +1087,8 @@ def smart_batch_gen(self, batch):
             prec_data = torch.stack(batch[1])
             pep_str = list(batch[2])
             indexes = [a[1] for a in batch[3]]
-            t_or_ds = batch[4]
-            yield (indexes, t_or_ds, pep_str, prec_data, encoded_ms)
+            is_decoy = batch[4]
+            yield (indexes, is_decoy, pep_str, prec_data, encoded_ms)
 
     def on_predict_batch_end(
         self,
@@ -1115,7 +1131,9 @@ def on_predict_batch_end(
 
 
 def _calc_match_score(
-    batch_all_aa_scores: torch.Tensor, truth_aa_indicies: torch.Tensor
+    batch_all_aa_scores: torch.Tensor,
+    truth_aa_indicies: torch.Tensor,
+    decoder_reverse: bool = False,
 ) -> List[float]:
     """
     Calculate the score between the input spectra and associated peptide.
@@ -1135,6 +1153,8 @@ def _calc_match_score(
     truth_aa_indicies : torch.Tensor
         Indicies of the score for each actual amino acid
         in the peptide (for an entire batch)
+    decoder_reverse : bool
+        Whether the decoder is reversed.
 
     Returns
     -------
@@ -1144,8 +1164,11 @@ def _calc_match_score(
         a list of lists of per amino acid scores
         (for an entire batch)
     """
-    # Remove trailing tokens from predictions,
-    batch_all_aa_scores = batch_all_aa_scores[:, :-1]
+    # Remove trailing tokens from predictions based on decoder reversal
+    if decoder_reverse:
+        batch_all_aa_scores = batch_all_aa_scores[:, 1:]
+    elif not decoder_reverse:
+        batch_all_aa_scores = batch_all_aa_scores[:, :-1]
 
     # Vectorized scoring using efficient indexing.
     rows = (
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 14aebf8d..e150ab2d 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -322,8 +322,6 @@ def initialize_model(
                     map_location=device,
                     **loaded_model_params,
                 )
-                ## TODO move?
-                self.model.num_pairs = self.config.predict_batch_size
             else:
                 self.model = Spec2Pep.load_from_checkpoint(
                     self.model_filename,
@@ -351,8 +349,6 @@ def initialize_model(
                         map_location=device,
                         **model_params,
                     )
-                    ## TODO move?
-                    self.model.num_pairs = self.config.predict_batch_size
                 else:
                     self.model = Spec2Pep.load_from_checkpoint(
                         self.model_filename,

From ea1f97df98724fa3bd9b5128792eccff11040bed Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 18 Jun 2024 18:23:55 -0700
Subject: [PATCH 18/84] cleaned up on_predict_batch_end, TODOs for calc_mz

---
 casanovo/denovo/model.py | 54 +++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 29 deletions(-)

diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 9dcb3e7e..2f3f9aed 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -12,6 +12,7 @@
 import numpy as np
 import lightning.pytorch as pl
 from torch.utils.tensorboard import SummaryWriter
+from pyteomics import mass
 from depthcharge.components import ModelMixin, PeptideDecoder, SpectrumEncoder
 
 from . import evaluate
@@ -1043,24 +1044,26 @@ def predict_step(self, batch, *args):
         return batch_res
 
     def smart_batch_gen(self, batch):
-        batch_size = len(batch[0])
         all_psm = []
+        batch_size = len(batch[0])
         enc = self.encoder(batch[0])
         precursors = batch[1]
         indexes = batch[3]
         enc = list(zip(*enc))
-        for idx, _ in enumerate(batch[0]):
+        for idx in range(batch_size):
             spec_peptides = batch[2][idx].split(",")
             # Check for decoy prefixes and create a bit-vector indicating targets (1) or decoys (0)
             decoy_prefix = "decoy_"  # Decoy prefix
-            decoy_mask = [
-                0 if p.startswith(decoy_prefix) else 1 for p in spec_peptides
-            ]
-            # Remove decoy prefix
-            spec_peptides = [
-                s[len(decoy_prefix) :] if s.startswith(decoy_prefix) else s
-                for s in spec_peptides
-            ]
+            id_decoys = np.array(
+                [
+                    (0, p.removeprefix(decoy_prefix))
+                    if p.startswith(decoy_prefix)
+                    else (1, p)
+                    for p in spec_peptides
+                ]
+            )
+            decoy_mask = np.array(id_decoys[:, 0], dtype=bool)
+            spec_peptides = list(id_decoys[:, 1])
             spec_precursors = [precursors[idx]] * len(spec_peptides)
             spec_enc = [enc[idx]] * len(spec_peptides)
             spec_idx = [indexes[idx]] * len(spec_peptides)
@@ -1105,29 +1108,22 @@ def on_predict_batch_end(
             per_aa_score,
             precursors,
         ) in outputs:
-            for index, t_or_d, peptide, score, per_aa_scores, precursor in zip(
-                indexes,
-                t_or_d,
+            prec_mass = precursors[:, 0]
+            prec_charge = precursors[:, 1]
+            prec_mz = precursors[:, 2]
+            # calc_mz = [mass.fast_mass(pep, charge=int(pc)) for pep, pc in zip(peptides, prec_charge)]
+            calc_mz = prec_mass  # TODO: Replace with actual calc_mz
+            for row in zip(
                 peptides,
                 score_result,
+                prec_charge,
+                prec_mz,
+                calc_mz,
+                indexes,
                 per_aa_score,
-                precursors,
+                t_or_d,
             ):
-                prec_charge = precursor[1]
-                prec_mz = precursor[2]
-                calc_mz = precursor[2]
-                self.out_writer.psms.append(
-                    (
-                        peptide,
-                        score,
-                        prec_charge,
-                        prec_mz,
-                        calc_mz,
-                        index,
-                        per_aa_scores,
-                        t_or_d,
-                    ),
-                )
+                self.out_writer.psms.append(row)
 
 
 def _calc_match_score(

From 8825506da091aa7aaa7dac0da78608b07fc48978 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Wed, 26 Jun 2024 18:12:17 -0700
Subject: [PATCH 19/84] add proper calc_mz calculation with depthcharge

---
 casanovo/denovo/model.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 2f3f9aed..71f4a6fa 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1111,8 +1111,10 @@ def on_predict_batch_end(
             prec_mass = precursors[:, 0]
             prec_charge = precursors[:, 1]
             prec_mz = precursors[:, 2]
-            # calc_mz = [mass.fast_mass(pep, charge=int(pc)) for pep, pc in zip(peptides, prec_charge)]
-            calc_mz = prec_mass  # TODO: Replace with actual calc_mz
+            calc_mz = [
+                self.peptide_mass_calculator.mass(peptide, charge)
+                for peptide, charge in zip(peptides, prec_charge)
+            ]
             for row in zip(
                 peptides,
                 score_result,

From f25ace84af22ea0eefb0a96a705cfc7957912d13 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 2 Jul 2024 16:22:02 -0700
Subject: [PATCH 20/84] rough implementation

---
 casanovo/casanovo.py            |  90 +++++++++++++++++-
 casanovo/data/datasets.py       |  14 +--
 casanovo/data/db_utils.py       | 156 ++++++++++++++++++++++++++++++++
 casanovo/data/ms_io.py          |   4 +-
 casanovo/denovo/dataloaders.py  |  57 +-----------
 casanovo/denovo/model.py        |  62 ++++++-------
 casanovo/denovo/model_runner.py |  39 ++++++--
 7 files changed, 307 insertions(+), 115 deletions(-)
 create mode 100644 casanovo/data/db_utils.py

diff --git a/casanovo/casanovo.py b/casanovo/casanovo.py
index 7db5faa8..df3cc79f 100644
--- a/casanovo/casanovo.py
+++ b/casanovo/casanovo.py
@@ -214,8 +214,74 @@ def annotate(
     nargs=-1,
     type=click.Path(exists=True, dir_okay=False),
 )
+@click.argument(
+    "fasta_path",
+    required=True,
+    nargs=1,
+    type=click.Path(exists=True, dir_okay=False),
+)
+@click.option(
+    "--enzyme",
+    help="Enzyme for in silico digestion, see pyteomics.parser.expasy_rules",
+    type=str,
+    default="trypsin",
+)
+@click.option(
+    "--digestion",
+    help="Digestion: full, partial",
+    type=click.Choice(
+        ["full", "partial"],
+        case_sensitive=False,
+    ),
+    default="full",
+)
+@click.option(
+    "--missed_cleavages",
+    help="Number of allowed missed cleavages",
+    type=int,
+    default=0,
+)
+@click.option(
+    "--max_mods",
+    help="Maximum number of modifications per peptide",
+    type=int,
+    default=0,
+)
+@click.option(
+    "--min_length",
+    help="Minimum peptide length",
+    type=int,
+    default=6,
+)
+@click.option(
+    "--max_length",
+    help="Maximum peptide length",
+    type=int,
+    default=50,
+)
+@click.option(
+    "--precursor_tolerance",
+    help="Precursor tolerance window size (ppm)",
+    type=int,
+    default=20,
+)
+@click.option(
+    "--isotope_error",
+    help="Isotope error levels to consider (list of ints, e.g: 1,2)",
+    type=str,
+    default="0",
+)
 def db_search(
     peak_path: Tuple[str],
+    fasta_path: str,
+    enzyme: str,
+    digestion: str,
+    missed_cleavages: int,
+    max_mods: int,
+    min_length: int,
+    max_length: int,
+    precursor_tolerance: int,
+    isotope_error: str,
     model: Optional[str],
     config: Optional[str],
     output: Optional[str],
@@ -223,14 +289,30 @@ def db_search(
 ) -> None:
     """Perform a search using Casanovo-DB.
 
-    PEAK_PATH must be one MGF file that has ANNOTATED spectra,
-    as output by annotate mode.
+    PEAK_PATH must be one MGF file. FASTA_PATH must be one FASTA file.
     """
     output = setup_logging(output, verbosity)
     config, model = setup_model(model, config, output, False)
     with ModelRunner(config, model) as runner:
-        logger.info("DB-searching peptides from: %s", peak_path)
-        runner.db_search(peak_path, output)
+        logger.info("Performing database search on:")
+        for peak_file in peak_path:
+            logger.info("  %s", peak_file)
+        logger.info("Using the following FASTA file:")
+        logger.info("  %s", fasta_path)
+
+        runner.db_search(
+            peak_path,
+            fasta_path,
+            enzyme,
+            digestion,
+            missed_cleavages,
+            max_mods,
+            min_length,
+            max_length,
+            precursor_tolerance,
+            isotope_error,
+            output,
+        )
 
     logger.info("DONE!")
 
diff --git a/casanovo/data/datasets.py b/casanovo/data/datasets.py
index aff6af85..59f56b68 100644
--- a/casanovo/data/datasets.py
+++ b/casanovo/data/datasets.py
@@ -134,6 +134,8 @@ def _process_peaks(
             The precursor m/z.
         precursor_charge : int
             The precursor charge.
+        track_spectrum_id : Optional[bool]
+            Whether to keep track of the identifier of the MS/MS spectra.
 
         Returns
         -------
@@ -212,8 +214,6 @@ class AnnotatedSpectrumDataset(SpectrumDataset):
     random_state : Optional[int]
         The NumPy random state. ``None`` leaves mass spectra in the order they
         were parsed.
-    track_spectrum_id : Optional[bool]
-        Whether to keep track of the identifier of the MS/MS spectra.
     """
 
     def __init__(
@@ -225,7 +225,6 @@ def __init__(
         min_intensity: float = 0.01,
         remove_precursor_tol: float = 2.0,
         random_state: Optional[int] = None,
-        track_spectrum_id: Optional[bool] = False,
     ):
         super().__init__(
             annotated_spectrum_index,
@@ -236,7 +235,6 @@ def __init__(
             remove_precursor_tol=remove_precursor_tol,
             random_state=random_state,
         )
-        self.track_spectrum_id = track_spectrum_id
 
     def __getitem__(self, idx: int) -> Tuple[torch.Tensor, float, int, str]:
         """
@@ -268,12 +266,4 @@ def __getitem__(self, idx: int) -> Tuple[torch.Tensor, float, int, str]:
         spectrum = self._process_peaks(
             mz_array, int_array, precursor_mz, precursor_charge
         )
-        if self.track_spectrum_id:
-            return (
-                spectrum,
-                precursor_mz,
-                precursor_charge,
-                peptide,
-                self.get_spectrum_id(idx),
-            )
         return spectrum, precursor_mz, precursor_charge, peptide
diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
new file mode 100644
index 00000000..c961e35e
--- /dev/null
+++ b/casanovo/data/db_utils.py
@@ -0,0 +1,156 @@
+"""Unique methods used within db-search mode"""
+
+import os
+import depthcharge.masses
+from pyteomics import fasta, parser
+import bisect
+
+HYDROGEN = 1.007825035
+OXYGEN = 15.99491463
+H2O = 2 * HYDROGEN + OXYGEN
+PROTON = 1.00727646677
+ISOTOPE_SPACING = 1.003355  # - 0.00288
+
+var_mods = {
+    "d": ["N", "Q"],
+    "ox": ["M"],
+    "ace-": True,
+    "carb-": True,
+    "nh3x-": True,
+    "carbnh3x-": True,
+}
+fixed_mods = {"carbm": ["C"]}
+
+
+def convert_from_modx(seq):
+    """Converts peptide sequence from modX format to Casanovo-acceptable modifications.
+
+    Args:
+        seq (str): Peptide in modX format
+    """
+    seq = seq.replace("carbmC", "C+57.021")  # Fixed modification
+    seq = seq.replace("oxM", "M+15.995")
+    seq = seq.replace("dN", "N+0.984")
+    seq = seq.replace("dQ", "Q+0.984")
+    seq = seq.replace("ace-", "+42.011")
+    seq = seq.replace("carbnh3x-", "+43.006-17.027")
+    seq = seq.replace("carb-", "+43.006")
+    seq = seq.replace("nh3x-", "-17.027")
+    return seq
+
+
+def digest_fasta(
+    fasta_filename,
+    enzyme,
+    digestion,
+    missed_cleavages,
+    max_mods,
+    min_length,
+    max_length,
+):
+    """TODO: Add docstring"""
+
+    # Verify the eistence of the file:
+    if not os.path.isfile(fasta_filename):
+        print(f"File {fasta_filename} does not exist.")
+        raise FileNotFoundError(f"File {fasta_filename} does not exist.")
+
+    fasta_data = fasta.read(fasta_filename)
+    peptide_list = []
+    if digestion in ["full", "partial"]:
+        semi = True if digestion == "partial" else False
+        for header, seq in fasta_data:
+            pep_set = parser.cleave(
+                seq,
+                rule=parser.expasy_rules[enzyme],
+                missed_cleavages=missed_cleavages,
+                semi=semi,
+            )
+            protein = header.split()[0]
+            peptide_list.extend([(pep, protein) for pep in pep_set])
+    else:
+        raise ValueError(f"Digestion type {digestion} not recognized.")
+
+    # Generate modified peptides
+    mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
+    mass_calculator.masses.update({"X": 0.0})  # TODO: REMOVE?
+    mod_peptide_list = []
+    for pep, prot in peptide_list:
+        if len(pep) < min_length or len(pep) > max_length:
+            continue
+        peptide_isoforms = parser.isoforms(
+            pep,
+            variable_mods=var_mods,
+            fixed_mods=fixed_mods,
+            max_mods=max_mods,
+        )
+        peptide_isoforms = list(map(convert_from_modx, peptide_isoforms))
+        mod_peptide_list.extend(
+            (mod_pep, mass_calculator.mass(mod_pep), prot)
+            for mod_pep in peptide_isoforms
+        )
+
+    # Sort the peptides by mass and return.
+    mod_peptide_list.sort(key=lambda x: x[1])
+    return mod_peptide_list
+
+
+def get_candidates(
+    precursor_mass, charge, peptide_list, precursor_tolerance, isotope_error
+):
+    """TODO: ADD DOCSTRING"""
+
+    candidates = set()
+
+    isotope_error = [int(x) for x in isotope_error.split(",")]
+    for e in isotope_error:
+        iso_shift = ISOTOPE_SPACING * e
+        upper_bound = (_to_raw_mass(precursor_mass, charge) - iso_shift) * (
+            1 + (precursor_tolerance / 1e6)
+        )
+        lower_bound = (_to_raw_mass(precursor_mass, charge) - iso_shift) * (
+            1 - (precursor_tolerance / 1e6)
+        )
+
+        start, end = get_mass_indices(
+            [x[1] for x in peptide_list], lower_bound, upper_bound
+        )
+
+        candidates.update(peptide_list[start:end])
+
+    candidates = list(candidates)
+    candidates.sort(key=lambda x: x[1])
+    return candidates
+
+
+def _to_mz(precursor_mass, charge):
+    """TODO: ADD DOCSTRING"""
+    return (precursor_mass + (charge * PROTON)) / charge
+
+
+def _to_raw_mass(mz_mass, charge):
+    """TODO: ADD DOCSTRING"""
+    return charge * (mz_mass - PROTON)
+
+
+def get_mass_indices(masses, m_low, m_high):
+    """Grabs mass indices from a list of mass values that fall within a specified range.
+    Requires that the mass values are sorted in ascending order.
+
+    Parameters
+    ----------
+    masses : List[int]
+        List of mass values
+    m_low : int
+        Lower bound of mass range (inclusive)
+    m_high : int
+        Upper bound of mass range (inclusive)
+
+    Return
+    ------
+    indices : Tuple[int, int]
+        Indices of mass values that fall within the specified range
+    """
+    start = bisect.bisect_left(masses, m_low)
+    end = bisect.bisect_right(masses, m_high)
+    return start, end
diff --git a/casanovo/data/ms_io.py b/casanovo/data/ms_io.py
index c4cfc7cb..d47b9b04 100644
--- a/casanovo/data/ms_io.py
+++ b/casanovo/data/ms_io.py
@@ -251,7 +251,6 @@ def save_db_variant(self) -> None:
                     "start",
                     "end",
                     "opt_ms_run[1]_aa_scores",
-                    "opt_cv_MS:1002217_decoy_peptide",
                 ]
             )
             for i, psm in enumerate(self.psms):
@@ -259,7 +258,7 @@ def save_db_variant(self) -> None:
                     [
                         "PSM",
                         psm[0],  # sequence
-                        f"{psm[5]}:{i}",  # spectra_ref
+                        f"{psm[5]}:{i}",  # PSM_ID (spectrum # :candidate #)
                         "null",  # accession
                         "null",  # unique
                         "null",  # database
@@ -284,6 +283,5 @@ def save_db_variant(self) -> None:
                                 )
                             )
                         ),  # opt_ms_run[1]_aa_scores
-                        bool(psm[7]),  # opt_cv_MS:1002217_decoy_peptide
                     ]
                 )
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index aff860a1..ba02936c 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -127,13 +127,12 @@ def setup(self, stage: str = None, annotated: bool = True) -> None:
                 self.test_dataset = make_dataset(self.test_index)
         if stage == "db":
             make_dataset = functools.partial(
-                AnnotatedSpectrumDataset,
+                SpectrumDataset,
                 n_peaks=self.n_peaks,
                 min_mz=self.min_mz,
                 max_mz=self.max_mz,
                 min_intensity=self.min_intensity,
                 remove_precursor_tol=self.remove_precursor_tol,
-                track_spectrum_id=True,
             )
             if self.test_index is not None:
                 self.test_dataset = make_dataset(self.test_index)
@@ -143,7 +142,6 @@ def _make_loader(
         dataset: torch.utils.data.Dataset,
         batch_size: int,
         shuffle: bool = False,
-        db_mode: bool = False,
     ) -> torch.utils.data.DataLoader:
         """
         Create a PyTorch DataLoader.
@@ -167,7 +165,7 @@ def _make_loader(
         return torch.utils.data.DataLoader(
             dataset,
             batch_size=batch_size,
-            collate_fn=prepare_batch if not db_mode else prepare_db_batch,
+            collate_fn=prepare_batch,
             pin_memory=True,
             num_workers=self.n_workers,
             shuffle=shuffle,
@@ -191,12 +189,6 @@ def predict_dataloader(self) -> torch.utils.data.DataLoader:
         """Get the predict DataLoader."""
         return self._make_loader(self.test_dataset, self.eval_batch_size)
 
-    def db_dataloader(self) -> torch.utils.data.DataLoader:
-        """Get the predict DataLoader."""
-        return self._make_loader(
-            self.test_dataset, self.eval_batch_size, db_mode=True
-        )
-
 
 def prepare_batch(
     batch: List[Tuple[torch.Tensor, float, int, str]]
@@ -235,48 +227,3 @@ def prepare_batch(
         [precursor_masses, precursor_charges, precursor_mzs]
     ).T.float()
     return spectra, precursors, np.asarray(spectrum_ids)
-
-
-def prepare_db_batch(
-    batch: List[Tuple[torch.Tensor, float, int, str, Tuple[str, str]]]
-) -> Tuple[torch.Tensor, torch.Tensor, np.ndarray, Tuple[str, str]]:
-    """
-    Collate MS/MS spectra into a batch meant for Casanovo-DB.
-
-    Parameters
-    ----------
-    batch : List[Tuple[torch.Tensor, Tuple[float, int, float], str, Tuple[str, str]]]
-        A batch of data from an AnnotatedSpectrumDataset, consisting of for each
-        spectrum (i) a tensor with the m/z and intensity peak values,
-        (ii) the precursor information [mass, charge, m/z], (iii) the
-        peptide sequence, the precursor m/z, (iv) spectrum identifiers
-        (file and scan).
-
-    Returns
-    -------
-    spectra : torch.Tensor of shape (batch_size, n_peaks, 2)
-        The padded mass spectra tensor with the m/z and intensity peak values
-        for each spectrum.
-    precursors : torch.Tensor of shape (batch_size, 3)
-        A tensor with the precursor neutral mass, precursor charge, and
-        precursor m/z.
-    spectrum_peps : np.ndarray
-        Peptide sequences
-    spectrum_ids : Tuple[str, str]
-        Peak file and spectrum identifier
-    """
-    (
-        spectra,
-        precursor_mzs,
-        precursor_charges,
-        spectrum_peps,
-        spectrum_ids,
-    ) = list(zip(*batch))
-    spectra = torch.nn.utils.rnn.pad_sequence(spectra, batch_first=True)
-    precursor_mzs = torch.tensor(precursor_mzs)
-    precursor_charges = torch.tensor(precursor_charges)
-    precursor_masses = (precursor_mzs - 1.007276) * precursor_charges
-    precursors = torch.vstack(
-        [precursor_masses, precursor_charges, precursor_mzs]
-    ).T.float()
-    return spectra, precursors, np.asarray(spectrum_peps), spectrum_ids
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 71f4a6fa..be7dba9a 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -17,7 +17,7 @@
 
 from . import evaluate
 from .. import config
-from ..data import ms_io
+from ..data import ms_io, db_utils
 
 logger = logging.getLogger("casanovo")
 
@@ -1009,19 +1009,18 @@ def predict_step(self, batch, *args):
         ----------
         batch : Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
             A batch of (i) MS/MS spectra, (ii) precursor information, (iii)
-            spectrum identifiers as torch Tensors, (iv) scan numbers.
+            spectrum identifiers as torch Tensors
 
         Returns
         -------
-        predictions: List[Tuple[int, bool, str, float, np.ndarray, np.ndarray]]
+        predictions: List[Tuple[int, str, float, np.ndarray, np.ndarray]]
             Model predictions for the given batch of spectra containing spectrum
-            scan number, decoy flag, peptide sequence, Casanovo-DB score,
+            scan number, peptide sequence, Casanovo-DB score,
             amino acid-level confidence scores, and precursor information.
         """
         batch_res = []
         for (
             indexes,
-            is_decoy,
             peptides,
             precursors,
             encoded_ms,
@@ -1034,7 +1033,6 @@ def predict_step(self, batch, *args):
             batch_res.append(
                 (
                     indexes,
-                    is_decoy,
                     peptides,
                     score_result.cpu().detach().numpy(),
                     per_aa_score.cpu().detach().numpy(),
@@ -1043,27 +1041,25 @@ def predict_step(self, batch, *args):
             )
         return batch_res
 
-    def smart_batch_gen(self, batch):
+    def smart_batch_gen(self, spectrum_batch):
+        """TODO: ADD DOCSTRING"""
         all_psm = []
-        batch_size = len(batch[0])
-        enc = self.encoder(batch[0])
-        precursors = batch[1]
-        indexes = batch[3]
+        batch_size = len(spectrum_batch[0])
+        enc = self.encoder(spectrum_batch[0])
         enc = list(zip(*enc))
+        precursors = spectrum_batch[1]
+        indexes = spectrum_batch[2]
         for idx in range(batch_size):
-            spec_peptides = batch[2][idx].split(",")
-            # Check for decoy prefixes and create a bit-vector indicating targets (1) or decoys (0)
-            decoy_prefix = "decoy_"  # Decoy prefix
-            id_decoys = np.array(
-                [
-                    (0, p.removeprefix(decoy_prefix))
-                    if p.startswith(decoy_prefix)
-                    else (1, p)
-                    for p in spec_peptides
-                ]
+            spec_peptides = db_utils.get_candidates(
+                precursors[idx][2],
+                precursors[idx][1],
+                self.digest,
+                self.precursor_tolerance,
+                self.isotope_error,
             )
-            decoy_mask = np.array(id_decoys[:, 0], dtype=bool)
-            spec_peptides = list(id_decoys[:, 1])
+            spec_peptides = [
+                a[0] for a in spec_peptides
+            ]  # TODO: USE MASS AND PROTEIN INFORMATION
             spec_precursors = [precursors[idx]] * len(spec_peptides)
             spec_enc = [enc[idx]] * len(spec_peptides)
             spec_idx = [indexes[idx]] * len(spec_peptides)
@@ -1074,24 +1070,22 @@ def smart_batch_gen(self, batch):
                         spec_precursors,
                         spec_peptides,
                         spec_idx,
-                        decoy_mask,
                     )
                 )
             )
         # Continually grab num_pairs items from all_psm until list is exhausted
         while len(all_psm) > 0:
-            batch = all_psm[:batch_size]
+            psm_batch = all_psm[:batch_size]
             all_psm = all_psm[batch_size:]
-            batch = list(zip(*batch))
+            psm_batch = list(zip(*psm_batch))
             encoded_ms = (
-                torch.stack([a[0] for a in batch[0]]),
-                torch.stack([a[1] for a in batch[0]]),
+                torch.stack([a[0] for a in psm_batch[0]]),
+                torch.stack([a[1] for a in psm_batch[0]]),
             )
-            prec_data = torch.stack(batch[1])
-            pep_str = list(batch[2])
-            indexes = [a[1] for a in batch[3]]
-            is_decoy = batch[4]
-            yield (indexes, is_decoy, pep_str, prec_data, encoded_ms)
+            prec_data = torch.stack(psm_batch[1])
+            pep_str = list(psm_batch[2])
+            indexes = [a[1] for a in psm_batch[3]]
+            yield (indexes, pep_str, prec_data, encoded_ms)
 
     def on_predict_batch_end(
         self,
@@ -1102,7 +1096,6 @@ def on_predict_batch_end(
             return
         for (
             indexes,
-            t_or_d,
             peptides,
             score_result,
             per_aa_score,
@@ -1123,7 +1116,6 @@ def on_predict_batch_end(
                 calc_mz,
                 indexes,
                 per_aa_score,
-                t_or_d,
             ):
                 self.out_writer.psms.append(row)
 
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index e150ab2d..73dfdff2 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -18,7 +18,7 @@
 from lightning.pytorch.callbacks import ModelCheckpoint
 
 from ..config import Config
-from ..data import ms_io
+from ..data import ms_io, db_utils
 from ..denovo.dataloaders import DeNovoDataModule
 from ..denovo.model import Spec2Pep, DbSpec2Pep
 
@@ -79,13 +79,29 @@ def __exit__(self, exc_type, exc_value, traceback):
         if self.writer is not None:
             self.writer.save()
 
-    def db_search(self, peak_path: Iterable[str], output: str) -> None:
+    def db_search(
+        self,
+        peak_path: Iterable[str],
+        fasta_path: str,
+        enzyme: str,
+        digestion: str,
+        missed_cleavages: int,
+        max_mods: int,
+        min_length: int,
+        max_length: int,
+        precursor_tolerance: float,
+        isotope_error: float,
+        output: str,
+    ) -> None:
         """Perform database search with Casanovo.
 
         Parameters
         ----------
-        peak_path : iterable of str
-            The path to the annotated .mgf data files for database search.
+        peak_path : Iterable[str]
+            The path to the .mgf data file for database search.
+        fasta_path : str
+            The path to the FASTA file for database search.
+        # TODO: ADD ALL DOCUMENTATION
         output : str
             Where should the output be saved?
 
@@ -105,12 +121,23 @@ def db_search(self, peak_path: Iterable[str], output: str) -> None:
         self.initialize_trainer(train=True)
         self.initialize_model(train=False, db_search=True)
         self.model.out_writer = self.writer
+        self.model.digest = db_utils.digest_fasta(
+            fasta_path,
+            enzyme,
+            digestion,
+            missed_cleavages,
+            max_mods,
+            min_length,
+            max_length,
+        )
+        self.model.precursor_tolerance = precursor_tolerance
+        self.model.isotope_error = isotope_error
 
-        test_index = self._get_index(peak_path, True, "db search")
+        test_index = self._get_index(peak_path, False, "db search")
         self.writer.set_ms_run(test_index.ms_files)
         self.initialize_data_module(test_index=test_index)
         self.loaders.setup(stage="db")
-        self.trainer.predict(self.model, self.loaders.db_dataloader())
+        self.trainer.predict(self.model, self.loaders.predict_dataloader())
 
     def train(
         self,

From f7dfbc8356d8993c219dbfaeccf59753f555fa07 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 2 Jul 2024 19:54:56 -0700
Subject: [PATCH 21/84] tested implementation of db search

---
 casanovo/casanovo.py            | 107 ++++----
 casanovo/data/annotate_db.py    | 138 ----------
 casanovo/data/db_utils.py       | 109 ++++++--
 casanovo/data/ms_io.py          |   2 +-
 casanovo/denovo/model.py        |  32 ++-
 casanovo/denovo/model_runner.py |  19 +-
 tests/conftest.py               |  51 +++-
 tests/test_integration.py       |  99 +-------
 tests/unit_tests/test_unit.py   | 430 +++++++++++++++++++++++++++++++-
 9 files changed, 666 insertions(+), 321 deletions(-)
 delete mode 100644 casanovo/data/annotate_db.py

diff --git a/casanovo/casanovo.py b/casanovo/casanovo.py
index df3cc79f..8ae9a81b 100644
--- a/casanovo/casanovo.py
+++ b/casanovo/casanovo.py
@@ -42,7 +42,6 @@
 from . import utils
 from .denovo import ModelRunner
 from .config import Config
-from .data.annotate_db import annotate_mgf
 
 logger = logging.getLogger("casanovo")
 click.rich_click.USE_MARKDOWN = True
@@ -146,67 +145,6 @@ def sequence(
     logger.info("DONE!")
 
 
-@main.command()
-@click.argument(
-    "peak_path",
-    required=True,
-    nargs=1,
-    type=click.Path(exists=True, dir_okay=False),
-)
-@click.argument(
-    "tide_path",
-    required=True,
-    nargs=1,
-    type=click.Path(exists=True, dir_okay=True),
-)
-@click.option(
-    "-o",
-    "--output",
-    help="The output annotated MGF file.",
-    type=click.Path(dir_okay=False),
-)
-@click.option(
-    "-v",
-    "--verbosity",
-    help="""
-    Set the verbosity of console logging messages. Log files are
-    always set to 'debug'.
-    """,
-    type=click.Choice(
-        ["debug", "info", "warning", "error"],
-        case_sensitive=False,
-    ),
-    default="info",
-)
-def annotate(
-    peak_path: str,
-    tide_path: str,
-    output: Optional[str],
-    verbosity: str,
-) -> None:
-    """Annotate a given .mgf with candidates as selected by a Tide search for Casanovo-DB.
-
-    PEAK_PATH must be one MGF file from which to annotate spectra.
-
-    TIDE_PATH must be one directory containing the Tide search results of the <PEAK_PATH> .mgf.
-    This directory must contain tide-search.decoy.txt and tide-search.target.txt
-    """
-    if output is None:
-        output = setup_logging(output, verbosity)
-        logger.info(
-            "Output file not specified. \
-            Annotated MGF will be saved in the same directory \
-            as the input MGF."
-        )
-        output = peak_path.replace(".mgf", "_annotated.mgf")
-    else:
-        output = setup_logging(output, verbosity)
-
-    annotate_mgf(peak_path, tide_path, output)
-
-    logger.info("DONE!")
-
-
 @main.command(cls=_SharedParams)
 @click.argument(
     "peak_path",
@@ -222,8 +160,47 @@ def annotate(
 )
 @click.option(
     "--enzyme",
-    help="Enzyme for in silico digestion, see pyteomics.parser.expasy_rules",
-    type=str,
+    help="Enzyme for in silico digestion, \
+    See pyteomics.parser.expasy_rules for valid enzymes",
+    type=click.Choice(
+        [
+            "arg-c",
+            "asp-n",
+            "bnps-skatole",
+            "caspase 1",
+            "caspase 2",
+            "caspase 3",
+            "caspase 4",
+            "caspase 5",
+            "caspase 6",
+            "caspase 7",
+            "caspase 8",
+            "caspase 9",
+            "caspase 10",
+            "chymotrypsin high specificity",
+            "chymotrypsin low specificity",
+            "clostripain",
+            "cnbr",
+            "enterokinase",
+            "factor xa",
+            "formic acid",
+            "glutamyl endopeptidase",
+            "granzyme b",
+            "hydroxylamine",
+            "iodosobenzoic acid",
+            "lysc",
+            "ntcb",
+            "pepsin ph1.3",
+            "pepsin ph2.0",
+            "proline endopeptidase",
+            "proteinase k",
+            "staphylococcal peptidase i",
+            "thermolysin",
+            "thrombin",
+            "trypsin",
+            "trypsin_exception",
+        ]
+    ),
     default="trypsin",
 )
 @click.option(
@@ -287,7 +264,7 @@ def db_search(
     output: Optional[str],
     verbosity: str,
 ) -> None:
-    """Perform a search using Casanovo-DB.
+    """Perform a database search on MS/MS data using Casanovo-DB.
 
     PEAK_PATH must be one MGF file. FASTA_PATH must be one FASTA file.
     """
diff --git a/casanovo/data/annotate_db.py b/casanovo/data/annotate_db.py
deleted file mode 100644
index dd2e6c64..00000000
--- a/casanovo/data/annotate_db.py
+++ /dev/null
@@ -1,138 +0,0 @@
-"""Methods used to annotate an .mgf so that it can be used by Casanovo-DB"""
-
-from pathlib import Path
-from typing import Optional, Tuple
-import os
-import re
-import logging
-
-import pandas as pd
-import pyteomics.mgf as mgf
-
-
-def _normalize_mods(seq: str) -> str:
-    """
-    Turns tide-style modifications into the format used by Casanovo-DB.
-
-        Parameters
-        ----------
-        seq : str
-            The peptide sequence with tide-style modifications.
-
-        Returns
-        -------
-        str
-            The peptide sequence with Casanovo-DB-style modifications.
-    """
-    logger = logging.getLogger("casanovo")
-    seq = seq.replace("C", "C+57.021")
-    seq = re.sub(r"M\[15\.[0-9]*\]", r"M+15.995", seq)
-    seq = re.sub(r"N\[0\.9[0-9]*\]", r"N+0.984", seq)
-    seq = re.sub(r"Q\[0\.9[0-9]*\]", r"Q+0.984", seq)
-    seq = re.sub(r"(.*)\[42\.[0-9]*\]", r"+42.011\1", seq)
-    seq = re.sub(r"(.*)\[43\.[0-9]*\]", r"+43.006\1", seq)
-    seq = re.sub(r"(.*)\[\-17\.[0-9]*\]", r"-17.027\1", seq)
-    seq = re.sub(r"(.*)\[25\.[0-9]*\]", r"+43.006-17.027\1", seq)
-    return seq
-
-
-def annotate_mgf(peak_path: str, tide_path: str, output: Optional[str]):
-    """
-    Accepts a directory containing the results of a successful tide search,
-    and an .mgf file containing MS/MS spectra.
-    The .mgf file is then annotated in the SEQ field with
-    all of the candidate peptides for each spectrum, as well as their target/decoy status.
-    This annotated .mgf can be given directly to Casanovo-DB to perfrom a database search.
-
-        Parameters
-        ----------
-        tide_dir_path : str
-            Path to the directory containing the results of a successful tide search.
-        mgf_file : str
-            Path to the .mgf file containing MS/MS spectra.
-        output_file : str
-            Path to where the annotated .mgf will be written.
-
-    """
-    logger = logging.getLogger("casanovo")
-    # Get paths to tide search text files
-    tdf_path = os.path.join(tide_path, "tide-search.target.txt")
-    ddf_path = os.path.join(tide_path, "tide-search.decoy.txt")
-    try:
-        target_df = pd.read_csv(
-            tdf_path, sep="\t", usecols=["scan", "sequence", "target/decoy"]
-        )
-        decoy_df = pd.read_csv(
-            ddf_path, sep="\t", usecols=["scan", "sequence", "target/decoy"]
-        )
-    except FileNotFoundError as e:
-        logger.error(
-            "Could not find tide search results in the specified directory. "
-            "Please ensure that the directory contains the following files: "
-            "tide-search.target.txt and tide-search.decoy.txt"
-        )
-        raise e
-
-    logger.info("Successfully read tide search results from %s.", tide_path)
-
-    df = pd.concat([target_df, decoy_df])
-    scan_groups = df.groupby("scan")[["sequence", "target/decoy"]]
-
-    scan_map = {}
-
-    for scan, item in scan_groups:
-        td_group = item.groupby("target/decoy")["sequence"].apply(list)
-        if "target" in td_group.index:
-            target_candidate_list = list(
-                map(
-                    _normalize_mods,
-                    td_group["target"],
-                )
-            )
-        else:
-            target_candidate_list = []
-            logger.warn(f"No target peptides found for scan {scan}.")
-        if "decoy" in td_group.index:
-            decoy_candidate_list = list(
-                map(
-                    _normalize_mods,
-                    td_group["decoy"],
-                )
-            )
-            decoy_candidate_list = list(
-                map(lambda x: "decoy_" + str(x), decoy_candidate_list)
-            )
-        else:
-            decoy_candidate_list = []
-            logger.warn(f"No decoy peptides found for scan {scan}.")
-
-        pep_list = target_candidate_list + decoy_candidate_list
-        if len(pep_list) == 0:
-            logger.warn(f"No peptides found for scan {scan}.")
-        else:
-            scan_map[scan] = target_candidate_list + decoy_candidate_list
-
-    all_spec = []
-    for idx, spec_dict in enumerate(mgf.read(peak_path)):
-        try:
-            scan = int(spec_dict["params"]["scans"])
-        except KeyError as e:
-            logger.error(
-                "Could not find the scan number in the .mgf file."
-                "Please ensure that the .mgf file contains the scan number in the 'SCANS' field."
-            )
-            raise e
-        try:
-            spec_dict["params"]["seq"] = ",".join(list(scan_map[scan]))
-            all_spec.append(spec_dict)
-        except KeyError as e:
-            # No need to do anything if the scan is not found in the scan map
-            pass
-    try:
-        output = str(output)
-        mgf.write(all_spec, output, file_mode="w")
-        logger.info("Annotated .mgf file written to %s.", output)
-    except Exception as e:
-        logger.error(
-            "Write to %s failed. Check if the file path is correct.", output
-        )
diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index c961e35e..341a6162 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -5,11 +5,14 @@
 from pyteomics import fasta, parser
 import bisect
 
+from typing import List, Tuple
+
+# CONSTANTS
 HYDROGEN = 1.007825035
 OXYGEN = 15.99491463
 H2O = 2 * HYDROGEN + OXYGEN
 PROTON = 1.00727646677
-ISOTOPE_SPACING = 1.003355  # - 0.00288
+ISOTOPE_SPACING = 1.003355
 
 var_mods = {
     "d": ["N", "Q"],
@@ -22,7 +25,7 @@
 fixed_mods = {"carbm": ["C"]}
 
 
-def convert_from_modx(seq):
+def convert_from_modx(seq: str):
     """Converts peptide sequence from modX format to Casanovo-acceptable modifications.
 
     Args:
@@ -40,15 +43,41 @@ def convert_from_modx(seq):
 
 
 def digest_fasta(
-    fasta_filename,
-    enzyme,
-    digestion,
-    missed_cleavages,
-    max_mods,
-    min_length,
-    max_length,
+    fasta_filename: str,
+    enzyme: str,
+    digestion: str,
+    missed_cleavages: int,
+    max_mods: int,
+    min_length: int,
+    max_length: int,
 ):
-    """TODO: Add docstring"""
+    """
+    Digests a FASTA file and returns the peptides, their masses, and associated protein.
+
+    Parameters
+    ----------
+    fasta_filename : str
+        Path to the FASTA file.
+    enzyme : str
+        The enzyme to use for digestion.
+        See pyteomics.parser.expasy_rules for valid enzymes.
+    digestion : str
+        The type of digestion to perform. Either 'full' or 'partial'.
+    missed_cleavages : int
+        The number of missed cleavages to allow.
+    max_mods : int
+        The maximum number of modifications to allow per peptide.
+    min_length : int
+        The minimum length of peptides to consider.
+    max_length : int
+        The maximum length of peptides to consider.
+
+    Returns
+    -------
+    mod_peptide_list : List[Tuple[str, float, str]]
+        A list of tuples containing the peptide sequence, mass,
+        and associated protein. Sorted by neutral mass in ascending order.
+    """
 
     # Verify the eistence of the file:
     if not os.path.isfile(fasta_filename):
@@ -96,19 +125,39 @@ def digest_fasta(
 
 
 def get_candidates(
-    precursor_mass, charge, peptide_list, precursor_tolerance, isotope_error
+    precursor_mz: float,
+    charge: int,
+    peptide_list: List[Tuple[str, float, str]],
+    precursor_tolerance: int,
+    isotope_error: str,
 ):
-    """TODO: ADD DOCSTRING"""
+    """
+    Returns a list of candidate peptides that fall within the specified mass range.
+
+    Parameters
+    ----------
+    precursor_mz : float
+        The precursor mass-to-charge ratio.
+    charge : int
+        The precursor charge.
+    peptide_list : List[Tuple[str, float, str]]
+        A list of tuples containing the peptide sequence, mass, and associated protein.
+        Must be sorted by mass in ascending order. Uses neutral masses.
+    precursor_tolerance : float
+        The precursor mass tolerance in parts-per-million.
+    isotope_error : str
+        The isotope error levels to consider.
+    """
 
     candidates = set()
 
     isotope_error = [int(x) for x in isotope_error.split(",")]
     for e in isotope_error:
         iso_shift = ISOTOPE_SPACING * e
-        upper_bound = (_to_raw_mass(precursor_mass, charge) - iso_shift) * (
+        upper_bound = (_to_raw_mass(precursor_mz, charge) - iso_shift) * (
             1 + (precursor_tolerance / 1e6)
         )
-        lower_bound = (_to_raw_mass(precursor_mass, charge) - iso_shift) * (
+        lower_bound = (_to_raw_mass(precursor_mz, charge) - iso_shift) * (
             1 - (precursor_tolerance / 1e6)
         )
 
@@ -124,12 +173,40 @@ def get_candidates(
 
 
 def _to_mz(precursor_mass, charge):
-    """TODO: ADD DOCSTRING"""
+    """
+    Convert precursor neutral mass to m/z value.
+
+    Parameters
+    ----------
+    precursor_mass : float
+        The precursor neutral mass.
+    charge : int
+        The precursor charge.
+
+    Returns
+    -------
+    mz : float
+        The calculated precursor mass-to-charge ratio.
+    """
     return (precursor_mass + (charge * PROTON)) / charge
 
 
 def _to_raw_mass(mz_mass, charge):
-    """TODO: ADD DOCSTRING"""
+    """
+    Convert precursor m/z value to neutral mass.
+
+    Parameters
+    ----------
+    mz_mass : float
+        The precursor mass-to-charge ratio.
+    charge : int
+        The precursor charge.
+
+    Returns
+    -------
+    mass : float
+        The calculated precursor neutral mass.
+    """
     return charge * (mz_mass - PROTON)
 
 
diff --git a/casanovo/data/ms_io.py b/casanovo/data/ms_io.py
index d47b9b04..a701b627 100644
--- a/casanovo/data/ms_io.py
+++ b/casanovo/data/ms_io.py
@@ -221,7 +221,7 @@ def save_db_variant(self) -> None:
         Export the Casanovo-DB search results to the mzTab file.
 
         Outputs PSMs in the order they were scored
-        (i.e. the order in the annotated .mgf file).
+        (i.e. the order in the .mgf file).
         """
         with open(self.filename, "w", newline="") as f:
             writer = csv.writer(f, delimiter="\t", lineterminator=os.linesep)
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index be7dba9a..4d9bd41b 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1009,7 +1009,7 @@ def predict_step(self, batch, *args):
         ----------
         batch : Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
             A batch of (i) MS/MS spectra, (ii) precursor information, (iii)
-            spectrum identifiers as torch Tensors
+            spectrum identifiers as torch Tensors.
 
         Returns
         -------
@@ -1042,7 +1042,21 @@ def predict_step(self, batch, *args):
         return batch_res
 
     def smart_batch_gen(self, spectrum_batch):
-        """TODO: ADD DOCSTRING"""
+        """
+        Transforms a batch of spectra into multiple equally-sized batches of PSMs.
+
+        Parameters
+        ----------
+        spectrum batch : Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
+            A batch of (i) MS/MS spectra, (ii) precursor information, (iii)
+            spectrum identifiers as torch Tensors.
+
+        Yields
+        -------
+        psm_batch: Tuple[List[int], List[str], torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]
+            A batch of PSMs containing the spectrum index, peptide sequence,
+            precursor information, and encoded MS/MS spectra.
+        """
         all_psm = []
         batch_size = len(spectrum_batch[0])
         enc = self.encoder(spectrum_batch[0])
@@ -1050,16 +1064,22 @@ def smart_batch_gen(self, spectrum_batch):
         precursors = spectrum_batch[1]
         indexes = spectrum_batch[2]
         for idx in range(batch_size):
-            spec_peptides = db_utils.get_candidates(
+            digest_data = db_utils.get_candidates(
                 precursors[idx][2],
                 precursors[idx][1],
                 self.digest,
                 self.precursor_tolerance,
                 self.isotope_error,
             )
-            spec_peptides = [
-                a[0] for a in spec_peptides
-            ]  # TODO: USE MASS AND PROTEIN INFORMATION
+            logger.debug("%s", digest_data)
+            try:
+                spec_peptides, pep_masses, pep_protein = list(
+                    zip(*digest_data)
+                )
+            except ValueError:
+                logger.info(
+                    "No peptides found for precursor %s", precursors[idx]
+                )
             spec_precursors = [precursors[idx]] * len(spec_peptides)
             spec_enc = [enc[idx]] * len(spec_peptides)
             spec_idx = [indexes[idx]] * len(spec_peptides)
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 73dfdff2..284acbe8 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -90,7 +90,7 @@ def db_search(
         min_length: int,
         max_length: int,
         precursor_tolerance: float,
-        isotope_error: float,
+        isotope_error: str,
         output: str,
     ) -> None:
         """Perform database search with Casanovo.
@@ -101,7 +101,22 @@ def db_search(
             The path to the .mgf data file for database search.
         fasta_path : str
             The path to the FASTA file for database search.
-        # TODO: ADD ALL DOCUMENTATION
+        enzyme : str
+            The enzyme used for digestion.
+        digestion : str
+            The digestion type, full or partial.
+        missed_cleavages : int
+            The number of missed cleavages allowed.
+        max_mods : int
+            The maximum number of modifications allowed per peptide.
+        min_length : int
+            The minimum peptide length.
+        max_length : int
+            The maximum peptide length.
+        precursor_tolerance : float
+            The precursor mass tolerance in ppm.
+        isotope_error : str
+            Isotope error levels to consider, in comma-delineated string form.
         output : str
             Where should the output be saved?
 
diff --git a/tests/conftest.py b/tests/conftest.py
index eed4f39a..cac1a873 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -5,7 +5,7 @@
 import psims
 import pytest
 import yaml
-from pyteomics.mass import calculate_mass
+from pyteomics.mass import calculate_mass, fast_mass, std_aa_mass
 
 
 @pytest.fixture
@@ -263,6 +263,36 @@ def tiny_config(tmp_path):
     return cfg_file
 
 
+@pytest.fixture
+def tiny_fasta_file(tmp_path, fasta_raw_data):
+    fasta_file = tmp_path / "tiny_fasta.fasta"
+    with fasta_file.open("w+") as fasta_ref:
+        fasta_ref.write(fasta_raw_data)
+
+    return fasta_file
+
+
+@pytest.fixture
+def fasta_raw_data():
+    return ">foo\nMEAPAQLLFLLLLWLPDTTREIVMTQSPPTLSLSPGERVTLSCRASQSVSSSYLTWYQQKPGQAPRLLIYGASTRATSIPARFSGSGSGTDFTLTISSLQPEDFAVYYCQQDYNLP"
+
+
+@pytest.fixture
+def mgf_db_search(tmp_path):
+    """An MGF file with 2 unannotated spectra and scan numbers."""
+    peptides = [
+        "ATSIPAR",
+        "VTLSCR",
+        "LLIYGASTR",
+        "EIVMTQSPPTLSLSPGER",
+        "MEAPAQLLFLLLLWLPDTTR",
+        "ASQSVSSSYLTWYQQKPGQAPR",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYCQQDYNLP",
+    ]
+    mgf_file = tmp_path / "db_search.mgf"
+    return _create_unannotated_mgf(peptides, mgf_file, c_mod=True)
+
+
 @pytest.fixture
 def mgf_small_unannotated(tmp_path):
     """An MGF file with 2 unannotated spectra and scan numbers."""
@@ -271,7 +301,7 @@ def mgf_small_unannotated(tmp_path):
     return _create_unannotated_mgf(peptides, mgf_file)
 
 
-def _create_unannotated_mgf(peptides, mgf_file, random_state=999):
+def _create_unannotated_mgf(peptides, mgf_file, random_state=999, c_mod=False):
     """
     Create a fake MGF file from one or more peptides.
     This file will have no SEQ= parameter, but will have a SCANS= parameter.
@@ -284,6 +314,9 @@ def _create_unannotated_mgf(peptides, mgf_file, random_state=999):
         The MGF file to create.
     random_state : int or numpy.random.Generator, optional
         The random seed. The charge states are chosen to be 2 or 3 randomly.
+    c_mod : bool, optional
+        Whether to use the constant carbamidomethylation
+        of C in mass calculations.
 
     Returns
     -------
@@ -291,7 +324,7 @@ def _create_unannotated_mgf(peptides, mgf_file, random_state=999):
     """
     rng = np.random.default_rng(random_state)
     entries = [
-        _create_unannotated_mgf_entry(p, idx, rng.choice([2, 3]))
+        _create_unannotated_mgf_entry(p, idx, rng.choice([2, 3]), c_mod=c_mod)
         for idx, p in enumerate(peptides)
     ]
     with mgf_file.open("w+") as mgf_ref:
@@ -300,7 +333,7 @@ def _create_unannotated_mgf(peptides, mgf_file, random_state=999):
     return mgf_file
 
 
-def _create_unannotated_mgf_entry(peptide, scan_num, charge):
+def _create_unannotated_mgf_entry(peptide, scan_num, charge, c_mod=False):
     """
     Create a MassIVE-KB style MGF entry for a single PSM.
     Each entry will have no SEQ= parameter, but will have a SCANS= parameter.
@@ -313,13 +346,21 @@ def _create_unannotated_mgf_entry(peptide, scan_num, charge):
         The scan number.
     charge : int, optional
         The peptide charge state.
+    c_mod : bool, optional
+        Whether to use the constant carbamidomethylation
+        of C in mass calculations.
 
     Returns
     -------
     str
         The PSM entry in an MGF file format.
     """
-    precursor_mz = calculate_mass(peptide, charge=int(charge))
+    if not c_mod:
+        precursor_mz = calculate_mass(peptide, charge=int(charge))
+    else:
+        aa_mass = std_aa_mass
+        aa_mass.update({"C": 160.030649})  # Carbamidomethylated C mass
+        precursor_mz = fast_mass(peptide, charge=int(charge), aa_mass=aa_mass)
     mzs, intensities = _peptide_to_peaks(peptide, charge)
     frags = "\n".join([f"{m} {i}" for m, i in zip(mzs, intensities)])
 
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 60e3977b..4bd55174 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -7,50 +7,8 @@
 from casanovo import casanovo
 
 
-def test_annotate(mgf_small_unannotated, tide_dir_small, tmp_path):
-
-    # Run a command:
-    run = functools.partial(
-        CliRunner().invoke, casanovo.main, catch_exceptions=False
-    )
-
-    annotate_args = [
-        "annotate",
-        str(mgf_small_unannotated),
-        str(tide_dir_small),
-        "--output",
-        str(tmp_path / "annotated_mgf.mgf"),
-    ]
-
-    result = run(annotate_args)
-
-    assert result.exit_code == 0
-    assert (tmp_path / "annotated_mgf.mgf").exists()
-
-    # Read in the annotated file
-    with open(tmp_path / "annotated_mgf.mgf") as f:
-        annotated_lines = f.readlines()
-
-    # Get each SEQ= line
-    seq_lines = [line for line in annotated_lines if line.startswith("SEQ=")]
-    assert len(seq_lines) == 3
-    assert (
-        seq_lines[0].strip()
-        == "SEQ=LESLIEK,PEPTIDEK,decoy_KEILSEL,decoy_KEDITEPP"
-    )
-    assert (
-        seq_lines[1].strip()
-        == "SEQ=LESLIEK,PEPTIDEK,decoy_KEILSEL,decoy_KEDITEPP"
-    )
-    assert (
-        seq_lines[2].strip() == "SEQ=+42.011LEM+15.995SLIM+15.995EK,"
-        "+43.006PEN+0.984PTIQ+0.984DEK,decoy_-17.027KM+15.995EILSEL,"
-        "decoy_+43.006-17.027KEDITEPP,decoy_KEDIQ+0.984TEPPQ+0.984"
-    )
-
-
 def test_db_search(
-    mgf_small_unannotated, tide_dir_small, tiny_config, tmp_path, monkeypatch
+    mgf_db_search, tiny_fasta_file, tiny_config, tmp_path, monkeypatch
 ):
     # Run a command:
     monkeypatch.setattr(casanovo, "__version__", "4.1.0")
@@ -58,30 +16,18 @@ def test_db_search(
         CliRunner().invoke, casanovo.main, catch_exceptions=False
     )
 
-    annotate_args = [
-        "annotate",
-        str(mgf_small_unannotated),
-        str(tide_dir_small),
-        "--output",
-        str(tmp_path / "annotated_mgf.mgf"),
-    ]
-
-    result = run(annotate_args)
-
-    assert result.exit_code == 0
-    assert (tmp_path / "annotated_mgf.mgf").exists()
-
-    # Follow up annotate run with db search
-
     output_path = tmp_path / "db_search.mztab"
 
     search_args = [
         "db-search",
-        str(tmp_path / "annotated_mgf.mgf"),
         "--config",
         tiny_config,
         "--output",
         str(output_path),
+        "--precursor_tolerance",
+        str(100),
+        str(mgf_db_search),
+        str(tiny_fasta_file),
     ]
 
     result = run(search_args)
@@ -94,34 +40,13 @@ def test_db_search(
 
     psms = mztab.spectrum_match_table
     assert list(psms.sequence) == [
-        "LESLIEK",
-        "PEPTIDEK",
-        "KEILSEL",
-        "KEDITEPP",
-        "LESLIEK",
-        "PEPTIDEK",
-        "KEILSEL",
-        "KEDITEPP",
-        "+42.011LEM+15.995SLIM+15.995EK",
-        "+43.006PEN+0.984PTIQ+0.984DEK",
-        "-17.027KM+15.995EILSEL",
-        "+43.006-17.027KEDITEPP",
-        "KEDIQ+0.984TEPPQ+0.984",
-    ]
-    assert list(psms["opt_cv_MS:1002217_decoy_peptide"]) == [
-        "True",
-        "True",
-        "False",
-        "False",
-        "True",
-        "True",
-        "False",
-        "False",
-        "True",
-        "True",
-        "False",
-        "False",
-        "False",
+        "ATSIPAR",
+        "VTLSC+57.021R",
+        "LLIYGASTR",
+        "EIVMTQSPPTLSLSPGER",
+        "MEAPAQLLFLLLLWLPDTTR",
+        "ASQSVSSSYLTWYQQKPGQAPR",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
     ]
 
 
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index ec9085c0..e3707917 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -10,10 +10,11 @@
 import numpy as np
 import pytest
 import torch
+import re
 
 from casanovo import casanovo
 from casanovo import utils
-from casanovo.data import ms_io
+from casanovo.data import ms_io, db_utils
 from casanovo.data.datasets import SpectrumDataset, AnnotatedSpectrumDataset
 from casanovo.denovo.evaluate import aa_match_batch, aa_match_metrics
 from casanovo.denovo.model import Spec2Pep, _aa_pep_score, _calc_match_score
@@ -219,6 +220,433 @@ def test_calc_match_score():
     assert np.sum(masked_per_aa_scores.numpy()[3]) == 3
 
 
+def test_digest_fasta_cleave(fasta_raw_data):
+
+    with open("temp_fasta", "w") as file:
+        file.write(fasta_raw_data)
+
+    # No missed cleavages
+    expected_normal = [
+        "ATSIPAR",
+        "VTLSC+57.021R",
+        "LLIYGASTR",
+        "EIVMTQSPPTLSLSPGER",
+        "MEAPAQLLFLLLLWLPDTTR",
+        "ASQSVSSSYLTWYQQKPGQAPR",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+    ]
+
+    # 1 missed cleavage
+    expected_1missedcleavage = [
+        "ATSIPAR",
+        "VTLSC+57.021R",
+        "LLIYGASTR",
+        "LLIYGASTRATSIPAR",
+        "EIVMTQSPPTLSLSPGER",
+        "MEAPAQLLFLLLLWLPDTTR",
+        "ASQSVSSSYLTWYQQKPGQAPR",
+        "EIVMTQSPPTLSLSPGERVTLSC+57.021R",
+        "VTLSC+57.021RASQSVSSSYLTWYQQKPGQAPR",
+        "ASQSVSSSYLTWYQQKPGQAPRLLIYGASTR",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+        "MEAPAQLLFLLLLWLPDTTREIVMTQSPPTLSLSPGER",
+        "ATSIPARFSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+    ]
+
+    # 3 missed cleavages
+    expected_3missedcleavage = [
+        "ATSIPAR",
+        "VTLSC+57.021R",
+        "LLIYGASTR",
+        "LLIYGASTRATSIPAR",
+        "EIVMTQSPPTLSLSPGER",
+        "MEAPAQLLFLLLLWLPDTTR",
+        "ASQSVSSSYLTWYQQKPGQAPR",
+        "EIVMTQSPPTLSLSPGERVTLSC+57.021R",
+        "VTLSC+57.021RASQSVSSSYLTWYQQKPGQAPR",
+        "ASQSVSSSYLTWYQQKPGQAPRLLIYGASTR",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+        "ASQSVSSSYLTWYQQKPGQAPRLLIYGASTRATSIPAR",
+        "VTLSC+57.021RASQSVSSSYLTWYQQKPGQAPRLLIYGASTR",
+        "MEAPAQLLFLLLLWLPDTTREIVMTQSPPTLSLSPGER",
+        "ATSIPARFSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+        "VTLSC+57.021RASQSVSSSYLTWYQQKPGQAPRLLIYGASTRATSIPAR",
+        "MEAPAQLLFLLLLWLPDTTREIVMTQSPPTLSLSPGERVTLSC+57.021R",
+        "EIVMTQSPPTLSLSPGERVTLSC+57.021RASQSVSSSYLTWYQQKPGQAPR",
+        "LLIYGASTRATSIPARFSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+    ]
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=0,
+        max_mods=0,
+        min_length=6,
+        max_length=50,
+    )
+    peptide_list = [x[0] for x in peptide_list]
+    assert peptide_list == expected_normal
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=1,
+        max_mods=0,
+        min_length=6,
+        max_length=50,
+    )
+    peptide_list = [x[0] for x in peptide_list]
+    assert peptide_list == expected_1missedcleavage
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=3,
+        max_mods=0,
+        min_length=6,
+        max_length=50,
+    )
+    peptide_list = [x[0] for x in peptide_list]
+    assert peptide_list == expected_3missedcleavage
+
+
+def test_digest_fasta_mods(fasta_raw_data):
+
+    with open("temp_fasta", "w") as file:
+        file.write(fasta_raw_data)
+
+    # 1 modification allowed
+    # fixed: C+57.02146
+    # variable: 1M+15.994915,1N+0.984016,1Q+0.984016
+    # nterm: 1X+42.010565,1X+43.005814,1X-17.026549,1X+25.980265
+    expected_1mod = [
+        "-17.027ATSIPAR",
+        "ATSIPAR",
+        "-17.027VTLSC+57.021R",
+        "VTLSC+57.021R",
+        "+43.006-17.027ATSIPAR",
+        "+42.011ATSIPAR",
+        "+43.006ATSIPAR",
+        "+43.006-17.027VTLSC+57.021R",
+        "+42.011VTLSC+57.021R",
+        "+43.006VTLSC+57.021R",
+        "-17.027LLIYGASTR",
+        "LLIYGASTR",
+        "+43.006-17.027LLIYGASTR",
+        "+42.011LLIYGASTR",
+        "+43.006LLIYGASTR",
+        "-17.027EIVMTQSPPTLSLSPGER",
+        "EIVMTQSPPTLSLSPGER",
+        "EIVMTQ+0.984SPPTLSLSPGER",
+        "EIVM+15.995TQSPPTLSLSPGER",
+        "+43.006-17.027EIVMTQSPPTLSLSPGER",
+        "+42.011EIVMTQSPPTLSLSPGER",
+        "+43.006EIVMTQSPPTLSLSPGER",
+        "-17.027MEAPAQLLFLLLLWLPDTTR",
+        "MEAPAQLLFLLLLWLPDTTR",
+        "MEAPAQ+0.984LLFLLLLWLPDTTR",
+        "M+15.995EAPAQLLFLLLLWLPDTTR",
+        "+43.006-17.027MEAPAQLLFLLLLWLPDTTR",
+        "+42.011MEAPAQLLFLLLLWLPDTTR",
+        "+43.006MEAPAQLLFLLLLWLPDTTR",
+        "-17.027ASQSVSSSYLTWYQQKPGQAPR",
+        "ASQSVSSSYLTWYQQKPGQAPR",
+        "ASQ+0.984SVSSSYLTWYQQKPGQAPR",
+        "ASQSVSSSYLTWYQ+0.984QKPGQAPR",
+        "ASQSVSSSYLTWYQQ+0.984KPGQAPR",
+        "ASQSVSSSYLTWYQQKPGQ+0.984APR",
+        "+43.006-17.027ASQSVSSSYLTWYQQKPGQAPR",
+        "+42.011ASQSVSSSYLTWYQQKPGQAPR",
+        "+43.006ASQSVSSSYLTWYQQKPGQAPR",
+        "-17.027FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+        "FSGSGSGTDFTLTISSLQ+0.984PEDFAVYYC+57.021QQDYNLP",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021Q+0.984QDYNLP",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQ+0.984DYNLP",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYN+0.984LP",
+        "+43.006-17.027FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+        "+42.011FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+        "+43.006FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+    ]
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=0,
+        max_mods=1,
+        min_length=6,
+        max_length=50,
+    )
+    peptide_list = [x[0] for x in peptide_list]
+    peptide_list = [
+        x
+        for x in peptide_list
+        if not re.search(
+            r"(\+42\.011|\+43\.006|\-17\.027|\+43\.006\-17\.027)+[A-Z]\+", x
+        )
+    ]
+    assert peptide_list == expected_1mod
+
+
+def test_length_restrictions(fasta_raw_data):
+
+    with open("temp_fasta", "w") as file:
+        file.write(fasta_raw_data)
+
+    # length between 20 and 50
+    expected_long = [
+        "MEAPAQLLFLLLLWLPDTTR",
+        "ASQSVSSSYLTWYQQKPGQAPR",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+    ]
+
+    # length between 6 and 8
+    expected_short = ["ATSIPAR", "VTLSC+57.021R"]
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=0,
+        max_mods=0,
+        min_length=20,
+        max_length=50,
+    )
+    peptide_list = [x[0] for x in peptide_list]
+    assert peptide_list == expected_long
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=0,
+        max_mods=0,
+        min_length=6,
+        max_length=8,
+    )
+    peptide_list = [x[0] for x in peptide_list]
+    assert peptide_list == expected_short
+
+
+def test_digest_fasta_enzyme(fasta_raw_data):
+
+    with open("temp_fasta", "w") as file:
+        file.write(fasta_raw_data)
+
+    # arg-c enzyme
+    expected_argc = [
+        "ATSIPAR",
+        "VTLSC+57.021R",
+        "LLIYGASTR",
+        "EIVMTQSPPTLSLSPGER",
+        "MEAPAQLLFLLLLWLPDTTR",
+        "ASQSVSSSYLTWYQQKPGQAPR",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+    ]
+
+    # asp-n enzyme
+    expected_aspn = ["DFAVYYC+57.021QQ", "DFTLTISSLQPE", "MEAPAQLLFLLLLWLP"]
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="arg-c",
+        digestion="full",
+        missed_cleavages=0,
+        max_mods=0,
+        min_length=6,
+        max_length=50,
+    )
+    peptide_list = [x[0] for x in peptide_list]
+    assert peptide_list == expected_argc
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="asp-n",
+        digestion="full",
+        missed_cleavages=0,
+        max_mods=0,
+        min_length=6,
+        max_length=50,
+    )
+    peptide_list = [x[0] for x in peptide_list]
+    assert peptide_list == expected_aspn
+
+
+def test_get_candidates(fasta_raw_data):
+
+    with open("temp_fasta", "w") as file:
+        file.write(fasta_raw_data)
+
+    # precursor_window is 10000
+    expected_smallwindow = ["LLIYGASTR"]
+
+    # precursor window is 150000
+    expected_midwindow = ["LLIYGASTR"]
+
+    # precursor window is 600000
+    expected_widewindow = ["ATSIPAR", "VTLSC+57.021R", "LLIYGASTR"]
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=1,
+        max_mods=0,
+        min_length=6,
+        max_length=50,
+    )
+
+    candidates = db_utils.get_candidates(
+        precursor_mz=496.2,
+        charge=2,
+        peptide_list=peptide_list,
+        precursor_tolerance=10000,
+        isotope_error="0",
+    )
+    candidates = [x[0] for x in candidates]
+    assert expected_smallwindow == candidates
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=1,
+        max_mods=0,
+        min_length=6,
+        max_length=50,
+    )
+
+    candidates = db_utils.get_candidates(
+        precursor_mz=496.2,
+        charge=2,
+        peptide_list=peptide_list,
+        precursor_tolerance=150000,
+        isotope_error="0",
+    )
+    candidates = [x[0] for x in candidates]
+    assert expected_midwindow == candidates
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=1,
+        max_mods=0,
+        min_length=6,
+        max_length=50,
+    )
+
+    candidates = db_utils.get_candidates(
+        precursor_mz=496.2,
+        charge=2,
+        peptide_list=peptide_list,
+        precursor_tolerance=600000,
+        isotope_error="0",
+    )
+    candidates = [x[0] for x in candidates]
+    assert expected_widewindow == candidates
+
+
+def test_get_candidates_isotope_error():
+
+    # Tide isotope error windows for 496.2, 2+:
+    # 0: [980.481617, 1000.289326]
+    # 1: [979.491114, 999.278813]
+    # 2: [978.500611, 998.268300]
+    # 3: [977.510108, 997.257787]
+
+    peptide_list = [
+        ("A", 1001),
+        ("B", 1000),
+        ("C", 999),
+        ("D", 998),
+        ("E", 997),
+        ("F", 996),
+        ("G", 995),
+        ("H", 994),
+        ("I", 993),
+        ("J", 992),
+        ("K", 991),
+        ("L", 990),
+        ("M", 989),
+        ("N", 988),
+        ("O", 987),
+        ("P", 986),
+        ("Q", 985),
+        ("R", 984),
+        ("S", 983),
+        ("T", 982),
+        ("U", 981),
+        ("V", 980),
+        ("W", 979),
+        ("X", 978),
+        ("Y", 977),
+        ("Z", 976),
+    ]
+
+    peptide_list.sort(key=lambda x: x[1])
+
+    expected_isotope0 = list("UTSRQPONMLKJIHGFEDCB")
+    expected_isotope1 = list("VUTSRQPONMLKJIHGFEDC")
+    expected_isotope2 = list("WVUTSRQPONMLKJIHGFED")
+    expected_isotope3 = list("XWVUTSRQPONMLKJIHGFE")
+    expected_isotope0123 = list("XWVUTSRQPONMLKJIHGFEDCB")
+
+    candidates = db_utils.get_candidates(
+        precursor_mz=496.2,
+        charge=2,
+        peptide_list=peptide_list,
+        precursor_tolerance=10000,
+        isotope_error="0",
+    )
+    candidates = [x[0] for x in candidates]
+    assert expected_isotope0 == candidates
+
+    candidates = db_utils.get_candidates(
+        precursor_mz=496.2,
+        charge=2,
+        peptide_list=peptide_list,
+        precursor_tolerance=10000,
+        isotope_error="1",
+    )
+    candidates = [x[0] for x in candidates]
+    assert expected_isotope1 == candidates
+
+    candidates = db_utils.get_candidates(
+        precursor_mz=496.2,
+        charge=2,
+        peptide_list=peptide_list,
+        precursor_tolerance=10000,
+        isotope_error="2",
+    )
+    candidates = [x[0] for x in candidates]
+    assert expected_isotope2 == candidates
+
+    candidates = db_utils.get_candidates(
+        precursor_mz=496.2,
+        charge=2,
+        peptide_list=peptide_list,
+        precursor_tolerance=10000,
+        isotope_error="3",
+    )
+    candidates = [x[0] for x in candidates]
+    assert expected_isotope3 == candidates
+
+    candidates = db_utils.get_candidates(
+        precursor_mz=496.2,
+        charge=2,
+        peptide_list=peptide_list,
+        precursor_tolerance=10000,
+        isotope_error="0,1,2,3",
+    )
+    candidates = [x[0] for x in candidates]
+    assert expected_isotope0123 == candidates
+
+
 def test_beam_search_decode():
     """
     Test beam search decoding and its sub-functions.

From e2ce3172c89a5c4fc74256689fa3cdf6b01d1faf Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 2 Jul 2024 20:20:25 -0700
Subject: [PATCH 22/84] fix for issue with 0 candidates

---
 casanovo/denovo/model.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 4d9bd41b..02a324d3 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1071,15 +1071,13 @@ def smart_batch_gen(self, spectrum_batch):
                 self.precursor_tolerance,
                 self.isotope_error,
             )
-            logger.debug("%s", digest_data)
             try:
                 spec_peptides, pep_masses, pep_protein = list(
                     zip(*digest_data)
                 )
             except ValueError:
-                logger.info(
-                    "No peptides found for precursor %s", precursors[idx]
-                )
+                logger.info("No peptides found for spectrum %s", indexes[idx])
+                continue
             spec_precursors = [precursors[idx]] * len(spec_peptides)
             spec_enc = [enc[idx]] * len(spec_peptides)
             spec_idx = [indexes[idx]] * len(spec_peptides)

From 5ef27e0c7dfffd219e5b248205a7ced0187ce4bb Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Wed, 3 Jul 2024 11:33:36 -0700
Subject: [PATCH 23/84] minor fixes added

---
 casanovo/data/datasets.py       |   2 -
 casanovo/denovo/dataloaders.py  |  13 ---
 casanovo/denovo/model.py        |  31 +++---
 casanovo/denovo/model_runner.py |   2 +-
 tests/conftest.py               | 164 +++++++++-----------------------
 5 files changed, 67 insertions(+), 145 deletions(-)

diff --git a/casanovo/data/datasets.py b/casanovo/data/datasets.py
index 59f56b68..6244e88f 100644
--- a/casanovo/data/datasets.py
+++ b/casanovo/data/datasets.py
@@ -134,8 +134,6 @@ def _process_peaks(
             The precursor m/z.
         precursor_charge : int
             The precursor charge.
-        track_spectrum_id : Optional[bool]
-            Whether to keep track of the identifier of the MS/MS spectra.
 
         Returns
         -------
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index ba02936c..97bfb2fc 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -125,17 +125,6 @@ def setup(self, stage: str = None, annotated: bool = True) -> None:
             )
             if self.test_index is not None:
                 self.test_dataset = make_dataset(self.test_index)
-        if stage == "db":
-            make_dataset = functools.partial(
-                SpectrumDataset,
-                n_peaks=self.n_peaks,
-                min_mz=self.min_mz,
-                max_mz=self.max_mz,
-                min_intensity=self.min_intensity,
-                remove_precursor_tol=self.remove_precursor_tol,
-            )
-            if self.test_index is not None:
-                self.test_dataset = make_dataset(self.test_index)
 
     def _make_loader(
         self,
@@ -154,8 +143,6 @@ def _make_loader(
             The batch size to use.
         shuffle : bool
             Option to shuffle the batches.
-        db_mode : bool
-            Option to use the DataLoader for Casanovo-DB.
 
         Returns
         -------
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 02a324d3..312e7f92 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -12,7 +12,6 @@
 import numpy as np
 import lightning.pytorch as pl
 from torch.utils.tensorboard import SummaryWriter
-from pyteomics import mass
 from depthcharge.components import ModelMixin, PeptideDecoder, SpectrumEncoder
 
 from . import evaluate
@@ -992,10 +991,19 @@ def configure_optimizers(
 
 class DbSpec2Pep(Spec2Pep):
     """
-    Inherits Spec2Pep
+    Subclass of Spec2Pep for the use of Casanovo as an MS/MS database search score function.
 
-    Hijacks teacher-forcing implemented in Spec2Pep and
-    uses it to predict scores between a spectra and associated peptide.
+    Uses teacher forcing to 'query' Casanovo for its score for each AA
+    within a candidate peptide, and takes the geometric average of these scores
+    and reports this as the score for the spectrum-peptide pair. Note that the
+    geometric mean of the AA scores is actually calculated by a
+    summation and average of the log of the scores, to preserve numerical
+    stability. This does not affect PSM ranking.
+
+    Also note that although teacher-forcing is used within this method,
+    there is *no training* involved. This is a prediction-only method.
+
+    Output is provided in .mztab format.
     """
 
     def __init__(self, *args, **kwargs):
@@ -1119,7 +1127,6 @@ def on_predict_batch_end(
             per_aa_score,
             precursors,
         ) in outputs:
-            prec_mass = precursors[:, 0]
             prec_charge = precursors[:, 1]
             prec_mz = precursors[:, 2]
             calc_mz = [
@@ -1140,9 +1147,9 @@ def on_predict_batch_end(
 
 def _calc_match_score(
     batch_all_aa_scores: torch.Tensor,
-    truth_aa_indicies: torch.Tensor,
+    truth_aa_indices: torch.Tensor,
     decoder_reverse: bool = False,
-) -> List[float]:
+) -> Tuple[torch.Tensor, torch.Tensor]:
     """
     Calculate the score between the input spectra and associated peptide.
 
@@ -1158,7 +1165,7 @@ def _calc_match_score(
         Amino acid scores for all amino acids in
         the vocabulary for every prediction made to generate
         the associated peptide (for an entire batch)
-    truth_aa_indicies : torch.Tensor
+    truth_aa_indices : torch.Tensor
         Indicies of the score for each actual amino acid
         in the peptide (for an entire batch)
     decoder_reverse : bool
@@ -1166,7 +1173,7 @@ def _calc_match_score(
 
     Returns
     -------
-    score : list[float], list[list[float]]
+    (all_scores, per_aa_scores) : Tuple[torch.Tensor, torch.Tensor]
         The score between the input spectra and associated peptide
         (for an entire batch)
         a list of lists of per amino acid scores
@@ -1175,7 +1182,7 @@ def _calc_match_score(
     # Remove trailing tokens from predictions based on decoder reversal
     if decoder_reverse:
         batch_all_aa_scores = batch_all_aa_scores[:, 1:]
-    elif not decoder_reverse:
+    else:
         batch_all_aa_scores = batch_all_aa_scores[:, :-1]
 
     # Vectorized scoring using efficient indexing.
@@ -1186,10 +1193,10 @@ def _calc_match_score(
     )
     cols = torch.arange(0, batch_all_aa_scores.shape[1]).expand_as(rows)
 
-    per_aa_scores = batch_all_aa_scores[rows, cols, truth_aa_indicies]
+    per_aa_scores = batch_all_aa_scores[rows, cols, truth_aa_indices]
 
     per_aa_scores[per_aa_scores == 0] += 1e-10
-    score_mask = truth_aa_indicies != 0
+    score_mask = truth_aa_indices != 0
     per_aa_scores[~score_mask] = 0
     log_per_aa_scores = torch.log(per_aa_scores)
     all_scores = torch.where(
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 284acbe8..865df71b 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -151,7 +151,7 @@ def db_search(
         test_index = self._get_index(peak_path, False, "db search")
         self.writer.set_ms_run(test_index.ms_files)
         self.initialize_data_module(test_index=test_index)
-        self.loaders.setup(stage="db")
+        self.loaders.setup(stage="test", annotated=False)
         self.trainer.predict(self.model, self.loaders.predict_dataloader())
 
     def train(
diff --git a/tests/conftest.py b/tests/conftest.py
index cac1a873..b2244308 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -16,7 +16,37 @@ def mgf_small(tmp_path):
     return _create_mgf(peptides, mgf_file)
 
 
-def _create_mgf(peptides, mgf_file, random_state=42):
+@pytest.fixture
+def tiny_fasta_file(tmp_path, fasta_raw_data):
+    fasta_file = tmp_path / "tiny_fasta.fasta"
+    with fasta_file.open("w+") as fasta_ref:
+        fasta_ref.write(fasta_raw_data)
+
+    return fasta_file
+
+
+@pytest.fixture
+def fasta_raw_data():
+    return ">foo\nMEAPAQLLFLLLLWLPDTTREIVMTQSPPTLSLSPGERVTLSCRASQSVSSSYLTWYQQKPGQAPRLLIYGASTRATSIPARFSGSGSGTDFTLTISSLQPEDFAVYYCQQDYNLP"
+
+
+@pytest.fixture
+def mgf_db_search(tmp_path):
+    """An MGF file with 7 spectra and scan numbers, C+57.021 mass modification considered"""
+    peptides = [
+        "ATSIPAR",
+        "VTLSCR",
+        "LLIYGASTR",
+        "EIVMTQSPPTLSLSPGER",
+        "MEAPAQLLFLLLLWLPDTTR",
+        "ASQSVSSSYLTWYQQKPGQAPR",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYCQQDYNLP",
+    ]
+    mgf_file = tmp_path / "db_search.mgf"
+    return _create_mgf(peptides, mgf_file, c_mod=True)
+
+
+def _create_mgf(peptides, mgf_file, random_state=42, c_mod=False):
     """
     Create a fake MGF file from one or more peptides.
 
@@ -28,20 +58,25 @@ def _create_mgf(peptides, mgf_file, random_state=42):
         The MGF file to create.
     random_state : int or numpy.random.Generator, optional
         The random seed. The charge states are chosen to be 2 or 3 randomly.
+    c_mod : bool, optional
+        Whether to use the constant carbamidomethylation
+        of C in mass calculations.
 
     Returns
     -------
     mgf_file : Path
     """
     rng = np.random.default_rng(random_state)
-    entries = [_create_mgf_entry(p, rng.choice([2, 3])) for p in peptides]
+    entries = [
+        _create_mgf_entry(p, rng.choice([2, 3]), c_mod) for p in peptides
+    ]
     with mgf_file.open("w+") as mgf_ref:
         mgf_ref.write("\n".join(entries))
 
     return mgf_file
 
 
-def _create_mgf_entry(peptide, charge=2):
+def _create_mgf_entry(peptide, charge=2, c_mod=False):
     """
     Create a MassIVE-KB style MGF entry for a single PSM.
 
@@ -51,13 +86,21 @@ def _create_mgf_entry(peptide, charge=2):
         A peptide sequence.
     charge : int, optional
         The peptide charge state.
+    c_mod : bool, optional
+        Whether to use the constant carbamidomethylation
+        of C in mass calculations.
 
     Returns
     -------
     str
         The PSM entry in an MGF file format.
     """
-    precursor_mz = calculate_mass(peptide, charge=int(charge))
+    if not c_mod:
+        precursor_mz = calculate_mass(peptide, charge=int(charge))
+    else:
+        aa_mass = std_aa_mass
+        aa_mass.update({"C": 160.030649})  # Carbamidomethylated C mass
+        precursor_mz = fast_mass(peptide, charge=int(charge), aa_mass=aa_mass)
     mzs, intensities = _peptide_to_peaks(peptide, charge)
     frags = "\n".join([f"{m} {i}" for m, i in zip(mzs, intensities)])
 
@@ -263,119 +306,6 @@ def tiny_config(tmp_path):
     return cfg_file
 
 
-@pytest.fixture
-def tiny_fasta_file(tmp_path, fasta_raw_data):
-    fasta_file = tmp_path / "tiny_fasta.fasta"
-    with fasta_file.open("w+") as fasta_ref:
-        fasta_ref.write(fasta_raw_data)
-
-    return fasta_file
-
-
-@pytest.fixture
-def fasta_raw_data():
-    return ">foo\nMEAPAQLLFLLLLWLPDTTREIVMTQSPPTLSLSPGERVTLSCRASQSVSSSYLTWYQQKPGQAPRLLIYGASTRATSIPARFSGSGSGTDFTLTISSLQPEDFAVYYCQQDYNLP"
-
-
-@pytest.fixture
-def mgf_db_search(tmp_path):
-    """An MGF file with 2 unannotated spectra and scan numbers."""
-    peptides = [
-        "ATSIPAR",
-        "VTLSCR",
-        "LLIYGASTR",
-        "EIVMTQSPPTLSLSPGER",
-        "MEAPAQLLFLLLLWLPDTTR",
-        "ASQSVSSSYLTWYQQKPGQAPR",
-        "FSGSGSGTDFTLTISSLQPEDFAVYYCQQDYNLP",
-    ]
-    mgf_file = tmp_path / "db_search.mgf"
-    return _create_unannotated_mgf(peptides, mgf_file, c_mod=True)
-
-
-@pytest.fixture
-def mgf_small_unannotated(tmp_path):
-    """An MGF file with 2 unannotated spectra and scan numbers."""
-    peptides = ["LESLIEK", "PEPTIDEK", "LESTIEK"]
-    mgf_file = tmp_path / "small_unannotated.mgf"
-    return _create_unannotated_mgf(peptides, mgf_file)
-
-
-def _create_unannotated_mgf(peptides, mgf_file, random_state=999, c_mod=False):
-    """
-    Create a fake MGF file from one or more peptides.
-    This file will have no SEQ= parameter, but will have a SCANS= parameter.
-
-    Parameters
-    ----------
-    peptides : str or list of str
-        The peptides for which to create spectra.
-    mgf_file : Path
-        The MGF file to create.
-    random_state : int or numpy.random.Generator, optional
-        The random seed. The charge states are chosen to be 2 or 3 randomly.
-    c_mod : bool, optional
-        Whether to use the constant carbamidomethylation
-        of C in mass calculations.
-
-    Returns
-    -------
-    mgf_file : Path
-    """
-    rng = np.random.default_rng(random_state)
-    entries = [
-        _create_unannotated_mgf_entry(p, idx, rng.choice([2, 3]), c_mod=c_mod)
-        for idx, p in enumerate(peptides)
-    ]
-    with mgf_file.open("w+") as mgf_ref:
-        mgf_ref.write("\n".join(entries))
-
-    return mgf_file
-
-
-def _create_unannotated_mgf_entry(peptide, scan_num, charge, c_mod=False):
-    """
-    Create a MassIVE-KB style MGF entry for a single PSM.
-    Each entry will have no SEQ= parameter, but will have a SCANS= parameter.
-
-    Parameters
-    ----------
-    peptide : str
-        A peptide sequence.
-    scan_num : int
-        The scan number.
-    charge : int, optional
-        The peptide charge state.
-    c_mod : bool, optional
-        Whether to use the constant carbamidomethylation
-        of C in mass calculations.
-
-    Returns
-    -------
-    str
-        The PSM entry in an MGF file format.
-    """
-    if not c_mod:
-        precursor_mz = calculate_mass(peptide, charge=int(charge))
-    else:
-        aa_mass = std_aa_mass
-        aa_mass.update({"C": 160.030649})  # Carbamidomethylated C mass
-        precursor_mz = fast_mass(peptide, charge=int(charge), aa_mass=aa_mass)
-    mzs, intensities = _peptide_to_peaks(peptide, charge)
-    frags = "\n".join([f"{m} {i}" for m, i in zip(mzs, intensities)])
-
-    mgf = [
-        "BEGIN IONS",
-        f"TITLE=title::{scan_num}",
-        f"PEPMASS={precursor_mz}",
-        f"CHARGE={charge}+",
-        f"SCANS={scan_num}",
-        f"{frags}",
-        "END IONS",
-    ]
-    return "\n".join(mgf)
-
-
 @pytest.fixture
 def tide_dir_small(tmp_path):
     """A directory with a very small TIDE search result."""

From 5f0675f032579e2976718c619969bdfd47cc68c5 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Wed, 3 Jul 2024 14:20:56 -0700
Subject: [PATCH 24/84] reordered and renamed variables for consistency

---
 casanovo/denovo/model.py        | 45 ++++++++++++++++++---------------
 casanovo/denovo/model_runner.py | 10 ++++----
 2 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 312e7f92..8bb0dbee 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1021,30 +1021,34 @@ def predict_step(self, batch, *args):
 
         Returns
         -------
-        predictions: List[Tuple[int, str, float, np.ndarray, np.ndarray]]
+        predictions: List[Tuple[int, int, float, str, np.ndarray, np.ndarray]]
             Model predictions for the given batch of spectra containing spectrum
-            scan number, peptide sequence, Casanovo-DB score,
-            amino acid-level confidence scores, and precursor information.
+            ids, precursor charge and m/z, candidate peptide sequences, peptide
+            scores, and amino acid-level scores.
         """
         batch_res = []
         for (
-            indexes,
+            spectrum_i,
             peptides,
             precursors,
             encoded_ms,
         ) in self.smart_batch_gen(batch):
             pred, truth = self.decoder(peptides, precursors, *encoded_ms)
             pred = self.softmax(pred)
-            score_result, per_aa_score = _calc_match_score(
+            peptide_scores, aa_scores = _calc_match_score(
                 pred, truth, self.decoder.reverse
             )
+            precursor_info = precursors.cpu().detach().numpy()
+            precursor_charge = precursor_info[:, 1]
+            precursor_mz = precursor_info[:, 2]
             batch_res.append(
                 (
-                    indexes,
+                    spectrum_i,
+                    precursor_charge,
+                    precursor_mz,
                     peptides,
-                    score_result.cpu().detach().numpy(),
-                    per_aa_score.cpu().detach().numpy(),
-                    precursors.cpu().detach().numpy(),
+                    peptide_scores.cpu().detach().numpy(),
+                    aa_scores.cpu().detach().numpy(),
                 )
             )
         return batch_res
@@ -1121,26 +1125,25 @@ def on_predict_batch_end(
         if self.out_writer is None:
             return
         for (
-            indexes,
+            spectrum_i,
+            precursor_charge,
+            precursor_mz,
             peptides,
-            score_result,
-            per_aa_score,
-            precursors,
+            peptide_scores,
+            aa_scores,
         ) in outputs:
-            prec_charge = precursors[:, 1]
-            prec_mz = precursors[:, 2]
             calc_mz = [
                 self.peptide_mass_calculator.mass(peptide, charge)
-                for peptide, charge in zip(peptides, prec_charge)
+                for peptide, charge in zip(peptides, precursor_charge)
             ]
             for row in zip(
                 peptides,
-                score_result,
-                prec_charge,
-                prec_mz,
+                peptide_scores,
+                precursor_charge,
+                precursor_mz,
                 calc_mz,
-                indexes,
-                per_aa_score,
+                spectrum_i,
+                aa_scores,
             ):
                 self.out_writer.psms.append(row)
 
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 865df71b..1457df38 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -333,12 +333,12 @@ def initialize_model(
 
         if self.model_filename is None:
             # Train a model from scratch if no model file is provided.
+            if db_search:
+                logger.error("DB search mode requires a model file")
+                raise ValueError(
+                    "A model file must be provided for DB search mode"
+                )
             if train:
-                if db_search:
-                    logger.error("Db search mode requires a model file.")
-                    raise ValueError(
-                        "A model file must be provided for DB search mode"
-                    )
                 self.model = Spec2Pep(**model_params)
                 return
             # Else we're not training, so a model file must be provided.

From b4fd8ff05eaebcf62351627c8ceee2fee3bc23a1 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Thu, 4 Jul 2024 14:39:36 -0700
Subject: [PATCH 25/84] casanovo-db full working version with code
 simplification

---
 casanovo/data/db_utils.py       |  16 +++-
 casanovo/data/ms_io.py          |  80 +---------------
 casanovo/denovo/dataloaders.py  | 111 ++++++++++++++++++++++
 casanovo/denovo/model.py        | 159 +++++++++++---------------------
 casanovo/denovo/model_runner.py |  32 ++++---
 5 files changed, 198 insertions(+), 200 deletions(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 341a6162..921c75bd 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -4,9 +4,12 @@
 import depthcharge.masses
 from pyteomics import fasta, parser
 import bisect
+import logging
 
 from typing import List, Tuple
 
+logger = logging.getLogger("casanovo")
+
 # CONSTANTS
 HYDROGEN = 1.007825035
 OXYGEN = 15.99491463
@@ -96,17 +99,22 @@ def digest_fasta(
                 semi=semi,
             )
             protein = header.split()[0]
-            peptide_list.extend([(pep, protein) for pep in pep_set])
+            for pep in pep_set:
+                if len(pep) < min_length or len(pep) > max_length:
+                    continue
+                if "X" in pep or "U" in pep:
+                    logger.warn(
+                        "Skipping peptide with ambiguous amino acids: %s", pep
+                    )
+                    continue
+                peptide_list.append((pep, protein))
     else:
         raise ValueError(f"Digestion type {digestion} not recognized.")
 
     # Generate modified peptides
     mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
-    mass_calculator.masses.update({"X": 0.0})  # TODO: REMOVE?
     mod_peptide_list = []
     for pep, prot in peptide_list:
-        if len(pep) < min_length or len(pep) > max_length:
-            continue
         peptide_isoforms = parser.isoforms(
             pep,
             variable_mods=var_mods,
diff --git a/casanovo/data/ms_io.py b/casanovo/data/ms_io.py
index a701b627..b27f083b 100644
--- a/casanovo/data/ms_io.py
+++ b/casanovo/data/ms_io.py
@@ -22,13 +22,10 @@ class MztabWriter:
     ----------
     filename : str
         The name of the mzTab file.
-    is_db_variant : bool
-        Whether the mzTab file is for a Casanovo-DB search.
     """
 
-    def __init__(self, filename: str, is_db_variant: bool = False):
+    def __init__(self, filename: str):
         self.filename = filename
-        self.is_db_variant = is_db_variant
         self.metadata = [
             ("mzTab-version", "1.0.0"),
             ("mzTab-mode", "Summary"),
@@ -150,9 +147,6 @@ def save(self) -> None:
         """
         Export the spectrum identifications to the mzTab file.
         """
-        if self.is_db_variant:
-            self.save_db_variant()
-            return
         with open(self.filename, "w", newline="") as f:
             writer = csv.writer(f, delimiter="\t", lineterminator=os.linesep)
             # Write metadata.
@@ -192,7 +186,7 @@ def save(self) -> None:
                         "PSM",
                         psm[0],  # sequence
                         i,  # PSM_ID
-                        "null",  # accession
+                        "null" if len(psm) < 8 else psm[7],  # accession
                         "null",  # unique
                         "null",  # database
                         "null",  # database_version
@@ -215,73 +209,3 @@ def save(self) -> None:
                         psm[6],  # opt_ms_run[1]_aa_scores
                     ]
                 )
-
-    def save_db_variant(self) -> None:
-        """
-        Export the Casanovo-DB search results to the mzTab file.
-
-        Outputs PSMs in the order they were scored
-        (i.e. the order in the .mgf file).
-        """
-        with open(self.filename, "w", newline="") as f:
-            writer = csv.writer(f, delimiter="\t", lineterminator=os.linesep)
-            # Write metadata.
-            for row in self.metadata:
-                writer.writerow(["MTD", *row])
-            # Write PSMs.
-            writer.writerow(
-                [
-                    "PSH",
-                    "sequence",
-                    "PSM_ID",
-                    "accession",
-                    "unique",
-                    "database",
-                    "database_version",
-                    "search_engine",
-                    "search_engine_score[1]",
-                    "modifications",
-                    "retention_time",
-                    "charge",
-                    "exp_mass_to_charge",
-                    "calc_mass_to_charge",
-                    "spectra_ref",
-                    "pre",
-                    "post",
-                    "start",
-                    "end",
-                    "opt_ms_run[1]_aa_scores",
-                ]
-            )
-            for i, psm in enumerate(self.psms):
-                writer.writerow(
-                    [
-                        "PSM",
-                        psm[0],  # sequence
-                        f"{psm[5]}:{i}",  # PSM_ID (spectrum # :candidate #)
-                        "null",  # accession
-                        "null",  # unique
-                        "null",  # database
-                        "null",  # database_version
-                        "null",  # search_engine
-                        psm[1],  # search_engine_score[1]
-                        "null",  # modifications
-                        "null",  # retention_time
-                        int(psm[2]),  # charge
-                        psm[3],  # exp_mass_to_charge
-                        psm[4],  # calc_mass_to_charge
-                        psm[5],  # spectra_ref
-                        "null",  # pre
-                        "null",  # post
-                        "null",  # start
-                        "null",  # end
-                        ",".join(
-                            list(
-                                map(
-                                    "{:.5f}".format,
-                                    psm[6][psm[6] != 0],
-                                )
-                            )
-                        ),  # opt_ms_run[1]_aa_scores
-                    ]
-                )
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index 97bfb2fc..80a4f7dc 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -3,6 +3,8 @@
 import functools
 import os
 from typing import List, Optional, Tuple
+from functools import partial
+import logging
 
 import lightning.pytorch as pl
 import numpy as np
@@ -13,6 +15,9 @@
     AnnotatedSpectrumDataset,
     SpectrumDataset,
 )
+from ..data import db_utils
+
+logger = logging.getLogger("casanovo")
 
 
 class DeNovoDataModule(pl.LightningDataModule):
@@ -176,6 +181,22 @@ def predict_dataloader(self) -> torch.utils.data.DataLoader:
         """Get the predict DataLoader."""
         return self._make_loader(self.test_dataset, self.eval_batch_size)
 
+    def db_dataloader(self) -> torch.utils.data.DataLoader:
+        """Get a special dataloader for DB search"""
+        return torch.utils.data.DataLoader(
+            self.test_dataset,
+            batch_size=self.eval_batch_size,
+            collate_fn=partial(
+                prepare_psm_batch,
+                digest=self.digest,
+                precursor_tolerance=self.precursor_tolerance,
+                isotope_error=self.isotope_error,
+            ),
+            pin_memory=True,
+            num_workers=self.n_workers,
+            shuffle=False,
+        )
+
 
 def prepare_batch(
     batch: List[Tuple[torch.Tensor, float, int, str]]
@@ -214,3 +235,93 @@ def prepare_batch(
         [precursor_masses, precursor_charges, precursor_mzs]
     ).T.float()
     return spectra, precursors, np.asarray(spectrum_ids)
+
+
+def prepare_psm_batch(
+    batch: List[Tuple[torch.Tensor, float, int, str]],
+    digest: List[Tuple[str, float, str]],
+    precursor_tolerance: float,
+    isotope_error: str,
+):
+    """
+    Collate MS/MS spectra into a batch for DB search.
+
+    The MS/MS spectra will be padded so that they fit nicely as a tensor.
+    However, the padded elements are ignored during the subsequent steps.
+
+    Parameters
+    ----------
+    batch : List[Tuple[torch.Tensor, float, int, str]]
+        A batch of data from an AnnotatedSpectrumDataset, consisting of for each
+        spectrum (i) a tensor with the m/z and intensity peak values, (ii), the
+        precursor m/z, (iii) the precursor charge, (iv) the spectrum identifier.
+    digest : List[Tuple[str, float, str]]
+        A list of tuples containing the peptide sequence, mass, and associated protein
+        from digesting a .fasta file. Sorted by mass in ascending order. Uses neutral masses.
+    precursor_tolerance : float
+        The precursor mass tolerance in parts-per-million.
+    isotope_error : str
+        The isotope error levels to consider.
+
+    Returns
+    -------
+    all_spectra : torch.Tensor of shape (batch_size, n_peaks, 2)
+        The padded mass spectra tensor with the m/z and intensity peak values
+        for each spectrum.
+    all_precursors : torch.Tensor of shape (batch_size, 3)
+        A tensor with the precursor neutral mass, precursor charge, and
+        precursor m/z.
+    all_spectrum_ids : np.ndarray
+        The spectrum identifiers.
+    all_peptides : List[str]
+        The candidate peptides for each spectrum.
+    all_proteins : List[str]
+        The associated proteins for each candidate peptide.
+    """
+    spectra, precursor_mzs, precursor_charges, spectrum_ids = list(zip(*batch))
+    spectra = torch.nn.utils.rnn.pad_sequence(spectra, batch_first=True)
+
+    precursor_mzs = torch.tensor(precursor_mzs)
+    precursor_charges = torch.tensor(precursor_charges)
+    precursor_masses = (precursor_mzs - 1.007276) * precursor_charges
+    precursors = torch.vstack(
+        [precursor_masses, precursor_charges, precursor_mzs]
+    ).T.float()
+
+    all_spectra = []
+    all_precursors = []
+    all_spectrum_ids = []
+    all_peptides = []
+    all_proteins = []
+    for idx in range(len(batch)):
+        digest_data = db_utils.get_candidates(
+            precursor_mzs[idx],
+            precursor_charges[idx],
+            digest,
+            precursor_tolerance,
+            isotope_error,
+        )
+        try:
+            spec_peptides, _, pep_protein = list(zip(*digest_data))
+            all_spectra.append(
+                spectra[idx].unsqueeze(0).repeat(len(spec_peptides), 1, 1)
+            )
+            all_precursors.append(
+                precursors[idx].unsqueeze(0).repeat(len(spec_peptides), 1)
+            )
+            all_spectrum_ids.extend([spectrum_ids[idx]] * len(spec_peptides))
+            all_peptides.extend(spec_peptides)
+            all_proteins.extend(pep_protein)
+        except ValueError:
+            logger.warning(
+                "No candidates found for spectrum %s", spectrum_ids[idx]
+            )
+            continue
+
+    return (
+        torch.cat(all_spectra, dim=0),
+        torch.cat(all_precursors, dim=0),
+        all_spectrum_ids,
+        all_peptides,
+        all_proteins,
+    )
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 8bb0dbee..2256946c 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1008,6 +1008,7 @@ class DbSpec2Pep(Spec2Pep):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
+        self.total_psms = 0
 
     def predict_step(self, batch, *args):
         """
@@ -1015,137 +1016,85 @@ def predict_step(self, batch, *args):
 
         Parameters
         ----------
-        batch : Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
+        batch : Tuple[torch.Tensor, torch.Tensor, np.array, List[str], List[str]]
             A batch of (i) MS/MS spectra, (ii) precursor information, (iii)
-            spectrum identifiers as torch Tensors.
+            spectrum identifiers, (iv) candidate peptides, (v) associated proteins.
 
         Returns
         -------
-        predictions: List[Tuple[int, int, float, str, np.ndarray, np.ndarray]]
+        predictions: List[Tuple[int, int, float, str, np.ndarray, np.ndarray, str]]
             Model predictions for the given batch of spectra containing spectrum
             ids, precursor charge and m/z, candidate peptide sequences, peptide
-            scores, and amino acid-level scores.
+            scores, amino acid-level scores, and associated proteins.
         """
-        batch_res = []
+        predictions = []
+        pred, truth = self.decoder(batch[3], batch[1], *self.encoder(batch[0]))
+        pred = self.softmax(pred)
+        all_scores, per_aa_scores = _calc_match_score(
+            pred, truth, self.decoder.reverse
+        )
         for (
+            precursor_charge,
+            precursor_mz,
             spectrum_i,
-            peptides,
-            precursors,
-            encoded_ms,
-        ) in self.smart_batch_gen(batch):
-            pred, truth = self.decoder(peptides, precursors, *encoded_ms)
-            pred = self.softmax(pred)
-            peptide_scores, aa_scores = _calc_match_score(
-                pred, truth, self.decoder.reverse
-            )
-            precursor_info = precursors.cpu().detach().numpy()
-            precursor_charge = precursor_info[:, 1]
-            precursor_mz = precursor_info[:, 2]
-            batch_res.append(
+            peptide_score,
+            aa_scores,
+            peptide,
+            protein,
+        ) in zip(
+            batch[1][:, 1].cpu().detach().numpy(),
+            batch[1][:, 2].cpu().detach().numpy(),
+            batch[2],
+            all_scores.cpu().detach().numpy(),
+            per_aa_scores.cpu().detach().numpy(),
+            batch[3],
+            batch[4],
+        ):
+            predictions.append(
                 (
                     spectrum_i,
                     precursor_charge,
                     precursor_mz,
-                    peptides,
-                    peptide_scores.cpu().detach().numpy(),
-                    aa_scores.cpu().detach().numpy(),
-                )
-            )
-        return batch_res
-
-    def smart_batch_gen(self, spectrum_batch):
-        """
-        Transforms a batch of spectra into multiple equally-sized batches of PSMs.
-
-        Parameters
-        ----------
-        spectrum batch : Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
-            A batch of (i) MS/MS spectra, (ii) precursor information, (iii)
-            spectrum identifiers as torch Tensors.
-
-        Yields
-        -------
-        psm_batch: Tuple[List[int], List[str], torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]
-            A batch of PSMs containing the spectrum index, peptide sequence,
-            precursor information, and encoded MS/MS spectra.
-        """
-        all_psm = []
-        batch_size = len(spectrum_batch[0])
-        enc = self.encoder(spectrum_batch[0])
-        enc = list(zip(*enc))
-        precursors = spectrum_batch[1]
-        indexes = spectrum_batch[2]
-        for idx in range(batch_size):
-            digest_data = db_utils.get_candidates(
-                precursors[idx][2],
-                precursors[idx][1],
-                self.digest,
-                self.precursor_tolerance,
-                self.isotope_error,
-            )
-            try:
-                spec_peptides, pep_masses, pep_protein = list(
-                    zip(*digest_data)
-                )
-            except ValueError:
-                logger.info("No peptides found for spectrum %s", indexes[idx])
-                continue
-            spec_precursors = [precursors[idx]] * len(spec_peptides)
-            spec_enc = [enc[idx]] * len(spec_peptides)
-            spec_idx = [indexes[idx]] * len(spec_peptides)
-            all_psm.extend(
-                list(
-                    zip(
-                        spec_enc,
-                        spec_precursors,
-                        spec_peptides,
-                        spec_idx,
-                    )
+                    peptide,
+                    peptide_score,
+                    aa_scores,
+                    protein,
                 )
             )
-        # Continually grab num_pairs items from all_psm until list is exhausted
-        while len(all_psm) > 0:
-            psm_batch = all_psm[:batch_size]
-            all_psm = all_psm[batch_size:]
-            psm_batch = list(zip(*psm_batch))
-            encoded_ms = (
-                torch.stack([a[0] for a in psm_batch[0]]),
-                torch.stack([a[1] for a in psm_batch[0]]),
-            )
-            prec_data = torch.stack(psm_batch[1])
-            pep_str = list(psm_batch[2])
-            indexes = [a[1] for a in psm_batch[3]]
-            yield (indexes, pep_str, prec_data, encoded_ms)
+        self.total_psms += len(predictions)
+        return predictions
 
     def on_predict_batch_end(
         self,
         outputs: List[Tuple[np.ndarray, List[str], torch.Tensor]],
         *args,
     ) -> None:
-        if self.out_writer is None:
-            return
+        """
+        Write the database search results to the output file.
+        """
         for (
             spectrum_i,
-            precursor_charge,
+            charge,
             precursor_mz,
-            peptides,
-            peptide_scores,
+            peptide,
+            peptide_score,
             aa_scores,
+            protein,
         ) in outputs:
-            calc_mz = [
-                self.peptide_mass_calculator.mass(peptide, charge)
-                for peptide, charge in zip(peptides, precursor_charge)
-            ]
-            for row in zip(
-                peptides,
-                peptide_scores,
-                precursor_charge,
-                precursor_mz,
-                calc_mz,
-                spectrum_i,
-                aa_scores,
-            ):
-                self.out_writer.psms.append(row)
+            if len(peptide) == 0:
+                continue
+            self.out_writer.psms.append(
+                (
+                    peptide,
+                    tuple(spectrum_i),
+                    peptide_score,
+                    charge,
+                    precursor_mz,
+                    self.peptide_mass_calculator.mass(peptide, charge),
+                    ",".join(list(map("{:.5f}".format, aa_scores))),
+                    protein,
+                ),
+            )
 
 
 def _calc_match_score(
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 1457df38..3286f4b8 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -10,6 +10,8 @@
 from pathlib import Path
 from typing import Iterable, List, Optional, Union
 
+import time
+
 import lightning.pytorch as pl
 import numpy as np
 import torch
@@ -124,19 +126,21 @@ def db_search(
         -------
         self
         """
-        self.writer = ms_io.MztabWriter(
-            Path(output).with_suffix(".mztab"), is_db_variant=True
-        )
+        self.writer = ms_io.MztabWriter(Path(output).with_suffix(".mztab"))
         self.writer.set_metadata(
             self.config,
             model=str(self.model_filename),
             config_filename=self.config.file,
         )
-
         self.initialize_trainer(train=True)
         self.initialize_model(train=False, db_search=True)
         self.model.out_writer = self.writer
-        self.model.digest = db_utils.digest_fasta(
+        test_index = self._get_index(peak_path, False, "db search")
+        self.writer.set_ms_run(test_index.ms_files)
+
+        self.initialize_data_module(test_index=test_index)
+        self.loaders.setup(stage="test", annotated=False)
+        self.loaders.digest = db_utils.digest_fasta(
             fasta_path,
             enzyme,
             digestion,
@@ -145,14 +149,16 @@ def db_search(
             min_length,
             max_length,
         )
-        self.model.precursor_tolerance = precursor_tolerance
-        self.model.isotope_error = isotope_error
-
-        test_index = self._get_index(peak_path, False, "db search")
-        self.writer.set_ms_run(test_index.ms_files)
-        self.initialize_data_module(test_index=test_index)
-        self.loaders.setup(stage="test", annotated=False)
-        self.trainer.predict(self.model, self.loaders.predict_dataloader())
+        self.loaders.precursor_tolerance = precursor_tolerance
+        self.loaders.isotope_error = isotope_error
+
+        t1 = time.time()
+        self.trainer.predict(self.model, self.loaders.db_dataloader())
+        t2 = time.time()
+        logger.info("Database search took %.3f seconds", t2 - t1)
+        logger.info("Scored %s PSMs", self.model.total_psms)
+        logger.info("%.3f PSMs per second", self.model.total_psms / (t2 - t1))
+        logger.info("%s seconds per PSM", (t2 - t1) / self.model.total_psms)
 
     def train(
         self,

From 35ba7d497cbc0c044ca5e13fd8e6e09162f77590 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Thu, 4 Jul 2024 21:50:44 +0000
Subject: [PATCH 26/84] Generate new screengrabs with rich-codex

---
 docs/images/configure-help.svg | 154 +++++++++++++++-------
 docs/images/evaluate-help.svg  | 182 ++++++++++++++------------
 docs/images/help.svg           | 224 ++++++++++++++------------------
 docs/images/sequence-help.svg  | 182 ++++++++++++++------------
 docs/images/train-help.svg     | 228 ++++++++++++++-------------------
 5 files changed, 493 insertions(+), 477 deletions(-)

diff --git a/docs/images/configure-help.svg b/docs/images/configure-help.svg
index 0822927a..4092bce3 100644
--- a/docs/images/configure-help.svg
+++ b/docs/images/configure-help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 994 342.79999999999995" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 1934 757.5999999999999" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,83 +19,151 @@
         font-weight: 700;
     }
 
-    .terminal-3936755216-matrix {
+    .terminal-2941406062-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-3936755216-title {
+    .terminal-2941406062-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-3936755216-r1 { fill: #c5c8c6 }
+    .terminal-2941406062-r1 { fill: #c5c8c6 }
     </style>
 
     <defs>
-    <clipPath id="terminal-3936755216-clip-terminal">
-      <rect x="0" y="0" width="975.0" height="291.79999999999995" />
+    <clipPath id="terminal-2941406062-clip-terminal">
+      <rect x="0" y="0" width="1914.3999999999999" height="706.5999999999999" />
     </clipPath>
-    <clipPath id="terminal-3936755216-line-0">
-    <rect x="0" y="1.5" width="976" height="24.65"/>
+    <clipPath id="terminal-2941406062-line-0">
+    <rect x="0" y="1.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3936755216-line-1">
-    <rect x="0" y="25.9" width="976" height="24.65"/>
+<clipPath id="terminal-2941406062-line-1">
+    <rect x="0" y="25.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3936755216-line-2">
-    <rect x="0" y="50.3" width="976" height="24.65"/>
+<clipPath id="terminal-2941406062-line-2">
+    <rect x="0" y="50.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3936755216-line-3">
-    <rect x="0" y="74.7" width="976" height="24.65"/>
+<clipPath id="terminal-2941406062-line-3">
+    <rect x="0" y="74.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3936755216-line-4">
-    <rect x="0" y="99.1" width="976" height="24.65"/>
+<clipPath id="terminal-2941406062-line-4">
+    <rect x="0" y="99.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3936755216-line-5">
-    <rect x="0" y="123.5" width="976" height="24.65"/>
+<clipPath id="terminal-2941406062-line-5">
+    <rect x="0" y="123.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3936755216-line-6">
-    <rect x="0" y="147.9" width="976" height="24.65"/>
+<clipPath id="terminal-2941406062-line-6">
+    <rect x="0" y="147.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3936755216-line-7">
-    <rect x="0" y="172.3" width="976" height="24.65"/>
+<clipPath id="terminal-2941406062-line-7">
+    <rect x="0" y="172.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3936755216-line-8">
-    <rect x="0" y="196.7" width="976" height="24.65"/>
+<clipPath id="terminal-2941406062-line-8">
+    <rect x="0" y="196.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3936755216-line-9">
-    <rect x="0" y="221.1" width="976" height="24.65"/>
+<clipPath id="terminal-2941406062-line-9">
+    <rect x="0" y="221.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3936755216-line-10">
-    <rect x="0" y="245.5" width="976" height="24.65"/>
+<clipPath id="terminal-2941406062-line-10">
+    <rect x="0" y="245.5" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-11">
+    <rect x="0" y="269.9" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-12">
+    <rect x="0" y="294.3" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-13">
+    <rect x="0" y="318.7" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-14">
+    <rect x="0" y="343.1" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-15">
+    <rect x="0" y="367.5" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-16">
+    <rect x="0" y="391.9" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-17">
+    <rect x="0" y="416.3" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-18">
+    <rect x="0" y="440.7" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-19">
+    <rect x="0" y="465.1" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-20">
+    <rect x="0" y="489.5" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-21">
+    <rect x="0" y="513.9" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-22">
+    <rect x="0" y="538.3" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-23">
+    <rect x="0" y="562.7" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-24">
+    <rect x="0" y="587.1" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-25">
+    <rect x="0" y="611.5" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-26">
+    <rect x="0" y="635.9" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-27">
+    <rect x="0" y="660.3" width="1915.4" height="24.65"/>
             </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="340.8" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1932" height="755.6" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-3936755216-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-2941406062-clip-terminal)">
     
-    <g class="terminal-3936755216-matrix">
-    <text class="terminal-3936755216-r1" x="0" y="20" textLength="329.4" clip-path="url(#terminal-3936755216-line-0)">$&#160;casanovo&#160;configure&#160;--help</text><text class="terminal-3936755216-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-3936755216-line-0)">
-</text><text class="terminal-3936755216-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-3936755216-line-1)">
-</text><text class="terminal-3936755216-r1" x="0" y="68.8" textLength="976" clip-path="url(#terminal-3936755216-line-2)">&#160;Usage:&#160;casanovo&#160;configure&#160;[OPTIONS]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3936755216-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-3936755216-line-2)">
-</text><text class="terminal-3936755216-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-3936755216-line-3)">
-</text><text class="terminal-3936755216-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-3936755216-line-4)">&#160;Generate&#160;a&#160;Casanovo&#160;configuration&#160;file&#160;to&#160;customize.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3936755216-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-3936755216-line-4)">
-</text><text class="terminal-3936755216-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-3936755216-line-5)">&#160;The&#160;casanovo&#160;configuration&#160;file&#160;is&#160;in&#160;the&#160;YAML&#160;format.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3936755216-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-3936755216-line-5)">
-</text><text class="terminal-3936755216-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-3936755216-line-6)">
-</text><text class="terminal-3936755216-r1" x="0" y="190.8" textLength="976" clip-path="url(#terminal-3936755216-line-7)">╭─&#160;Options&#160;────────────────────────────────────────────────────────────────────╮</text><text class="terminal-3936755216-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-3936755216-line-7)">
-</text><text class="terminal-3936755216-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-3936755216-line-8)">│&#160;--output&#160;&#160;-o&#160;&#160;FILE&#160;&#160;The&#160;output&#160;configuration&#160;file.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-3936755216-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-3936755216-line-8)">
-</text><text class="terminal-3936755216-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-3936755216-line-9)">│&#160;--help&#160;&#160;&#160;&#160;-h&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-3936755216-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-3936755216-line-9)">
-</text><text class="terminal-3936755216-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-3936755216-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-3936755216-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-3936755216-line-10)">
-</text><text class="terminal-3936755216-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-3936755216-line-11)">
+    <g class="terminal-2941406062-matrix">
+    <text class="terminal-2941406062-r1" x="0" y="20" textLength="329.4" clip-path="url(#terminal-2941406062-line-0)">$&#160;casanovo&#160;configure&#160;--help</text><text class="terminal-2941406062-r1" x="1915.4" y="20" textLength="12.2" clip-path="url(#terminal-2941406062-line-0)">
+</text><text class="terminal-2941406062-r1" x="0" y="44.4" textLength="414.8" clip-path="url(#terminal-2941406062-line-1)">Traceback&#160;(most&#160;recent&#160;call&#160;last):</text><text class="terminal-2941406062-r1" x="1915.4" y="44.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-1)">
+</text><text class="terminal-2941406062-r1" x="0" y="68.8" textLength="1000.4" clip-path="url(#terminal-2941406062-line-2)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/bin/casanovo&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="68.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-2)">
+</text><text class="terminal-2941406062-r1" x="0" y="93.2" textLength="463.6" clip-path="url(#terminal-2941406062-line-3)">&#160;&#160;&#160;&#160;from&#160;casanovo.casanovo&#160;import&#160;main</text><text class="terminal-2941406062-r1" x="1915.4" y="93.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-3)">
+</text><text class="terminal-2941406062-r1" x="0" y="117.6" textLength="1464" clip-path="url(#terminal-2941406062-line-4)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/casanovo/casanovo.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="117.6" textLength="12.2" clip-path="url(#terminal-2941406062-line-4)">
+</text><text class="terminal-2941406062-r1" x="0" y="142" textLength="268.4" clip-path="url(#terminal-2941406062-line-5)">&#160;&#160;&#160;&#160;import&#160;depthcharge</text><text class="terminal-2941406062-r1" x="1915.4" y="142" textLength="12.2" clip-path="url(#terminal-2941406062-line-5)">
+</text><text class="terminal-2941406062-r1" x="0" y="166.4" textLength="1488.4" clip-path="url(#terminal-2941406062-line-6)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/__init__.py&quot;,&#160;line&#160;3,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="166.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-6)">
+</text><text class="terminal-2941406062-r1" x="0" y="190.8" textLength="341.6" clip-path="url(#terminal-2941406062-line-7)">&#160;&#160;&#160;&#160;from&#160;.&#160;import&#160;components</text><text class="terminal-2941406062-r1" x="1915.4" y="190.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-7)">
+</text><text class="terminal-2941406062-r1" x="0" y="215.2" textLength="1622.6" clip-path="url(#terminal-2941406062-line-8)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/__init__.py&quot;,&#160;line&#160;2,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="215.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-8)">
+</text><text class="terminal-2941406062-r1" x="0" y="239.6" textLength="744.2" clip-path="url(#terminal-2941406062-line-9)">&#160;&#160;&#160;&#160;from&#160;.transformers&#160;import&#160;SpectrumEncoder,&#160;PeptideDecoder</text><text class="terminal-2941406062-r1" x="1915.4" y="239.6" textLength="12.2" clip-path="url(#terminal-2941406062-line-9)">
+</text><text class="terminal-2941406062-r1" x="0" y="264" textLength="1671.4" clip-path="url(#terminal-2941406062-line-10)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/transformers.py&quot;,&#160;line&#160;8,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="264" textLength="12.2" clip-path="url(#terminal-2941406062-line-10)">
+</text><text class="terminal-2941406062-r1" x="0" y="288.4" textLength="292.8" clip-path="url(#terminal-2941406062-line-11)">&#160;&#160;&#160;&#160;from&#160;..&#160;import&#160;utils</text><text class="terminal-2941406062-r1" x="1915.4" y="288.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-11)">
+</text><text class="terminal-2941406062-r1" x="0" y="312.8" textLength="1451.8" clip-path="url(#terminal-2941406062-line-12)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/utils.py&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="312.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-12)">
+</text><text class="terminal-2941406062-r1" x="0" y="337.2" textLength="878.4" clip-path="url(#terminal-2941406062-line-13)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing.event_accumulator&#160;import&#160;(</text><text class="terminal-2941406062-r1" x="1915.4" y="337.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-13)">
+</text><text class="terminal-2941406062-r1" x="0" y="361.6" textLength="1915.4" clip-path="url(#terminal-2941406062-line-14)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_accumulator.py&quot;,&#160;line&#160;24,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="361.6" textLength="12.2" clip-path="url(#terminal-2941406062-line-14)">
+</text><text class="terminal-2941406062-r1" x="0" y="386" textLength="854" clip-path="url(#terminal-2941406062-line-15)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing&#160;import&#160;event_file_loader</text><text class="terminal-2941406062-r1" x="1915.4" y="386" textLength="12.2" clip-path="url(#terminal-2941406062-line-15)">
+</text><text class="terminal-2941406062-r1" x="0" y="410.4" textLength="1915.4" clip-path="url(#terminal-2941406062-line-16)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_file_loader.py&quot;,&#160;line&#160;21,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="410.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-16)">
+</text><text class="terminal-2941406062-r1" x="0" y="434.8" textLength="536.8" clip-path="url(#terminal-2941406062-line-17)">&#160;&#160;&#160;&#160;from&#160;tensorboard&#160;import&#160;dataclass_compat</text><text class="terminal-2941406062-r1" x="1915.4" y="434.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-17)">
+</text><text class="terminal-2941406062-r1" x="0" y="459.2" textLength="1598.2" clip-path="url(#terminal-2941406062-line-18)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/dataclass_compat.py&quot;,&#160;line&#160;33,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="459.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-18)">
+</text><text class="terminal-2941406062-r1" x="0" y="483.6" textLength="878.4" clip-path="url(#terminal-2941406062-line-19)">&#160;&#160;&#160;&#160;from&#160;tensorboard.plugins.hparams&#160;import&#160;metadata&#160;as&#160;hparams_metadata</text><text class="terminal-2941406062-r1" x="1915.4" y="483.6" textLength="12.2" clip-path="url(#terminal-2941406062-line-19)">
+</text><text class="terminal-2941406062-r1" x="0" y="508" textLength="1695.8" clip-path="url(#terminal-2941406062-line-20)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/plugins/hparams/metadata.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="508" textLength="12.2" clip-path="url(#terminal-2941406062-line-20)">
+</text><text class="terminal-2941406062-r1" x="0" y="532.4" textLength="585.6" clip-path="url(#terminal-2941406062-line-21)">&#160;&#160;&#160;&#160;NULL_TENSOR&#160;=&#160;tensor_util.make_tensor_proto(</text><text class="terminal-2941406062-r1" x="1915.4" y="532.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-21)">
+</text><text class="terminal-2941406062-r1" x="0" y="556.8" textLength="1720.2" clip-path="url(#terminal-2941406062-line-22)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/util/tensor_util.py&quot;,&#160;line&#160;405,&#160;in&#160;make_tensor_proto</text><text class="terminal-2941406062-r1" x="1915.4" y="556.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-22)">
+</text><text class="terminal-2941406062-r1" x="0" y="581.2" textLength="585.6" clip-path="url(#terminal-2941406062-line-23)">&#160;&#160;&#160;&#160;numpy_dtype&#160;=&#160;dtypes.as_dtype(nparray.dtype)</text><text class="terminal-2941406062-r1" x="1915.4" y="581.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-23)">
+</text><text class="terminal-2941406062-r1" x="0" y="605.6" textLength="1769" clip-path="url(#terminal-2941406062-line-24)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py&quot;,&#160;line&#160;677,&#160;in&#160;as_dtype</text><text class="terminal-2941406062-r1" x="1915.4" y="605.6" textLength="12.2" clip-path="url(#terminal-2941406062-line-24)">
+</text><text class="terminal-2941406062-r1" x="0" y="630" textLength="866.2" clip-path="url(#terminal-2941406062-line-25)">&#160;&#160;&#160;&#160;if&#160;type_value.type&#160;==&#160;np.string_&#160;or&#160;type_value.type&#160;==&#160;np.unicode_:</text><text class="terminal-2941406062-r1" x="1915.4" y="630" textLength="12.2" clip-path="url(#terminal-2941406062-line-25)">
+</text><text class="terminal-2941406062-r1" x="0" y="654.4" textLength="1476.2" clip-path="url(#terminal-2941406062-line-26)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/numpy/__init__.py&quot;,&#160;line&#160;397,&#160;in&#160;__getattr__</text><text class="terminal-2941406062-r1" x="1915.4" y="654.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-26)">
+</text><text class="terminal-2941406062-r1" x="0" y="678.8" textLength="305" clip-path="url(#terminal-2941406062-line-27)">&#160;&#160;&#160;&#160;raise&#160;AttributeError(</text><text class="terminal-2941406062-r1" x="1915.4" y="678.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-27)">
+</text><text class="terminal-2941406062-r1" x="0" y="703.2" textLength="1427.4" clip-path="url(#terminal-2941406062-line-28)">AttributeError:&#160;`np.string_`&#160;was&#160;removed&#160;in&#160;the&#160;NumPy&#160;2.0&#160;release.&#160;Use&#160;`np.bytes_`&#160;instead..&#160;Did&#160;you&#160;mean:&#160;&#x27;strings&#x27;?</text><text class="terminal-2941406062-r1" x="1915.4" y="703.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-28)">
 </text>
     </g>
     </g>
diff --git a/docs/images/evaluate-help.svg b/docs/images/evaluate-help.svg
index b16c4ffd..d86b2497 100644
--- a/docs/images/evaluate-help.svg
+++ b/docs/images/evaluate-help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 994 684.4" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 1934 757.5999999999999" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,139 +19,151 @@
         font-weight: 700;
     }
 
-    .terminal-1284026435-matrix {
+    .terminal-1991789315-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-1284026435-title {
+    .terminal-1991789315-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-1284026435-r1 { fill: #c5c8c6 }
+    .terminal-1991789315-r1 { fill: #c5c8c6 }
     </style>
 
     <defs>
-    <clipPath id="terminal-1284026435-clip-terminal">
-      <rect x="0" y="0" width="975.0" height="633.4" />
+    <clipPath id="terminal-1991789315-clip-terminal">
+      <rect x="0" y="0" width="1914.3999999999999" height="706.5999999999999" />
     </clipPath>
-    <clipPath id="terminal-1284026435-line-0">
-    <rect x="0" y="1.5" width="976" height="24.65"/>
+    <clipPath id="terminal-1991789315-line-0">
+    <rect x="0" y="1.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-1">
-    <rect x="0" y="25.9" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-1">
+    <rect x="0" y="25.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-2">
-    <rect x="0" y="50.3" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-2">
+    <rect x="0" y="50.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-3">
-    <rect x="0" y="74.7" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-3">
+    <rect x="0" y="74.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-4">
-    <rect x="0" y="99.1" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-4">
+    <rect x="0" y="99.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-5">
-    <rect x="0" y="123.5" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-5">
+    <rect x="0" y="123.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-6">
-    <rect x="0" y="147.9" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-6">
+    <rect x="0" y="147.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-7">
-    <rect x="0" y="172.3" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-7">
+    <rect x="0" y="172.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-8">
-    <rect x="0" y="196.7" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-8">
+    <rect x="0" y="196.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-9">
-    <rect x="0" y="221.1" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-9">
+    <rect x="0" y="221.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-10">
-    <rect x="0" y="245.5" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-10">
+    <rect x="0" y="245.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-11">
-    <rect x="0" y="269.9" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-11">
+    <rect x="0" y="269.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-12">
-    <rect x="0" y="294.3" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-12">
+    <rect x="0" y="294.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-13">
-    <rect x="0" y="318.7" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-13">
+    <rect x="0" y="318.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-14">
-    <rect x="0" y="343.1" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-14">
+    <rect x="0" y="343.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-15">
-    <rect x="0" y="367.5" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-15">
+    <rect x="0" y="367.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-16">
-    <rect x="0" y="391.9" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-16">
+    <rect x="0" y="391.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-17">
-    <rect x="0" y="416.3" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-17">
+    <rect x="0" y="416.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-18">
-    <rect x="0" y="440.7" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-18">
+    <rect x="0" y="440.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-19">
-    <rect x="0" y="465.1" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-19">
+    <rect x="0" y="465.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-20">
-    <rect x="0" y="489.5" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-20">
+    <rect x="0" y="489.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-21">
-    <rect x="0" y="513.9" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-21">
+    <rect x="0" y="513.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-22">
-    <rect x="0" y="538.3" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-22">
+    <rect x="0" y="538.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-23">
-    <rect x="0" y="562.7" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-23">
+    <rect x="0" y="562.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-24">
-    <rect x="0" y="587.1" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-24">
+    <rect x="0" y="587.1" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1991789315-line-25">
+    <rect x="0" y="611.5" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1991789315-line-26">
+    <rect x="0" y="635.9" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1991789315-line-27">
+    <rect x="0" y="660.3" width="1915.4" height="24.65"/>
             </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="682.4" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1932" height="755.6" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-1284026435-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-1991789315-clip-terminal)">
     
-    <g class="terminal-1284026435-matrix">
-    <text class="terminal-1284026435-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-1284026435-line-0)">$&#160;casanovo&#160;evaluate&#160;--help</text><text class="terminal-1284026435-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-1284026435-line-0)">
-</text><text class="terminal-1284026435-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-1284026435-line-1)">
-</text><text class="terminal-1284026435-r1" x="0" y="68.8" textLength="976" clip-path="url(#terminal-1284026435-line-2)">&#160;Usage:&#160;casanovo&#160;evaluate&#160;[OPTIONS]&#160;ANNOTATED_PEAK_PATH...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1284026435-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-1284026435-line-2)">
-</text><text class="terminal-1284026435-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-1284026435-line-3)">
-</text><text class="terminal-1284026435-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-1284026435-line-4)">&#160;Evaluate&#160;de&#160;novo&#160;peptide&#160;sequencing&#160;performance.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1284026435-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-1284026435-line-4)">
-</text><text class="terminal-1284026435-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-1284026435-line-5)">&#160;ANNOTATED_PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;annoated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1284026435-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-1284026435-line-5)">
-</text><text class="terminal-1284026435-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-1284026435-line-6)">&#160;provided&#160;by&#160;MassIVE-KB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1284026435-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-1284026435-line-6)">
-</text><text class="terminal-1284026435-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-1284026435-line-7)">
-</text><text class="terminal-1284026435-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-1284026435-line-8)">╭─&#160;Arguments&#160;──────────────────────────────────────────────────────────────────╮</text><text class="terminal-1284026435-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-1284026435-line-8)">
-</text><text class="terminal-1284026435-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-1284026435-line-9)">│&#160;*&#160;&#160;ANNOTATED_PEAK_PATH&#160;&#160;&#160;&#160;FILE&#160;&#160;[required]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-1284026435-line-9)">
-</text><text class="terminal-1284026435-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-1284026435-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-1284026435-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-1284026435-line-10)">
-</text><text class="terminal-1284026435-r1" x="0" y="288.4" textLength="976" clip-path="url(#terminal-1284026435-line-11)">╭─&#160;Options&#160;────────────────────────────────────────────────────────────────────╮</text><text class="terminal-1284026435-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-1284026435-line-11)">
-</text><text class="terminal-1284026435-r1" x="0" y="312.8" textLength="976" clip-path="url(#terminal-1284026435-line-12)">│&#160;--model&#160;&#160;&#160;&#160;&#160;&#160;-m&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;model&#160;weights&#160;(.ckpt&#160;file).&#160;│</text><text class="terminal-1284026435-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-1284026435-line-12)">
-</text><text class="terminal-1284026435-r1" x="0" y="337.2" textLength="976" clip-path="url(#terminal-1284026435-line-13)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;If&#160;not&#160;provided,&#160;Casanovo&#160;will&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-1284026435-line-13)">
-</text><text class="terminal-1284026435-r1" x="0" y="361.6" textLength="976" clip-path="url(#terminal-1284026435-line-14)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;try&#160;to&#160;download&#160;the&#160;latest&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-1284026435-line-14)">
-</text><text class="terminal-1284026435-r1" x="0" y="386" textLength="976" clip-path="url(#terminal-1284026435-line-15)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;release.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-1284026435-line-15)">
-</text><text class="terminal-1284026435-r1" x="0" y="410.4" textLength="976" clip-path="url(#terminal-1284026435-line-16)">│&#160;--output&#160;&#160;&#160;&#160;&#160;-o&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;mzTab&#160;file&#160;to&#160;which&#160;results&#160;│</text><text class="terminal-1284026435-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-1284026435-line-16)">
-</text><text class="terminal-1284026435-r1" x="0" y="434.8" textLength="976" clip-path="url(#terminal-1284026435-line-17)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;will&#160;be&#160;written.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-1284026435-line-17)">
-</text><text class="terminal-1284026435-r1" x="0" y="459.2" textLength="976" clip-path="url(#terminal-1284026435-line-18)">│&#160;--config&#160;&#160;&#160;&#160;&#160;-c&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;YAML&#160;configuration&#160;file&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-1284026435-line-18)">
-</text><text class="terminal-1284026435-r1" x="0" y="483.6" textLength="976" clip-path="url(#terminal-1284026435-line-19)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;overriding&#160;the&#160;default&#160;options.&#160;│</text><text class="terminal-1284026435-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-1284026435-line-19)">
-</text><text class="terminal-1284026435-r1" x="0" y="508" textLength="976" clip-path="url(#terminal-1284026435-line-20)">│&#160;--verbosity&#160;&#160;-v&#160;&#160;[debug|info|warning|error]&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;console&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-1284026435-line-20)">
-</text><text class="terminal-1284026435-r1" x="0" y="532.4" textLength="976" clip-path="url(#terminal-1284026435-line-21)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;logging&#160;messages.&#160;Log&#160;files&#160;are&#160;│</text><text class="terminal-1284026435-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-1284026435-line-21)">
-</text><text class="terminal-1284026435-r1" x="0" y="556.8" textLength="976" clip-path="url(#terminal-1284026435-line-22)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;always&#160;set&#160;to&#160;&#x27;debug&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-1284026435-line-22)">
-</text><text class="terminal-1284026435-r1" x="0" y="581.2" textLength="976" clip-path="url(#terminal-1284026435-line-23)">│&#160;--help&#160;&#160;&#160;&#160;&#160;&#160;&#160;-h&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-1284026435-line-23)">
-</text><text class="terminal-1284026435-r1" x="0" y="605.6" textLength="976" clip-path="url(#terminal-1284026435-line-24)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-1284026435-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-1284026435-line-24)">
-</text><text class="terminal-1284026435-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-1284026435-line-25)">
+    <g class="terminal-1991789315-matrix">
+    <text class="terminal-1991789315-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-1991789315-line-0)">$&#160;casanovo&#160;evaluate&#160;--help</text><text class="terminal-1991789315-r1" x="1915.4" y="20" textLength="12.2" clip-path="url(#terminal-1991789315-line-0)">
+</text><text class="terminal-1991789315-r1" x="0" y="44.4" textLength="414.8" clip-path="url(#terminal-1991789315-line-1)">Traceback&#160;(most&#160;recent&#160;call&#160;last):</text><text class="terminal-1991789315-r1" x="1915.4" y="44.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-1)">
+</text><text class="terminal-1991789315-r1" x="0" y="68.8" textLength="1000.4" clip-path="url(#terminal-1991789315-line-2)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/bin/casanovo&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="68.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-2)">
+</text><text class="terminal-1991789315-r1" x="0" y="93.2" textLength="463.6" clip-path="url(#terminal-1991789315-line-3)">&#160;&#160;&#160;&#160;from&#160;casanovo.casanovo&#160;import&#160;main</text><text class="terminal-1991789315-r1" x="1915.4" y="93.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-3)">
+</text><text class="terminal-1991789315-r1" x="0" y="117.6" textLength="1464" clip-path="url(#terminal-1991789315-line-4)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/casanovo/casanovo.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="117.6" textLength="12.2" clip-path="url(#terminal-1991789315-line-4)">
+</text><text class="terminal-1991789315-r1" x="0" y="142" textLength="268.4" clip-path="url(#terminal-1991789315-line-5)">&#160;&#160;&#160;&#160;import&#160;depthcharge</text><text class="terminal-1991789315-r1" x="1915.4" y="142" textLength="12.2" clip-path="url(#terminal-1991789315-line-5)">
+</text><text class="terminal-1991789315-r1" x="0" y="166.4" textLength="1488.4" clip-path="url(#terminal-1991789315-line-6)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/__init__.py&quot;,&#160;line&#160;3,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="166.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-6)">
+</text><text class="terminal-1991789315-r1" x="0" y="190.8" textLength="341.6" clip-path="url(#terminal-1991789315-line-7)">&#160;&#160;&#160;&#160;from&#160;.&#160;import&#160;components</text><text class="terminal-1991789315-r1" x="1915.4" y="190.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-7)">
+</text><text class="terminal-1991789315-r1" x="0" y="215.2" textLength="1622.6" clip-path="url(#terminal-1991789315-line-8)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/__init__.py&quot;,&#160;line&#160;2,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="215.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-8)">
+</text><text class="terminal-1991789315-r1" x="0" y="239.6" textLength="744.2" clip-path="url(#terminal-1991789315-line-9)">&#160;&#160;&#160;&#160;from&#160;.transformers&#160;import&#160;SpectrumEncoder,&#160;PeptideDecoder</text><text class="terminal-1991789315-r1" x="1915.4" y="239.6" textLength="12.2" clip-path="url(#terminal-1991789315-line-9)">
+</text><text class="terminal-1991789315-r1" x="0" y="264" textLength="1671.4" clip-path="url(#terminal-1991789315-line-10)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/transformers.py&quot;,&#160;line&#160;8,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="264" textLength="12.2" clip-path="url(#terminal-1991789315-line-10)">
+</text><text class="terminal-1991789315-r1" x="0" y="288.4" textLength="292.8" clip-path="url(#terminal-1991789315-line-11)">&#160;&#160;&#160;&#160;from&#160;..&#160;import&#160;utils</text><text class="terminal-1991789315-r1" x="1915.4" y="288.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-11)">
+</text><text class="terminal-1991789315-r1" x="0" y="312.8" textLength="1451.8" clip-path="url(#terminal-1991789315-line-12)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/utils.py&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="312.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-12)">
+</text><text class="terminal-1991789315-r1" x="0" y="337.2" textLength="878.4" clip-path="url(#terminal-1991789315-line-13)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing.event_accumulator&#160;import&#160;(</text><text class="terminal-1991789315-r1" x="1915.4" y="337.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-13)">
+</text><text class="terminal-1991789315-r1" x="0" y="361.6" textLength="1915.4" clip-path="url(#terminal-1991789315-line-14)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_accumulator.py&quot;,&#160;line&#160;24,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="361.6" textLength="12.2" clip-path="url(#terminal-1991789315-line-14)">
+</text><text class="terminal-1991789315-r1" x="0" y="386" textLength="854" clip-path="url(#terminal-1991789315-line-15)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing&#160;import&#160;event_file_loader</text><text class="terminal-1991789315-r1" x="1915.4" y="386" textLength="12.2" clip-path="url(#terminal-1991789315-line-15)">
+</text><text class="terminal-1991789315-r1" x="0" y="410.4" textLength="1915.4" clip-path="url(#terminal-1991789315-line-16)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_file_loader.py&quot;,&#160;line&#160;21,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="410.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-16)">
+</text><text class="terminal-1991789315-r1" x="0" y="434.8" textLength="536.8" clip-path="url(#terminal-1991789315-line-17)">&#160;&#160;&#160;&#160;from&#160;tensorboard&#160;import&#160;dataclass_compat</text><text class="terminal-1991789315-r1" x="1915.4" y="434.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-17)">
+</text><text class="terminal-1991789315-r1" x="0" y="459.2" textLength="1598.2" clip-path="url(#terminal-1991789315-line-18)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/dataclass_compat.py&quot;,&#160;line&#160;33,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="459.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-18)">
+</text><text class="terminal-1991789315-r1" x="0" y="483.6" textLength="878.4" clip-path="url(#terminal-1991789315-line-19)">&#160;&#160;&#160;&#160;from&#160;tensorboard.plugins.hparams&#160;import&#160;metadata&#160;as&#160;hparams_metadata</text><text class="terminal-1991789315-r1" x="1915.4" y="483.6" textLength="12.2" clip-path="url(#terminal-1991789315-line-19)">
+</text><text class="terminal-1991789315-r1" x="0" y="508" textLength="1695.8" clip-path="url(#terminal-1991789315-line-20)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/plugins/hparams/metadata.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="508" textLength="12.2" clip-path="url(#terminal-1991789315-line-20)">
+</text><text class="terminal-1991789315-r1" x="0" y="532.4" textLength="585.6" clip-path="url(#terminal-1991789315-line-21)">&#160;&#160;&#160;&#160;NULL_TENSOR&#160;=&#160;tensor_util.make_tensor_proto(</text><text class="terminal-1991789315-r1" x="1915.4" y="532.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-21)">
+</text><text class="terminal-1991789315-r1" x="0" y="556.8" textLength="1720.2" clip-path="url(#terminal-1991789315-line-22)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/util/tensor_util.py&quot;,&#160;line&#160;405,&#160;in&#160;make_tensor_proto</text><text class="terminal-1991789315-r1" x="1915.4" y="556.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-22)">
+</text><text class="terminal-1991789315-r1" x="0" y="581.2" textLength="585.6" clip-path="url(#terminal-1991789315-line-23)">&#160;&#160;&#160;&#160;numpy_dtype&#160;=&#160;dtypes.as_dtype(nparray.dtype)</text><text class="terminal-1991789315-r1" x="1915.4" y="581.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-23)">
+</text><text class="terminal-1991789315-r1" x="0" y="605.6" textLength="1769" clip-path="url(#terminal-1991789315-line-24)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py&quot;,&#160;line&#160;677,&#160;in&#160;as_dtype</text><text class="terminal-1991789315-r1" x="1915.4" y="605.6" textLength="12.2" clip-path="url(#terminal-1991789315-line-24)">
+</text><text class="terminal-1991789315-r1" x="0" y="630" textLength="866.2" clip-path="url(#terminal-1991789315-line-25)">&#160;&#160;&#160;&#160;if&#160;type_value.type&#160;==&#160;np.string_&#160;or&#160;type_value.type&#160;==&#160;np.unicode_:</text><text class="terminal-1991789315-r1" x="1915.4" y="630" textLength="12.2" clip-path="url(#terminal-1991789315-line-25)">
+</text><text class="terminal-1991789315-r1" x="0" y="654.4" textLength="1476.2" clip-path="url(#terminal-1991789315-line-26)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/numpy/__init__.py&quot;,&#160;line&#160;397,&#160;in&#160;__getattr__</text><text class="terminal-1991789315-r1" x="1915.4" y="654.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-26)">
+</text><text class="terminal-1991789315-r1" x="0" y="678.8" textLength="305" clip-path="url(#terminal-1991789315-line-27)">&#160;&#160;&#160;&#160;raise&#160;AttributeError(</text><text class="terminal-1991789315-r1" x="1915.4" y="678.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-27)">
+</text><text class="terminal-1991789315-r1" x="0" y="703.2" textLength="1427.4" clip-path="url(#terminal-1991789315-line-28)">AttributeError:&#160;`np.string_`&#160;was&#160;removed&#160;in&#160;the&#160;NumPy&#160;2.0&#160;release.&#160;Use&#160;`np.bytes_`&#160;instead..&#160;Did&#160;you&#160;mean:&#160;&#x27;strings&#x27;?</text><text class="terminal-1991789315-r1" x="1915.4" y="703.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-28)">
 </text>
     </g>
     </g>
diff --git a/docs/images/help.svg b/docs/images/help.svg
index 67dca83e..dfb1039c 100644
--- a/docs/images/help.svg
+++ b/docs/images/help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 994 977.1999999999999" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 1934 757.5999999999999" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,187 +19,151 @@
         font-weight: 700;
     }
 
-    .terminal-2658734560-matrix {
+    .terminal-952518540-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-2658734560-title {
+    .terminal-952518540-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-2658734560-r1 { fill: #c5c8c6 }
+    .terminal-952518540-r1 { fill: #c5c8c6 }
     </style>
 
     <defs>
-    <clipPath id="terminal-2658734560-clip-terminal">
-      <rect x="0" y="0" width="975.0" height="926.1999999999999" />
+    <clipPath id="terminal-952518540-clip-terminal">
+      <rect x="0" y="0" width="1914.3999999999999" height="706.5999999999999" />
     </clipPath>
-    <clipPath id="terminal-2658734560-line-0">
-    <rect x="0" y="1.5" width="976" height="24.65"/>
+    <clipPath id="terminal-952518540-line-0">
+    <rect x="0" y="1.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-1">
-    <rect x="0" y="25.9" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-1">
+    <rect x="0" y="25.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-2">
-    <rect x="0" y="50.3" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-2">
+    <rect x="0" y="50.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-3">
-    <rect x="0" y="74.7" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-3">
+    <rect x="0" y="74.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-4">
-    <rect x="0" y="99.1" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-4">
+    <rect x="0" y="99.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-5">
-    <rect x="0" y="123.5" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-5">
+    <rect x="0" y="123.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-6">
-    <rect x="0" y="147.9" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-6">
+    <rect x="0" y="147.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-7">
-    <rect x="0" y="172.3" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-7">
+    <rect x="0" y="172.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-8">
-    <rect x="0" y="196.7" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-8">
+    <rect x="0" y="196.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-9">
-    <rect x="0" y="221.1" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-9">
+    <rect x="0" y="221.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-10">
-    <rect x="0" y="245.5" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-10">
+    <rect x="0" y="245.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-11">
-    <rect x="0" y="269.9" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-11">
+    <rect x="0" y="269.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-12">
-    <rect x="0" y="294.3" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-12">
+    <rect x="0" y="294.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-13">
-    <rect x="0" y="318.7" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-13">
+    <rect x="0" y="318.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-14">
-    <rect x="0" y="343.1" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-14">
+    <rect x="0" y="343.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-15">
-    <rect x="0" y="367.5" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-15">
+    <rect x="0" y="367.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-16">
-    <rect x="0" y="391.9" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-16">
+    <rect x="0" y="391.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-17">
-    <rect x="0" y="416.3" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-17">
+    <rect x="0" y="416.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-18">
-    <rect x="0" y="440.7" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-18">
+    <rect x="0" y="440.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-19">
-    <rect x="0" y="465.1" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-19">
+    <rect x="0" y="465.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-20">
-    <rect x="0" y="489.5" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-20">
+    <rect x="0" y="489.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-21">
-    <rect x="0" y="513.9" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-21">
+    <rect x="0" y="513.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-22">
-    <rect x="0" y="538.3" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-22">
+    <rect x="0" y="538.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-23">
-    <rect x="0" y="562.7" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-23">
+    <rect x="0" y="562.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-24">
-    <rect x="0" y="587.1" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-24">
+    <rect x="0" y="587.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-25">
-    <rect x="0" y="611.5" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-25">
+    <rect x="0" y="611.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-26">
-    <rect x="0" y="635.9" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-26">
+    <rect x="0" y="635.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-27">
-    <rect x="0" y="660.3" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2658734560-line-28">
-    <rect x="0" y="684.7" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2658734560-line-29">
-    <rect x="0" y="709.1" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2658734560-line-30">
-    <rect x="0" y="733.5" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2658734560-line-31">
-    <rect x="0" y="757.9" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2658734560-line-32">
-    <rect x="0" y="782.3" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2658734560-line-33">
-    <rect x="0" y="806.7" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2658734560-line-34">
-    <rect x="0" y="831.1" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2658734560-line-35">
-    <rect x="0" y="855.5" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2658734560-line-36">
-    <rect x="0" y="879.9" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-27">
+    <rect x="0" y="660.3" width="1915.4" height="24.65"/>
             </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="975.2" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1932" height="755.6" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-2658734560-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-952518540-clip-terminal)">
     
-    <g class="terminal-2658734560-matrix">
-    <text class="terminal-2658734560-r1" x="0" y="20" textLength="207.4" clip-path="url(#terminal-2658734560-line-0)">$&#160;casanovo&#160;--help</text><text class="terminal-2658734560-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-2658734560-line-0)">
-</text><text class="terminal-2658734560-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-1)">
-</text><text class="terminal-2658734560-r1" x="0" y="68.8" textLength="976" clip-path="url(#terminal-2658734560-line-2)">&#160;Usage:&#160;casanovo&#160;[OPTIONS]&#160;COMMAND&#160;[ARGS]...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-2)">
-</text><text class="terminal-2658734560-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-3)">
-</text><text class="terminal-2658734560-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-2658734560-line-4)">&#160;┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓&#160;</text><text class="terminal-2658734560-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-4)">
-</text><text class="terminal-2658734560-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-2658734560-line-5)">&#160;┃&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;Casanovo&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;┃&#160;</text><text class="terminal-2658734560-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-2658734560-line-5)">
-</text><text class="terminal-2658734560-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-2658734560-line-6)">&#160;┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛&#160;</text><text class="terminal-2658734560-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-6)">
-</text><text class="terminal-2658734560-r1" x="0" y="190.8" textLength="976" clip-path="url(#terminal-2658734560-line-7)">&#160;Casanovo&#160;de&#160;novo&#160;sequences&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra&#160;using&#160;a&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-7)">
-</text><text class="terminal-2658734560-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-2658734560-line-8)">&#160;Transformer&#160;model.&#160;Casanovo&#160;currently&#160;supports&#160;mzML,&#160;mzXML,&#160;and&#160;MGF&#160;files&#160;for&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-8)">
-</text><text class="terminal-2658734560-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-2658734560-line-9)">&#160;de&#160;novo&#160;sequencing&#160;and&#160;annotated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;from&#160;MassIVE-KB,&#160;for&#160;</text><text class="terminal-2658734560-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-9)">
-</text><text class="terminal-2658734560-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-2658734560-line-10)">&#160;training&#160;new&#160;models.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-2658734560-line-10)">
-</text><text class="terminal-2658734560-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-11)">
-</text><text class="terminal-2658734560-r1" x="0" y="312.8" textLength="976" clip-path="url(#terminal-2658734560-line-12)">&#160;Links:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-12)">
-</text><text class="terminal-2658734560-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-13)">
-</text><text class="terminal-2658734560-r1" x="0" y="361.6" textLength="976" clip-path="url(#terminal-2658734560-line-14)">&#160;&#160;•&#160;Documentation:&#160;https://casanovo.readthedocs.io&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-14)">
-</text><text class="terminal-2658734560-r1" x="0" y="386" textLength="976" clip-path="url(#terminal-2658734560-line-15)">&#160;&#160;•&#160;Official&#160;code&#160;repository:&#160;https://github.com/Noble-Lab/casanovo&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-2658734560-line-15)">
-</text><text class="terminal-2658734560-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-16)">
-</text><text class="terminal-2658734560-r1" x="0" y="434.8" textLength="976" clip-path="url(#terminal-2658734560-line-17)">&#160;If&#160;you&#160;use&#160;Casanovo&#160;in&#160;your&#160;work,&#160;please&#160;cite:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-17)">
-</text><text class="terminal-2658734560-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-18)">
-</text><text class="terminal-2658734560-r1" x="0" y="483.6" textLength="976" clip-path="url(#terminal-2658734560-line-19)">&#160;&#160;•&#160;Yilmaz,&#160;M.,&#160;Fondrie,&#160;W.&#160;E.,&#160;Bittremieux,&#160;W.,&#160;Oh,&#160;S.&#160;&amp;&#160;Noble,&#160;W.&#160;S.&#160;De&#160;novo&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-19)">
-</text><text class="terminal-2658734560-r1" x="0" y="508" textLength="976" clip-path="url(#terminal-2658734560-line-20)">&#160;&#160;&#160;&#160;mass&#160;spectrometry&#160;peptide&#160;sequencing&#160;with&#160;a&#160;transformer&#160;model.&#160;Proceedings&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-2658734560-line-20)">
-</text><text class="terminal-2658734560-r1" x="0" y="532.4" textLength="976" clip-path="url(#terminal-2658734560-line-21)">&#160;&#160;&#160;&#160;of&#160;the&#160;39th&#160;International&#160;Conference&#160;on&#160;Machine&#160;Learning&#160;-&#160;ICML&#160;&#x27;22&#160;(2022)&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-21)">
-</text><text class="terminal-2658734560-r1" x="0" y="556.8" textLength="976" clip-path="url(#terminal-2658734560-line-22)">&#160;&#160;&#160;&#160;doi:10.1101/2022.02.07.479481.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-22)">
-</text><text class="terminal-2658734560-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-23)">
-</text><text class="terminal-2658734560-r1" x="0" y="605.6" textLength="976" clip-path="url(#terminal-2658734560-line-24)">╭─&#160;Options&#160;────────────────────────────────────────────────────────────────────╮</text><text class="terminal-2658734560-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-24)">
-</text><text class="terminal-2658734560-r1" x="0" y="630" textLength="976" clip-path="url(#terminal-2658734560-line-25)">│&#160;--help&#160;&#160;-h&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-2658734560-line-25)">
-</text><text class="terminal-2658734560-r1" x="0" y="654.4" textLength="976" clip-path="url(#terminal-2658734560-line-26)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2658734560-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-26)">
-</text><text class="terminal-2658734560-r1" x="0" y="678.8" textLength="976" clip-path="url(#terminal-2658734560-line-27)">╭─&#160;Commands&#160;───────────────────────────────────────────────────────────────────╮</text><text class="terminal-2658734560-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-27)">
-</text><text class="terminal-2658734560-r1" x="0" y="703.2" textLength="976" clip-path="url(#terminal-2658734560-line-28)">│&#160;annotate&#160;&#160;&#160;Annotate&#160;a&#160;given&#160;.mgf&#160;with&#160;candidates&#160;as&#160;selected&#160;by&#160;a&#160;Tide&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-28)">
-</text><text class="terminal-2658734560-r1" x="0" y="727.6" textLength="976" clip-path="url(#terminal-2658734560-line-29)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;search&#160;for&#160;Casanovo-DB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-29)">
-</text><text class="terminal-2658734560-r1" x="0" y="752" textLength="976" clip-path="url(#terminal-2658734560-line-30)">│&#160;configure&#160;&#160;Generate&#160;a&#160;Casanovo&#160;configuration&#160;file&#160;to&#160;customize.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-2658734560-line-30)">
-</text><text class="terminal-2658734560-r1" x="0" y="776.4" textLength="976" clip-path="url(#terminal-2658734560-line-31)">│&#160;db-search&#160;&#160;Perform&#160;a&#160;search&#160;using&#160;Casanovo-DB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-31)">
-</text><text class="terminal-2658734560-r1" x="0" y="800.8" textLength="976" clip-path="url(#terminal-2658734560-line-32)">│&#160;evaluate&#160;&#160;&#160;Evaluate&#160;de&#160;novo&#160;peptide&#160;sequencing&#160;performance.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-32)">
-</text><text class="terminal-2658734560-r1" x="0" y="825.2" textLength="976" clip-path="url(#terminal-2658734560-line-33)">│&#160;sequence&#160;&#160;&#160;De&#160;novo&#160;sequence&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-33)">
-</text><text class="terminal-2658734560-r1" x="0" y="849.6" textLength="976" clip-path="url(#terminal-2658734560-line-34)">│&#160;train&#160;&#160;&#160;&#160;&#160;&#160;Train&#160;a&#160;Casanovo&#160;model&#160;on&#160;your&#160;own&#160;data.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="849.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-34)">
-</text><text class="terminal-2658734560-r1" x="0" y="874" textLength="976" clip-path="url(#terminal-2658734560-line-35)">│&#160;version&#160;&#160;&#160;&#160;Get&#160;the&#160;Casanovo&#160;version&#160;information&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="874" textLength="12.2" clip-path="url(#terminal-2658734560-line-35)">
-</text><text class="terminal-2658734560-r1" x="0" y="898.4" textLength="976" clip-path="url(#terminal-2658734560-line-36)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2658734560-r1" x="976" y="898.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-36)">
-</text><text class="terminal-2658734560-r1" x="976" y="922.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-37)">
+    <g class="terminal-952518540-matrix">
+    <text class="terminal-952518540-r1" x="0" y="20" textLength="207.4" clip-path="url(#terminal-952518540-line-0)">$&#160;casanovo&#160;--help</text><text class="terminal-952518540-r1" x="1915.4" y="20" textLength="12.2" clip-path="url(#terminal-952518540-line-0)">
+</text><text class="terminal-952518540-r1" x="0" y="44.4" textLength="414.8" clip-path="url(#terminal-952518540-line-1)">Traceback&#160;(most&#160;recent&#160;call&#160;last):</text><text class="terminal-952518540-r1" x="1915.4" y="44.4" textLength="12.2" clip-path="url(#terminal-952518540-line-1)">
+</text><text class="terminal-952518540-r1" x="0" y="68.8" textLength="1000.4" clip-path="url(#terminal-952518540-line-2)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/bin/casanovo&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="68.8" textLength="12.2" clip-path="url(#terminal-952518540-line-2)">
+</text><text class="terminal-952518540-r1" x="0" y="93.2" textLength="463.6" clip-path="url(#terminal-952518540-line-3)">&#160;&#160;&#160;&#160;from&#160;casanovo.casanovo&#160;import&#160;main</text><text class="terminal-952518540-r1" x="1915.4" y="93.2" textLength="12.2" clip-path="url(#terminal-952518540-line-3)">
+</text><text class="terminal-952518540-r1" x="0" y="117.6" textLength="1464" clip-path="url(#terminal-952518540-line-4)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/casanovo/casanovo.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="117.6" textLength="12.2" clip-path="url(#terminal-952518540-line-4)">
+</text><text class="terminal-952518540-r1" x="0" y="142" textLength="268.4" clip-path="url(#terminal-952518540-line-5)">&#160;&#160;&#160;&#160;import&#160;depthcharge</text><text class="terminal-952518540-r1" x="1915.4" y="142" textLength="12.2" clip-path="url(#terminal-952518540-line-5)">
+</text><text class="terminal-952518540-r1" x="0" y="166.4" textLength="1488.4" clip-path="url(#terminal-952518540-line-6)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/__init__.py&quot;,&#160;line&#160;3,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="166.4" textLength="12.2" clip-path="url(#terminal-952518540-line-6)">
+</text><text class="terminal-952518540-r1" x="0" y="190.8" textLength="341.6" clip-path="url(#terminal-952518540-line-7)">&#160;&#160;&#160;&#160;from&#160;.&#160;import&#160;components</text><text class="terminal-952518540-r1" x="1915.4" y="190.8" textLength="12.2" clip-path="url(#terminal-952518540-line-7)">
+</text><text class="terminal-952518540-r1" x="0" y="215.2" textLength="1622.6" clip-path="url(#terminal-952518540-line-8)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/__init__.py&quot;,&#160;line&#160;2,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="215.2" textLength="12.2" clip-path="url(#terminal-952518540-line-8)">
+</text><text class="terminal-952518540-r1" x="0" y="239.6" textLength="744.2" clip-path="url(#terminal-952518540-line-9)">&#160;&#160;&#160;&#160;from&#160;.transformers&#160;import&#160;SpectrumEncoder,&#160;PeptideDecoder</text><text class="terminal-952518540-r1" x="1915.4" y="239.6" textLength="12.2" clip-path="url(#terminal-952518540-line-9)">
+</text><text class="terminal-952518540-r1" x="0" y="264" textLength="1671.4" clip-path="url(#terminal-952518540-line-10)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/transformers.py&quot;,&#160;line&#160;8,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="264" textLength="12.2" clip-path="url(#terminal-952518540-line-10)">
+</text><text class="terminal-952518540-r1" x="0" y="288.4" textLength="292.8" clip-path="url(#terminal-952518540-line-11)">&#160;&#160;&#160;&#160;from&#160;..&#160;import&#160;utils</text><text class="terminal-952518540-r1" x="1915.4" y="288.4" textLength="12.2" clip-path="url(#terminal-952518540-line-11)">
+</text><text class="terminal-952518540-r1" x="0" y="312.8" textLength="1451.8" clip-path="url(#terminal-952518540-line-12)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/utils.py&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="312.8" textLength="12.2" clip-path="url(#terminal-952518540-line-12)">
+</text><text class="terminal-952518540-r1" x="0" y="337.2" textLength="878.4" clip-path="url(#terminal-952518540-line-13)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing.event_accumulator&#160;import&#160;(</text><text class="terminal-952518540-r1" x="1915.4" y="337.2" textLength="12.2" clip-path="url(#terminal-952518540-line-13)">
+</text><text class="terminal-952518540-r1" x="0" y="361.6" textLength="1915.4" clip-path="url(#terminal-952518540-line-14)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_accumulator.py&quot;,&#160;line&#160;24,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="361.6" textLength="12.2" clip-path="url(#terminal-952518540-line-14)">
+</text><text class="terminal-952518540-r1" x="0" y="386" textLength="854" clip-path="url(#terminal-952518540-line-15)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing&#160;import&#160;event_file_loader</text><text class="terminal-952518540-r1" x="1915.4" y="386" textLength="12.2" clip-path="url(#terminal-952518540-line-15)">
+</text><text class="terminal-952518540-r1" x="0" y="410.4" textLength="1915.4" clip-path="url(#terminal-952518540-line-16)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_file_loader.py&quot;,&#160;line&#160;21,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="410.4" textLength="12.2" clip-path="url(#terminal-952518540-line-16)">
+</text><text class="terminal-952518540-r1" x="0" y="434.8" textLength="536.8" clip-path="url(#terminal-952518540-line-17)">&#160;&#160;&#160;&#160;from&#160;tensorboard&#160;import&#160;dataclass_compat</text><text class="terminal-952518540-r1" x="1915.4" y="434.8" textLength="12.2" clip-path="url(#terminal-952518540-line-17)">
+</text><text class="terminal-952518540-r1" x="0" y="459.2" textLength="1598.2" clip-path="url(#terminal-952518540-line-18)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/dataclass_compat.py&quot;,&#160;line&#160;33,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="459.2" textLength="12.2" clip-path="url(#terminal-952518540-line-18)">
+</text><text class="terminal-952518540-r1" x="0" y="483.6" textLength="878.4" clip-path="url(#terminal-952518540-line-19)">&#160;&#160;&#160;&#160;from&#160;tensorboard.plugins.hparams&#160;import&#160;metadata&#160;as&#160;hparams_metadata</text><text class="terminal-952518540-r1" x="1915.4" y="483.6" textLength="12.2" clip-path="url(#terminal-952518540-line-19)">
+</text><text class="terminal-952518540-r1" x="0" y="508" textLength="1695.8" clip-path="url(#terminal-952518540-line-20)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/plugins/hparams/metadata.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="508" textLength="12.2" clip-path="url(#terminal-952518540-line-20)">
+</text><text class="terminal-952518540-r1" x="0" y="532.4" textLength="585.6" clip-path="url(#terminal-952518540-line-21)">&#160;&#160;&#160;&#160;NULL_TENSOR&#160;=&#160;tensor_util.make_tensor_proto(</text><text class="terminal-952518540-r1" x="1915.4" y="532.4" textLength="12.2" clip-path="url(#terminal-952518540-line-21)">
+</text><text class="terminal-952518540-r1" x="0" y="556.8" textLength="1720.2" clip-path="url(#terminal-952518540-line-22)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/util/tensor_util.py&quot;,&#160;line&#160;405,&#160;in&#160;make_tensor_proto</text><text class="terminal-952518540-r1" x="1915.4" y="556.8" textLength="12.2" clip-path="url(#terminal-952518540-line-22)">
+</text><text class="terminal-952518540-r1" x="0" y="581.2" textLength="585.6" clip-path="url(#terminal-952518540-line-23)">&#160;&#160;&#160;&#160;numpy_dtype&#160;=&#160;dtypes.as_dtype(nparray.dtype)</text><text class="terminal-952518540-r1" x="1915.4" y="581.2" textLength="12.2" clip-path="url(#terminal-952518540-line-23)">
+</text><text class="terminal-952518540-r1" x="0" y="605.6" textLength="1769" clip-path="url(#terminal-952518540-line-24)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py&quot;,&#160;line&#160;677,&#160;in&#160;as_dtype</text><text class="terminal-952518540-r1" x="1915.4" y="605.6" textLength="12.2" clip-path="url(#terminal-952518540-line-24)">
+</text><text class="terminal-952518540-r1" x="0" y="630" textLength="866.2" clip-path="url(#terminal-952518540-line-25)">&#160;&#160;&#160;&#160;if&#160;type_value.type&#160;==&#160;np.string_&#160;or&#160;type_value.type&#160;==&#160;np.unicode_:</text><text class="terminal-952518540-r1" x="1915.4" y="630" textLength="12.2" clip-path="url(#terminal-952518540-line-25)">
+</text><text class="terminal-952518540-r1" x="0" y="654.4" textLength="1476.2" clip-path="url(#terminal-952518540-line-26)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/numpy/__init__.py&quot;,&#160;line&#160;397,&#160;in&#160;__getattr__</text><text class="terminal-952518540-r1" x="1915.4" y="654.4" textLength="12.2" clip-path="url(#terminal-952518540-line-26)">
+</text><text class="terminal-952518540-r1" x="0" y="678.8" textLength="305" clip-path="url(#terminal-952518540-line-27)">&#160;&#160;&#160;&#160;raise&#160;AttributeError(</text><text class="terminal-952518540-r1" x="1915.4" y="678.8" textLength="12.2" clip-path="url(#terminal-952518540-line-27)">
+</text><text class="terminal-952518540-r1" x="0" y="703.2" textLength="1427.4" clip-path="url(#terminal-952518540-line-28)">AttributeError:&#160;`np.string_`&#160;was&#160;removed&#160;in&#160;the&#160;NumPy&#160;2.0&#160;release.&#160;Use&#160;`np.bytes_`&#160;instead..&#160;Did&#160;you&#160;mean:&#160;&#x27;strings&#x27;?</text><text class="terminal-952518540-r1" x="1915.4" y="703.2" textLength="12.2" clip-path="url(#terminal-952518540-line-28)">
 </text>
     </g>
     </g>
diff --git a/docs/images/sequence-help.svg b/docs/images/sequence-help.svg
index f5799766..b9b96d74 100644
--- a/docs/images/sequence-help.svg
+++ b/docs/images/sequence-help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 994 684.4" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 1934 757.5999999999999" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,139 +19,151 @@
         font-weight: 700;
     }
 
-    .terminal-2359602172-matrix {
+    .terminal-2412464901-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-2359602172-title {
+    .terminal-2412464901-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-2359602172-r1 { fill: #c5c8c6 }
+    .terminal-2412464901-r1 { fill: #c5c8c6 }
     </style>
 
     <defs>
-    <clipPath id="terminal-2359602172-clip-terminal">
-      <rect x="0" y="0" width="975.0" height="633.4" />
+    <clipPath id="terminal-2412464901-clip-terminal">
+      <rect x="0" y="0" width="1914.3999999999999" height="706.5999999999999" />
     </clipPath>
-    <clipPath id="terminal-2359602172-line-0">
-    <rect x="0" y="1.5" width="976" height="24.65"/>
+    <clipPath id="terminal-2412464901-line-0">
+    <rect x="0" y="1.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-1">
-    <rect x="0" y="25.9" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-1">
+    <rect x="0" y="25.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-2">
-    <rect x="0" y="50.3" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-2">
+    <rect x="0" y="50.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-3">
-    <rect x="0" y="74.7" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-3">
+    <rect x="0" y="74.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-4">
-    <rect x="0" y="99.1" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-4">
+    <rect x="0" y="99.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-5">
-    <rect x="0" y="123.5" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-5">
+    <rect x="0" y="123.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-6">
-    <rect x="0" y="147.9" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-6">
+    <rect x="0" y="147.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-7">
-    <rect x="0" y="172.3" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-7">
+    <rect x="0" y="172.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-8">
-    <rect x="0" y="196.7" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-8">
+    <rect x="0" y="196.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-9">
-    <rect x="0" y="221.1" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-9">
+    <rect x="0" y="221.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-10">
-    <rect x="0" y="245.5" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-10">
+    <rect x="0" y="245.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-11">
-    <rect x="0" y="269.9" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-11">
+    <rect x="0" y="269.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-12">
-    <rect x="0" y="294.3" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-12">
+    <rect x="0" y="294.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-13">
-    <rect x="0" y="318.7" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-13">
+    <rect x="0" y="318.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-14">
-    <rect x="0" y="343.1" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-14">
+    <rect x="0" y="343.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-15">
-    <rect x="0" y="367.5" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-15">
+    <rect x="0" y="367.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-16">
-    <rect x="0" y="391.9" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-16">
+    <rect x="0" y="391.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-17">
-    <rect x="0" y="416.3" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-17">
+    <rect x="0" y="416.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-18">
-    <rect x="0" y="440.7" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-18">
+    <rect x="0" y="440.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-19">
-    <rect x="0" y="465.1" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-19">
+    <rect x="0" y="465.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-20">
-    <rect x="0" y="489.5" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-20">
+    <rect x="0" y="489.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-21">
-    <rect x="0" y="513.9" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-21">
+    <rect x="0" y="513.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-22">
-    <rect x="0" y="538.3" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-22">
+    <rect x="0" y="538.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-23">
-    <rect x="0" y="562.7" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-23">
+    <rect x="0" y="562.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-24">
-    <rect x="0" y="587.1" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-24">
+    <rect x="0" y="587.1" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2412464901-line-25">
+    <rect x="0" y="611.5" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2412464901-line-26">
+    <rect x="0" y="635.9" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2412464901-line-27">
+    <rect x="0" y="660.3" width="1915.4" height="24.65"/>
             </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="682.4" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1932" height="755.6" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-2359602172-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-2412464901-clip-terminal)">
     
-    <g class="terminal-2359602172-matrix">
-    <text class="terminal-2359602172-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-2359602172-line-0)">$&#160;casanovo&#160;sequence&#160;--help</text><text class="terminal-2359602172-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-2359602172-line-0)">
-</text><text class="terminal-2359602172-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-2359602172-line-1)">
-</text><text class="terminal-2359602172-r1" x="0" y="68.8" textLength="976" clip-path="url(#terminal-2359602172-line-2)">&#160;Usage:&#160;casanovo&#160;sequence&#160;[OPTIONS]&#160;PEAK_PATH...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2359602172-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-2359602172-line-2)">
-</text><text class="terminal-2359602172-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-2359602172-line-3)">
-</text><text class="terminal-2359602172-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-2359602172-line-4)">&#160;De&#160;novo&#160;sequence&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2359602172-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-2359602172-line-4)">
-</text><text class="terminal-2359602172-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-2359602172-line-5)">&#160;PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;mzMl,&#160;mzXML,&#160;or&#160;MGF&#160;files&#160;from&#160;which&#160;to&#160;sequence&#160;</text><text class="terminal-2359602172-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-2359602172-line-5)">
-</text><text class="terminal-2359602172-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-2359602172-line-6)">&#160;peptides.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2359602172-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-2359602172-line-6)">
-</text><text class="terminal-2359602172-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-2359602172-line-7)">
-</text><text class="terminal-2359602172-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-2359602172-line-8)">╭─&#160;Arguments&#160;──────────────────────────────────────────────────────────────────╮</text><text class="terminal-2359602172-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-2359602172-line-8)">
-</text><text class="terminal-2359602172-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-2359602172-line-9)">│&#160;*&#160;&#160;PEAK_PATH&#160;&#160;&#160;&#160;FILE&#160;&#160;[required]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-2359602172-line-9)">
-</text><text class="terminal-2359602172-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-2359602172-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2359602172-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-2359602172-line-10)">
-</text><text class="terminal-2359602172-r1" x="0" y="288.4" textLength="976" clip-path="url(#terminal-2359602172-line-11)">╭─&#160;Options&#160;────────────────────────────────────────────────────────────────────╮</text><text class="terminal-2359602172-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-2359602172-line-11)">
-</text><text class="terminal-2359602172-r1" x="0" y="312.8" textLength="976" clip-path="url(#terminal-2359602172-line-12)">│&#160;--model&#160;&#160;&#160;&#160;&#160;&#160;-m&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;model&#160;weights&#160;(.ckpt&#160;file).&#160;│</text><text class="terminal-2359602172-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-2359602172-line-12)">
-</text><text class="terminal-2359602172-r1" x="0" y="337.2" textLength="976" clip-path="url(#terminal-2359602172-line-13)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;If&#160;not&#160;provided,&#160;Casanovo&#160;will&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-2359602172-line-13)">
-</text><text class="terminal-2359602172-r1" x="0" y="361.6" textLength="976" clip-path="url(#terminal-2359602172-line-14)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;try&#160;to&#160;download&#160;the&#160;latest&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-2359602172-line-14)">
-</text><text class="terminal-2359602172-r1" x="0" y="386" textLength="976" clip-path="url(#terminal-2359602172-line-15)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;release.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-2359602172-line-15)">
-</text><text class="terminal-2359602172-r1" x="0" y="410.4" textLength="976" clip-path="url(#terminal-2359602172-line-16)">│&#160;--output&#160;&#160;&#160;&#160;&#160;-o&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;mzTab&#160;file&#160;to&#160;which&#160;results&#160;│</text><text class="terminal-2359602172-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-2359602172-line-16)">
-</text><text class="terminal-2359602172-r1" x="0" y="434.8" textLength="976" clip-path="url(#terminal-2359602172-line-17)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;will&#160;be&#160;written.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-2359602172-line-17)">
-</text><text class="terminal-2359602172-r1" x="0" y="459.2" textLength="976" clip-path="url(#terminal-2359602172-line-18)">│&#160;--config&#160;&#160;&#160;&#160;&#160;-c&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;YAML&#160;configuration&#160;file&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-2359602172-line-18)">
-</text><text class="terminal-2359602172-r1" x="0" y="483.6" textLength="976" clip-path="url(#terminal-2359602172-line-19)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;overriding&#160;the&#160;default&#160;options.&#160;│</text><text class="terminal-2359602172-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-2359602172-line-19)">
-</text><text class="terminal-2359602172-r1" x="0" y="508" textLength="976" clip-path="url(#terminal-2359602172-line-20)">│&#160;--verbosity&#160;&#160;-v&#160;&#160;[debug|info|warning|error]&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;console&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-2359602172-line-20)">
-</text><text class="terminal-2359602172-r1" x="0" y="532.4" textLength="976" clip-path="url(#terminal-2359602172-line-21)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;logging&#160;messages.&#160;Log&#160;files&#160;are&#160;│</text><text class="terminal-2359602172-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-2359602172-line-21)">
-</text><text class="terminal-2359602172-r1" x="0" y="556.8" textLength="976" clip-path="url(#terminal-2359602172-line-22)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;always&#160;set&#160;to&#160;&#x27;debug&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-2359602172-line-22)">
-</text><text class="terminal-2359602172-r1" x="0" y="581.2" textLength="976" clip-path="url(#terminal-2359602172-line-23)">│&#160;--help&#160;&#160;&#160;&#160;&#160;&#160;&#160;-h&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-2359602172-line-23)">
-</text><text class="terminal-2359602172-r1" x="0" y="605.6" textLength="976" clip-path="url(#terminal-2359602172-line-24)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2359602172-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-2359602172-line-24)">
-</text><text class="terminal-2359602172-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-2359602172-line-25)">
+    <g class="terminal-2412464901-matrix">
+    <text class="terminal-2412464901-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-2412464901-line-0)">$&#160;casanovo&#160;sequence&#160;--help</text><text class="terminal-2412464901-r1" x="1915.4" y="20" textLength="12.2" clip-path="url(#terminal-2412464901-line-0)">
+</text><text class="terminal-2412464901-r1" x="0" y="44.4" textLength="414.8" clip-path="url(#terminal-2412464901-line-1)">Traceback&#160;(most&#160;recent&#160;call&#160;last):</text><text class="terminal-2412464901-r1" x="1915.4" y="44.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-1)">
+</text><text class="terminal-2412464901-r1" x="0" y="68.8" textLength="1000.4" clip-path="url(#terminal-2412464901-line-2)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/bin/casanovo&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="68.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-2)">
+</text><text class="terminal-2412464901-r1" x="0" y="93.2" textLength="463.6" clip-path="url(#terminal-2412464901-line-3)">&#160;&#160;&#160;&#160;from&#160;casanovo.casanovo&#160;import&#160;main</text><text class="terminal-2412464901-r1" x="1915.4" y="93.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-3)">
+</text><text class="terminal-2412464901-r1" x="0" y="117.6" textLength="1464" clip-path="url(#terminal-2412464901-line-4)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/casanovo/casanovo.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="117.6" textLength="12.2" clip-path="url(#terminal-2412464901-line-4)">
+</text><text class="terminal-2412464901-r1" x="0" y="142" textLength="268.4" clip-path="url(#terminal-2412464901-line-5)">&#160;&#160;&#160;&#160;import&#160;depthcharge</text><text class="terminal-2412464901-r1" x="1915.4" y="142" textLength="12.2" clip-path="url(#terminal-2412464901-line-5)">
+</text><text class="terminal-2412464901-r1" x="0" y="166.4" textLength="1488.4" clip-path="url(#terminal-2412464901-line-6)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/__init__.py&quot;,&#160;line&#160;3,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="166.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-6)">
+</text><text class="terminal-2412464901-r1" x="0" y="190.8" textLength="341.6" clip-path="url(#terminal-2412464901-line-7)">&#160;&#160;&#160;&#160;from&#160;.&#160;import&#160;components</text><text class="terminal-2412464901-r1" x="1915.4" y="190.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-7)">
+</text><text class="terminal-2412464901-r1" x="0" y="215.2" textLength="1622.6" clip-path="url(#terminal-2412464901-line-8)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/__init__.py&quot;,&#160;line&#160;2,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="215.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-8)">
+</text><text class="terminal-2412464901-r1" x="0" y="239.6" textLength="744.2" clip-path="url(#terminal-2412464901-line-9)">&#160;&#160;&#160;&#160;from&#160;.transformers&#160;import&#160;SpectrumEncoder,&#160;PeptideDecoder</text><text class="terminal-2412464901-r1" x="1915.4" y="239.6" textLength="12.2" clip-path="url(#terminal-2412464901-line-9)">
+</text><text class="terminal-2412464901-r1" x="0" y="264" textLength="1671.4" clip-path="url(#terminal-2412464901-line-10)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/transformers.py&quot;,&#160;line&#160;8,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="264" textLength="12.2" clip-path="url(#terminal-2412464901-line-10)">
+</text><text class="terminal-2412464901-r1" x="0" y="288.4" textLength="292.8" clip-path="url(#terminal-2412464901-line-11)">&#160;&#160;&#160;&#160;from&#160;..&#160;import&#160;utils</text><text class="terminal-2412464901-r1" x="1915.4" y="288.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-11)">
+</text><text class="terminal-2412464901-r1" x="0" y="312.8" textLength="1451.8" clip-path="url(#terminal-2412464901-line-12)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/utils.py&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="312.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-12)">
+</text><text class="terminal-2412464901-r1" x="0" y="337.2" textLength="878.4" clip-path="url(#terminal-2412464901-line-13)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing.event_accumulator&#160;import&#160;(</text><text class="terminal-2412464901-r1" x="1915.4" y="337.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-13)">
+</text><text class="terminal-2412464901-r1" x="0" y="361.6" textLength="1915.4" clip-path="url(#terminal-2412464901-line-14)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_accumulator.py&quot;,&#160;line&#160;24,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="361.6" textLength="12.2" clip-path="url(#terminal-2412464901-line-14)">
+</text><text class="terminal-2412464901-r1" x="0" y="386" textLength="854" clip-path="url(#terminal-2412464901-line-15)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing&#160;import&#160;event_file_loader</text><text class="terminal-2412464901-r1" x="1915.4" y="386" textLength="12.2" clip-path="url(#terminal-2412464901-line-15)">
+</text><text class="terminal-2412464901-r1" x="0" y="410.4" textLength="1915.4" clip-path="url(#terminal-2412464901-line-16)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_file_loader.py&quot;,&#160;line&#160;21,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="410.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-16)">
+</text><text class="terminal-2412464901-r1" x="0" y="434.8" textLength="536.8" clip-path="url(#terminal-2412464901-line-17)">&#160;&#160;&#160;&#160;from&#160;tensorboard&#160;import&#160;dataclass_compat</text><text class="terminal-2412464901-r1" x="1915.4" y="434.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-17)">
+</text><text class="terminal-2412464901-r1" x="0" y="459.2" textLength="1598.2" clip-path="url(#terminal-2412464901-line-18)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/dataclass_compat.py&quot;,&#160;line&#160;33,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="459.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-18)">
+</text><text class="terminal-2412464901-r1" x="0" y="483.6" textLength="878.4" clip-path="url(#terminal-2412464901-line-19)">&#160;&#160;&#160;&#160;from&#160;tensorboard.plugins.hparams&#160;import&#160;metadata&#160;as&#160;hparams_metadata</text><text class="terminal-2412464901-r1" x="1915.4" y="483.6" textLength="12.2" clip-path="url(#terminal-2412464901-line-19)">
+</text><text class="terminal-2412464901-r1" x="0" y="508" textLength="1695.8" clip-path="url(#terminal-2412464901-line-20)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/plugins/hparams/metadata.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="508" textLength="12.2" clip-path="url(#terminal-2412464901-line-20)">
+</text><text class="terminal-2412464901-r1" x="0" y="532.4" textLength="585.6" clip-path="url(#terminal-2412464901-line-21)">&#160;&#160;&#160;&#160;NULL_TENSOR&#160;=&#160;tensor_util.make_tensor_proto(</text><text class="terminal-2412464901-r1" x="1915.4" y="532.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-21)">
+</text><text class="terminal-2412464901-r1" x="0" y="556.8" textLength="1720.2" clip-path="url(#terminal-2412464901-line-22)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/util/tensor_util.py&quot;,&#160;line&#160;405,&#160;in&#160;make_tensor_proto</text><text class="terminal-2412464901-r1" x="1915.4" y="556.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-22)">
+</text><text class="terminal-2412464901-r1" x="0" y="581.2" textLength="585.6" clip-path="url(#terminal-2412464901-line-23)">&#160;&#160;&#160;&#160;numpy_dtype&#160;=&#160;dtypes.as_dtype(nparray.dtype)</text><text class="terminal-2412464901-r1" x="1915.4" y="581.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-23)">
+</text><text class="terminal-2412464901-r1" x="0" y="605.6" textLength="1769" clip-path="url(#terminal-2412464901-line-24)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py&quot;,&#160;line&#160;677,&#160;in&#160;as_dtype</text><text class="terminal-2412464901-r1" x="1915.4" y="605.6" textLength="12.2" clip-path="url(#terminal-2412464901-line-24)">
+</text><text class="terminal-2412464901-r1" x="0" y="630" textLength="866.2" clip-path="url(#terminal-2412464901-line-25)">&#160;&#160;&#160;&#160;if&#160;type_value.type&#160;==&#160;np.string_&#160;or&#160;type_value.type&#160;==&#160;np.unicode_:</text><text class="terminal-2412464901-r1" x="1915.4" y="630" textLength="12.2" clip-path="url(#terminal-2412464901-line-25)">
+</text><text class="terminal-2412464901-r1" x="0" y="654.4" textLength="1476.2" clip-path="url(#terminal-2412464901-line-26)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/numpy/__init__.py&quot;,&#160;line&#160;397,&#160;in&#160;__getattr__</text><text class="terminal-2412464901-r1" x="1915.4" y="654.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-26)">
+</text><text class="terminal-2412464901-r1" x="0" y="678.8" textLength="305" clip-path="url(#terminal-2412464901-line-27)">&#160;&#160;&#160;&#160;raise&#160;AttributeError(</text><text class="terminal-2412464901-r1" x="1915.4" y="678.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-27)">
+</text><text class="terminal-2412464901-r1" x="0" y="703.2" textLength="1427.4" clip-path="url(#terminal-2412464901-line-28)">AttributeError:&#160;`np.string_`&#160;was&#160;removed&#160;in&#160;the&#160;NumPy&#160;2.0&#160;release.&#160;Use&#160;`np.bytes_`&#160;instead..&#160;Did&#160;you&#160;mean:&#160;&#x27;strings&#x27;?</text><text class="terminal-2412464901-r1" x="1915.4" y="703.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-28)">
 </text>
     </g>
     </g>
diff --git a/docs/images/train-help.svg b/docs/images/train-help.svg
index fccd4140..a71b8915 100644
--- a/docs/images/train-help.svg
+++ b/docs/images/train-help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 994 1001.5999999999999" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 1934 757.5999999999999" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,191 +19,151 @@
         font-weight: 700;
     }
 
-    .terminal-2430201580-matrix {
+    .terminal-844581322-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-2430201580-title {
+    .terminal-844581322-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-2430201580-r1 { fill: #c5c8c6 }
+    .terminal-844581322-r1 { fill: #c5c8c6 }
     </style>
 
     <defs>
-    <clipPath id="terminal-2430201580-clip-terminal">
-      <rect x="0" y="0" width="975.0" height="950.5999999999999" />
+    <clipPath id="terminal-844581322-clip-terminal">
+      <rect x="0" y="0" width="1914.3999999999999" height="706.5999999999999" />
     </clipPath>
-    <clipPath id="terminal-2430201580-line-0">
-    <rect x="0" y="1.5" width="976" height="24.65"/>
+    <clipPath id="terminal-844581322-line-0">
+    <rect x="0" y="1.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-1">
-    <rect x="0" y="25.9" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-1">
+    <rect x="0" y="25.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-2">
-    <rect x="0" y="50.3" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-2">
+    <rect x="0" y="50.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-3">
-    <rect x="0" y="74.7" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-3">
+    <rect x="0" y="74.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-4">
-    <rect x="0" y="99.1" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-4">
+    <rect x="0" y="99.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-5">
-    <rect x="0" y="123.5" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-5">
+    <rect x="0" y="123.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-6">
-    <rect x="0" y="147.9" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-6">
+    <rect x="0" y="147.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-7">
-    <rect x="0" y="172.3" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-7">
+    <rect x="0" y="172.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-8">
-    <rect x="0" y="196.7" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-8">
+    <rect x="0" y="196.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-9">
-    <rect x="0" y="221.1" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-9">
+    <rect x="0" y="221.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-10">
-    <rect x="0" y="245.5" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-10">
+    <rect x="0" y="245.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-11">
-    <rect x="0" y="269.9" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-11">
+    <rect x="0" y="269.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-12">
-    <rect x="0" y="294.3" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-12">
+    <rect x="0" y="294.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-13">
-    <rect x="0" y="318.7" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-13">
+    <rect x="0" y="318.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-14">
-    <rect x="0" y="343.1" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-14">
+    <rect x="0" y="343.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-15">
-    <rect x="0" y="367.5" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-15">
+    <rect x="0" y="367.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-16">
-    <rect x="0" y="391.9" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-16">
+    <rect x="0" y="391.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-17">
-    <rect x="0" y="416.3" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-17">
+    <rect x="0" y="416.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-18">
-    <rect x="0" y="440.7" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-18">
+    <rect x="0" y="440.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-19">
-    <rect x="0" y="465.1" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-19">
+    <rect x="0" y="465.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-20">
-    <rect x="0" y="489.5" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-20">
+    <rect x="0" y="489.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-21">
-    <rect x="0" y="513.9" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-21">
+    <rect x="0" y="513.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-22">
-    <rect x="0" y="538.3" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-22">
+    <rect x="0" y="538.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-23">
-    <rect x="0" y="562.7" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-23">
+    <rect x="0" y="562.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-24">
-    <rect x="0" y="587.1" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-24">
+    <rect x="0" y="587.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-25">
-    <rect x="0" y="611.5" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-25">
+    <rect x="0" y="611.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-26">
-    <rect x="0" y="635.9" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-26">
+    <rect x="0" y="635.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-27">
-    <rect x="0" y="660.3" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2430201580-line-28">
-    <rect x="0" y="684.7" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2430201580-line-29">
-    <rect x="0" y="709.1" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2430201580-line-30">
-    <rect x="0" y="733.5" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2430201580-line-31">
-    <rect x="0" y="757.9" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2430201580-line-32">
-    <rect x="0" y="782.3" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2430201580-line-33">
-    <rect x="0" y="806.7" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2430201580-line-34">
-    <rect x="0" y="831.1" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2430201580-line-35">
-    <rect x="0" y="855.5" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2430201580-line-36">
-    <rect x="0" y="879.9" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2430201580-line-37">
-    <rect x="0" y="904.3" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-27">
+    <rect x="0" y="660.3" width="1915.4" height="24.65"/>
             </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="999.6" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1932" height="755.6" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-2430201580-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-844581322-clip-terminal)">
     
-    <g class="terminal-2430201580-matrix">
-    <text class="terminal-2430201580-r1" x="0" y="20" textLength="280.6" clip-path="url(#terminal-2430201580-line-0)">$&#160;casanovo&#160;train&#160;--help</text><text class="terminal-2430201580-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-2430201580-line-0)">
-</text><text class="terminal-2430201580-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-1)">
-</text><text class="terminal-2430201580-r1" x="0" y="68.8" textLength="976" clip-path="url(#terminal-2430201580-line-2)">&#160;Usage:&#160;casanovo&#160;train&#160;[OPTIONS]&#160;TRAIN_PEAK_PATH...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2430201580-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-2)">
-</text><text class="terminal-2430201580-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-3)">
-</text><text class="terminal-2430201580-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-2430201580-line-4)">&#160;Train&#160;a&#160;Casanovo&#160;model&#160;on&#160;your&#160;own&#160;data.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2430201580-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-4)">
-</text><text class="terminal-2430201580-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-2430201580-line-5)">&#160;TRAIN_PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;annoated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;provided&#160;</text><text class="terminal-2430201580-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-2430201580-line-5)">
-</text><text class="terminal-2430201580-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-2430201580-line-6)">&#160;by&#160;MassIVE-KB,&#160;from&#160;which&#160;to&#160;train&#160;a&#160;new&#160;Casnovo&#160;model.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2430201580-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-6)">
-</text><text class="terminal-2430201580-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-7)">
-</text><text class="terminal-2430201580-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-2430201580-line-8)">╭─&#160;Arguments&#160;──────────────────────────────────────────────────────────────────╮</text><text class="terminal-2430201580-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-8)">
-</text><text class="terminal-2430201580-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-2430201580-line-9)">│&#160;*&#160;&#160;TRAIN_PEAK_PATH&#160;&#160;&#160;&#160;FILE&#160;&#160;[required]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-9)">
-</text><text class="terminal-2430201580-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-2430201580-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2430201580-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-2430201580-line-10)">
-</text><text class="terminal-2430201580-r1" x="0" y="288.4" textLength="976" clip-path="url(#terminal-2430201580-line-11)">╭─&#160;Options&#160;────────────────────────────────────────────────────────────────────╮</text><text class="terminal-2430201580-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-11)">
-</text><text class="terminal-2430201580-r1" x="0" y="312.8" textLength="976" clip-path="url(#terminal-2430201580-line-12)">│&#160;*&#160;&#160;--validation_peak_pa…&#160;&#160;-p&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;An&#160;annotated&#160;MGF&#160;file&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-12)">
-</text><text class="terminal-2430201580-r1" x="0" y="337.2" textLength="976" clip-path="url(#terminal-2430201580-line-13)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;for&#160;validation,&#160;like&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-13)">
-</text><text class="terminal-2430201580-r1" x="0" y="361.6" textLength="976" clip-path="url(#terminal-2430201580-line-14)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;from&#160;MassIVE-KB.&#160;Use&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-14)">
-</text><text class="terminal-2430201580-r1" x="0" y="386" textLength="976" clip-path="url(#terminal-2430201580-line-15)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;this&#160;option&#160;multiple&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-2430201580-line-15)">
-</text><text class="terminal-2430201580-r1" x="0" y="410.4" textLength="976" clip-path="url(#terminal-2430201580-line-16)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;times&#160;to&#160;specify&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-16)">
-</text><text class="terminal-2430201580-r1" x="0" y="434.8" textLength="976" clip-path="url(#terminal-2430201580-line-17)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;multiple&#160;files.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-17)">
-</text><text class="terminal-2430201580-r1" x="0" y="459.2" textLength="976" clip-path="url(#terminal-2430201580-line-18)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;[required]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-18)">
-</text><text class="terminal-2430201580-r1" x="0" y="483.6" textLength="976" clip-path="url(#terminal-2430201580-line-19)">│&#160;&#160;&#160;&#160;--model&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;-m&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;model&#160;weights&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-19)">
-</text><text class="terminal-2430201580-r1" x="0" y="508" textLength="976" clip-path="url(#terminal-2430201580-line-20)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;(.ckpt&#160;file).&#160;If&#160;not&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-2430201580-line-20)">
-</text><text class="terminal-2430201580-r1" x="0" y="532.4" textLength="976" clip-path="url(#terminal-2430201580-line-21)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;provided,&#160;Casanovo&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-21)">
-</text><text class="terminal-2430201580-r1" x="0" y="556.8" textLength="976" clip-path="url(#terminal-2430201580-line-22)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;will&#160;try&#160;to&#160;download&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-22)">
-</text><text class="terminal-2430201580-r1" x="0" y="581.2" textLength="976" clip-path="url(#terminal-2430201580-line-23)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;the&#160;latest&#160;release.&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-23)">
-</text><text class="terminal-2430201580-r1" x="0" y="605.6" textLength="976" clip-path="url(#terminal-2430201580-line-24)">│&#160;&#160;&#160;&#160;--output&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;-o&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;mzTab&#160;file&#160;to&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-24)">
-</text><text class="terminal-2430201580-r1" x="0" y="630" textLength="976" clip-path="url(#terminal-2430201580-line-25)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;which&#160;results&#160;will&#160;be&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-2430201580-line-25)">
-</text><text class="terminal-2430201580-r1" x="0" y="654.4" textLength="976" clip-path="url(#terminal-2430201580-line-26)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;written.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-26)">
-</text><text class="terminal-2430201580-r1" x="0" y="678.8" textLength="976" clip-path="url(#terminal-2430201580-line-27)">│&#160;&#160;&#160;&#160;--config&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;-c&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;YAML&#160;configuration&#160;│</text><text class="terminal-2430201580-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-27)">
-</text><text class="terminal-2430201580-r1" x="0" y="703.2" textLength="976" clip-path="url(#terminal-2430201580-line-28)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;file&#160;overriding&#160;the&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-28)">
-</text><text class="terminal-2430201580-r1" x="0" y="727.6" textLength="976" clip-path="url(#terminal-2430201580-line-29)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;default&#160;options.&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-29)">
-</text><text class="terminal-2430201580-r1" x="0" y="752" textLength="976" clip-path="url(#terminal-2430201580-line-30)">│&#160;&#160;&#160;&#160;--verbosity&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;-v&#160;&#160;[debug|info|warning|er&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-2430201580-line-30)">
-</text><text class="terminal-2430201580-r1" x="0" y="776.4" textLength="976" clip-path="url(#terminal-2430201580-line-31)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;ror]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;console&#160;logging&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-31)">
-</text><text class="terminal-2430201580-r1" x="0" y="800.8" textLength="976" clip-path="url(#terminal-2430201580-line-32)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;messages.&#160;Log&#160;files&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-32)">
-</text><text class="terminal-2430201580-r1" x="0" y="825.2" textLength="976" clip-path="url(#terminal-2430201580-line-33)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;are&#160;always&#160;set&#160;to&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-33)">
-</text><text class="terminal-2430201580-r1" x="0" y="849.6" textLength="976" clip-path="url(#terminal-2430201580-line-34)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#x27;debug&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="849.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-34)">
-</text><text class="terminal-2430201580-r1" x="0" y="874" textLength="976" clip-path="url(#terminal-2430201580-line-35)">│&#160;&#160;&#160;&#160;--help&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;-h&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="874" textLength="12.2" clip-path="url(#terminal-2430201580-line-35)">
-</text><text class="terminal-2430201580-r1" x="0" y="898.4" textLength="976" clip-path="url(#terminal-2430201580-line-36)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="898.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-36)">
-</text><text class="terminal-2430201580-r1" x="0" y="922.8" textLength="976" clip-path="url(#terminal-2430201580-line-37)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2430201580-r1" x="976" y="922.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-37)">
-</text><text class="terminal-2430201580-r1" x="976" y="947.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-38)">
+    <g class="terminal-844581322-matrix">
+    <text class="terminal-844581322-r1" x="0" y="20" textLength="280.6" clip-path="url(#terminal-844581322-line-0)">$&#160;casanovo&#160;train&#160;--help</text><text class="terminal-844581322-r1" x="1915.4" y="20" textLength="12.2" clip-path="url(#terminal-844581322-line-0)">
+</text><text class="terminal-844581322-r1" x="0" y="44.4" textLength="414.8" clip-path="url(#terminal-844581322-line-1)">Traceback&#160;(most&#160;recent&#160;call&#160;last):</text><text class="terminal-844581322-r1" x="1915.4" y="44.4" textLength="12.2" clip-path="url(#terminal-844581322-line-1)">
+</text><text class="terminal-844581322-r1" x="0" y="68.8" textLength="1000.4" clip-path="url(#terminal-844581322-line-2)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/bin/casanovo&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="68.8" textLength="12.2" clip-path="url(#terminal-844581322-line-2)">
+</text><text class="terminal-844581322-r1" x="0" y="93.2" textLength="463.6" clip-path="url(#terminal-844581322-line-3)">&#160;&#160;&#160;&#160;from&#160;casanovo.casanovo&#160;import&#160;main</text><text class="terminal-844581322-r1" x="1915.4" y="93.2" textLength="12.2" clip-path="url(#terminal-844581322-line-3)">
+</text><text class="terminal-844581322-r1" x="0" y="117.6" textLength="1464" clip-path="url(#terminal-844581322-line-4)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/casanovo/casanovo.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="117.6" textLength="12.2" clip-path="url(#terminal-844581322-line-4)">
+</text><text class="terminal-844581322-r1" x="0" y="142" textLength="268.4" clip-path="url(#terminal-844581322-line-5)">&#160;&#160;&#160;&#160;import&#160;depthcharge</text><text class="terminal-844581322-r1" x="1915.4" y="142" textLength="12.2" clip-path="url(#terminal-844581322-line-5)">
+</text><text class="terminal-844581322-r1" x="0" y="166.4" textLength="1488.4" clip-path="url(#terminal-844581322-line-6)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/__init__.py&quot;,&#160;line&#160;3,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="166.4" textLength="12.2" clip-path="url(#terminal-844581322-line-6)">
+</text><text class="terminal-844581322-r1" x="0" y="190.8" textLength="341.6" clip-path="url(#terminal-844581322-line-7)">&#160;&#160;&#160;&#160;from&#160;.&#160;import&#160;components</text><text class="terminal-844581322-r1" x="1915.4" y="190.8" textLength="12.2" clip-path="url(#terminal-844581322-line-7)">
+</text><text class="terminal-844581322-r1" x="0" y="215.2" textLength="1622.6" clip-path="url(#terminal-844581322-line-8)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/__init__.py&quot;,&#160;line&#160;2,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="215.2" textLength="12.2" clip-path="url(#terminal-844581322-line-8)">
+</text><text class="terminal-844581322-r1" x="0" y="239.6" textLength="744.2" clip-path="url(#terminal-844581322-line-9)">&#160;&#160;&#160;&#160;from&#160;.transformers&#160;import&#160;SpectrumEncoder,&#160;PeptideDecoder</text><text class="terminal-844581322-r1" x="1915.4" y="239.6" textLength="12.2" clip-path="url(#terminal-844581322-line-9)">
+</text><text class="terminal-844581322-r1" x="0" y="264" textLength="1671.4" clip-path="url(#terminal-844581322-line-10)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/transformers.py&quot;,&#160;line&#160;8,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="264" textLength="12.2" clip-path="url(#terminal-844581322-line-10)">
+</text><text class="terminal-844581322-r1" x="0" y="288.4" textLength="292.8" clip-path="url(#terminal-844581322-line-11)">&#160;&#160;&#160;&#160;from&#160;..&#160;import&#160;utils</text><text class="terminal-844581322-r1" x="1915.4" y="288.4" textLength="12.2" clip-path="url(#terminal-844581322-line-11)">
+</text><text class="terminal-844581322-r1" x="0" y="312.8" textLength="1451.8" clip-path="url(#terminal-844581322-line-12)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/utils.py&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="312.8" textLength="12.2" clip-path="url(#terminal-844581322-line-12)">
+</text><text class="terminal-844581322-r1" x="0" y="337.2" textLength="878.4" clip-path="url(#terminal-844581322-line-13)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing.event_accumulator&#160;import&#160;(</text><text class="terminal-844581322-r1" x="1915.4" y="337.2" textLength="12.2" clip-path="url(#terminal-844581322-line-13)">
+</text><text class="terminal-844581322-r1" x="0" y="361.6" textLength="1915.4" clip-path="url(#terminal-844581322-line-14)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_accumulator.py&quot;,&#160;line&#160;24,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="361.6" textLength="12.2" clip-path="url(#terminal-844581322-line-14)">
+</text><text class="terminal-844581322-r1" x="0" y="386" textLength="854" clip-path="url(#terminal-844581322-line-15)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing&#160;import&#160;event_file_loader</text><text class="terminal-844581322-r1" x="1915.4" y="386" textLength="12.2" clip-path="url(#terminal-844581322-line-15)">
+</text><text class="terminal-844581322-r1" x="0" y="410.4" textLength="1915.4" clip-path="url(#terminal-844581322-line-16)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_file_loader.py&quot;,&#160;line&#160;21,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="410.4" textLength="12.2" clip-path="url(#terminal-844581322-line-16)">
+</text><text class="terminal-844581322-r1" x="0" y="434.8" textLength="536.8" clip-path="url(#terminal-844581322-line-17)">&#160;&#160;&#160;&#160;from&#160;tensorboard&#160;import&#160;dataclass_compat</text><text class="terminal-844581322-r1" x="1915.4" y="434.8" textLength="12.2" clip-path="url(#terminal-844581322-line-17)">
+</text><text class="terminal-844581322-r1" x="0" y="459.2" textLength="1598.2" clip-path="url(#terminal-844581322-line-18)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/dataclass_compat.py&quot;,&#160;line&#160;33,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="459.2" textLength="12.2" clip-path="url(#terminal-844581322-line-18)">
+</text><text class="terminal-844581322-r1" x="0" y="483.6" textLength="878.4" clip-path="url(#terminal-844581322-line-19)">&#160;&#160;&#160;&#160;from&#160;tensorboard.plugins.hparams&#160;import&#160;metadata&#160;as&#160;hparams_metadata</text><text class="terminal-844581322-r1" x="1915.4" y="483.6" textLength="12.2" clip-path="url(#terminal-844581322-line-19)">
+</text><text class="terminal-844581322-r1" x="0" y="508" textLength="1695.8" clip-path="url(#terminal-844581322-line-20)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/plugins/hparams/metadata.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="508" textLength="12.2" clip-path="url(#terminal-844581322-line-20)">
+</text><text class="terminal-844581322-r1" x="0" y="532.4" textLength="585.6" clip-path="url(#terminal-844581322-line-21)">&#160;&#160;&#160;&#160;NULL_TENSOR&#160;=&#160;tensor_util.make_tensor_proto(</text><text class="terminal-844581322-r1" x="1915.4" y="532.4" textLength="12.2" clip-path="url(#terminal-844581322-line-21)">
+</text><text class="terminal-844581322-r1" x="0" y="556.8" textLength="1720.2" clip-path="url(#terminal-844581322-line-22)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/util/tensor_util.py&quot;,&#160;line&#160;405,&#160;in&#160;make_tensor_proto</text><text class="terminal-844581322-r1" x="1915.4" y="556.8" textLength="12.2" clip-path="url(#terminal-844581322-line-22)">
+</text><text class="terminal-844581322-r1" x="0" y="581.2" textLength="585.6" clip-path="url(#terminal-844581322-line-23)">&#160;&#160;&#160;&#160;numpy_dtype&#160;=&#160;dtypes.as_dtype(nparray.dtype)</text><text class="terminal-844581322-r1" x="1915.4" y="581.2" textLength="12.2" clip-path="url(#terminal-844581322-line-23)">
+</text><text class="terminal-844581322-r1" x="0" y="605.6" textLength="1769" clip-path="url(#terminal-844581322-line-24)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py&quot;,&#160;line&#160;677,&#160;in&#160;as_dtype</text><text class="terminal-844581322-r1" x="1915.4" y="605.6" textLength="12.2" clip-path="url(#terminal-844581322-line-24)">
+</text><text class="terminal-844581322-r1" x="0" y="630" textLength="866.2" clip-path="url(#terminal-844581322-line-25)">&#160;&#160;&#160;&#160;if&#160;type_value.type&#160;==&#160;np.string_&#160;or&#160;type_value.type&#160;==&#160;np.unicode_:</text><text class="terminal-844581322-r1" x="1915.4" y="630" textLength="12.2" clip-path="url(#terminal-844581322-line-25)">
+</text><text class="terminal-844581322-r1" x="0" y="654.4" textLength="1476.2" clip-path="url(#terminal-844581322-line-26)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/numpy/__init__.py&quot;,&#160;line&#160;397,&#160;in&#160;__getattr__</text><text class="terminal-844581322-r1" x="1915.4" y="654.4" textLength="12.2" clip-path="url(#terminal-844581322-line-26)">
+</text><text class="terminal-844581322-r1" x="0" y="678.8" textLength="305" clip-path="url(#terminal-844581322-line-27)">&#160;&#160;&#160;&#160;raise&#160;AttributeError(</text><text class="terminal-844581322-r1" x="1915.4" y="678.8" textLength="12.2" clip-path="url(#terminal-844581322-line-27)">
+</text><text class="terminal-844581322-r1" x="0" y="703.2" textLength="1427.4" clip-path="url(#terminal-844581322-line-28)">AttributeError:&#160;`np.string_`&#160;was&#160;removed&#160;in&#160;the&#160;NumPy&#160;2.0&#160;release.&#160;Use&#160;`np.bytes_`&#160;instead..&#160;Did&#160;you&#160;mean:&#160;&#x27;strings&#x27;?</text><text class="terminal-844581322-r1" x="1915.4" y="703.2" textLength="12.2" clip-path="url(#terminal-844581322-line-28)">
 </text>
     </g>
     </g>

From f8a1a8964f929b793cd58844072d76656b4ac0f1 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Mon, 8 Jul 2024 12:14:52 -0700
Subject: [PATCH 27/84] fix batching issues

---
 casanovo/denovo/model.py        | 71 ++++++++++++++++++---------------
 casanovo/denovo/model_runner.py |  1 +
 2 files changed, 40 insertions(+), 32 deletions(-)

diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 2256946c..3a069dcd 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1009,6 +1009,7 @@ class DbSpec2Pep(Spec2Pep):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.total_psms = 0
+        self.psm_batch_size = 1024
 
     def predict_step(self, batch, *args):
         """
@@ -1028,39 +1029,45 @@ def predict_step(self, batch, *args):
             scores, amino acid-level scores, and associated proteins.
         """
         predictions = []
-        pred, truth = self.decoder(batch[3], batch[1], *self.encoder(batch[0]))
-        pred = self.softmax(pred)
-        all_scores, per_aa_scores = _calc_match_score(
-            pred, truth, self.decoder.reverse
-        )
-        for (
-            precursor_charge,
-            precursor_mz,
-            spectrum_i,
-            peptide_score,
-            aa_scores,
-            peptide,
-            protein,
-        ) in zip(
-            batch[1][:, 1].cpu().detach().numpy(),
-            batch[1][:, 2].cpu().detach().numpy(),
-            batch[2],
-            all_scores.cpu().detach().numpy(),
-            per_aa_scores.cpu().detach().numpy(),
-            batch[3],
-            batch[4],
-        ):
-            predictions.append(
-                (
-                    spectrum_i,
-                    precursor_charge,
-                    precursor_mz,
-                    peptide,
-                    peptide_score,
-                    aa_scores,
-                    protein,
-                )
+        while len(batch[0]) > 0:
+            next_batch = [b[self.psm_batch_size :] for b in batch]
+            batch = [b[: self.psm_batch_size] for b in batch]
+            pred, truth = self.decoder(
+                batch[3], batch[1], *self.encoder(batch[0])
             )
+            pred = self.softmax(pred)
+            all_scores, per_aa_scores = _calc_match_score(
+                pred, truth, self.decoder.reverse
+            )
+            for (
+                precursor_charge,
+                precursor_mz,
+                spectrum_i,
+                peptide_score,
+                aa_scores,
+                peptide,
+                protein,
+            ) in zip(
+                batch[1][:, 1].cpu().detach().numpy(),
+                batch[1][:, 2].cpu().detach().numpy(),
+                batch[2],
+                all_scores.cpu().detach().numpy(),
+                per_aa_scores.cpu().detach().numpy(),
+                batch[3],
+                batch[4],
+            ):
+                predictions.append(
+                    (
+                        spectrum_i,
+                        precursor_charge,
+                        precursor_mz,
+                        peptide,
+                        peptide_score,
+                        aa_scores,
+                        protein,
+                    )
+                )
+            batch = next_batch
         self.total_psms += len(predictions)
         return predictions
 
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 3286f4b8..a6b59ed9 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -135,6 +135,7 @@ def db_search(
         self.initialize_trainer(train=True)
         self.initialize_model(train=False, db_search=True)
         self.model.out_writer = self.writer
+        self.model.psm_batch_size = self.config.predict_batch_size
         test_index = self._get_index(peak_path, False, "db search")
         self.writer.set_ms_run(test_index.ms_files)
 

From 7cb8e141ccab5b865a3af00711d290cd6cab788d Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Mon, 12 Aug 2024 14:50:18 -0700
Subject: [PATCH 28/84] small fixes regarding documentation, import syntax,
 etc.

---
 casanovo/casanovo.py            |  39 ++++++----
 casanovo/data/db_utils.py       |  71 +++++++++--------
 casanovo/denovo/dataloaders.py  |  10 +--
 casanovo/denovo/model.py        |  31 ++++----
 casanovo/denovo/model_runner.py |  24 ++----
 tests/conftest.py               |  11 +--
 tests/unit_tests/test_unit.py   | 132 +++++++++++---------------------
 7 files changed, 137 insertions(+), 181 deletions(-)

diff --git a/casanovo/casanovo.py b/casanovo/casanovo.py
index 8ae9a81b..4b9b4e38 100644
--- a/casanovo/casanovo.py
+++ b/casanovo/casanovo.py
@@ -130,7 +130,7 @@ def sequence(
 ) -> None:
     """De novo sequence peptides from tandem mass spectra.
 
-    PEAK_PATH must be one or more mzMl, mzXML, or MGF files from which
+    PEAK_PATH must be one or more mzML, mzXML, or MGF files from which
     to sequence peptides.
     """
     output = setup_logging(output, verbosity)
@@ -205,7 +205,7 @@ def sequence(
 )
 @click.option(
     "--digestion",
-    help="Digestion: full, partial",
+    help="Full: standard digestion. Semi: Include products of semi-specific cleavage",
     type=click.Choice(
         ["full", "partial"],
         case_sensitive=False,
@@ -214,37 +214,41 @@ def sequence(
 )
 @click.option(
     "--missed_cleavages",
-    help="Number of allowed missed cleavages",
+    help="Number of allowed missed cleavages when digesting protein",
     type=int,
     default=0,
 )
 @click.option(
     "--max_mods",
-    help="Maximum number of modifications per peptide",
+    help="Maximum number of amino acid modifications per peptide",
     type=int,
     default=0,
 )
 @click.option(
-    "--min_length",
-    help="Minimum peptide length",
+    "--min_peptide_length",
+    help="Minimum peptide length to consider",
     type=int,
     default=6,
 )
 @click.option(
-    "--max_length",
-    help="Maximum peptide length",
+    "--max_peptide_length",
+    help="Maximum peptide length to consider",
     type=int,
     default=50,
 )
 @click.option(
     "--precursor_tolerance",
-    help="Precursor tolerance window size (ppm)",
-    type=int,
+    help="Precursor tolerance window size (units: ppm)",
+    type=float,
     default=20,
 )
 @click.option(
     "--isotope_error",
-    help="Isotope error levels to consider (list of ints, e.g: 1,2)",
+    help="Isotope error levels to consider. \
+        Creates multiple mass windows to consider per spectrum \
+        to account for observed mass not matching monoisotopic mass \
+        due to the instrument assigning the 13C isotope \
+        peak as the precursor (list of ints, e.g: 1,2)",
     type=str,
     default="0",
 )
@@ -255,9 +259,9 @@ def db_search(
     digestion: str,
     missed_cleavages: int,
     max_mods: int,
-    min_length: int,
-    max_length: int,
-    precursor_tolerance: int,
+    min_peptide_length: int,
+    max_peptide_length: int,
+    precursor_tolerance: float,
     isotope_error: str,
     model: Optional[str],
     config: Optional[str],
@@ -266,7 +270,8 @@ def db_search(
 ) -> None:
     """Perform a database search on MS/MS data using Casanovo-DB.
 
-    PEAK_PATH must be one MGF file. FASTA_PATH must be one FASTA file.
+    PEAK_PATH must be one or more mzML, mzXML, or MGF files.
+    FASTA_PATH must be one FASTA file.
     """
     output = setup_logging(output, verbosity)
     config, model = setup_model(model, config, output, False)
@@ -284,8 +289,8 @@ def db_search(
             digestion,
             missed_cleavages,
             max_mods,
-            min_length,
-            max_length,
+            min_peptide_length,
+            max_peptide_length,
             precursor_tolerance,
             isotope_error,
             output,
diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 921c75bd..1af09a47 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -1,15 +1,16 @@
 """Unique methods used within db-search mode"""
 
-import os
-import depthcharge.masses
-from pyteomics import fasta, parser
 import bisect
 import logging
-
+import os
 from typing import List, Tuple
 
+import depthcharge.masses
+from pyteomics import fasta, parser
+
 logger = logging.getLogger("casanovo")
 
+
 # CONSTANTS
 HYDROGEN = 1.007825035
 OXYGEN = 15.99491463
@@ -51,8 +52,8 @@ def digest_fasta(
     digestion: str,
     missed_cleavages: int,
     max_mods: int,
-    min_length: int,
-    max_length: int,
+    min_peptide_length: int,
+    max_peptide_length: int,
 ):
     """
     Digests a FASTA file and returns the peptides, their masses, and associated protein.
@@ -70,9 +71,9 @@ def digest_fasta(
         The number of missed cleavages to allow.
     max_mods : int
         The maximum number of modifications to allow per peptide.
-    min_length : int
+    min_peptide_length : int
         The minimum length of peptides to consider.
-    max_length : int
+    max_peptide_length : int
         The maximum length of peptides to consider.
 
     Returns
@@ -81,35 +82,36 @@ def digest_fasta(
         A list of tuples containing the peptide sequence, mass,
         and associated protein. Sorted by neutral mass in ascending order.
     """
-
-    # Verify the eistence of the file:
+    # Verify the existence of the file:
     if not os.path.isfile(fasta_filename):
-        print(f"File {fasta_filename} does not exist.")
+        logger.error("File %s does not exist.", fasta_filename)
         raise FileNotFoundError(f"File {fasta_filename} does not exist.")
 
     fasta_data = fasta.read(fasta_filename)
     peptide_list = []
-    if digestion in ["full", "partial"]:
-        semi = True if digestion == "partial" else False
-        for header, seq in fasta_data:
-            pep_set = parser.cleave(
-                seq,
-                rule=parser.expasy_rules[enzyme],
-                missed_cleavages=missed_cleavages,
-                semi=semi,
-            )
-            protein = header.split()[0]
-            for pep in pep_set:
-                if len(pep) < min_length or len(pep) > max_length:
-                    continue
-                if "X" in pep or "U" in pep:
-                    logger.warn(
-                        "Skipping peptide with ambiguous amino acids: %s", pep
-                    )
-                    continue
-                peptide_list.append((pep, protein))
-    else:
+    if digestion not in ["full", "partial"]:
+        logger.error("Digestion type %s not recognized.", digestion)
         raise ValueError(f"Digestion type {digestion} not recognized.")
+    semi = digestion == "partial"
+    for header, seq in fasta_data:
+        pep_set = parser.cleave(
+            seq,
+            rule=parser.expasy_rules[enzyme],
+            missed_cleavages=missed_cleavages,
+            semi=semi,
+        )
+        protein = header.split()[0]
+        for pep in pep_set:
+            if len(pep) < min_peptide_length or len(pep) > max_peptide_length:
+                continue
+            if any(
+                aa in pep for aa in "BJOUXZ"
+            ):  # Check for incorrect AA letters
+                logger.warn(
+                    "Skipping peptide with ambiguous amino acids: %s", pep
+                )
+                continue
+            peptide_list.append((pep, protein))
 
     # Generate modified peptides
     mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
@@ -136,7 +138,7 @@ def get_candidates(
     precursor_mz: float,
     charge: int,
     peptide_list: List[Tuple[str, float, str]],
-    precursor_tolerance: int,
+    precursor_tolerance: float,
     isotope_error: str,
 ):
     """
@@ -156,7 +158,6 @@ def get_candidates(
     isotope_error : str
         The isotope error levels to consider.
     """
-
     candidates = set()
 
     isotope_error = [int(x) for x in isotope_error.split(",")]
@@ -219,7 +220,9 @@ def _to_raw_mass(mz_mass, charge):
 
 
 def get_mass_indices(masses, m_low, m_high):
-    """Grabs mass indices from a list of mass values that fall within a specified range.
+    """Grabs mass indices that fall within a specified range.
+
+    Pulls from masses, a list of mass values.
     Requires that the mass values are sorted in ascending order.
 
     Parameters
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index 80a4f7dc..14a0ff99 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -2,20 +2,20 @@
 
 import functools
 import os
-from typing import List, Optional, Tuple
-from functools import partial
 import logging
+from typing import List, Optional, Tuple
 
+from depthcharge.data import AnnotatedSpectrumIndex
 import lightning.pytorch as pl
 import numpy as np
 import torch
-from depthcharge.data import AnnotatedSpectrumIndex
 
+from ..data import db_utils
 from ..data.datasets import (
     AnnotatedSpectrumDataset,
     SpectrumDataset,
 )
-from ..data import db_utils
+
 
 logger = logging.getLogger("casanovo")
 
@@ -186,7 +186,7 @@ def db_dataloader(self) -> torch.utils.data.DataLoader:
         return torch.utils.data.DataLoader(
             self.test_dataset,
             batch_size=self.eval_batch_size,
-            collate_fn=partial(
+            collate_fn=functools.partial(
                 prepare_psm_batch,
                 digest=self.digest,
                 precursor_tolerance=self.precursor_tolerance,
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 3a069dcd..79848682 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -16,7 +16,7 @@
 
 from . import evaluate
 from .. import config
-from ..data import ms_io, db_utils
+from ..data import ms_io
 
 logger = logging.getLogger("casanovo")
 
@@ -991,7 +991,8 @@ def configure_optimizers(
 
 class DbSpec2Pep(Spec2Pep):
     """
-    Subclass of Spec2Pep for the use of Casanovo as an MS/MS database search score function.
+    Subclass of Spec2Pep for the use of Casanovo as an \
+    MS/MS database search score function.
 
     Uses teacher forcing to 'query' Casanovo for its score for each AA
     within a candidate peptide, and takes the geometric average of these scores
@@ -1008,7 +1009,6 @@ class DbSpec2Pep(Spec2Pep):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.total_psms = 0
         self.psm_batch_size = 1024
 
     def predict_step(self, batch, *args):
@@ -1029,11 +1029,14 @@ def predict_step(self, batch, *args):
             scores, amino acid-level scores, and associated proteins.
         """
         predictions = []
-        while len(batch[0]) > 0:
-            next_batch = [b[self.psm_batch_size :] for b in batch]
-            batch = [b[: self.psm_batch_size] for b in batch]
+        for start_idx in range(0, len(batch[0]), self.psm_batch_size):
+            current_batch = [
+                b[start_idx : start_idx + self.psm_batch_size] for b in batch
+            ]
             pred, truth = self.decoder(
-                batch[3], batch[1], *self.encoder(batch[0])
+                current_batch[3],
+                current_batch[1],
+                *self.encoder(current_batch[0]),
             )
             pred = self.softmax(pred)
             all_scores, per_aa_scores = _calc_match_score(
@@ -1048,13 +1051,13 @@ def predict_step(self, batch, *args):
                 peptide,
                 protein,
             ) in zip(
-                batch[1][:, 1].cpu().detach().numpy(),
-                batch[1][:, 2].cpu().detach().numpy(),
-                batch[2],
+                current_batch[1][:, 1].cpu().detach().numpy(),
+                current_batch[1][:, 2].cpu().detach().numpy(),
+                current_batch[2],
                 all_scores.cpu().detach().numpy(),
                 per_aa_scores.cpu().detach().numpy(),
-                batch[3],
-                batch[4],
+                current_batch[3],
+                current_batch[4],
             ):
                 predictions.append(
                     (
@@ -1067,8 +1070,6 @@ def predict_step(self, batch, *args):
                         protein,
                     )
                 )
-            batch = next_batch
-        self.total_psms += len(predictions)
         return predictions
 
     def on_predict_batch_end(
@@ -1088,8 +1089,6 @@ def on_predict_batch_end(
             aa_scores,
             protein,
         ) in outputs:
-            if len(peptide) == 0:
-                continue
             self.out_writer.psms.append(
                 (
                     peptide,
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index a6b59ed9..c2b71098 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -10,8 +10,6 @@
 from pathlib import Path
 from typing import Iterable, List, Optional, Union
 
-import time
-
 import lightning.pytorch as pl
 import numpy as np
 import torch
@@ -20,7 +18,7 @@
 from lightning.pytorch.callbacks import ModelCheckpoint
 
 from ..config import Config
-from ..data import ms_io, db_utils
+from ..data import db_utils, ms_io
 from ..denovo.dataloaders import DeNovoDataModule
 from ..denovo.model import Spec2Pep, DbSpec2Pep
 
@@ -89,8 +87,8 @@ def db_search(
         digestion: str,
         missed_cleavages: int,
         max_mods: int,
-        min_length: int,
-        max_length: int,
+        min_peptide_length: int,
+        max_peptide_length: int,
         precursor_tolerance: float,
         isotope_error: str,
         output: str,
@@ -100,7 +98,7 @@ def db_search(
         Parameters
         ----------
         peak_path : Iterable[str]
-            The path to the .mgf data file for database search.
+            The paths to the .mgf data files for database search.
         fasta_path : str
             The path to the FASTA file for database search.
         enzyme : str
@@ -111,9 +109,9 @@ def db_search(
             The number of missed cleavages allowed.
         max_mods : int
             The maximum number of modifications allowed per peptide.
-        min_length : int
+        min_peptide_length : int
             The minimum peptide length.
-        max_length : int
+        max_peptide_length : int
             The maximum peptide length.
         precursor_tolerance : float
             The precursor mass tolerance in ppm.
@@ -147,19 +145,13 @@ def db_search(
             digestion,
             missed_cleavages,
             max_mods,
-            min_length,
-            max_length,
+            min_peptide_length,
+            max_peptide_length,
         )
         self.loaders.precursor_tolerance = precursor_tolerance
         self.loaders.isotope_error = isotope_error
 
-        t1 = time.time()
         self.trainer.predict(self.model, self.loaders.db_dataloader())
-        t2 = time.time()
-        logger.info("Database search took %.3f seconds", t2 - t1)
-        logger.info("Scored %s PSMs", self.model.total_psms)
-        logger.info("%.3f PSMs per second", self.model.total_psms / (t2 - t1))
-        logger.info("%s seconds per PSM", (t2 - t1) / self.model.total_psms)
 
     def train(
         self,
diff --git a/tests/conftest.py b/tests/conftest.py
index b2244308..60afcd83 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -17,19 +17,16 @@ def mgf_small(tmp_path):
 
 
 @pytest.fixture
-def tiny_fasta_file(tmp_path, fasta_raw_data):
+def tiny_fasta_file(tmp_path):
     fasta_file = tmp_path / "tiny_fasta.fasta"
     with fasta_file.open("w+") as fasta_ref:
-        fasta_ref.write(fasta_raw_data)
+        fasta_ref.write(
+            ">foo\nMEAPAQLLFLLLLWLPDTTREIVMTQSPPTLSLSPGERVTLSCRASQSVSSSYLTWYQQKPGQAPRLLIYGASTRATSIPARFSGSGSGTDFTLTISSLQPEDFAVYYCQQDYNLP"
+        )
 
     return fasta_file
 
 
-@pytest.fixture
-def fasta_raw_data():
-    return ">foo\nMEAPAQLLFLLLLWLPDTTREIVMTQSPPTLSLSPGERVTLSCRASQSVSSSYLTWYQQKPGQAPRLLIYGASTRATSIPARFSGSGSGTDFTLTISSLQPEDFAVYYCQQDYNLP"
-
-
 @pytest.fixture
 def mgf_db_search(tmp_path):
     """An MGF file with 7 spectra and scan numbers, C+57.021 mass modification considered"""
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index e3707917..419cf3ef 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -2,6 +2,7 @@
 import heapq
 import os
 import platform
+import re
 import shutil
 import tempfile
 
@@ -10,11 +11,10 @@
 import numpy as np
 import pytest
 import torch
-import re
 
 from casanovo import casanovo
 from casanovo import utils
-from casanovo.data import ms_io, db_utils
+from casanovo.data import db_utils, ms_io
 from casanovo.data.datasets import SpectrumDataset, AnnotatedSpectrumDataset
 from casanovo.denovo.evaluate import aa_match_batch, aa_match_metrics
 from casanovo.denovo.model import Spec2Pep, _aa_pep_score, _calc_match_score
@@ -220,10 +220,7 @@ def test_calc_match_score():
     assert np.sum(masked_per_aa_scores.numpy()[3]) == 3
 
 
-def test_digest_fasta_cleave(fasta_raw_data):
-
-    with open("temp_fasta", "w") as file:
-        file.write(fasta_raw_data)
+def test_digest_fasta_cleave(tiny_fasta_file):
 
     # No missed cleavages
     expected_normal = [
@@ -275,49 +272,24 @@ def test_digest_fasta_cleave(fasta_raw_data):
         "EIVMTQSPPTLSLSPGERVTLSC+57.021RASQSVSSSYLTWYQQKPGQAPR",
         "LLIYGASTRATSIPARFSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
     ]
+    for missed_cleavages, expected in zip(
+        (0, 1, 3),
+        (expected_normal, expected_1missedcleavage, expected_3missedcleavage),
+    ):
+        peptide_list = db_utils.digest_fasta(
+            fasta_filename=str(tiny_fasta_file),
+            enzyme="trypsin",
+            digestion="full",
+            missed_cleavages=missed_cleavages,
+            max_mods=0,
+            min_peptide_length=6,
+            max_peptide_length=50,
+        )
+        peptide_list = [x[0] for x in peptide_list]
+        assert peptide_list == expected
 
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
-        enzyme="trypsin",
-        digestion="full",
-        missed_cleavages=0,
-        max_mods=0,
-        min_length=6,
-        max_length=50,
-    )
-    peptide_list = [x[0] for x in peptide_list]
-    assert peptide_list == expected_normal
-
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
-        enzyme="trypsin",
-        digestion="full",
-        missed_cleavages=1,
-        max_mods=0,
-        min_length=6,
-        max_length=50,
-    )
-    peptide_list = [x[0] for x in peptide_list]
-    assert peptide_list == expected_1missedcleavage
-
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
-        enzyme="trypsin",
-        digestion="full",
-        missed_cleavages=3,
-        max_mods=0,
-        min_length=6,
-        max_length=50,
-    )
-    peptide_list = [x[0] for x in peptide_list]
-    assert peptide_list == expected_3missedcleavage
-
-
-def test_digest_fasta_mods(fasta_raw_data):
-
-    with open("temp_fasta", "w") as file:
-        file.write(fasta_raw_data)
 
+def test_digest_fasta_mods(tiny_fasta_file):
     # 1 modification allowed
     # fixed: C+57.02146
     # variable: 1M+15.994915,1N+0.984016,1Q+0.984016
@@ -373,13 +345,13 @@ def test_digest_fasta_mods(fasta_raw_data):
     ]
 
     peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
+        fasta_filename=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=0,
         max_mods=1,
-        min_length=6,
-        max_length=50,
+        min_peptide_length=6,
+        max_peptide_length=50,
     )
     peptide_list = [x[0] for x in peptide_list]
     peptide_list = [
@@ -392,11 +364,7 @@ def test_digest_fasta_mods(fasta_raw_data):
     assert peptide_list == expected_1mod
 
 
-def test_length_restrictions(fasta_raw_data):
-
-    with open("temp_fasta", "w") as file:
-        file.write(fasta_raw_data)
-
+def test_length_restrictions(tiny_fasta_file):
     # length between 20 and 50
     expected_long = [
         "MEAPAQLLFLLLLWLPDTTR",
@@ -408,35 +376,31 @@ def test_length_restrictions(fasta_raw_data):
     expected_short = ["ATSIPAR", "VTLSC+57.021R"]
 
     peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
+        fasta_filename=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=0,
         max_mods=0,
-        min_length=20,
-        max_length=50,
+        min_peptide_length=20,
+        max_peptide_length=50,
     )
     peptide_list = [x[0] for x in peptide_list]
     assert peptide_list == expected_long
 
     peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
+        fasta_filename=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=0,
         max_mods=0,
-        min_length=6,
-        max_length=8,
+        min_peptide_length=6,
+        max_peptide_length=8,
     )
     peptide_list = [x[0] for x in peptide_list]
     assert peptide_list == expected_short
 
 
-def test_digest_fasta_enzyme(fasta_raw_data):
-
-    with open("temp_fasta", "w") as file:
-        file.write(fasta_raw_data)
-
+def test_digest_fasta_enzyme(tiny_fasta_file):
     # arg-c enzyme
     expected_argc = [
         "ATSIPAR",
@@ -452,35 +416,31 @@ def test_digest_fasta_enzyme(fasta_raw_data):
     expected_aspn = ["DFAVYYC+57.021QQ", "DFTLTISSLQPE", "MEAPAQLLFLLLLWLP"]
 
     peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
+        fasta_filename=str(tiny_fasta_file),
         enzyme="arg-c",
         digestion="full",
         missed_cleavages=0,
         max_mods=0,
-        min_length=6,
-        max_length=50,
+        min_peptide_length=6,
+        max_peptide_length=50,
     )
     peptide_list = [x[0] for x in peptide_list]
     assert peptide_list == expected_argc
 
     peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
+        fasta_filename=str(tiny_fasta_file),
         enzyme="asp-n",
         digestion="full",
         missed_cleavages=0,
         max_mods=0,
-        min_length=6,
-        max_length=50,
+        min_peptide_length=6,
+        max_peptide_length=50,
     )
     peptide_list = [x[0] for x in peptide_list]
     assert peptide_list == expected_aspn
 
 
-def test_get_candidates(fasta_raw_data):
-
-    with open("temp_fasta", "w") as file:
-        file.write(fasta_raw_data)
-
+def test_get_candidates(tiny_fasta_file):
     # precursor_window is 10000
     expected_smallwindow = ["LLIYGASTR"]
 
@@ -491,13 +451,13 @@ def test_get_candidates(fasta_raw_data):
     expected_widewindow = ["ATSIPAR", "VTLSC+57.021R", "LLIYGASTR"]
 
     peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
+        fasta_filename=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=1,
         max_mods=0,
-        min_length=6,
-        max_length=50,
+        min_peptide_length=6,
+        max_peptide_length=50,
     )
 
     candidates = db_utils.get_candidates(
@@ -511,13 +471,13 @@ def test_get_candidates(fasta_raw_data):
     assert expected_smallwindow == candidates
 
     peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
+        fasta_filename=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=1,
         max_mods=0,
-        min_length=6,
-        max_length=50,
+        min_peptide_length=6,
+        max_peptide_length=50,
     )
 
     candidates = db_utils.get_candidates(
@@ -531,13 +491,13 @@ def test_get_candidates(fasta_raw_data):
     assert expected_midwindow == candidates
 
     peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
+        fasta_filename=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=1,
         max_mods=0,
-        min_length=6,
-        max_length=50,
+        min_peptide_length=6,
+        max_peptide_length=50,
     )
 
     candidates = db_utils.get_candidates(

From b2f08ac307f50c4dabc458745cd79b3ec2058f35 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Mon, 19 Aug 2024 19:09:26 -0700
Subject: [PATCH 29/84] add proteindatabase

---
 casanovo/casanovo.py            | 110 --------
 casanovo/config.yaml            |  36 ++-
 casanovo/data/datasets.py       |   2 +-
 casanovo/data/db_utils.py       | 442 +++++++++++++++++---------------
 casanovo/denovo/dataloaders.py  |  28 +-
 casanovo/denovo/model_runner.py |  45 +---
 tests/conftest.py               |   5 +
 tests/test_integration.py       |   2 -
 tests/unit_tests/test_unit.py   | 200 +++++++++------
 9 files changed, 404 insertions(+), 466 deletions(-)

diff --git a/casanovo/casanovo.py b/casanovo/casanovo.py
index 4b9b4e38..b153512d 100644
--- a/casanovo/casanovo.py
+++ b/casanovo/casanovo.py
@@ -158,111 +158,9 @@ def sequence(
     nargs=1,
     type=click.Path(exists=True, dir_okay=False),
 )
-@click.option(
-    "--enzyme",
-    help="Enzyme for in silico digestion, \
-    See pyteomics.parser.expasy_rules for valid enzymes",
-    type=click.Choice(
-        [
-            "arg-c",
-            "asp-n",
-            "bnps-skatole",
-            "caspase 1",
-            "caspase 2",
-            "caspase 3",
-            "caspase 4",
-            "caspase 5",
-            "caspase 6",
-            "caspase 7",
-            "caspase 8",
-            "caspase 9",
-            "caspase 10",
-            "chymotrypsin high specificity",
-            "chymotrypsin low specificity",
-            "clostripain",
-            "cnbr",
-            "enterokinase",
-            "factor xa",
-            "formic acid",
-            "glutamyl endopeptidase",
-            "granzyme b",
-            "hydroxylamine",
-            "iodosobenzoic acid",
-            "lysc",
-            "ntcb",
-            "pepsin ph1.3",
-            "pepsin ph2.0",
-            "proline endopeptidase",
-            "proteinase k",
-            "staphylococcal peptidase i",
-            "thermolysin",
-            "thrombin",
-            "trypsin",
-            "trypsin_exception",
-        ]
-    ),
-    default="trypsin",
-)
-@click.option(
-    "--digestion",
-    help="Full: standard digestion. Semi: Include products of semi-specific cleavage",
-    type=click.Choice(
-        ["full", "partial"],
-        case_sensitive=False,
-    ),
-    default="full",
-)
-@click.option(
-    "--missed_cleavages",
-    help="Number of allowed missed cleavages when digesting protein",
-    type=int,
-    default=0,
-)
-@click.option(
-    "--max_mods",
-    help="Maximum number of amino acid modifications per peptide",
-    type=int,
-    default=0,
-)
-@click.option(
-    "--min_peptide_length",
-    help="Minimum peptide length to consider",
-    type=int,
-    default=6,
-)
-@click.option(
-    "--max_peptide_length",
-    help="Maximum peptide length to consider",
-    type=int,
-    default=50,
-)
-@click.option(
-    "--precursor_tolerance",
-    help="Precursor tolerance window size (units: ppm)",
-    type=float,
-    default=20,
-)
-@click.option(
-    "--isotope_error",
-    help="Isotope error levels to consider. \
-        Creates multiple mass windows to consider per spectrum \
-        to account for observed mass not matching monoisotopic mass \
-        due to the instrument assigning the 13C isotope \
-        peak as the precursor (list of ints, e.g: 1,2)",
-    type=str,
-    default="0",
-)
 def db_search(
     peak_path: Tuple[str],
     fasta_path: str,
-    enzyme: str,
-    digestion: str,
-    missed_cleavages: int,
-    max_mods: int,
-    min_peptide_length: int,
-    max_peptide_length: int,
-    precursor_tolerance: float,
-    isotope_error: str,
     model: Optional[str],
     config: Optional[str],
     output: Optional[str],
@@ -285,14 +183,6 @@ def db_search(
         runner.db_search(
             peak_path,
             fasta_path,
-            enzyme,
-            digestion,
-            missed_cleavages,
-            max_mods,
-            min_peptide_length,
-            max_peptide_length,
-            precursor_tolerance,
-            isotope_error,
             output,
         )
 
diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index c7186ff7..860cfabb 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -5,18 +5,26 @@
 
 ###
 # The following parameters can be modified when running inference or when
-# fine-tuning an existing Casanovo model.
+# fine-tuning an existing Casanovo model. They also affect database search
+# parameters when running Casanovo in DB-search mode.
 ###
 
 # Max absolute difference allowed with respect to observed precursor m/z.
-# Predictions outside the tolerance range are assigned a negative peptide score.
+# denovo: Predictions outside the tolerance range are assigned a negative peptide score.
+# db-search: Used to create mas windows for candidate generation.
 precursor_mass_tol: 50  # ppm
 # Isotopes to consider when comparing predicted and observed precursor m/z's.
 isotope_error_range: [0, 1]
-# The minimum length of predicted peptides.
+# The minimum length of predicted/scored peptides.
 min_peptide_len: 6
-# Number of spectra in one inference batch.
+# Number of spectra or psms in one inference batch.
 predict_batch_size: 1024
+
+
+###
+# The following parameters are unique to Casanovo's inference/finetuning mode.
+###
+
 # Number of beams used in beam search.
 n_beams: 1
 # Number of PSMs for each spectrum.
@@ -29,6 +37,26 @@ accelerator: "auto"
 # number will be automatically selected for based on the chosen accelerator.
 devices:
 
+
+###
+# The following parameters are unique to Casanovo's database search mode.
+###
+
+# Enzyme for in silico digestion, used to generate candidate peptides.
+# See pyteomics.parser.expasy_rules for valid enzymes
+enzyme: "trypsin"
+# Digestion type for candidate peptide generation.
+# Full: standard digestion. Semi: Include products of semi-specific cleavage
+digestion: "full"
+# Number of allowed missed cleavages when digesting protein
+missed_cleavages: 0
+# Maximum number of amino acid modifications per peptide.
+# None generates all possible isoforms as candidates.
+max_mods: 
+# Maximum peptide length to consider
+max_peptide_len: 50
+
+
 ###
 # The following parameters should only be modified if you are training a new
 # Casanovo model from scratch.
diff --git a/casanovo/data/datasets.py b/casanovo/data/datasets.py
index 6244e88f..3f05811f 100644
--- a/casanovo/data/datasets.py
+++ b/casanovo/data/datasets.py
@@ -1,6 +1,6 @@
 """A PyTorch Dataset class for annotated spectra."""
 
-from typing import Optional, Tuple
+from typing import List, Optional, Tuple
 
 import depthcharge
 import numpy as np
diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 1af09a47..a7b5e850 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -6,15 +6,12 @@
 from typing import List, Tuple
 
 import depthcharge.masses
+from numba import jit
 from pyteomics import fasta, parser
 
 logger = logging.getLogger("casanovo")
 
-
 # CONSTANTS
-HYDROGEN = 1.007825035
-OXYGEN = 15.99491463
-H2O = 2 * HYDROGEN + OXYGEN
 PROTON = 1.00727646677
 ISOTOPE_SPACING = 1.003355
 
@@ -29,216 +26,243 @@
 fixed_mods = {"carbm": ["C"]}
 
 
-def convert_from_modx(seq: str):
-    """Converts peptide sequence from modX format to Casanovo-acceptable modifications.
-
-    Args:
-        seq (str): Peptide in modX format
-    """
-    seq = seq.replace("carbmC", "C+57.021")  # Fixed modification
-    seq = seq.replace("oxM", "M+15.995")
-    seq = seq.replace("dN", "N+0.984")
-    seq = seq.replace("dQ", "Q+0.984")
-    seq = seq.replace("ace-", "+42.011")
-    seq = seq.replace("carbnh3x-", "+43.006-17.027")
-    seq = seq.replace("carb-", "+43.006")
-    seq = seq.replace("nh3x-", "-17.027")
-    return seq
-
-
-def digest_fasta(
-    fasta_filename: str,
-    enzyme: str,
-    digestion: str,
-    missed_cleavages: int,
-    max_mods: int,
-    min_peptide_length: int,
-    max_peptide_length: int,
-):
-    """
-    Digests a FASTA file and returns the peptides, their masses, and associated protein.
-
-    Parameters
-    ----------
-    fasta_filename : str
-        Path to the FASTA file.
-    enzyme : str
-        The enzyme to use for digestion.
-        See pyteomics.parser.expasy_rules for valid enzymes.
-    digestion : str
-        The type of digestion to perform. Either 'full' or 'partial'.
-    missed_cleavages : int
-        The number of missed cleavages to allow.
-    max_mods : int
-        The maximum number of modifications to allow per peptide.
-    min_peptide_length : int
-        The minimum length of peptides to consider.
-    max_peptide_length : int
-        The maximum length of peptides to consider.
-
-    Returns
-    -------
-    mod_peptide_list : List[Tuple[str, float, str]]
-        A list of tuples containing the peptide sequence, mass,
-        and associated protein. Sorted by neutral mass in ascending order.
-    """
-    # Verify the existence of the file:
-    if not os.path.isfile(fasta_filename):
-        logger.error("File %s does not exist.", fasta_filename)
-        raise FileNotFoundError(f"File {fasta_filename} does not exist.")
-
-    fasta_data = fasta.read(fasta_filename)
-    peptide_list = []
-    if digestion not in ["full", "partial"]:
-        logger.error("Digestion type %s not recognized.", digestion)
-        raise ValueError(f"Digestion type {digestion} not recognized.")
-    semi = digestion == "partial"
-    for header, seq in fasta_data:
-        pep_set = parser.cleave(
-            seq,
-            rule=parser.expasy_rules[enzyme],
-            missed_cleavages=missed_cleavages,
-            semi=semi,
-        )
-        protein = header.split()[0]
-        for pep in pep_set:
-            if len(pep) < min_peptide_length or len(pep) > max_peptide_length:
-                continue
-            if any(
-                aa in pep for aa in "BJOUXZ"
-            ):  # Check for incorrect AA letters
-                logger.warn(
-                    "Skipping peptide with ambiguous amino acids: %s", pep
-                )
-                continue
-            peptide_list.append((pep, protein))
-
-    # Generate modified peptides
-    mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
-    mod_peptide_list = []
-    for pep, prot in peptide_list:
-        peptide_isoforms = parser.isoforms(
-            pep,
-            variable_mods=var_mods,
-            fixed_mods=fixed_mods,
-            max_mods=max_mods,
-        )
-        peptide_isoforms = list(map(convert_from_modx, peptide_isoforms))
-        mod_peptide_list.extend(
-            (mod_pep, mass_calculator.mass(mod_pep), prot)
-            for mod_pep in peptide_isoforms
-        )
-
-    # Sort the peptides by mass and return.
-    mod_peptide_list.sort(key=lambda x: x[1])
-    return mod_peptide_list
-
-
-def get_candidates(
-    precursor_mz: float,
-    charge: int,
-    peptide_list: List[Tuple[str, float, str]],
-    precursor_tolerance: float,
-    isotope_error: str,
-):
+class ProteinDatabase:
     """
-    Returns a list of candidate peptides that fall within the specified mass range.
+    TODO
 
     Parameters
     ----------
-    precursor_mz : float
-        The precursor mass-to-charge ratio.
-    charge : int
-        The precursor charge.
-    peptide_list : List[Tuple[str, float, str]]
-        A list of tuples containing the peptide sequence, mass, and associated protein.
-        Must be sorted by mass in ascending order. Uses neutral masses.
-    precursor_tolerance : float
-        The precursor mass tolerance in parts-per-million.
-    isotope_error : str
-        The isotope error levels to consider.
+    TODO
     """
-    candidates = set()
 
-    isotope_error = [int(x) for x in isotope_error.split(",")]
-    for e in isotope_error:
-        iso_shift = ISOTOPE_SPACING * e
-        upper_bound = (_to_raw_mass(precursor_mz, charge) - iso_shift) * (
-            1 + (precursor_tolerance / 1e6)
-        )
-        lower_bound = (_to_raw_mass(precursor_mz, charge) - iso_shift) * (
-            1 - (precursor_tolerance / 1e6)
+    def __init__(
+        self,
+        fasta_path: str,
+        enzyme: str,
+        digestion: str,
+        missed_cleavages: int,
+        min_peptide_len: int,
+        max_peptide_len: int,
+        max_mods: int,
+        precursor_tolerance: float,
+        isotope_error: List[int],
+    ):
+        self.digest = self._digest_fasta(
+            fasta_path,
+            enzyme,
+            digestion,
+            missed_cleavages,
+            max_mods,
+            min_peptide_len,
+            max_peptide_len,
         )
-
-        start, end = get_mass_indices(
-            [x[1] for x in peptide_list], lower_bound, upper_bound
+        self.precursor_tolerance = precursor_tolerance
+        self.isotope_error = isotope_error
+
+    def get_candidates(
+        self,
+        precursor_mz: float,
+        charge: int,
+    ):
+        """
+        Returns a list of candidate peptides that fall within the specified mass range.
+
+        Parameters
+        ----------
+        precursor_mz : float
+            The precursor mass-to-charge ratio.
+        charge : int
+            The precursor charge.
+        """
+        candidates = set()
+
+        for e in self.isotope_error:
+            iso_shift = ISOTOPE_SPACING * e
+            upper_bound = (
+                ProteinDatabase._to_raw_mass(precursor_mz, charge) - iso_shift
+            ) * (1 + (self.precursor_tolerance / 1e6))
+            lower_bound = (
+                ProteinDatabase._to_raw_mass(precursor_mz, charge) - iso_shift
+            ) * (1 - (self.precursor_tolerance / 1e6))
+
+            start, end = ProteinDatabase._get_mass_indices(
+                [x[1] for x in self.digest], lower_bound, upper_bound
+            )
+
+            candidates.update(self.digest[start:end])
+
+        candidates = list(candidates)
+        candidates.sort(key=lambda x: x[1])
+        return candidates
+
+    def _digest_fasta(
+        self,
+        fasta_filename: str,
+        enzyme: str,
+        digestion: str,
+        missed_cleavages: int,
+        max_mods: int,
+        min_peptide_length: int,
+        max_peptide_length: int,
+    ):
+        """
+        Digests a FASTA file and returns the peptides, their masses, and associated protein.
+
+        Parameters
+        ----------
+        fasta_filename : str
+            Path to the FASTA file.
+        enzyme : str
+            The enzyme to use for digestion.
+            See pyteomics.parser.expasy_rules for valid enzymes.
+        digestion : str
+            The type of digestion to perform. Either 'full' or 'partial'.
+        missed_cleavages : int
+            The number of missed cleavages to allow.
+        max_mods : int
+            The maximum number of modifications to allow per peptide.
+        min_peptide_length : int
+            The minimum length of peptides to consider.
+        max_peptide_length : int
+            The maximum length of peptides to consider.
+
+        Returns
+        -------
+        mod_peptide_list : List[Tuple[str, float, str]]
+            A list of tuples containing the peptide sequence, mass,
+            and associated protein. Sorted by neutral mass in ascending order.
+        """
+        # Verify the existence of the file:
+        if not os.path.isfile(fasta_filename):
+            logger.error("File %s does not exist.", fasta_filename)
+            raise FileNotFoundError(f"File {fasta_filename} does not exist.")
+
+        fasta_data = fasta.read(fasta_filename)
+        peptide_list = []
+        if digestion not in ["full", "partial"]:
+            logger.error("Digestion type %s not recognized.", digestion)
+            raise ValueError(f"Digestion type {digestion} not recognized.")
+        semi = digestion == "partial"
+        for header, seq in fasta_data:
+            pep_set = parser.cleave(
+                seq,
+                rule=parser.expasy_rules[enzyme],
+                missed_cleavages=missed_cleavages,
+                semi=semi,
+            )
+            protein = header.split()[0]
+            for pep in pep_set:
+                if (
+                    len(pep) < min_peptide_length
+                    or len(pep) > max_peptide_length
+                ):
+                    continue
+                if any(
+                    aa in pep for aa in "BJOUXZ"
+                ):  # Check for incorrect AA letters
+                    logger.warn(
+                        "Skipping peptide with ambiguous amino acids: %s", pep
+                    )
+                    continue
+                peptide_list.append((pep, protein))
+
+        # Generate modified peptides
+        mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
+        mod_peptide_list = []
+        for pep, prot in peptide_list:
+            peptide_isoforms = parser.isoforms(
+                pep,
+                variable_mods=var_mods,
+                fixed_mods=fixed_mods,
+                max_mods=max_mods,
+            )
+            peptide_isoforms = list(
+                map(ProteinDatabase._convert_from_modx, peptide_isoforms)
+            )
+            mod_peptide_list.extend(
+                (mod_pep, mass_calculator.mass(mod_pep), prot)
+                for mod_pep in peptide_isoforms
+            )
+
+        # Sort the peptides by mass and return.
+        mod_peptide_list.sort(key=lambda x: x[1])
+        logger.info(
+            "Digestion complete. %d peptides generated.", len(mod_peptide_list)
         )
-
-        candidates.update(peptide_list[start:end])
-
-    candidates = list(candidates)
-    candidates.sort(key=lambda x: x[1])
-    return candidates
-
-
-def _to_mz(precursor_mass, charge):
-    """
-    Convert precursor neutral mass to m/z value.
-
-    Parameters
-    ----------
-    precursor_mass : float
-        The precursor neutral mass.
-    charge : int
-        The precursor charge.
-
-    Returns
-    -------
-    mz : float
-        The calculated precursor mass-to-charge ratio.
-    """
-    return (precursor_mass + (charge * PROTON)) / charge
-
-
-def _to_raw_mass(mz_mass, charge):
-    """
-    Convert precursor m/z value to neutral mass.
-
-    Parameters
-    ----------
-    mz_mass : float
-        The precursor mass-to-charge ratio.
-    charge : int
-        The precursor charge.
-
-    Returns
-    -------
-    mass : float
-        The calculated precursor neutral mass.
-    """
-    return charge * (mz_mass - PROTON)
-
-
-def get_mass_indices(masses, m_low, m_high):
-    """Grabs mass indices that fall within a specified range.
-
-    Pulls from masses, a list of mass values.
-    Requires that the mass values are sorted in ascending order.
-
-    Parameters
-    ----------
-    masses : List[int]
-        List of mass values
-    m_low : int
-        Lower bound of mass range (inclusive)
-    m_high : int
-        Upper bound of mass range (inclusive)
-
-    Return
-    ------
-    indices : Tuple[int, int]
-        Indices of mass values that fall within the specified range
-    """
-    start = bisect.bisect_left(masses, m_low)
-    end = bisect.bisect_right(masses, m_high)
-    return start, end
+        return mod_peptide_list
+
+    def _to_mz(precursor_mass, charge):
+        """
+        Convert precursor neutral mass to m/z value.
+
+        Parameters
+        ----------
+        precursor_mass : float
+            The precursor neutral mass.
+        charge : int
+            The precursor charge.
+
+        Returns
+        -------
+        mz : float
+            The calculated precursor mass-to-charge ratio.
+        """
+        return (precursor_mass + (charge * PROTON)) / charge
+
+    def _to_raw_mass(mz_mass, charge):
+        """
+        Convert precursor m/z value to neutral mass.
+
+        Parameters
+        ----------
+        mz_mass : float
+            The precursor mass-to-charge ratio.
+        charge : int
+            The precursor charge.
+
+        Returns
+        -------
+        mass : float
+            The calculated precursor neutral mass.
+        """
+        return charge * (mz_mass - PROTON)
+
+    def _get_mass_indices(masses, m_low, m_high):
+        """Grabs mass indices that fall within a specified range.
+
+        Pulls from masses, a list of mass values.
+        Requires that the mass values are sorted in ascending order.
+
+        Parameters
+        ----------
+        masses : List[int]
+            List of mass values
+        m_low : int
+            Lower bound of mass range (inclusive)
+        m_high : int
+            Upper bound of mass range (inclusive)
+
+        Return
+        ------
+        indices : Tuple[int, int]
+            Indices of mass values that fall within the specified range
+        """
+        start = bisect.bisect_left(masses, m_low)
+        end = bisect.bisect_right(masses, m_high)
+        return start, end
+
+    def _convert_from_modx(seq: str):
+        """Converts peptide sequence from modX format to Casanovo-acceptable modifications.
+
+        Args:
+            seq (str): Peptide in modX format
+        """
+        seq = seq.replace("carbmC", "C+57.021")  # Fixed modification
+        seq = seq.replace("oxM", "M+15.995")
+        seq = seq.replace("dN", "N+0.984")
+        seq = seq.replace("dQ", "Q+0.984")
+        seq = seq.replace("ace-", "+42.011")
+        seq = seq.replace("carbnh3x-", "+43.006-17.027")
+        seq = seq.replace("carb-", "+43.006")
+        seq = seq.replace("nh3x-", "-17.027")
+        return seq
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index 14a0ff99..4d5524f4 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -89,6 +89,7 @@ def __init__(
         self.train_dataset = None
         self.valid_dataset = None
         self.test_dataset = None
+        self.pdb = None
 
     def setup(self, stage: str = None, annotated: bool = True) -> None:
         """
@@ -96,7 +97,7 @@ def setup(self, stage: str = None, annotated: bool = True) -> None:
 
         Parameters
         ----------
-        stage : str {"fit", "validate", "test", "db"}
+        stage : str {"fit", "validate", "test"}
             The stage indicating which Datasets to prepare. All are prepared by
             default.
         annotated: bool
@@ -186,12 +187,7 @@ def db_dataloader(self) -> torch.utils.data.DataLoader:
         return torch.utils.data.DataLoader(
             self.test_dataset,
             batch_size=self.eval_batch_size,
-            collate_fn=functools.partial(
-                prepare_psm_batch,
-                digest=self.digest,
-                precursor_tolerance=self.precursor_tolerance,
-                isotope_error=self.isotope_error,
-            ),
+            collate_fn=functools.partial(prepare_psm_batch, pdb=self.pdb),
             pin_memory=True,
             num_workers=self.n_workers,
             shuffle=False,
@@ -239,9 +235,7 @@ def prepare_batch(
 
 def prepare_psm_batch(
     batch: List[Tuple[torch.Tensor, float, int, str]],
-    digest: List[Tuple[str, float, str]],
-    precursor_tolerance: float,
-    isotope_error: str,
+    pdb: db_utils.ProteinDatabase,
 ):
     """
     Collate MS/MS spectra into a batch for DB search.
@@ -255,13 +249,8 @@ def prepare_psm_batch(
         A batch of data from an AnnotatedSpectrumDataset, consisting of for each
         spectrum (i) a tensor with the m/z and intensity peak values, (ii), the
         precursor m/z, (iii) the precursor charge, (iv) the spectrum identifier.
-    digest : List[Tuple[str, float, str]]
-        A list of tuples containing the peptide sequence, mass, and associated protein
-        from digesting a .fasta file. Sorted by mass in ascending order. Uses neutral masses.
-    precursor_tolerance : float
-        The precursor mass tolerance in parts-per-million.
-    isotope_error : str
-        The isotope error levels to consider.
+    pdb : db_utils.ProteinDatabase
+        The protein database to use for candidate peptide retrieval.
 
     Returns
     -------
@@ -294,12 +283,9 @@ def prepare_psm_batch(
     all_peptides = []
     all_proteins = []
     for idx in range(len(batch)):
-        digest_data = db_utils.get_candidates(
+        digest_data = pdb.get_candidates(
             precursor_mzs[idx],
             precursor_charges[idx],
-            digest,
-            precursor_tolerance,
-            isotope_error,
         )
         try:
             spec_peptides, _, pep_protein = list(zip(*digest_data))
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index c2b71098..b90f06b0 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -83,14 +83,6 @@ def db_search(
         self,
         peak_path: Iterable[str],
         fasta_path: str,
-        enzyme: str,
-        digestion: str,
-        missed_cleavages: int,
-        max_mods: int,
-        min_peptide_length: int,
-        max_peptide_length: int,
-        precursor_tolerance: float,
-        isotope_error: str,
         output: str,
     ) -> None:
         """Perform database search with Casanovo.
@@ -101,22 +93,6 @@ def db_search(
             The paths to the .mgf data files for database search.
         fasta_path : str
             The path to the FASTA file for database search.
-        enzyme : str
-            The enzyme used for digestion.
-        digestion : str
-            The digestion type, full or partial.
-        missed_cleavages : int
-            The number of missed cleavages allowed.
-        max_mods : int
-            The maximum number of modifications allowed per peptide.
-        min_peptide_length : int
-            The minimum peptide length.
-        max_peptide_length : int
-            The maximum peptide length.
-        precursor_tolerance : float
-            The precursor mass tolerance in ppm.
-        isotope_error : str
-            Isotope error levels to consider, in comma-delineated string form.
         output : str
             Where should the output be saved?
 
@@ -138,19 +114,18 @@ def db_search(
         self.writer.set_ms_run(test_index.ms_files)
 
         self.initialize_data_module(test_index=test_index)
-        self.loaders.setup(stage="test", annotated=False)
-        self.loaders.digest = db_utils.digest_fasta(
+        self.loaders.pdb = db_utils.ProteinDatabase(
             fasta_path,
-            enzyme,
-            digestion,
-            missed_cleavages,
-            max_mods,
-            min_peptide_length,
-            max_peptide_length,
+            self.config.enzyme,
+            self.config.digestion,
+            self.config.missed_cleavages,
+            self.config.min_peptide_len,
+            self.config.max_peptide_len,
+            self.config.max_mods,
+            self.config.precursor_mass_tol,
+            self.config.isotope_error_range,
         )
-        self.loaders.precursor_tolerance = precursor_tolerance
-        self.loaders.isotope_error = isotope_error
-
+        self.loaders.setup(stage="test", annotated=False)
         self.trainer.predict(self.model, self.loaders.db_dataloader())
 
     def train(
diff --git a/tests/conftest.py b/tests/conftest.py
index 60afcd83..f20d7879 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -242,6 +242,11 @@ def tiny_config(tmp_path):
         "precursor_mass_tol": 5,
         "isotope_error_range": [0, 1],
         "min_peptide_len": 6,
+        "max_peptide_len": 50,
+        "enzyme": "trypsin",
+        "digestion": "full",
+        "missed_cleavages": 0,
+        "max_mods": None,
         "predict_batch_size": 1024,
         "n_beams": 1,
         "top_match": 1,
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 4bd55174..61f735c3 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -24,8 +24,6 @@ def test_db_search(
         tiny_config,
         "--output",
         str(output_path),
-        "--precursor_tolerance",
-        str(100),
         str(mgf_db_search),
         str(tiny_fasta_file),
     ]
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index 419cf3ef..7a37e771 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -276,15 +276,18 @@ def test_digest_fasta_cleave(tiny_fasta_file):
         (0, 1, 3),
         (expected_normal, expected_1missedcleavage, expected_3missedcleavage),
     ):
-        peptide_list = db_utils.digest_fasta(
-            fasta_filename=str(tiny_fasta_file),
+        pdb = db_utils.ProteinDatabase(
+            fasta_path=str(tiny_fasta_file),
             enzyme="trypsin",
             digestion="full",
             missed_cleavages=missed_cleavages,
+            min_peptide_len=6,
+            max_peptide_len=50,
             max_mods=0,
-            min_peptide_length=6,
-            max_peptide_length=50,
+            precursor_tolerance=20,
+            isotope_error=[0],
         )
+        peptide_list = pdb.digest
         peptide_list = [x[0] for x in peptide_list]
         assert peptide_list == expected
 
@@ -343,16 +346,18 @@ def test_digest_fasta_mods(tiny_fasta_file):
         "+42.011FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
         "+43.006FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
     ]
-
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename=str(tiny_fasta_file),
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=0,
+        min_peptide_len=6,
+        max_peptide_len=50,
         max_mods=1,
-        min_peptide_length=6,
-        max_peptide_length=50,
+        precursor_tolerance=20,
+        isotope_error=[0],
     )
+    peptide_list = pdb.digest
     peptide_list = [x[0] for x in peptide_list]
     peptide_list = [
         x
@@ -375,27 +380,33 @@ def test_length_restrictions(tiny_fasta_file):
     # length between 6 and 8
     expected_short = ["ATSIPAR", "VTLSC+57.021R"]
 
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename=str(tiny_fasta_file),
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=0,
+        min_peptide_len=20,
+        max_peptide_len=50,
         max_mods=0,
-        min_peptide_length=20,
-        max_peptide_length=50,
+        precursor_tolerance=20,
+        isotope_error=[0],
     )
+    peptide_list = pdb.digest
     peptide_list = [x[0] for x in peptide_list]
     assert peptide_list == expected_long
 
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename=str(tiny_fasta_file),
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=0,
+        min_peptide_len=6,
+        max_peptide_len=8,
         max_mods=0,
-        min_peptide_length=6,
-        max_peptide_length=8,
+        precursor_tolerance=20,
+        isotope_error=[0],
     )
+    peptide_list = pdb.digest
     peptide_list = [x[0] for x in peptide_list]
     assert peptide_list == expected_short
 
@@ -415,27 +426,33 @@ def test_digest_fasta_enzyme(tiny_fasta_file):
     # asp-n enzyme
     expected_aspn = ["DFAVYYC+57.021QQ", "DFTLTISSLQPE", "MEAPAQLLFLLLLWLP"]
 
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename=str(tiny_fasta_file),
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
         enzyme="arg-c",
         digestion="full",
         missed_cleavages=0,
+        min_peptide_len=6,
+        max_peptide_len=50,
         max_mods=0,
-        min_peptide_length=6,
-        max_peptide_length=50,
+        precursor_tolerance=20,
+        isotope_error=[0],
     )
+    peptide_list = pdb.digest
     peptide_list = [x[0] for x in peptide_list]
     assert peptide_list == expected_argc
 
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename=str(tiny_fasta_file),
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
         enzyme="asp-n",
         digestion="full",
         missed_cleavages=0,
+        min_peptide_len=6,
+        max_peptide_len=50,
         max_mods=0,
-        min_peptide_length=6,
-        max_peptide_length=50,
+        precursor_tolerance=20,
+        isotope_error=[0],
     )
+    peptide_list = pdb.digest
     peptide_list = [x[0] for x in peptide_list]
     assert peptide_list == expected_aspn
 
@@ -450,68 +467,53 @@ def test_get_candidates(tiny_fasta_file):
     # precursor window is 600000
     expected_widewindow = ["ATSIPAR", "VTLSC+57.021R", "LLIYGASTR"]
 
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename=str(tiny_fasta_file),
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=1,
+        min_peptide_len=6,
+        max_peptide_len=50,
         max_mods=0,
-        min_peptide_length=6,
-        max_peptide_length=50,
-    )
-
-    candidates = db_utils.get_candidates(
-        precursor_mz=496.2,
-        charge=2,
-        peptide_list=peptide_list,
         precursor_tolerance=10000,
-        isotope_error="0",
+        isotope_error=[0],
     )
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     candidates = [x[0] for x in candidates]
     assert expected_smallwindow == candidates
 
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename=str(tiny_fasta_file),
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=1,
+        min_peptide_len=6,
+        max_peptide_len=50,
         max_mods=0,
-        min_peptide_length=6,
-        max_peptide_length=50,
-    )
-
-    candidates = db_utils.get_candidates(
-        precursor_mz=496.2,
-        charge=2,
-        peptide_list=peptide_list,
         precursor_tolerance=150000,
-        isotope_error="0",
+        isotope_error=[0],
     )
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     candidates = [x[0] for x in candidates]
     assert expected_midwindow == candidates
 
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename=str(tiny_fasta_file),
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=1,
+        min_peptide_len=6,
+        max_peptide_len=50,
         max_mods=0,
-        min_peptide_length=6,
-        max_peptide_length=50,
-    )
-
-    candidates = db_utils.get_candidates(
-        precursor_mz=496.2,
-        charge=2,
-        peptide_list=peptide_list,
         precursor_tolerance=600000,
-        isotope_error="0",
+        isotope_error=[0],
     )
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     candidates = [x[0] for x in candidates]
     assert expected_widewindow == candidates
 
 
-def test_get_candidates_isotope_error():
+def test_get_candidates_isotope_error(tiny_fasta_file):
 
     # Tide isotope error windows for 496.2, 2+:
     # 0: [980.481617, 1000.289326]
@@ -556,53 +558,83 @@ def test_get_candidates_isotope_error():
     expected_isotope3 = list("XWVUTSRQPONMLKJIHGFE")
     expected_isotope0123 = list("XWVUTSRQPONMLKJIHGFEDCB")
 
-    candidates = db_utils.get_candidates(
-        precursor_mz=496.2,
-        charge=2,
-        peptide_list=peptide_list,
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=0,
+        min_peptide_len=0,
+        max_peptide_len=0,
+        max_mods=0,
         precursor_tolerance=10000,
-        isotope_error="0",
+        isotope_error=[0],
     )
+    pdb.digest = peptide_list
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     candidates = [x[0] for x in candidates]
     assert expected_isotope0 == candidates
 
-    candidates = db_utils.get_candidates(
-        precursor_mz=496.2,
-        charge=2,
-        peptide_list=peptide_list,
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=0,
+        min_peptide_len=0,
+        max_peptide_len=0,
+        max_mods=0,
         precursor_tolerance=10000,
-        isotope_error="1",
+        isotope_error=[1],
     )
+    pdb.digest = peptide_list
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     candidates = [x[0] for x in candidates]
     assert expected_isotope1 == candidates
 
-    candidates = db_utils.get_candidates(
-        precursor_mz=496.2,
-        charge=2,
-        peptide_list=peptide_list,
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=0,
+        min_peptide_len=0,
+        max_peptide_len=0,
+        max_mods=0,
         precursor_tolerance=10000,
-        isotope_error="2",
+        isotope_error=[2],
     )
+    pdb.digest = peptide_list
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     candidates = [x[0] for x in candidates]
     assert expected_isotope2 == candidates
 
-    candidates = db_utils.get_candidates(
-        precursor_mz=496.2,
-        charge=2,
-        peptide_list=peptide_list,
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=0,
+        min_peptide_len=0,
+        max_peptide_len=0,
+        max_mods=0,
         precursor_tolerance=10000,
-        isotope_error="3",
+        isotope_error=[3],
     )
+    pdb.digest = peptide_list
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     candidates = [x[0] for x in candidates]
     assert expected_isotope3 == candidates
 
-    candidates = db_utils.get_candidates(
-        precursor_mz=496.2,
-        charge=2,
-        peptide_list=peptide_list,
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=0,
+        min_peptide_len=0,
+        max_peptide_len=0,
+        max_mods=0,
         precursor_tolerance=10000,
-        isotope_error="0,1,2,3",
+        isotope_error=[0, 1, 2, 3],
     )
+    pdb.digest = peptide_list
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     candidates = [x[0] for x in candidates]
     assert expected_isotope0123 == candidates
 

From 3d0b0b9b6f3c4efedd7034aab4ecc62de2a9a4ca Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 02:12:46 +0000
Subject: [PATCH 30/84] Generate new screengrabs with rich-codex

---
 docs/images/configure-help.svg | 160 +++++++---------------
 docs/images/evaluate-help.svg  | 191 +++++++++++++-------------
 docs/images/help.svg           | 223 ++++++++++++++++++-------------
 docs/images/sequence-help.svg  | 191 +++++++++++++-------------
 docs/images/train-help.svg     | 237 ++++++++++++++++++++-------------
 5 files changed, 509 insertions(+), 493 deletions(-)

diff --git a/docs/images/configure-help.svg b/docs/images/configure-help.svg
index 4092bce3..b1fcce10 100644
--- a/docs/images/configure-help.svg
+++ b/docs/images/configure-help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 1934 757.5999999999999" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 994 342.79999999999995" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,151 +19,89 @@
         font-weight: 700;
     }
 
-    .terminal-2941406062-matrix {
+    .terminal-2766440694-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-2941406062-title {
+    .terminal-2766440694-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-2941406062-r1 { fill: #c5c8c6 }
+    .terminal-2766440694-r1 { fill: #c5c8c6 }
+.terminal-2766440694-r2 { fill: #d0b344 }
+.terminal-2766440694-r3 { fill: #c5c8c6;font-weight: bold }
+.terminal-2766440694-r4 { fill: #68a0b3;font-weight: bold }
+.terminal-2766440694-r5 { fill: #868887 }
+.terminal-2766440694-r6 { fill: #98a84b;font-weight: bold }
+.terminal-2766440694-r7 { fill: #d0b344;font-weight: bold }
     </style>
 
     <defs>
-    <clipPath id="terminal-2941406062-clip-terminal">
-      <rect x="0" y="0" width="1914.3999999999999" height="706.5999999999999" />
+    <clipPath id="terminal-2766440694-clip-terminal">
+      <rect x="0" y="0" width="975.0" height="291.79999999999995" />
     </clipPath>
-    <clipPath id="terminal-2941406062-line-0">
-    <rect x="0" y="1.5" width="1915.4" height="24.65"/>
+    <clipPath id="terminal-2766440694-line-0">
+    <rect x="0" y="1.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2941406062-line-1">
-    <rect x="0" y="25.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2766440694-line-1">
+    <rect x="0" y="25.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2941406062-line-2">
-    <rect x="0" y="50.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2766440694-line-2">
+    <rect x="0" y="50.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2941406062-line-3">
-    <rect x="0" y="74.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2766440694-line-3">
+    <rect x="0" y="74.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2941406062-line-4">
-    <rect x="0" y="99.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2766440694-line-4">
+    <rect x="0" y="99.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2941406062-line-5">
-    <rect x="0" y="123.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2766440694-line-5">
+    <rect x="0" y="123.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2941406062-line-6">
-    <rect x="0" y="147.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2766440694-line-6">
+    <rect x="0" y="147.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2941406062-line-7">
-    <rect x="0" y="172.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2766440694-line-7">
+    <rect x="0" y="172.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2941406062-line-8">
-    <rect x="0" y="196.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2766440694-line-8">
+    <rect x="0" y="196.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2941406062-line-9">
-    <rect x="0" y="221.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2766440694-line-9">
+    <rect x="0" y="221.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2941406062-line-10">
-    <rect x="0" y="245.5" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-11">
-    <rect x="0" y="269.9" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-12">
-    <rect x="0" y="294.3" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-13">
-    <rect x="0" y="318.7" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-14">
-    <rect x="0" y="343.1" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-15">
-    <rect x="0" y="367.5" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-16">
-    <rect x="0" y="391.9" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-17">
-    <rect x="0" y="416.3" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-18">
-    <rect x="0" y="440.7" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-19">
-    <rect x="0" y="465.1" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-20">
-    <rect x="0" y="489.5" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-21">
-    <rect x="0" y="513.9" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-22">
-    <rect x="0" y="538.3" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-23">
-    <rect x="0" y="562.7" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-24">
-    <rect x="0" y="587.1" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-25">
-    <rect x="0" y="611.5" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-26">
-    <rect x="0" y="635.9" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-27">
-    <rect x="0" y="660.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2766440694-line-10">
+    <rect x="0" y="245.5" width="976" height="24.65"/>
             </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1932" height="755.6" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="340.8" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-2941406062-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-2766440694-clip-terminal)">
     
-    <g class="terminal-2941406062-matrix">
-    <text class="terminal-2941406062-r1" x="0" y="20" textLength="329.4" clip-path="url(#terminal-2941406062-line-0)">$&#160;casanovo&#160;configure&#160;--help</text><text class="terminal-2941406062-r1" x="1915.4" y="20" textLength="12.2" clip-path="url(#terminal-2941406062-line-0)">
-</text><text class="terminal-2941406062-r1" x="0" y="44.4" textLength="414.8" clip-path="url(#terminal-2941406062-line-1)">Traceback&#160;(most&#160;recent&#160;call&#160;last):</text><text class="terminal-2941406062-r1" x="1915.4" y="44.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-1)">
-</text><text class="terminal-2941406062-r1" x="0" y="68.8" textLength="1000.4" clip-path="url(#terminal-2941406062-line-2)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/bin/casanovo&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="68.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-2)">
-</text><text class="terminal-2941406062-r1" x="0" y="93.2" textLength="463.6" clip-path="url(#terminal-2941406062-line-3)">&#160;&#160;&#160;&#160;from&#160;casanovo.casanovo&#160;import&#160;main</text><text class="terminal-2941406062-r1" x="1915.4" y="93.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-3)">
-</text><text class="terminal-2941406062-r1" x="0" y="117.6" textLength="1464" clip-path="url(#terminal-2941406062-line-4)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/casanovo/casanovo.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="117.6" textLength="12.2" clip-path="url(#terminal-2941406062-line-4)">
-</text><text class="terminal-2941406062-r1" x="0" y="142" textLength="268.4" clip-path="url(#terminal-2941406062-line-5)">&#160;&#160;&#160;&#160;import&#160;depthcharge</text><text class="terminal-2941406062-r1" x="1915.4" y="142" textLength="12.2" clip-path="url(#terminal-2941406062-line-5)">
-</text><text class="terminal-2941406062-r1" x="0" y="166.4" textLength="1488.4" clip-path="url(#terminal-2941406062-line-6)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/__init__.py&quot;,&#160;line&#160;3,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="166.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-6)">
-</text><text class="terminal-2941406062-r1" x="0" y="190.8" textLength="341.6" clip-path="url(#terminal-2941406062-line-7)">&#160;&#160;&#160;&#160;from&#160;.&#160;import&#160;components</text><text class="terminal-2941406062-r1" x="1915.4" y="190.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-7)">
-</text><text class="terminal-2941406062-r1" x="0" y="215.2" textLength="1622.6" clip-path="url(#terminal-2941406062-line-8)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/__init__.py&quot;,&#160;line&#160;2,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="215.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-8)">
-</text><text class="terminal-2941406062-r1" x="0" y="239.6" textLength="744.2" clip-path="url(#terminal-2941406062-line-9)">&#160;&#160;&#160;&#160;from&#160;.transformers&#160;import&#160;SpectrumEncoder,&#160;PeptideDecoder</text><text class="terminal-2941406062-r1" x="1915.4" y="239.6" textLength="12.2" clip-path="url(#terminal-2941406062-line-9)">
-</text><text class="terminal-2941406062-r1" x="0" y="264" textLength="1671.4" clip-path="url(#terminal-2941406062-line-10)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/transformers.py&quot;,&#160;line&#160;8,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="264" textLength="12.2" clip-path="url(#terminal-2941406062-line-10)">
-</text><text class="terminal-2941406062-r1" x="0" y="288.4" textLength="292.8" clip-path="url(#terminal-2941406062-line-11)">&#160;&#160;&#160;&#160;from&#160;..&#160;import&#160;utils</text><text class="terminal-2941406062-r1" x="1915.4" y="288.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-11)">
-</text><text class="terminal-2941406062-r1" x="0" y="312.8" textLength="1451.8" clip-path="url(#terminal-2941406062-line-12)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/utils.py&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="312.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-12)">
-</text><text class="terminal-2941406062-r1" x="0" y="337.2" textLength="878.4" clip-path="url(#terminal-2941406062-line-13)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing.event_accumulator&#160;import&#160;(</text><text class="terminal-2941406062-r1" x="1915.4" y="337.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-13)">
-</text><text class="terminal-2941406062-r1" x="0" y="361.6" textLength="1915.4" clip-path="url(#terminal-2941406062-line-14)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_accumulator.py&quot;,&#160;line&#160;24,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="361.6" textLength="12.2" clip-path="url(#terminal-2941406062-line-14)">
-</text><text class="terminal-2941406062-r1" x="0" y="386" textLength="854" clip-path="url(#terminal-2941406062-line-15)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing&#160;import&#160;event_file_loader</text><text class="terminal-2941406062-r1" x="1915.4" y="386" textLength="12.2" clip-path="url(#terminal-2941406062-line-15)">
-</text><text class="terminal-2941406062-r1" x="0" y="410.4" textLength="1915.4" clip-path="url(#terminal-2941406062-line-16)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_file_loader.py&quot;,&#160;line&#160;21,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="410.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-16)">
-</text><text class="terminal-2941406062-r1" x="0" y="434.8" textLength="536.8" clip-path="url(#terminal-2941406062-line-17)">&#160;&#160;&#160;&#160;from&#160;tensorboard&#160;import&#160;dataclass_compat</text><text class="terminal-2941406062-r1" x="1915.4" y="434.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-17)">
-</text><text class="terminal-2941406062-r1" x="0" y="459.2" textLength="1598.2" clip-path="url(#terminal-2941406062-line-18)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/dataclass_compat.py&quot;,&#160;line&#160;33,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="459.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-18)">
-</text><text class="terminal-2941406062-r1" x="0" y="483.6" textLength="878.4" clip-path="url(#terminal-2941406062-line-19)">&#160;&#160;&#160;&#160;from&#160;tensorboard.plugins.hparams&#160;import&#160;metadata&#160;as&#160;hparams_metadata</text><text class="terminal-2941406062-r1" x="1915.4" y="483.6" textLength="12.2" clip-path="url(#terminal-2941406062-line-19)">
-</text><text class="terminal-2941406062-r1" x="0" y="508" textLength="1695.8" clip-path="url(#terminal-2941406062-line-20)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/plugins/hparams/metadata.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="508" textLength="12.2" clip-path="url(#terminal-2941406062-line-20)">
-</text><text class="terminal-2941406062-r1" x="0" y="532.4" textLength="585.6" clip-path="url(#terminal-2941406062-line-21)">&#160;&#160;&#160;&#160;NULL_TENSOR&#160;=&#160;tensor_util.make_tensor_proto(</text><text class="terminal-2941406062-r1" x="1915.4" y="532.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-21)">
-</text><text class="terminal-2941406062-r1" x="0" y="556.8" textLength="1720.2" clip-path="url(#terminal-2941406062-line-22)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/util/tensor_util.py&quot;,&#160;line&#160;405,&#160;in&#160;make_tensor_proto</text><text class="terminal-2941406062-r1" x="1915.4" y="556.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-22)">
-</text><text class="terminal-2941406062-r1" x="0" y="581.2" textLength="585.6" clip-path="url(#terminal-2941406062-line-23)">&#160;&#160;&#160;&#160;numpy_dtype&#160;=&#160;dtypes.as_dtype(nparray.dtype)</text><text class="terminal-2941406062-r1" x="1915.4" y="581.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-23)">
-</text><text class="terminal-2941406062-r1" x="0" y="605.6" textLength="1769" clip-path="url(#terminal-2941406062-line-24)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py&quot;,&#160;line&#160;677,&#160;in&#160;as_dtype</text><text class="terminal-2941406062-r1" x="1915.4" y="605.6" textLength="12.2" clip-path="url(#terminal-2941406062-line-24)">
-</text><text class="terminal-2941406062-r1" x="0" y="630" textLength="866.2" clip-path="url(#terminal-2941406062-line-25)">&#160;&#160;&#160;&#160;if&#160;type_value.type&#160;==&#160;np.string_&#160;or&#160;type_value.type&#160;==&#160;np.unicode_:</text><text class="terminal-2941406062-r1" x="1915.4" y="630" textLength="12.2" clip-path="url(#terminal-2941406062-line-25)">
-</text><text class="terminal-2941406062-r1" x="0" y="654.4" textLength="1476.2" clip-path="url(#terminal-2941406062-line-26)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/numpy/__init__.py&quot;,&#160;line&#160;397,&#160;in&#160;__getattr__</text><text class="terminal-2941406062-r1" x="1915.4" y="654.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-26)">
-</text><text class="terminal-2941406062-r1" x="0" y="678.8" textLength="305" clip-path="url(#terminal-2941406062-line-27)">&#160;&#160;&#160;&#160;raise&#160;AttributeError(</text><text class="terminal-2941406062-r1" x="1915.4" y="678.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-27)">
-</text><text class="terminal-2941406062-r1" x="0" y="703.2" textLength="1427.4" clip-path="url(#terminal-2941406062-line-28)">AttributeError:&#160;`np.string_`&#160;was&#160;removed&#160;in&#160;the&#160;NumPy&#160;2.0&#160;release.&#160;Use&#160;`np.bytes_`&#160;instead..&#160;Did&#160;you&#160;mean:&#160;&#x27;strings&#x27;?</text><text class="terminal-2941406062-r1" x="1915.4" y="703.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-28)">
+    <g class="terminal-2766440694-matrix">
+    <text class="terminal-2766440694-r1" x="0" y="20" textLength="329.4" clip-path="url(#terminal-2766440694-line-0)">$&#160;casanovo&#160;configure&#160;--help</text><text class="terminal-2766440694-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-2766440694-line-0)">
+</text><text class="terminal-2766440694-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-2766440694-line-1)">
+</text><text class="terminal-2766440694-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-2766440694-line-2)">Usage:</text><text class="terminal-2766440694-r3" x="97.6" y="68.8" textLength="219.6" clip-path="url(#terminal-2766440694-line-2)">casanovo&#160;configure</text><text class="terminal-2766440694-r1" x="317.2" y="68.8" textLength="24.4" clip-path="url(#terminal-2766440694-line-2)">&#160;[</text><text class="terminal-2766440694-r4" x="341.6" y="68.8" textLength="85.4" clip-path="url(#terminal-2766440694-line-2)">OPTIONS</text><text class="terminal-2766440694-r1" x="427" y="68.8" textLength="549" clip-path="url(#terminal-2766440694-line-2)">]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2766440694-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-2766440694-line-2)">
+</text><text class="terminal-2766440694-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-2766440694-line-3)">
+</text><text class="terminal-2766440694-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-2766440694-line-4)">&#160;Generate&#160;a&#160;Casanovo&#160;configuration&#160;file&#160;to&#160;customize.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2766440694-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-2766440694-line-4)">
+</text><text class="terminal-2766440694-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-2766440694-line-5)">&#160;The&#160;casanovo&#160;configuration&#160;file&#160;is&#160;in&#160;the&#160;YAML&#160;format.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2766440694-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-2766440694-line-5)">
+</text><text class="terminal-2766440694-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-2766440694-line-6)">
+</text><text class="terminal-2766440694-r5" x="0" y="190.8" textLength="24.4" clip-path="url(#terminal-2766440694-line-7)">╭─</text><text class="terminal-2766440694-r5" x="24.4" y="190.8" textLength="109.8" clip-path="url(#terminal-2766440694-line-7)">&#160;Options&#160;</text><text class="terminal-2766440694-r5" x="134.2" y="190.8" textLength="817.4" clip-path="url(#terminal-2766440694-line-7)">───────────────────────────────────────────────────────────────────</text><text class="terminal-2766440694-r5" x="951.6" y="190.8" textLength="24.4" clip-path="url(#terminal-2766440694-line-7)">─╮</text><text class="terminal-2766440694-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-2766440694-line-7)">
+</text><text class="terminal-2766440694-r5" x="0" y="215.2" textLength="12.2" clip-path="url(#terminal-2766440694-line-8)">│</text><text class="terminal-2766440694-r4" x="24.4" y="215.2" textLength="97.6" clip-path="url(#terminal-2766440694-line-8)">--output</text><text class="terminal-2766440694-r6" x="146.4" y="215.2" textLength="24.4" clip-path="url(#terminal-2766440694-line-8)">-o</text><text class="terminal-2766440694-r7" x="195.2" y="215.2" textLength="48.8" clip-path="url(#terminal-2766440694-line-8)">FILE</text><text class="terminal-2766440694-r1" x="244" y="215.2" textLength="719.8" clip-path="url(#terminal-2766440694-line-8)">&#160;&#160;The&#160;output&#160;configuration&#160;file.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2766440694-r5" x="963.8" y="215.2" textLength="12.2" clip-path="url(#terminal-2766440694-line-8)">│</text><text class="terminal-2766440694-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-2766440694-line-8)">
+</text><text class="terminal-2766440694-r5" x="0" y="239.6" textLength="12.2" clip-path="url(#terminal-2766440694-line-9)">│</text><text class="terminal-2766440694-r4" x="24.4" y="239.6" textLength="73.2" clip-path="url(#terminal-2766440694-line-9)">--help</text><text class="terminal-2766440694-r6" x="146.4" y="239.6" textLength="24.4" clip-path="url(#terminal-2766440694-line-9)">-h</text><text class="terminal-2766440694-r1" x="244" y="239.6" textLength="719.8" clip-path="url(#terminal-2766440694-line-9)">&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2766440694-r5" x="963.8" y="239.6" textLength="12.2" clip-path="url(#terminal-2766440694-line-9)">│</text><text class="terminal-2766440694-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-2766440694-line-9)">
+</text><text class="terminal-2766440694-r5" x="0" y="264" textLength="976" clip-path="url(#terminal-2766440694-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2766440694-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-2766440694-line-10)">
+</text><text class="terminal-2766440694-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-2766440694-line-11)">
 </text>
     </g>
     </g>
diff --git a/docs/images/evaluate-help.svg b/docs/images/evaluate-help.svg
index d86b2497..2f770e2e 100644
--- a/docs/images/evaluate-help.svg
+++ b/docs/images/evaluate-help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 1934 757.5999999999999" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 994 684.4" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,151 +19,148 @@
         font-weight: 700;
     }
 
-    .terminal-1991789315-matrix {
+    .terminal-2215953096-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-1991789315-title {
+    .terminal-2215953096-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-1991789315-r1 { fill: #c5c8c6 }
+    .terminal-2215953096-r1 { fill: #c5c8c6 }
+.terminal-2215953096-r2 { fill: #d0b344 }
+.terminal-2215953096-r3 { fill: #c5c8c6;font-weight: bold }
+.terminal-2215953096-r4 { fill: #68a0b3;font-weight: bold }
+.terminal-2215953096-r5 { fill: #868887 }
+.terminal-2215953096-r6 { fill: #cc555a }
+.terminal-2215953096-r7 { fill: #d0b344;font-weight: bold }
+.terminal-2215953096-r8 { fill: #8a4346 }
+.terminal-2215953096-r9 { fill: #98a84b;font-weight: bold }
+.terminal-2215953096-r10 { fill: #8d7b39;font-weight: bold }
     </style>
 
     <defs>
-    <clipPath id="terminal-1991789315-clip-terminal">
-      <rect x="0" y="0" width="1914.3999999999999" height="706.5999999999999" />
+    <clipPath id="terminal-2215953096-clip-terminal">
+      <rect x="0" y="0" width="975.0" height="633.4" />
     </clipPath>
-    <clipPath id="terminal-1991789315-line-0">
-    <rect x="0" y="1.5" width="1915.4" height="24.65"/>
+    <clipPath id="terminal-2215953096-line-0">
+    <rect x="0" y="1.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-1">
-    <rect x="0" y="25.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-1">
+    <rect x="0" y="25.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-2">
-    <rect x="0" y="50.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-2">
+    <rect x="0" y="50.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-3">
-    <rect x="0" y="74.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-3">
+    <rect x="0" y="74.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-4">
-    <rect x="0" y="99.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-4">
+    <rect x="0" y="99.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-5">
-    <rect x="0" y="123.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-5">
+    <rect x="0" y="123.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-6">
-    <rect x="0" y="147.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-6">
+    <rect x="0" y="147.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-7">
-    <rect x="0" y="172.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-7">
+    <rect x="0" y="172.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-8">
-    <rect x="0" y="196.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-8">
+    <rect x="0" y="196.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-9">
-    <rect x="0" y="221.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-9">
+    <rect x="0" y="221.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-10">
-    <rect x="0" y="245.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-10">
+    <rect x="0" y="245.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-11">
-    <rect x="0" y="269.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-11">
+    <rect x="0" y="269.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-12">
-    <rect x="0" y="294.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-12">
+    <rect x="0" y="294.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-13">
-    <rect x="0" y="318.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-13">
+    <rect x="0" y="318.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-14">
-    <rect x="0" y="343.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-14">
+    <rect x="0" y="343.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-15">
-    <rect x="0" y="367.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-15">
+    <rect x="0" y="367.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-16">
-    <rect x="0" y="391.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-16">
+    <rect x="0" y="391.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-17">
-    <rect x="0" y="416.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-17">
+    <rect x="0" y="416.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-18">
-    <rect x="0" y="440.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-18">
+    <rect x="0" y="440.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-19">
-    <rect x="0" y="465.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-19">
+    <rect x="0" y="465.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-20">
-    <rect x="0" y="489.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-20">
+    <rect x="0" y="489.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-21">
-    <rect x="0" y="513.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-21">
+    <rect x="0" y="513.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-22">
-    <rect x="0" y="538.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-22">
+    <rect x="0" y="538.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-23">
-    <rect x="0" y="562.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-23">
+    <rect x="0" y="562.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-24">
-    <rect x="0" y="587.1" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-1991789315-line-25">
-    <rect x="0" y="611.5" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-1991789315-line-26">
-    <rect x="0" y="635.9" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-1991789315-line-27">
-    <rect x="0" y="660.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-24">
+    <rect x="0" y="587.1" width="976" height="24.65"/>
             </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1932" height="755.6" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="682.4" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-1991789315-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-2215953096-clip-terminal)">
     
-    <g class="terminal-1991789315-matrix">
-    <text class="terminal-1991789315-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-1991789315-line-0)">$&#160;casanovo&#160;evaluate&#160;--help</text><text class="terminal-1991789315-r1" x="1915.4" y="20" textLength="12.2" clip-path="url(#terminal-1991789315-line-0)">
-</text><text class="terminal-1991789315-r1" x="0" y="44.4" textLength="414.8" clip-path="url(#terminal-1991789315-line-1)">Traceback&#160;(most&#160;recent&#160;call&#160;last):</text><text class="terminal-1991789315-r1" x="1915.4" y="44.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-1)">
-</text><text class="terminal-1991789315-r1" x="0" y="68.8" textLength="1000.4" clip-path="url(#terminal-1991789315-line-2)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/bin/casanovo&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="68.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-2)">
-</text><text class="terminal-1991789315-r1" x="0" y="93.2" textLength="463.6" clip-path="url(#terminal-1991789315-line-3)">&#160;&#160;&#160;&#160;from&#160;casanovo.casanovo&#160;import&#160;main</text><text class="terminal-1991789315-r1" x="1915.4" y="93.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-3)">
-</text><text class="terminal-1991789315-r1" x="0" y="117.6" textLength="1464" clip-path="url(#terminal-1991789315-line-4)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/casanovo/casanovo.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="117.6" textLength="12.2" clip-path="url(#terminal-1991789315-line-4)">
-</text><text class="terminal-1991789315-r1" x="0" y="142" textLength="268.4" clip-path="url(#terminal-1991789315-line-5)">&#160;&#160;&#160;&#160;import&#160;depthcharge</text><text class="terminal-1991789315-r1" x="1915.4" y="142" textLength="12.2" clip-path="url(#terminal-1991789315-line-5)">
-</text><text class="terminal-1991789315-r1" x="0" y="166.4" textLength="1488.4" clip-path="url(#terminal-1991789315-line-6)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/__init__.py&quot;,&#160;line&#160;3,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="166.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-6)">
-</text><text class="terminal-1991789315-r1" x="0" y="190.8" textLength="341.6" clip-path="url(#terminal-1991789315-line-7)">&#160;&#160;&#160;&#160;from&#160;.&#160;import&#160;components</text><text class="terminal-1991789315-r1" x="1915.4" y="190.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-7)">
-</text><text class="terminal-1991789315-r1" x="0" y="215.2" textLength="1622.6" clip-path="url(#terminal-1991789315-line-8)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/__init__.py&quot;,&#160;line&#160;2,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="215.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-8)">
-</text><text class="terminal-1991789315-r1" x="0" y="239.6" textLength="744.2" clip-path="url(#terminal-1991789315-line-9)">&#160;&#160;&#160;&#160;from&#160;.transformers&#160;import&#160;SpectrumEncoder,&#160;PeptideDecoder</text><text class="terminal-1991789315-r1" x="1915.4" y="239.6" textLength="12.2" clip-path="url(#terminal-1991789315-line-9)">
-</text><text class="terminal-1991789315-r1" x="0" y="264" textLength="1671.4" clip-path="url(#terminal-1991789315-line-10)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/transformers.py&quot;,&#160;line&#160;8,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="264" textLength="12.2" clip-path="url(#terminal-1991789315-line-10)">
-</text><text class="terminal-1991789315-r1" x="0" y="288.4" textLength="292.8" clip-path="url(#terminal-1991789315-line-11)">&#160;&#160;&#160;&#160;from&#160;..&#160;import&#160;utils</text><text class="terminal-1991789315-r1" x="1915.4" y="288.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-11)">
-</text><text class="terminal-1991789315-r1" x="0" y="312.8" textLength="1451.8" clip-path="url(#terminal-1991789315-line-12)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/utils.py&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="312.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-12)">
-</text><text class="terminal-1991789315-r1" x="0" y="337.2" textLength="878.4" clip-path="url(#terminal-1991789315-line-13)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing.event_accumulator&#160;import&#160;(</text><text class="terminal-1991789315-r1" x="1915.4" y="337.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-13)">
-</text><text class="terminal-1991789315-r1" x="0" y="361.6" textLength="1915.4" clip-path="url(#terminal-1991789315-line-14)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_accumulator.py&quot;,&#160;line&#160;24,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="361.6" textLength="12.2" clip-path="url(#terminal-1991789315-line-14)">
-</text><text class="terminal-1991789315-r1" x="0" y="386" textLength="854" clip-path="url(#terminal-1991789315-line-15)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing&#160;import&#160;event_file_loader</text><text class="terminal-1991789315-r1" x="1915.4" y="386" textLength="12.2" clip-path="url(#terminal-1991789315-line-15)">
-</text><text class="terminal-1991789315-r1" x="0" y="410.4" textLength="1915.4" clip-path="url(#terminal-1991789315-line-16)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_file_loader.py&quot;,&#160;line&#160;21,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="410.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-16)">
-</text><text class="terminal-1991789315-r1" x="0" y="434.8" textLength="536.8" clip-path="url(#terminal-1991789315-line-17)">&#160;&#160;&#160;&#160;from&#160;tensorboard&#160;import&#160;dataclass_compat</text><text class="terminal-1991789315-r1" x="1915.4" y="434.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-17)">
-</text><text class="terminal-1991789315-r1" x="0" y="459.2" textLength="1598.2" clip-path="url(#terminal-1991789315-line-18)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/dataclass_compat.py&quot;,&#160;line&#160;33,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="459.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-18)">
-</text><text class="terminal-1991789315-r1" x="0" y="483.6" textLength="878.4" clip-path="url(#terminal-1991789315-line-19)">&#160;&#160;&#160;&#160;from&#160;tensorboard.plugins.hparams&#160;import&#160;metadata&#160;as&#160;hparams_metadata</text><text class="terminal-1991789315-r1" x="1915.4" y="483.6" textLength="12.2" clip-path="url(#terminal-1991789315-line-19)">
-</text><text class="terminal-1991789315-r1" x="0" y="508" textLength="1695.8" clip-path="url(#terminal-1991789315-line-20)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/plugins/hparams/metadata.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="508" textLength="12.2" clip-path="url(#terminal-1991789315-line-20)">
-</text><text class="terminal-1991789315-r1" x="0" y="532.4" textLength="585.6" clip-path="url(#terminal-1991789315-line-21)">&#160;&#160;&#160;&#160;NULL_TENSOR&#160;=&#160;tensor_util.make_tensor_proto(</text><text class="terminal-1991789315-r1" x="1915.4" y="532.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-21)">
-</text><text class="terminal-1991789315-r1" x="0" y="556.8" textLength="1720.2" clip-path="url(#terminal-1991789315-line-22)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/util/tensor_util.py&quot;,&#160;line&#160;405,&#160;in&#160;make_tensor_proto</text><text class="terminal-1991789315-r1" x="1915.4" y="556.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-22)">
-</text><text class="terminal-1991789315-r1" x="0" y="581.2" textLength="585.6" clip-path="url(#terminal-1991789315-line-23)">&#160;&#160;&#160;&#160;numpy_dtype&#160;=&#160;dtypes.as_dtype(nparray.dtype)</text><text class="terminal-1991789315-r1" x="1915.4" y="581.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-23)">
-</text><text class="terminal-1991789315-r1" x="0" y="605.6" textLength="1769" clip-path="url(#terminal-1991789315-line-24)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py&quot;,&#160;line&#160;677,&#160;in&#160;as_dtype</text><text class="terminal-1991789315-r1" x="1915.4" y="605.6" textLength="12.2" clip-path="url(#terminal-1991789315-line-24)">
-</text><text class="terminal-1991789315-r1" x="0" y="630" textLength="866.2" clip-path="url(#terminal-1991789315-line-25)">&#160;&#160;&#160;&#160;if&#160;type_value.type&#160;==&#160;np.string_&#160;or&#160;type_value.type&#160;==&#160;np.unicode_:</text><text class="terminal-1991789315-r1" x="1915.4" y="630" textLength="12.2" clip-path="url(#terminal-1991789315-line-25)">
-</text><text class="terminal-1991789315-r1" x="0" y="654.4" textLength="1476.2" clip-path="url(#terminal-1991789315-line-26)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/numpy/__init__.py&quot;,&#160;line&#160;397,&#160;in&#160;__getattr__</text><text class="terminal-1991789315-r1" x="1915.4" y="654.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-26)">
-</text><text class="terminal-1991789315-r1" x="0" y="678.8" textLength="305" clip-path="url(#terminal-1991789315-line-27)">&#160;&#160;&#160;&#160;raise&#160;AttributeError(</text><text class="terminal-1991789315-r1" x="1915.4" y="678.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-27)">
-</text><text class="terminal-1991789315-r1" x="0" y="703.2" textLength="1427.4" clip-path="url(#terminal-1991789315-line-28)">AttributeError:&#160;`np.string_`&#160;was&#160;removed&#160;in&#160;the&#160;NumPy&#160;2.0&#160;release.&#160;Use&#160;`np.bytes_`&#160;instead..&#160;Did&#160;you&#160;mean:&#160;&#x27;strings&#x27;?</text><text class="terminal-1991789315-r1" x="1915.4" y="703.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-28)">
+    <g class="terminal-2215953096-matrix">
+    <text class="terminal-2215953096-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-2215953096-line-0)">$&#160;casanovo&#160;evaluate&#160;--help</text><text class="terminal-2215953096-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-2215953096-line-0)">
+</text><text class="terminal-2215953096-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-2215953096-line-1)">
+</text><text class="terminal-2215953096-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-2215953096-line-2)">Usage:</text><text class="terminal-2215953096-r3" x="97.6" y="68.8" textLength="207.4" clip-path="url(#terminal-2215953096-line-2)">casanovo&#160;evaluate</text><text class="terminal-2215953096-r1" x="305" y="68.8" textLength="24.4" clip-path="url(#terminal-2215953096-line-2)">&#160;[</text><text class="terminal-2215953096-r4" x="329.4" y="68.8" textLength="85.4" clip-path="url(#terminal-2215953096-line-2)">OPTIONS</text><text class="terminal-2215953096-r1" x="414.8" y="68.8" textLength="24.4" clip-path="url(#terminal-2215953096-line-2)">]&#160;</text><text class="terminal-2215953096-r4" x="439.2" y="68.8" textLength="231.8" clip-path="url(#terminal-2215953096-line-2)">ANNOTATED_PEAK_PATH</text><text class="terminal-2215953096-r1" x="671" y="68.8" textLength="305" clip-path="url(#terminal-2215953096-line-2)">...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-2)">
+</text><text class="terminal-2215953096-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-3)">
+</text><text class="terminal-2215953096-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-2215953096-line-4)">&#160;Evaluate&#160;de&#160;novo&#160;peptide&#160;sequencing&#160;performance.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-4)">
+</text><text class="terminal-2215953096-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-2215953096-line-5)">&#160;ANNOTATED_PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;annoated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-2215953096-line-5)">
+</text><text class="terminal-2215953096-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-2215953096-line-6)">&#160;provided&#160;by&#160;MassIVE-KB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-2215953096-line-6)">
+</text><text class="terminal-2215953096-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-7)">
+</text><text class="terminal-2215953096-r5" x="0" y="215.2" textLength="24.4" clip-path="url(#terminal-2215953096-line-8)">╭─</text><text class="terminal-2215953096-r5" x="24.4" y="215.2" textLength="134.2" clip-path="url(#terminal-2215953096-line-8)">&#160;Arguments&#160;</text><text class="terminal-2215953096-r5" x="158.6" y="215.2" textLength="793" clip-path="url(#terminal-2215953096-line-8)">─────────────────────────────────────────────────────────────────</text><text class="terminal-2215953096-r5" x="951.6" y="215.2" textLength="24.4" clip-path="url(#terminal-2215953096-line-8)">─╮</text><text class="terminal-2215953096-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-8)">
+</text><text class="terminal-2215953096-r5" x="0" y="239.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-9)">│</text><text class="terminal-2215953096-r6" x="24.4" y="239.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-9)">*</text><text class="terminal-2215953096-r1" x="36.6" y="239.6" textLength="305" clip-path="url(#terminal-2215953096-line-9)">&#160;&#160;ANNOTATED_PEAK_PATH&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r7" x="341.6" y="239.6" textLength="48.8" clip-path="url(#terminal-2215953096-line-9)">FILE</text><text class="terminal-2215953096-r8" x="414.8" y="239.6" textLength="122" clip-path="url(#terminal-2215953096-line-9)">[required]</text><text class="terminal-2215953096-r5" x="963.8" y="239.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-9)">│</text><text class="terminal-2215953096-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-9)">
+</text><text class="terminal-2215953096-r5" x="0" y="264" textLength="976" clip-path="url(#terminal-2215953096-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2215953096-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-2215953096-line-10)">
+</text><text class="terminal-2215953096-r5" x="0" y="288.4" textLength="24.4" clip-path="url(#terminal-2215953096-line-11)">╭─</text><text class="terminal-2215953096-r5" x="24.4" y="288.4" textLength="109.8" clip-path="url(#terminal-2215953096-line-11)">&#160;Options&#160;</text><text class="terminal-2215953096-r5" x="134.2" y="288.4" textLength="817.4" clip-path="url(#terminal-2215953096-line-11)">───────────────────────────────────────────────────────────────────</text><text class="terminal-2215953096-r5" x="951.6" y="288.4" textLength="24.4" clip-path="url(#terminal-2215953096-line-11)">─╮</text><text class="terminal-2215953096-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-2215953096-line-11)">
+</text><text class="terminal-2215953096-r5" x="0" y="312.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-12)">│</text><text class="terminal-2215953096-r4" x="24.4" y="312.8" textLength="85.4" clip-path="url(#terminal-2215953096-line-12)">--model</text><text class="terminal-2215953096-r9" x="183" y="312.8" textLength="24.4" clip-path="url(#terminal-2215953096-line-12)">-m</text><text class="terminal-2215953096-r7" x="231.8" y="312.8" textLength="317.2" clip-path="url(#terminal-2215953096-line-12)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r1" x="549" y="312.8" textLength="414.8" clip-path="url(#terminal-2215953096-line-12)">&#160;&#160;The&#160;model&#160;weights&#160;(.ckpt&#160;file).&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="312.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-12)">│</text><text class="terminal-2215953096-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-12)">
+</text><text class="terminal-2215953096-r5" x="0" y="337.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-13)">│</text><text class="terminal-2215953096-r1" x="12.2" y="337.2" textLength="951.6" clip-path="url(#terminal-2215953096-line-13)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;If&#160;not&#160;provided,&#160;Casanovo&#160;will&#160;&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="337.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-13)">│</text><text class="terminal-2215953096-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-13)">
+</text><text class="terminal-2215953096-r5" x="0" y="361.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-14)">│</text><text class="terminal-2215953096-r1" x="12.2" y="361.6" textLength="951.6" clip-path="url(#terminal-2215953096-line-14)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;try&#160;to&#160;download&#160;the&#160;latest&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="361.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-14)">│</text><text class="terminal-2215953096-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-14)">
+</text><text class="terminal-2215953096-r5" x="0" y="386" textLength="12.2" clip-path="url(#terminal-2215953096-line-15)">│</text><text class="terminal-2215953096-r1" x="12.2" y="386" textLength="951.6" clip-path="url(#terminal-2215953096-line-15)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;release.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="386" textLength="12.2" clip-path="url(#terminal-2215953096-line-15)">│</text><text class="terminal-2215953096-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-2215953096-line-15)">
+</text><text class="terminal-2215953096-r5" x="0" y="410.4" textLength="12.2" clip-path="url(#terminal-2215953096-line-16)">│</text><text class="terminal-2215953096-r4" x="24.4" y="410.4" textLength="97.6" clip-path="url(#terminal-2215953096-line-16)">--output</text><text class="terminal-2215953096-r9" x="183" y="410.4" textLength="24.4" clip-path="url(#terminal-2215953096-line-16)">-o</text><text class="terminal-2215953096-r7" x="231.8" y="410.4" textLength="317.2" clip-path="url(#terminal-2215953096-line-16)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r1" x="549" y="410.4" textLength="414.8" clip-path="url(#terminal-2215953096-line-16)">&#160;&#160;The&#160;mzTab&#160;file&#160;to&#160;which&#160;results&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="410.4" textLength="12.2" clip-path="url(#terminal-2215953096-line-16)">│</text><text class="terminal-2215953096-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-2215953096-line-16)">
+</text><text class="terminal-2215953096-r5" x="0" y="434.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-17)">│</text><text class="terminal-2215953096-r1" x="12.2" y="434.8" textLength="951.6" clip-path="url(#terminal-2215953096-line-17)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;will&#160;be&#160;written.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="434.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-17)">│</text><text class="terminal-2215953096-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-17)">
+</text><text class="terminal-2215953096-r5" x="0" y="459.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-18)">│</text><text class="terminal-2215953096-r4" x="24.4" y="459.2" textLength="97.6" clip-path="url(#terminal-2215953096-line-18)">--config</text><text class="terminal-2215953096-r9" x="183" y="459.2" textLength="24.4" clip-path="url(#terminal-2215953096-line-18)">-c</text><text class="terminal-2215953096-r7" x="231.8" y="459.2" textLength="317.2" clip-path="url(#terminal-2215953096-line-18)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r1" x="549" y="459.2" textLength="414.8" clip-path="url(#terminal-2215953096-line-18)">&#160;&#160;The&#160;YAML&#160;configuration&#160;file&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="459.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-18)">│</text><text class="terminal-2215953096-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-18)">
+</text><text class="terminal-2215953096-r5" x="0" y="483.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-19)">│</text><text class="terminal-2215953096-r1" x="12.2" y="483.6" textLength="951.6" clip-path="url(#terminal-2215953096-line-19)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;overriding&#160;the&#160;default&#160;options.&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="483.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-19)">│</text><text class="terminal-2215953096-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-19)">
+</text><text class="terminal-2215953096-r5" x="0" y="508" textLength="12.2" clip-path="url(#terminal-2215953096-line-20)">│</text><text class="terminal-2215953096-r4" x="24.4" y="508" textLength="134.2" clip-path="url(#terminal-2215953096-line-20)">--verbosity</text><text class="terminal-2215953096-r9" x="183" y="508" textLength="24.4" clip-path="url(#terminal-2215953096-line-20)">-v</text><text class="terminal-2215953096-r10" x="231.8" y="508" textLength="12.2" clip-path="url(#terminal-2215953096-line-20)">[</text><text class="terminal-2215953096-r7" x="244" y="508" textLength="61" clip-path="url(#terminal-2215953096-line-20)">debug</text><text class="terminal-2215953096-r10" x="305" y="508" textLength="12.2" clip-path="url(#terminal-2215953096-line-20)">|</text><text class="terminal-2215953096-r7" x="317.2" y="508" textLength="48.8" clip-path="url(#terminal-2215953096-line-20)">info</text><text class="terminal-2215953096-r10" x="366" y="508" textLength="12.2" clip-path="url(#terminal-2215953096-line-20)">|</text><text class="terminal-2215953096-r7" x="378.2" y="508" textLength="85.4" clip-path="url(#terminal-2215953096-line-20)">warning</text><text class="terminal-2215953096-r10" x="463.6" y="508" textLength="12.2" clip-path="url(#terminal-2215953096-line-20)">|</text><text class="terminal-2215953096-r7" x="475.8" y="508" textLength="61" clip-path="url(#terminal-2215953096-line-20)">error</text><text class="terminal-2215953096-r10" x="536.8" y="508" textLength="12.2" clip-path="url(#terminal-2215953096-line-20)">]</text><text class="terminal-2215953096-r1" x="549" y="508" textLength="414.8" clip-path="url(#terminal-2215953096-line-20)">&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;console&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="508" textLength="12.2" clip-path="url(#terminal-2215953096-line-20)">│</text><text class="terminal-2215953096-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-2215953096-line-20)">
+</text><text class="terminal-2215953096-r5" x="0" y="532.4" textLength="12.2" clip-path="url(#terminal-2215953096-line-21)">│</text><text class="terminal-2215953096-r1" x="12.2" y="532.4" textLength="951.6" clip-path="url(#terminal-2215953096-line-21)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;logging&#160;messages.&#160;Log&#160;files&#160;are&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="532.4" textLength="12.2" clip-path="url(#terminal-2215953096-line-21)">│</text><text class="terminal-2215953096-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-2215953096-line-21)">
+</text><text class="terminal-2215953096-r5" x="0" y="556.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-22)">│</text><text class="terminal-2215953096-r1" x="12.2" y="556.8" textLength="951.6" clip-path="url(#terminal-2215953096-line-22)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;always&#160;set&#160;to&#160;&#x27;debug&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="556.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-22)">│</text><text class="terminal-2215953096-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-22)">
+</text><text class="terminal-2215953096-r5" x="0" y="581.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-23)">│</text><text class="terminal-2215953096-r4" x="24.4" y="581.2" textLength="73.2" clip-path="url(#terminal-2215953096-line-23)">--help</text><text class="terminal-2215953096-r9" x="183" y="581.2" textLength="24.4" clip-path="url(#terminal-2215953096-line-23)">-h</text><text class="terminal-2215953096-r1" x="549" y="581.2" textLength="414.8" clip-path="url(#terminal-2215953096-line-23)">&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="581.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-23)">│</text><text class="terminal-2215953096-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-23)">
+</text><text class="terminal-2215953096-r5" x="0" y="605.6" textLength="976" clip-path="url(#terminal-2215953096-line-24)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2215953096-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-24)">
+</text><text class="terminal-2215953096-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-2215953096-line-25)">
 </text>
     </g>
     </g>
diff --git a/docs/images/help.svg b/docs/images/help.svg
index dfb1039c..6243538a 100644
--- a/docs/images/help.svg
+++ b/docs/images/help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 1934 757.5999999999999" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 994 928.4" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,151 +19,186 @@
         font-weight: 700;
     }
 
-    .terminal-952518540-matrix {
+    .terminal-3065326850-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-952518540-title {
+    .terminal-3065326850-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-952518540-r1 { fill: #c5c8c6 }
+    .terminal-3065326850-r1 { fill: #c5c8c6 }
+.terminal-3065326850-r2 { fill: #d0b344 }
+.terminal-3065326850-r3 { fill: #c5c8c6;font-weight: bold }
+.terminal-3065326850-r4 { fill: #68a0b3;font-weight: bold }
+.terminal-3065326850-r5 { fill: #d0b344;font-weight: bold }
+.terminal-3065326850-r6 { fill: #608ab1;text-decoration: underline; }
+.terminal-3065326850-r7 { fill: #868887 }
+.terminal-3065326850-r8 { fill: #98a84b;font-weight: bold }
     </style>
 
     <defs>
-    <clipPath id="terminal-952518540-clip-terminal">
-      <rect x="0" y="0" width="1914.3999999999999" height="706.5999999999999" />
+    <clipPath id="terminal-3065326850-clip-terminal">
+      <rect x="0" y="0" width="975.0" height="877.4" />
     </clipPath>
-    <clipPath id="terminal-952518540-line-0">
-    <rect x="0" y="1.5" width="1915.4" height="24.65"/>
+    <clipPath id="terminal-3065326850-line-0">
+    <rect x="0" y="1.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-1">
-    <rect x="0" y="25.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-1">
+    <rect x="0" y="25.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-2">
-    <rect x="0" y="50.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-2">
+    <rect x="0" y="50.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-3">
-    <rect x="0" y="74.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-3">
+    <rect x="0" y="74.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-4">
-    <rect x="0" y="99.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-4">
+    <rect x="0" y="99.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-5">
-    <rect x="0" y="123.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-5">
+    <rect x="0" y="123.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-6">
-    <rect x="0" y="147.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-6">
+    <rect x="0" y="147.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-7">
-    <rect x="0" y="172.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-7">
+    <rect x="0" y="172.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-8">
-    <rect x="0" y="196.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-8">
+    <rect x="0" y="196.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-9">
-    <rect x="0" y="221.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-9">
+    <rect x="0" y="221.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-10">
-    <rect x="0" y="245.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-10">
+    <rect x="0" y="245.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-11">
-    <rect x="0" y="269.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-11">
+    <rect x="0" y="269.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-12">
-    <rect x="0" y="294.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-12">
+    <rect x="0" y="294.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-13">
-    <rect x="0" y="318.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-13">
+    <rect x="0" y="318.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-14">
-    <rect x="0" y="343.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-14">
+    <rect x="0" y="343.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-15">
-    <rect x="0" y="367.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-15">
+    <rect x="0" y="367.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-16">
-    <rect x="0" y="391.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-16">
+    <rect x="0" y="391.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-17">
-    <rect x="0" y="416.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-17">
+    <rect x="0" y="416.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-18">
-    <rect x="0" y="440.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-18">
+    <rect x="0" y="440.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-19">
-    <rect x="0" y="465.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-19">
+    <rect x="0" y="465.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-20">
-    <rect x="0" y="489.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-20">
+    <rect x="0" y="489.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-21">
-    <rect x="0" y="513.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-21">
+    <rect x="0" y="513.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-22">
-    <rect x="0" y="538.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-22">
+    <rect x="0" y="538.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-23">
-    <rect x="0" y="562.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-23">
+    <rect x="0" y="562.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-24">
-    <rect x="0" y="587.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-24">
+    <rect x="0" y="587.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-25">
-    <rect x="0" y="611.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-25">
+    <rect x="0" y="611.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-26">
-    <rect x="0" y="635.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-26">
+    <rect x="0" y="635.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-27">
-    <rect x="0" y="660.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-27">
+    <rect x="0" y="660.3" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-3065326850-line-28">
+    <rect x="0" y="684.7" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-3065326850-line-29">
+    <rect x="0" y="709.1" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-3065326850-line-30">
+    <rect x="0" y="733.5" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-3065326850-line-31">
+    <rect x="0" y="757.9" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-3065326850-line-32">
+    <rect x="0" y="782.3" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-3065326850-line-33">
+    <rect x="0" y="806.7" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-3065326850-line-34">
+    <rect x="0" y="831.1" width="976" height="24.65"/>
             </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1932" height="755.6" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="926.4" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-952518540-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-3065326850-clip-terminal)">
     
-    <g class="terminal-952518540-matrix">
-    <text class="terminal-952518540-r1" x="0" y="20" textLength="207.4" clip-path="url(#terminal-952518540-line-0)">$&#160;casanovo&#160;--help</text><text class="terminal-952518540-r1" x="1915.4" y="20" textLength="12.2" clip-path="url(#terminal-952518540-line-0)">
-</text><text class="terminal-952518540-r1" x="0" y="44.4" textLength="414.8" clip-path="url(#terminal-952518540-line-1)">Traceback&#160;(most&#160;recent&#160;call&#160;last):</text><text class="terminal-952518540-r1" x="1915.4" y="44.4" textLength="12.2" clip-path="url(#terminal-952518540-line-1)">
-</text><text class="terminal-952518540-r1" x="0" y="68.8" textLength="1000.4" clip-path="url(#terminal-952518540-line-2)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/bin/casanovo&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="68.8" textLength="12.2" clip-path="url(#terminal-952518540-line-2)">
-</text><text class="terminal-952518540-r1" x="0" y="93.2" textLength="463.6" clip-path="url(#terminal-952518540-line-3)">&#160;&#160;&#160;&#160;from&#160;casanovo.casanovo&#160;import&#160;main</text><text class="terminal-952518540-r1" x="1915.4" y="93.2" textLength="12.2" clip-path="url(#terminal-952518540-line-3)">
-</text><text class="terminal-952518540-r1" x="0" y="117.6" textLength="1464" clip-path="url(#terminal-952518540-line-4)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/casanovo/casanovo.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="117.6" textLength="12.2" clip-path="url(#terminal-952518540-line-4)">
-</text><text class="terminal-952518540-r1" x="0" y="142" textLength="268.4" clip-path="url(#terminal-952518540-line-5)">&#160;&#160;&#160;&#160;import&#160;depthcharge</text><text class="terminal-952518540-r1" x="1915.4" y="142" textLength="12.2" clip-path="url(#terminal-952518540-line-5)">
-</text><text class="terminal-952518540-r1" x="0" y="166.4" textLength="1488.4" clip-path="url(#terminal-952518540-line-6)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/__init__.py&quot;,&#160;line&#160;3,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="166.4" textLength="12.2" clip-path="url(#terminal-952518540-line-6)">
-</text><text class="terminal-952518540-r1" x="0" y="190.8" textLength="341.6" clip-path="url(#terminal-952518540-line-7)">&#160;&#160;&#160;&#160;from&#160;.&#160;import&#160;components</text><text class="terminal-952518540-r1" x="1915.4" y="190.8" textLength="12.2" clip-path="url(#terminal-952518540-line-7)">
-</text><text class="terminal-952518540-r1" x="0" y="215.2" textLength="1622.6" clip-path="url(#terminal-952518540-line-8)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/__init__.py&quot;,&#160;line&#160;2,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="215.2" textLength="12.2" clip-path="url(#terminal-952518540-line-8)">
-</text><text class="terminal-952518540-r1" x="0" y="239.6" textLength="744.2" clip-path="url(#terminal-952518540-line-9)">&#160;&#160;&#160;&#160;from&#160;.transformers&#160;import&#160;SpectrumEncoder,&#160;PeptideDecoder</text><text class="terminal-952518540-r1" x="1915.4" y="239.6" textLength="12.2" clip-path="url(#terminal-952518540-line-9)">
-</text><text class="terminal-952518540-r1" x="0" y="264" textLength="1671.4" clip-path="url(#terminal-952518540-line-10)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/transformers.py&quot;,&#160;line&#160;8,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="264" textLength="12.2" clip-path="url(#terminal-952518540-line-10)">
-</text><text class="terminal-952518540-r1" x="0" y="288.4" textLength="292.8" clip-path="url(#terminal-952518540-line-11)">&#160;&#160;&#160;&#160;from&#160;..&#160;import&#160;utils</text><text class="terminal-952518540-r1" x="1915.4" y="288.4" textLength="12.2" clip-path="url(#terminal-952518540-line-11)">
-</text><text class="terminal-952518540-r1" x="0" y="312.8" textLength="1451.8" clip-path="url(#terminal-952518540-line-12)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/utils.py&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="312.8" textLength="12.2" clip-path="url(#terminal-952518540-line-12)">
-</text><text class="terminal-952518540-r1" x="0" y="337.2" textLength="878.4" clip-path="url(#terminal-952518540-line-13)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing.event_accumulator&#160;import&#160;(</text><text class="terminal-952518540-r1" x="1915.4" y="337.2" textLength="12.2" clip-path="url(#terminal-952518540-line-13)">
-</text><text class="terminal-952518540-r1" x="0" y="361.6" textLength="1915.4" clip-path="url(#terminal-952518540-line-14)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_accumulator.py&quot;,&#160;line&#160;24,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="361.6" textLength="12.2" clip-path="url(#terminal-952518540-line-14)">
-</text><text class="terminal-952518540-r1" x="0" y="386" textLength="854" clip-path="url(#terminal-952518540-line-15)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing&#160;import&#160;event_file_loader</text><text class="terminal-952518540-r1" x="1915.4" y="386" textLength="12.2" clip-path="url(#terminal-952518540-line-15)">
-</text><text class="terminal-952518540-r1" x="0" y="410.4" textLength="1915.4" clip-path="url(#terminal-952518540-line-16)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_file_loader.py&quot;,&#160;line&#160;21,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="410.4" textLength="12.2" clip-path="url(#terminal-952518540-line-16)">
-</text><text class="terminal-952518540-r1" x="0" y="434.8" textLength="536.8" clip-path="url(#terminal-952518540-line-17)">&#160;&#160;&#160;&#160;from&#160;tensorboard&#160;import&#160;dataclass_compat</text><text class="terminal-952518540-r1" x="1915.4" y="434.8" textLength="12.2" clip-path="url(#terminal-952518540-line-17)">
-</text><text class="terminal-952518540-r1" x="0" y="459.2" textLength="1598.2" clip-path="url(#terminal-952518540-line-18)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/dataclass_compat.py&quot;,&#160;line&#160;33,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="459.2" textLength="12.2" clip-path="url(#terminal-952518540-line-18)">
-</text><text class="terminal-952518540-r1" x="0" y="483.6" textLength="878.4" clip-path="url(#terminal-952518540-line-19)">&#160;&#160;&#160;&#160;from&#160;tensorboard.plugins.hparams&#160;import&#160;metadata&#160;as&#160;hparams_metadata</text><text class="terminal-952518540-r1" x="1915.4" y="483.6" textLength="12.2" clip-path="url(#terminal-952518540-line-19)">
-</text><text class="terminal-952518540-r1" x="0" y="508" textLength="1695.8" clip-path="url(#terminal-952518540-line-20)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/plugins/hparams/metadata.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="508" textLength="12.2" clip-path="url(#terminal-952518540-line-20)">
-</text><text class="terminal-952518540-r1" x="0" y="532.4" textLength="585.6" clip-path="url(#terminal-952518540-line-21)">&#160;&#160;&#160;&#160;NULL_TENSOR&#160;=&#160;tensor_util.make_tensor_proto(</text><text class="terminal-952518540-r1" x="1915.4" y="532.4" textLength="12.2" clip-path="url(#terminal-952518540-line-21)">
-</text><text class="terminal-952518540-r1" x="0" y="556.8" textLength="1720.2" clip-path="url(#terminal-952518540-line-22)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/util/tensor_util.py&quot;,&#160;line&#160;405,&#160;in&#160;make_tensor_proto</text><text class="terminal-952518540-r1" x="1915.4" y="556.8" textLength="12.2" clip-path="url(#terminal-952518540-line-22)">
-</text><text class="terminal-952518540-r1" x="0" y="581.2" textLength="585.6" clip-path="url(#terminal-952518540-line-23)">&#160;&#160;&#160;&#160;numpy_dtype&#160;=&#160;dtypes.as_dtype(nparray.dtype)</text><text class="terminal-952518540-r1" x="1915.4" y="581.2" textLength="12.2" clip-path="url(#terminal-952518540-line-23)">
-</text><text class="terminal-952518540-r1" x="0" y="605.6" textLength="1769" clip-path="url(#terminal-952518540-line-24)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py&quot;,&#160;line&#160;677,&#160;in&#160;as_dtype</text><text class="terminal-952518540-r1" x="1915.4" y="605.6" textLength="12.2" clip-path="url(#terminal-952518540-line-24)">
-</text><text class="terminal-952518540-r1" x="0" y="630" textLength="866.2" clip-path="url(#terminal-952518540-line-25)">&#160;&#160;&#160;&#160;if&#160;type_value.type&#160;==&#160;np.string_&#160;or&#160;type_value.type&#160;==&#160;np.unicode_:</text><text class="terminal-952518540-r1" x="1915.4" y="630" textLength="12.2" clip-path="url(#terminal-952518540-line-25)">
-</text><text class="terminal-952518540-r1" x="0" y="654.4" textLength="1476.2" clip-path="url(#terminal-952518540-line-26)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/numpy/__init__.py&quot;,&#160;line&#160;397,&#160;in&#160;__getattr__</text><text class="terminal-952518540-r1" x="1915.4" y="654.4" textLength="12.2" clip-path="url(#terminal-952518540-line-26)">
-</text><text class="terminal-952518540-r1" x="0" y="678.8" textLength="305" clip-path="url(#terminal-952518540-line-27)">&#160;&#160;&#160;&#160;raise&#160;AttributeError(</text><text class="terminal-952518540-r1" x="1915.4" y="678.8" textLength="12.2" clip-path="url(#terminal-952518540-line-27)">
-</text><text class="terminal-952518540-r1" x="0" y="703.2" textLength="1427.4" clip-path="url(#terminal-952518540-line-28)">AttributeError:&#160;`np.string_`&#160;was&#160;removed&#160;in&#160;the&#160;NumPy&#160;2.0&#160;release.&#160;Use&#160;`np.bytes_`&#160;instead..&#160;Did&#160;you&#160;mean:&#160;&#x27;strings&#x27;?</text><text class="terminal-952518540-r1" x="1915.4" y="703.2" textLength="12.2" clip-path="url(#terminal-952518540-line-28)">
+    <g class="terminal-3065326850-matrix">
+    <text class="terminal-3065326850-r1" x="0" y="20" textLength="207.4" clip-path="url(#terminal-3065326850-line-0)">$&#160;casanovo&#160;--help</text><text class="terminal-3065326850-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-3065326850-line-0)">
+</text><text class="terminal-3065326850-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-3065326850-line-1)">
+</text><text class="terminal-3065326850-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-3065326850-line-2)">Usage:</text><text class="terminal-3065326850-r3" x="97.6" y="68.8" textLength="97.6" clip-path="url(#terminal-3065326850-line-2)">casanovo</text><text class="terminal-3065326850-r1" x="195.2" y="68.8" textLength="24.4" clip-path="url(#terminal-3065326850-line-2)">&#160;[</text><text class="terminal-3065326850-r4" x="219.6" y="68.8" textLength="85.4" clip-path="url(#terminal-3065326850-line-2)">OPTIONS</text><text class="terminal-3065326850-r1" x="305" y="68.8" textLength="24.4" clip-path="url(#terminal-3065326850-line-2)">]&#160;</text><text class="terminal-3065326850-r4" x="329.4" y="68.8" textLength="85.4" clip-path="url(#terminal-3065326850-line-2)">COMMAND</text><text class="terminal-3065326850-r1" x="414.8" y="68.8" textLength="24.4" clip-path="url(#terminal-3065326850-line-2)">&#160;[</text><text class="terminal-3065326850-r4" x="439.2" y="68.8" textLength="48.8" clip-path="url(#terminal-3065326850-line-2)">ARGS</text><text class="terminal-3065326850-r1" x="488" y="68.8" textLength="488" clip-path="url(#terminal-3065326850-line-2)">]...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-3065326850-line-2)">
+</text><text class="terminal-3065326850-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-3)">
+</text><text class="terminal-3065326850-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-3065326850-line-4)">&#160;┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓&#160;</text><text class="terminal-3065326850-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-3065326850-line-4)">
+</text><text class="terminal-3065326850-r1" x="0" y="142" textLength="439.2" clip-path="url(#terminal-3065326850-line-5)">&#160;┃&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r3" x="439.2" y="142" textLength="97.6" clip-path="url(#terminal-3065326850-line-5)">Casanovo</text><text class="terminal-3065326850-r1" x="536.8" y="142" textLength="439.2" clip-path="url(#terminal-3065326850-line-5)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;┃&#160;</text><text class="terminal-3065326850-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-3065326850-line-5)">
+</text><text class="terminal-3065326850-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-3065326850-line-6)">&#160;┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛&#160;</text><text class="terminal-3065326850-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-3065326850-line-6)">
+</text><text class="terminal-3065326850-r1" x="0" y="190.8" textLength="976" clip-path="url(#terminal-3065326850-line-7)">&#160;Casanovo&#160;de&#160;novo&#160;sequences&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra&#160;using&#160;a&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-3065326850-line-7)">
+</text><text class="terminal-3065326850-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-3065326850-line-8)">&#160;Transformer&#160;model.&#160;Casanovo&#160;currently&#160;supports&#160;mzML,&#160;mzXML,&#160;and&#160;MGF&#160;files&#160;for&#160;&#160;</text><text class="terminal-3065326850-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-8)">
+</text><text class="terminal-3065326850-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-3065326850-line-9)">&#160;de&#160;novo&#160;sequencing&#160;and&#160;annotated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;from&#160;MassIVE-KB,&#160;for&#160;</text><text class="terminal-3065326850-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-3065326850-line-9)">
+</text><text class="terminal-3065326850-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-3065326850-line-10)">&#160;training&#160;new&#160;models.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-3065326850-line-10)">
+</text><text class="terminal-3065326850-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-3065326850-line-11)">
+</text><text class="terminal-3065326850-r1" x="0" y="312.8" textLength="976" clip-path="url(#terminal-3065326850-line-12)">&#160;Links:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-3065326850-line-12)">
+</text><text class="terminal-3065326850-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-13)">
+</text><text class="terminal-3065326850-r5" x="12.2" y="361.6" textLength="36.6" clip-path="url(#terminal-3065326850-line-14)">&#160;•&#160;</text><text class="terminal-3065326850-r1" x="48.8" y="361.6" textLength="183" clip-path="url(#terminal-3065326850-line-14)">Documentation:&#160;</text><text class="terminal-3065326850-r6" x="231.8" y="361.6" textLength="378.2" clip-path="url(#terminal-3065326850-line-14)">https://casanovo.readthedocs.io</text><text class="terminal-3065326850-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-3065326850-line-14)">
+</text><text class="terminal-3065326850-r5" x="12.2" y="386" textLength="36.6" clip-path="url(#terminal-3065326850-line-15)">&#160;•&#160;</text><text class="terminal-3065326850-r1" x="48.8" y="386" textLength="317.2" clip-path="url(#terminal-3065326850-line-15)">Official&#160;code&#160;repository:&#160;</text><text class="terminal-3065326850-r6" x="366" y="386" textLength="451.4" clip-path="url(#terminal-3065326850-line-15)">https://github.com/Noble-Lab/casanovo</text><text class="terminal-3065326850-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-3065326850-line-15)">
+</text><text class="terminal-3065326850-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-3065326850-line-16)">
+</text><text class="terminal-3065326850-r1" x="0" y="434.8" textLength="976" clip-path="url(#terminal-3065326850-line-17)">&#160;If&#160;you&#160;use&#160;Casanovo&#160;in&#160;your&#160;work,&#160;please&#160;cite:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-3065326850-line-17)">
+</text><text class="terminal-3065326850-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-18)">
+</text><text class="terminal-3065326850-r5" x="12.2" y="483.6" textLength="36.6" clip-path="url(#terminal-3065326850-line-19)">&#160;•&#160;</text><text class="terminal-3065326850-r1" x="48.8" y="483.6" textLength="927.2" clip-path="url(#terminal-3065326850-line-19)">Yilmaz,&#160;M.,&#160;Fondrie,&#160;W.&#160;E.,&#160;Bittremieux,&#160;W.,&#160;Oh,&#160;S.&#160;&amp;&#160;Noble,&#160;W.&#160;S.&#160;De&#160;novo&#160;&#160;</text><text class="terminal-3065326850-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-3065326850-line-19)">
+</text><text class="terminal-3065326850-r1" x="48.8" y="508" textLength="927.2" clip-path="url(#terminal-3065326850-line-20)">mass&#160;spectrometry&#160;peptide&#160;sequencing&#160;with&#160;a&#160;transformer&#160;model.&#160;Proceedings&#160;&#160;</text><text class="terminal-3065326850-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-3065326850-line-20)">
+</text><text class="terminal-3065326850-r1" x="48.8" y="532.4" textLength="927.2" clip-path="url(#terminal-3065326850-line-21)">of&#160;the&#160;39th&#160;International&#160;Conference&#160;on&#160;Machine&#160;Learning&#160;-&#160;ICML&#160;&#x27;22&#160;(2022)&#160;&#160;</text><text class="terminal-3065326850-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-3065326850-line-21)">
+</text><text class="terminal-3065326850-r1" x="48.8" y="556.8" textLength="927.2" clip-path="url(#terminal-3065326850-line-22)">doi:10.1101/2022.02.07.479481.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-3065326850-line-22)">
+</text><text class="terminal-3065326850-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-23)">
+</text><text class="terminal-3065326850-r7" x="0" y="605.6" textLength="24.4" clip-path="url(#terminal-3065326850-line-24)">╭─</text><text class="terminal-3065326850-r7" x="24.4" y="605.6" textLength="109.8" clip-path="url(#terminal-3065326850-line-24)">&#160;Options&#160;</text><text class="terminal-3065326850-r7" x="134.2" y="605.6" textLength="817.4" clip-path="url(#terminal-3065326850-line-24)">───────────────────────────────────────────────────────────────────</text><text class="terminal-3065326850-r7" x="951.6" y="605.6" textLength="24.4" clip-path="url(#terminal-3065326850-line-24)">─╮</text><text class="terminal-3065326850-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-3065326850-line-24)">
+</text><text class="terminal-3065326850-r7" x="0" y="630" textLength="12.2" clip-path="url(#terminal-3065326850-line-25)">│</text><text class="terminal-3065326850-r4" x="24.4" y="630" textLength="73.2" clip-path="url(#terminal-3065326850-line-25)">--help</text><text class="terminal-3065326850-r8" x="122" y="630" textLength="24.4" clip-path="url(#terminal-3065326850-line-25)">-h</text><text class="terminal-3065326850-r1" x="146.4" y="630" textLength="817.4" clip-path="url(#terminal-3065326850-line-25)">&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r7" x="963.8" y="630" textLength="12.2" clip-path="url(#terminal-3065326850-line-25)">│</text><text class="terminal-3065326850-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-3065326850-line-25)">
+</text><text class="terminal-3065326850-r7" x="0" y="654.4" textLength="976" clip-path="url(#terminal-3065326850-line-26)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-3065326850-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-3065326850-line-26)">
+</text><text class="terminal-3065326850-r7" x="0" y="678.8" textLength="24.4" clip-path="url(#terminal-3065326850-line-27)">╭─</text><text class="terminal-3065326850-r7" x="24.4" y="678.8" textLength="122" clip-path="url(#terminal-3065326850-line-27)">&#160;Commands&#160;</text><text class="terminal-3065326850-r7" x="146.4" y="678.8" textLength="805.2" clip-path="url(#terminal-3065326850-line-27)">──────────────────────────────────────────────────────────────────</text><text class="terminal-3065326850-r7" x="951.6" y="678.8" textLength="24.4" clip-path="url(#terminal-3065326850-line-27)">─╮</text><text class="terminal-3065326850-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-3065326850-line-27)">
+</text><text class="terminal-3065326850-r7" x="0" y="703.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-28)">│</text><text class="terminal-3065326850-r4" x="24.4" y="703.2" textLength="109.8" clip-path="url(#terminal-3065326850-line-28)">configure</text><text class="terminal-3065326850-r1" x="146.4" y="703.2" textLength="817.4" clip-path="url(#terminal-3065326850-line-28)">&#160;Generate&#160;a&#160;Casanovo&#160;configuration&#160;file&#160;to&#160;customize.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r7" x="963.8" y="703.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-28)">│</text><text class="terminal-3065326850-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-28)">
+</text><text class="terminal-3065326850-r7" x="0" y="727.6" textLength="12.2" clip-path="url(#terminal-3065326850-line-29)">│</text><text class="terminal-3065326850-r4" x="24.4" y="727.6" textLength="109.8" clip-path="url(#terminal-3065326850-line-29)">db-search</text><text class="terminal-3065326850-r1" x="146.4" y="727.6" textLength="817.4" clip-path="url(#terminal-3065326850-line-29)">&#160;Perform&#160;a&#160;database&#160;search&#160;on&#160;MS/MS&#160;data&#160;using&#160;Casanovo-DB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r7" x="963.8" y="727.6" textLength="12.2" clip-path="url(#terminal-3065326850-line-29)">│</text><text class="terminal-3065326850-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-3065326850-line-29)">
+</text><text class="terminal-3065326850-r7" x="0" y="752" textLength="12.2" clip-path="url(#terminal-3065326850-line-30)">│</text><text class="terminal-3065326850-r4" x="24.4" y="752" textLength="109.8" clip-path="url(#terminal-3065326850-line-30)">evaluate&#160;</text><text class="terminal-3065326850-r1" x="146.4" y="752" textLength="817.4" clip-path="url(#terminal-3065326850-line-30)">&#160;Evaluate&#160;de&#160;novo&#160;peptide&#160;sequencing&#160;performance.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r7" x="963.8" y="752" textLength="12.2" clip-path="url(#terminal-3065326850-line-30)">│</text><text class="terminal-3065326850-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-3065326850-line-30)">
+</text><text class="terminal-3065326850-r7" x="0" y="776.4" textLength="12.2" clip-path="url(#terminal-3065326850-line-31)">│</text><text class="terminal-3065326850-r4" x="24.4" y="776.4" textLength="109.8" clip-path="url(#terminal-3065326850-line-31)">sequence&#160;</text><text class="terminal-3065326850-r1" x="146.4" y="776.4" textLength="817.4" clip-path="url(#terminal-3065326850-line-31)">&#160;De&#160;novo&#160;sequence&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r7" x="963.8" y="776.4" textLength="12.2" clip-path="url(#terminal-3065326850-line-31)">│</text><text class="terminal-3065326850-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-3065326850-line-31)">
+</text><text class="terminal-3065326850-r7" x="0" y="800.8" textLength="12.2" clip-path="url(#terminal-3065326850-line-32)">│</text><text class="terminal-3065326850-r4" x="24.4" y="800.8" textLength="109.8" clip-path="url(#terminal-3065326850-line-32)">train&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r1" x="146.4" y="800.8" textLength="817.4" clip-path="url(#terminal-3065326850-line-32)">&#160;Train&#160;a&#160;Casanovo&#160;model&#160;on&#160;your&#160;own&#160;data.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r7" x="963.8" y="800.8" textLength="12.2" clip-path="url(#terminal-3065326850-line-32)">│</text><text class="terminal-3065326850-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-3065326850-line-32)">
+</text><text class="terminal-3065326850-r7" x="0" y="825.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-33)">│</text><text class="terminal-3065326850-r4" x="24.4" y="825.2" textLength="109.8" clip-path="url(#terminal-3065326850-line-33)">version&#160;&#160;</text><text class="terminal-3065326850-r1" x="146.4" y="825.2" textLength="817.4" clip-path="url(#terminal-3065326850-line-33)">&#160;Get&#160;the&#160;Casanovo&#160;version&#160;information&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r7" x="963.8" y="825.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-33)">│</text><text class="terminal-3065326850-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-33)">
+</text><text class="terminal-3065326850-r7" x="0" y="849.6" textLength="976" clip-path="url(#terminal-3065326850-line-34)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-3065326850-r1" x="976" y="849.6" textLength="12.2" clip-path="url(#terminal-3065326850-line-34)">
+</text><text class="terminal-3065326850-r1" x="976" y="874" textLength="12.2" clip-path="url(#terminal-3065326850-line-35)">
 </text>
     </g>
     </g>
diff --git a/docs/images/sequence-help.svg b/docs/images/sequence-help.svg
index b9b96d74..7a1bbff6 100644
--- a/docs/images/sequence-help.svg
+++ b/docs/images/sequence-help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 1934 757.5999999999999" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 994 684.4" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,151 +19,148 @@
         font-weight: 700;
     }
 
-    .terminal-2412464901-matrix {
+    .terminal-2416557665-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-2412464901-title {
+    .terminal-2416557665-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-2412464901-r1 { fill: #c5c8c6 }
+    .terminal-2416557665-r1 { fill: #c5c8c6 }
+.terminal-2416557665-r2 { fill: #d0b344 }
+.terminal-2416557665-r3 { fill: #c5c8c6;font-weight: bold }
+.terminal-2416557665-r4 { fill: #68a0b3;font-weight: bold }
+.terminal-2416557665-r5 { fill: #868887 }
+.terminal-2416557665-r6 { fill: #cc555a }
+.terminal-2416557665-r7 { fill: #d0b344;font-weight: bold }
+.terminal-2416557665-r8 { fill: #8a4346 }
+.terminal-2416557665-r9 { fill: #98a84b;font-weight: bold }
+.terminal-2416557665-r10 { fill: #8d7b39;font-weight: bold }
     </style>
 
     <defs>
-    <clipPath id="terminal-2412464901-clip-terminal">
-      <rect x="0" y="0" width="1914.3999999999999" height="706.5999999999999" />
+    <clipPath id="terminal-2416557665-clip-terminal">
+      <rect x="0" y="0" width="975.0" height="633.4" />
     </clipPath>
-    <clipPath id="terminal-2412464901-line-0">
-    <rect x="0" y="1.5" width="1915.4" height="24.65"/>
+    <clipPath id="terminal-2416557665-line-0">
+    <rect x="0" y="1.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-1">
-    <rect x="0" y="25.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-1">
+    <rect x="0" y="25.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-2">
-    <rect x="0" y="50.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-2">
+    <rect x="0" y="50.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-3">
-    <rect x="0" y="74.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-3">
+    <rect x="0" y="74.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-4">
-    <rect x="0" y="99.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-4">
+    <rect x="0" y="99.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-5">
-    <rect x="0" y="123.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-5">
+    <rect x="0" y="123.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-6">
-    <rect x="0" y="147.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-6">
+    <rect x="0" y="147.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-7">
-    <rect x="0" y="172.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-7">
+    <rect x="0" y="172.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-8">
-    <rect x="0" y="196.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-8">
+    <rect x="0" y="196.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-9">
-    <rect x="0" y="221.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-9">
+    <rect x="0" y="221.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-10">
-    <rect x="0" y="245.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-10">
+    <rect x="0" y="245.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-11">
-    <rect x="0" y="269.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-11">
+    <rect x="0" y="269.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-12">
-    <rect x="0" y="294.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-12">
+    <rect x="0" y="294.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-13">
-    <rect x="0" y="318.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-13">
+    <rect x="0" y="318.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-14">
-    <rect x="0" y="343.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-14">
+    <rect x="0" y="343.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-15">
-    <rect x="0" y="367.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-15">
+    <rect x="0" y="367.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-16">
-    <rect x="0" y="391.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-16">
+    <rect x="0" y="391.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-17">
-    <rect x="0" y="416.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-17">
+    <rect x="0" y="416.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-18">
-    <rect x="0" y="440.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-18">
+    <rect x="0" y="440.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-19">
-    <rect x="0" y="465.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-19">
+    <rect x="0" y="465.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-20">
-    <rect x="0" y="489.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-20">
+    <rect x="0" y="489.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-21">
-    <rect x="0" y="513.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-21">
+    <rect x="0" y="513.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-22">
-    <rect x="0" y="538.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-22">
+    <rect x="0" y="538.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-23">
-    <rect x="0" y="562.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-23">
+    <rect x="0" y="562.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-24">
-    <rect x="0" y="587.1" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2412464901-line-25">
-    <rect x="0" y="611.5" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2412464901-line-26">
-    <rect x="0" y="635.9" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2412464901-line-27">
-    <rect x="0" y="660.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-24">
+    <rect x="0" y="587.1" width="976" height="24.65"/>
             </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1932" height="755.6" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="682.4" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-2412464901-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-2416557665-clip-terminal)">
     
-    <g class="terminal-2412464901-matrix">
-    <text class="terminal-2412464901-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-2412464901-line-0)">$&#160;casanovo&#160;sequence&#160;--help</text><text class="terminal-2412464901-r1" x="1915.4" y="20" textLength="12.2" clip-path="url(#terminal-2412464901-line-0)">
-</text><text class="terminal-2412464901-r1" x="0" y="44.4" textLength="414.8" clip-path="url(#terminal-2412464901-line-1)">Traceback&#160;(most&#160;recent&#160;call&#160;last):</text><text class="terminal-2412464901-r1" x="1915.4" y="44.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-1)">
-</text><text class="terminal-2412464901-r1" x="0" y="68.8" textLength="1000.4" clip-path="url(#terminal-2412464901-line-2)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/bin/casanovo&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="68.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-2)">
-</text><text class="terminal-2412464901-r1" x="0" y="93.2" textLength="463.6" clip-path="url(#terminal-2412464901-line-3)">&#160;&#160;&#160;&#160;from&#160;casanovo.casanovo&#160;import&#160;main</text><text class="terminal-2412464901-r1" x="1915.4" y="93.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-3)">
-</text><text class="terminal-2412464901-r1" x="0" y="117.6" textLength="1464" clip-path="url(#terminal-2412464901-line-4)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/casanovo/casanovo.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="117.6" textLength="12.2" clip-path="url(#terminal-2412464901-line-4)">
-</text><text class="terminal-2412464901-r1" x="0" y="142" textLength="268.4" clip-path="url(#terminal-2412464901-line-5)">&#160;&#160;&#160;&#160;import&#160;depthcharge</text><text class="terminal-2412464901-r1" x="1915.4" y="142" textLength="12.2" clip-path="url(#terminal-2412464901-line-5)">
-</text><text class="terminal-2412464901-r1" x="0" y="166.4" textLength="1488.4" clip-path="url(#terminal-2412464901-line-6)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/__init__.py&quot;,&#160;line&#160;3,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="166.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-6)">
-</text><text class="terminal-2412464901-r1" x="0" y="190.8" textLength="341.6" clip-path="url(#terminal-2412464901-line-7)">&#160;&#160;&#160;&#160;from&#160;.&#160;import&#160;components</text><text class="terminal-2412464901-r1" x="1915.4" y="190.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-7)">
-</text><text class="terminal-2412464901-r1" x="0" y="215.2" textLength="1622.6" clip-path="url(#terminal-2412464901-line-8)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/__init__.py&quot;,&#160;line&#160;2,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="215.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-8)">
-</text><text class="terminal-2412464901-r1" x="0" y="239.6" textLength="744.2" clip-path="url(#terminal-2412464901-line-9)">&#160;&#160;&#160;&#160;from&#160;.transformers&#160;import&#160;SpectrumEncoder,&#160;PeptideDecoder</text><text class="terminal-2412464901-r1" x="1915.4" y="239.6" textLength="12.2" clip-path="url(#terminal-2412464901-line-9)">
-</text><text class="terminal-2412464901-r1" x="0" y="264" textLength="1671.4" clip-path="url(#terminal-2412464901-line-10)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/transformers.py&quot;,&#160;line&#160;8,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="264" textLength="12.2" clip-path="url(#terminal-2412464901-line-10)">
-</text><text class="terminal-2412464901-r1" x="0" y="288.4" textLength="292.8" clip-path="url(#terminal-2412464901-line-11)">&#160;&#160;&#160;&#160;from&#160;..&#160;import&#160;utils</text><text class="terminal-2412464901-r1" x="1915.4" y="288.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-11)">
-</text><text class="terminal-2412464901-r1" x="0" y="312.8" textLength="1451.8" clip-path="url(#terminal-2412464901-line-12)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/utils.py&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="312.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-12)">
-</text><text class="terminal-2412464901-r1" x="0" y="337.2" textLength="878.4" clip-path="url(#terminal-2412464901-line-13)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing.event_accumulator&#160;import&#160;(</text><text class="terminal-2412464901-r1" x="1915.4" y="337.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-13)">
-</text><text class="terminal-2412464901-r1" x="0" y="361.6" textLength="1915.4" clip-path="url(#terminal-2412464901-line-14)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_accumulator.py&quot;,&#160;line&#160;24,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="361.6" textLength="12.2" clip-path="url(#terminal-2412464901-line-14)">
-</text><text class="terminal-2412464901-r1" x="0" y="386" textLength="854" clip-path="url(#terminal-2412464901-line-15)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing&#160;import&#160;event_file_loader</text><text class="terminal-2412464901-r1" x="1915.4" y="386" textLength="12.2" clip-path="url(#terminal-2412464901-line-15)">
-</text><text class="terminal-2412464901-r1" x="0" y="410.4" textLength="1915.4" clip-path="url(#terminal-2412464901-line-16)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_file_loader.py&quot;,&#160;line&#160;21,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="410.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-16)">
-</text><text class="terminal-2412464901-r1" x="0" y="434.8" textLength="536.8" clip-path="url(#terminal-2412464901-line-17)">&#160;&#160;&#160;&#160;from&#160;tensorboard&#160;import&#160;dataclass_compat</text><text class="terminal-2412464901-r1" x="1915.4" y="434.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-17)">
-</text><text class="terminal-2412464901-r1" x="0" y="459.2" textLength="1598.2" clip-path="url(#terminal-2412464901-line-18)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/dataclass_compat.py&quot;,&#160;line&#160;33,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="459.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-18)">
-</text><text class="terminal-2412464901-r1" x="0" y="483.6" textLength="878.4" clip-path="url(#terminal-2412464901-line-19)">&#160;&#160;&#160;&#160;from&#160;tensorboard.plugins.hparams&#160;import&#160;metadata&#160;as&#160;hparams_metadata</text><text class="terminal-2412464901-r1" x="1915.4" y="483.6" textLength="12.2" clip-path="url(#terminal-2412464901-line-19)">
-</text><text class="terminal-2412464901-r1" x="0" y="508" textLength="1695.8" clip-path="url(#terminal-2412464901-line-20)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/plugins/hparams/metadata.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="508" textLength="12.2" clip-path="url(#terminal-2412464901-line-20)">
-</text><text class="terminal-2412464901-r1" x="0" y="532.4" textLength="585.6" clip-path="url(#terminal-2412464901-line-21)">&#160;&#160;&#160;&#160;NULL_TENSOR&#160;=&#160;tensor_util.make_tensor_proto(</text><text class="terminal-2412464901-r1" x="1915.4" y="532.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-21)">
-</text><text class="terminal-2412464901-r1" x="0" y="556.8" textLength="1720.2" clip-path="url(#terminal-2412464901-line-22)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/util/tensor_util.py&quot;,&#160;line&#160;405,&#160;in&#160;make_tensor_proto</text><text class="terminal-2412464901-r1" x="1915.4" y="556.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-22)">
-</text><text class="terminal-2412464901-r1" x="0" y="581.2" textLength="585.6" clip-path="url(#terminal-2412464901-line-23)">&#160;&#160;&#160;&#160;numpy_dtype&#160;=&#160;dtypes.as_dtype(nparray.dtype)</text><text class="terminal-2412464901-r1" x="1915.4" y="581.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-23)">
-</text><text class="terminal-2412464901-r1" x="0" y="605.6" textLength="1769" clip-path="url(#terminal-2412464901-line-24)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py&quot;,&#160;line&#160;677,&#160;in&#160;as_dtype</text><text class="terminal-2412464901-r1" x="1915.4" y="605.6" textLength="12.2" clip-path="url(#terminal-2412464901-line-24)">
-</text><text class="terminal-2412464901-r1" x="0" y="630" textLength="866.2" clip-path="url(#terminal-2412464901-line-25)">&#160;&#160;&#160;&#160;if&#160;type_value.type&#160;==&#160;np.string_&#160;or&#160;type_value.type&#160;==&#160;np.unicode_:</text><text class="terminal-2412464901-r1" x="1915.4" y="630" textLength="12.2" clip-path="url(#terminal-2412464901-line-25)">
-</text><text class="terminal-2412464901-r1" x="0" y="654.4" textLength="1476.2" clip-path="url(#terminal-2412464901-line-26)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/numpy/__init__.py&quot;,&#160;line&#160;397,&#160;in&#160;__getattr__</text><text class="terminal-2412464901-r1" x="1915.4" y="654.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-26)">
-</text><text class="terminal-2412464901-r1" x="0" y="678.8" textLength="305" clip-path="url(#terminal-2412464901-line-27)">&#160;&#160;&#160;&#160;raise&#160;AttributeError(</text><text class="terminal-2412464901-r1" x="1915.4" y="678.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-27)">
-</text><text class="terminal-2412464901-r1" x="0" y="703.2" textLength="1427.4" clip-path="url(#terminal-2412464901-line-28)">AttributeError:&#160;`np.string_`&#160;was&#160;removed&#160;in&#160;the&#160;NumPy&#160;2.0&#160;release.&#160;Use&#160;`np.bytes_`&#160;instead..&#160;Did&#160;you&#160;mean:&#160;&#x27;strings&#x27;?</text><text class="terminal-2412464901-r1" x="1915.4" y="703.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-28)">
+    <g class="terminal-2416557665-matrix">
+    <text class="terminal-2416557665-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-2416557665-line-0)">$&#160;casanovo&#160;sequence&#160;--help</text><text class="terminal-2416557665-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-2416557665-line-0)">
+</text><text class="terminal-2416557665-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-2416557665-line-1)">
+</text><text class="terminal-2416557665-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-2416557665-line-2)">Usage:</text><text class="terminal-2416557665-r3" x="97.6" y="68.8" textLength="207.4" clip-path="url(#terminal-2416557665-line-2)">casanovo&#160;sequence</text><text class="terminal-2416557665-r1" x="305" y="68.8" textLength="24.4" clip-path="url(#terminal-2416557665-line-2)">&#160;[</text><text class="terminal-2416557665-r4" x="329.4" y="68.8" textLength="85.4" clip-path="url(#terminal-2416557665-line-2)">OPTIONS</text><text class="terminal-2416557665-r1" x="414.8" y="68.8" textLength="24.4" clip-path="url(#terminal-2416557665-line-2)">]&#160;</text><text class="terminal-2416557665-r4" x="439.2" y="68.8" textLength="109.8" clip-path="url(#terminal-2416557665-line-2)">PEAK_PATH</text><text class="terminal-2416557665-r1" x="549" y="68.8" textLength="427" clip-path="url(#terminal-2416557665-line-2)">...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-2)">
+</text><text class="terminal-2416557665-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-3)">
+</text><text class="terminal-2416557665-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-2416557665-line-4)">&#160;De&#160;novo&#160;sequence&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-4)">
+</text><text class="terminal-2416557665-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-2416557665-line-5)">&#160;PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;mzML,&#160;mzXML,&#160;or&#160;MGF&#160;files&#160;from&#160;which&#160;to&#160;sequence&#160;</text><text class="terminal-2416557665-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-2416557665-line-5)">
+</text><text class="terminal-2416557665-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-2416557665-line-6)">&#160;peptides.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-2416557665-line-6)">
+</text><text class="terminal-2416557665-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-7)">
+</text><text class="terminal-2416557665-r5" x="0" y="215.2" textLength="24.4" clip-path="url(#terminal-2416557665-line-8)">╭─</text><text class="terminal-2416557665-r5" x="24.4" y="215.2" textLength="134.2" clip-path="url(#terminal-2416557665-line-8)">&#160;Arguments&#160;</text><text class="terminal-2416557665-r5" x="158.6" y="215.2" textLength="793" clip-path="url(#terminal-2416557665-line-8)">─────────────────────────────────────────────────────────────────</text><text class="terminal-2416557665-r5" x="951.6" y="215.2" textLength="24.4" clip-path="url(#terminal-2416557665-line-8)">─╮</text><text class="terminal-2416557665-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-8)">
+</text><text class="terminal-2416557665-r5" x="0" y="239.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-9)">│</text><text class="terminal-2416557665-r6" x="24.4" y="239.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-9)">*</text><text class="terminal-2416557665-r1" x="36.6" y="239.6" textLength="183" clip-path="url(#terminal-2416557665-line-9)">&#160;&#160;PEAK_PATH&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r7" x="219.6" y="239.6" textLength="48.8" clip-path="url(#terminal-2416557665-line-9)">FILE</text><text class="terminal-2416557665-r8" x="292.8" y="239.6" textLength="122" clip-path="url(#terminal-2416557665-line-9)">[required]</text><text class="terminal-2416557665-r5" x="963.8" y="239.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-9)">│</text><text class="terminal-2416557665-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-9)">
+</text><text class="terminal-2416557665-r5" x="0" y="264" textLength="976" clip-path="url(#terminal-2416557665-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2416557665-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-2416557665-line-10)">
+</text><text class="terminal-2416557665-r5" x="0" y="288.4" textLength="24.4" clip-path="url(#terminal-2416557665-line-11)">╭─</text><text class="terminal-2416557665-r5" x="24.4" y="288.4" textLength="109.8" clip-path="url(#terminal-2416557665-line-11)">&#160;Options&#160;</text><text class="terminal-2416557665-r5" x="134.2" y="288.4" textLength="817.4" clip-path="url(#terminal-2416557665-line-11)">───────────────────────────────────────────────────────────────────</text><text class="terminal-2416557665-r5" x="951.6" y="288.4" textLength="24.4" clip-path="url(#terminal-2416557665-line-11)">─╮</text><text class="terminal-2416557665-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-2416557665-line-11)">
+</text><text class="terminal-2416557665-r5" x="0" y="312.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-12)">│</text><text class="terminal-2416557665-r4" x="24.4" y="312.8" textLength="85.4" clip-path="url(#terminal-2416557665-line-12)">--model</text><text class="terminal-2416557665-r9" x="183" y="312.8" textLength="24.4" clip-path="url(#terminal-2416557665-line-12)">-m</text><text class="terminal-2416557665-r7" x="231.8" y="312.8" textLength="317.2" clip-path="url(#terminal-2416557665-line-12)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r1" x="549" y="312.8" textLength="414.8" clip-path="url(#terminal-2416557665-line-12)">&#160;&#160;The&#160;model&#160;weights&#160;(.ckpt&#160;file).&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="312.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-12)">│</text><text class="terminal-2416557665-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-12)">
+</text><text class="terminal-2416557665-r5" x="0" y="337.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-13)">│</text><text class="terminal-2416557665-r1" x="12.2" y="337.2" textLength="951.6" clip-path="url(#terminal-2416557665-line-13)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;If&#160;not&#160;provided,&#160;Casanovo&#160;will&#160;&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="337.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-13)">│</text><text class="terminal-2416557665-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-13)">
+</text><text class="terminal-2416557665-r5" x="0" y="361.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-14)">│</text><text class="terminal-2416557665-r1" x="12.2" y="361.6" textLength="951.6" clip-path="url(#terminal-2416557665-line-14)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;try&#160;to&#160;download&#160;the&#160;latest&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="361.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-14)">│</text><text class="terminal-2416557665-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-14)">
+</text><text class="terminal-2416557665-r5" x="0" y="386" textLength="12.2" clip-path="url(#terminal-2416557665-line-15)">│</text><text class="terminal-2416557665-r1" x="12.2" y="386" textLength="951.6" clip-path="url(#terminal-2416557665-line-15)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;release.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="386" textLength="12.2" clip-path="url(#terminal-2416557665-line-15)">│</text><text class="terminal-2416557665-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-2416557665-line-15)">
+</text><text class="terminal-2416557665-r5" x="0" y="410.4" textLength="12.2" clip-path="url(#terminal-2416557665-line-16)">│</text><text class="terminal-2416557665-r4" x="24.4" y="410.4" textLength="97.6" clip-path="url(#terminal-2416557665-line-16)">--output</text><text class="terminal-2416557665-r9" x="183" y="410.4" textLength="24.4" clip-path="url(#terminal-2416557665-line-16)">-o</text><text class="terminal-2416557665-r7" x="231.8" y="410.4" textLength="317.2" clip-path="url(#terminal-2416557665-line-16)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r1" x="549" y="410.4" textLength="414.8" clip-path="url(#terminal-2416557665-line-16)">&#160;&#160;The&#160;mzTab&#160;file&#160;to&#160;which&#160;results&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="410.4" textLength="12.2" clip-path="url(#terminal-2416557665-line-16)">│</text><text class="terminal-2416557665-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-2416557665-line-16)">
+</text><text class="terminal-2416557665-r5" x="0" y="434.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-17)">│</text><text class="terminal-2416557665-r1" x="12.2" y="434.8" textLength="951.6" clip-path="url(#terminal-2416557665-line-17)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;will&#160;be&#160;written.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="434.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-17)">│</text><text class="terminal-2416557665-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-17)">
+</text><text class="terminal-2416557665-r5" x="0" y="459.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-18)">│</text><text class="terminal-2416557665-r4" x="24.4" y="459.2" textLength="97.6" clip-path="url(#terminal-2416557665-line-18)">--config</text><text class="terminal-2416557665-r9" x="183" y="459.2" textLength="24.4" clip-path="url(#terminal-2416557665-line-18)">-c</text><text class="terminal-2416557665-r7" x="231.8" y="459.2" textLength="317.2" clip-path="url(#terminal-2416557665-line-18)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r1" x="549" y="459.2" textLength="414.8" clip-path="url(#terminal-2416557665-line-18)">&#160;&#160;The&#160;YAML&#160;configuration&#160;file&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="459.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-18)">│</text><text class="terminal-2416557665-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-18)">
+</text><text class="terminal-2416557665-r5" x="0" y="483.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-19)">│</text><text class="terminal-2416557665-r1" x="12.2" y="483.6" textLength="951.6" clip-path="url(#terminal-2416557665-line-19)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;overriding&#160;the&#160;default&#160;options.&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="483.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-19)">│</text><text class="terminal-2416557665-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-19)">
+</text><text class="terminal-2416557665-r5" x="0" y="508" textLength="12.2" clip-path="url(#terminal-2416557665-line-20)">│</text><text class="terminal-2416557665-r4" x="24.4" y="508" textLength="134.2" clip-path="url(#terminal-2416557665-line-20)">--verbosity</text><text class="terminal-2416557665-r9" x="183" y="508" textLength="24.4" clip-path="url(#terminal-2416557665-line-20)">-v</text><text class="terminal-2416557665-r10" x="231.8" y="508" textLength="12.2" clip-path="url(#terminal-2416557665-line-20)">[</text><text class="terminal-2416557665-r7" x="244" y="508" textLength="61" clip-path="url(#terminal-2416557665-line-20)">debug</text><text class="terminal-2416557665-r10" x="305" y="508" textLength="12.2" clip-path="url(#terminal-2416557665-line-20)">|</text><text class="terminal-2416557665-r7" x="317.2" y="508" textLength="48.8" clip-path="url(#terminal-2416557665-line-20)">info</text><text class="terminal-2416557665-r10" x="366" y="508" textLength="12.2" clip-path="url(#terminal-2416557665-line-20)">|</text><text class="terminal-2416557665-r7" x="378.2" y="508" textLength="85.4" clip-path="url(#terminal-2416557665-line-20)">warning</text><text class="terminal-2416557665-r10" x="463.6" y="508" textLength="12.2" clip-path="url(#terminal-2416557665-line-20)">|</text><text class="terminal-2416557665-r7" x="475.8" y="508" textLength="61" clip-path="url(#terminal-2416557665-line-20)">error</text><text class="terminal-2416557665-r10" x="536.8" y="508" textLength="12.2" clip-path="url(#terminal-2416557665-line-20)">]</text><text class="terminal-2416557665-r1" x="549" y="508" textLength="414.8" clip-path="url(#terminal-2416557665-line-20)">&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;console&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="508" textLength="12.2" clip-path="url(#terminal-2416557665-line-20)">│</text><text class="terminal-2416557665-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-2416557665-line-20)">
+</text><text class="terminal-2416557665-r5" x="0" y="532.4" textLength="12.2" clip-path="url(#terminal-2416557665-line-21)">│</text><text class="terminal-2416557665-r1" x="12.2" y="532.4" textLength="951.6" clip-path="url(#terminal-2416557665-line-21)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;logging&#160;messages.&#160;Log&#160;files&#160;are&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="532.4" textLength="12.2" clip-path="url(#terminal-2416557665-line-21)">│</text><text class="terminal-2416557665-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-2416557665-line-21)">
+</text><text class="terminal-2416557665-r5" x="0" y="556.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-22)">│</text><text class="terminal-2416557665-r1" x="12.2" y="556.8" textLength="951.6" clip-path="url(#terminal-2416557665-line-22)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;always&#160;set&#160;to&#160;&#x27;debug&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="556.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-22)">│</text><text class="terminal-2416557665-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-22)">
+</text><text class="terminal-2416557665-r5" x="0" y="581.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-23)">│</text><text class="terminal-2416557665-r4" x="24.4" y="581.2" textLength="73.2" clip-path="url(#terminal-2416557665-line-23)">--help</text><text class="terminal-2416557665-r9" x="183" y="581.2" textLength="24.4" clip-path="url(#terminal-2416557665-line-23)">-h</text><text class="terminal-2416557665-r1" x="549" y="581.2" textLength="414.8" clip-path="url(#terminal-2416557665-line-23)">&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="581.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-23)">│</text><text class="terminal-2416557665-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-23)">
+</text><text class="terminal-2416557665-r5" x="0" y="605.6" textLength="976" clip-path="url(#terminal-2416557665-line-24)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2416557665-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-24)">
+</text><text class="terminal-2416557665-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-2416557665-line-25)">
 </text>
     </g>
     </g>
diff --git a/docs/images/train-help.svg b/docs/images/train-help.svg
index a71b8915..58251215 100644
--- a/docs/images/train-help.svg
+++ b/docs/images/train-help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 1934 757.5999999999999" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 994 1001.5999999999999" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,151 +19,200 @@
         font-weight: 700;
     }
 
-    .terminal-844581322-matrix {
+    .terminal-2982408974-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-844581322-title {
+    .terminal-2982408974-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-844581322-r1 { fill: #c5c8c6 }
+    .terminal-2982408974-r1 { fill: #c5c8c6 }
+.terminal-2982408974-r2 { fill: #d0b344 }
+.terminal-2982408974-r3 { fill: #c5c8c6;font-weight: bold }
+.terminal-2982408974-r4 { fill: #68a0b3;font-weight: bold }
+.terminal-2982408974-r5 { fill: #868887 }
+.terminal-2982408974-r6 { fill: #cc555a }
+.terminal-2982408974-r7 { fill: #d0b344;font-weight: bold }
+.terminal-2982408974-r8 { fill: #8a4346 }
+.terminal-2982408974-r9 { fill: #98a84b;font-weight: bold }
+.terminal-2982408974-r10 { fill: #8d7b39;font-weight: bold }
     </style>
 
     <defs>
-    <clipPath id="terminal-844581322-clip-terminal">
-      <rect x="0" y="0" width="1914.3999999999999" height="706.5999999999999" />
+    <clipPath id="terminal-2982408974-clip-terminal">
+      <rect x="0" y="0" width="975.0" height="950.5999999999999" />
     </clipPath>
-    <clipPath id="terminal-844581322-line-0">
-    <rect x="0" y="1.5" width="1915.4" height="24.65"/>
+    <clipPath id="terminal-2982408974-line-0">
+    <rect x="0" y="1.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-1">
-    <rect x="0" y="25.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-1">
+    <rect x="0" y="25.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-2">
-    <rect x="0" y="50.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-2">
+    <rect x="0" y="50.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-3">
-    <rect x="0" y="74.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-3">
+    <rect x="0" y="74.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-4">
-    <rect x="0" y="99.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-4">
+    <rect x="0" y="99.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-5">
-    <rect x="0" y="123.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-5">
+    <rect x="0" y="123.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-6">
-    <rect x="0" y="147.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-6">
+    <rect x="0" y="147.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-7">
-    <rect x="0" y="172.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-7">
+    <rect x="0" y="172.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-8">
-    <rect x="0" y="196.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-8">
+    <rect x="0" y="196.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-9">
-    <rect x="0" y="221.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-9">
+    <rect x="0" y="221.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-10">
-    <rect x="0" y="245.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-10">
+    <rect x="0" y="245.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-11">
-    <rect x="0" y="269.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-11">
+    <rect x="0" y="269.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-12">
-    <rect x="0" y="294.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-12">
+    <rect x="0" y="294.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-13">
-    <rect x="0" y="318.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-13">
+    <rect x="0" y="318.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-14">
-    <rect x="0" y="343.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-14">
+    <rect x="0" y="343.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-15">
-    <rect x="0" y="367.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-15">
+    <rect x="0" y="367.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-16">
-    <rect x="0" y="391.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-16">
+    <rect x="0" y="391.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-17">
-    <rect x="0" y="416.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-17">
+    <rect x="0" y="416.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-18">
-    <rect x="0" y="440.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-18">
+    <rect x="0" y="440.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-19">
-    <rect x="0" y="465.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-19">
+    <rect x="0" y="465.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-20">
-    <rect x="0" y="489.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-20">
+    <rect x="0" y="489.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-21">
-    <rect x="0" y="513.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-21">
+    <rect x="0" y="513.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-22">
-    <rect x="0" y="538.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-22">
+    <rect x="0" y="538.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-23">
-    <rect x="0" y="562.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-23">
+    <rect x="0" y="562.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-24">
-    <rect x="0" y="587.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-24">
+    <rect x="0" y="587.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-25">
-    <rect x="0" y="611.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-25">
+    <rect x="0" y="611.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-26">
-    <rect x="0" y="635.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-26">
+    <rect x="0" y="635.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-27">
-    <rect x="0" y="660.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-27">
+    <rect x="0" y="660.3" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2982408974-line-28">
+    <rect x="0" y="684.7" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2982408974-line-29">
+    <rect x="0" y="709.1" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2982408974-line-30">
+    <rect x="0" y="733.5" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2982408974-line-31">
+    <rect x="0" y="757.9" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2982408974-line-32">
+    <rect x="0" y="782.3" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2982408974-line-33">
+    <rect x="0" y="806.7" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2982408974-line-34">
+    <rect x="0" y="831.1" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2982408974-line-35">
+    <rect x="0" y="855.5" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2982408974-line-36">
+    <rect x="0" y="879.9" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2982408974-line-37">
+    <rect x="0" y="904.3" width="976" height="24.65"/>
             </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1932" height="755.6" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="999.6" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-844581322-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-2982408974-clip-terminal)">
     
-    <g class="terminal-844581322-matrix">
-    <text class="terminal-844581322-r1" x="0" y="20" textLength="280.6" clip-path="url(#terminal-844581322-line-0)">$&#160;casanovo&#160;train&#160;--help</text><text class="terminal-844581322-r1" x="1915.4" y="20" textLength="12.2" clip-path="url(#terminal-844581322-line-0)">
-</text><text class="terminal-844581322-r1" x="0" y="44.4" textLength="414.8" clip-path="url(#terminal-844581322-line-1)">Traceback&#160;(most&#160;recent&#160;call&#160;last):</text><text class="terminal-844581322-r1" x="1915.4" y="44.4" textLength="12.2" clip-path="url(#terminal-844581322-line-1)">
-</text><text class="terminal-844581322-r1" x="0" y="68.8" textLength="1000.4" clip-path="url(#terminal-844581322-line-2)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/bin/casanovo&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="68.8" textLength="12.2" clip-path="url(#terminal-844581322-line-2)">
-</text><text class="terminal-844581322-r1" x="0" y="93.2" textLength="463.6" clip-path="url(#terminal-844581322-line-3)">&#160;&#160;&#160;&#160;from&#160;casanovo.casanovo&#160;import&#160;main</text><text class="terminal-844581322-r1" x="1915.4" y="93.2" textLength="12.2" clip-path="url(#terminal-844581322-line-3)">
-</text><text class="terminal-844581322-r1" x="0" y="117.6" textLength="1464" clip-path="url(#terminal-844581322-line-4)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/casanovo/casanovo.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="117.6" textLength="12.2" clip-path="url(#terminal-844581322-line-4)">
-</text><text class="terminal-844581322-r1" x="0" y="142" textLength="268.4" clip-path="url(#terminal-844581322-line-5)">&#160;&#160;&#160;&#160;import&#160;depthcharge</text><text class="terminal-844581322-r1" x="1915.4" y="142" textLength="12.2" clip-path="url(#terminal-844581322-line-5)">
-</text><text class="terminal-844581322-r1" x="0" y="166.4" textLength="1488.4" clip-path="url(#terminal-844581322-line-6)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/__init__.py&quot;,&#160;line&#160;3,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="166.4" textLength="12.2" clip-path="url(#terminal-844581322-line-6)">
-</text><text class="terminal-844581322-r1" x="0" y="190.8" textLength="341.6" clip-path="url(#terminal-844581322-line-7)">&#160;&#160;&#160;&#160;from&#160;.&#160;import&#160;components</text><text class="terminal-844581322-r1" x="1915.4" y="190.8" textLength="12.2" clip-path="url(#terminal-844581322-line-7)">
-</text><text class="terminal-844581322-r1" x="0" y="215.2" textLength="1622.6" clip-path="url(#terminal-844581322-line-8)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/__init__.py&quot;,&#160;line&#160;2,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="215.2" textLength="12.2" clip-path="url(#terminal-844581322-line-8)">
-</text><text class="terminal-844581322-r1" x="0" y="239.6" textLength="744.2" clip-path="url(#terminal-844581322-line-9)">&#160;&#160;&#160;&#160;from&#160;.transformers&#160;import&#160;SpectrumEncoder,&#160;PeptideDecoder</text><text class="terminal-844581322-r1" x="1915.4" y="239.6" textLength="12.2" clip-path="url(#terminal-844581322-line-9)">
-</text><text class="terminal-844581322-r1" x="0" y="264" textLength="1671.4" clip-path="url(#terminal-844581322-line-10)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/transformers.py&quot;,&#160;line&#160;8,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="264" textLength="12.2" clip-path="url(#terminal-844581322-line-10)">
-</text><text class="terminal-844581322-r1" x="0" y="288.4" textLength="292.8" clip-path="url(#terminal-844581322-line-11)">&#160;&#160;&#160;&#160;from&#160;..&#160;import&#160;utils</text><text class="terminal-844581322-r1" x="1915.4" y="288.4" textLength="12.2" clip-path="url(#terminal-844581322-line-11)">
-</text><text class="terminal-844581322-r1" x="0" y="312.8" textLength="1451.8" clip-path="url(#terminal-844581322-line-12)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/utils.py&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="312.8" textLength="12.2" clip-path="url(#terminal-844581322-line-12)">
-</text><text class="terminal-844581322-r1" x="0" y="337.2" textLength="878.4" clip-path="url(#terminal-844581322-line-13)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing.event_accumulator&#160;import&#160;(</text><text class="terminal-844581322-r1" x="1915.4" y="337.2" textLength="12.2" clip-path="url(#terminal-844581322-line-13)">
-</text><text class="terminal-844581322-r1" x="0" y="361.6" textLength="1915.4" clip-path="url(#terminal-844581322-line-14)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_accumulator.py&quot;,&#160;line&#160;24,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="361.6" textLength="12.2" clip-path="url(#terminal-844581322-line-14)">
-</text><text class="terminal-844581322-r1" x="0" y="386" textLength="854" clip-path="url(#terminal-844581322-line-15)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing&#160;import&#160;event_file_loader</text><text class="terminal-844581322-r1" x="1915.4" y="386" textLength="12.2" clip-path="url(#terminal-844581322-line-15)">
-</text><text class="terminal-844581322-r1" x="0" y="410.4" textLength="1915.4" clip-path="url(#terminal-844581322-line-16)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_file_loader.py&quot;,&#160;line&#160;21,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="410.4" textLength="12.2" clip-path="url(#terminal-844581322-line-16)">
-</text><text class="terminal-844581322-r1" x="0" y="434.8" textLength="536.8" clip-path="url(#terminal-844581322-line-17)">&#160;&#160;&#160;&#160;from&#160;tensorboard&#160;import&#160;dataclass_compat</text><text class="terminal-844581322-r1" x="1915.4" y="434.8" textLength="12.2" clip-path="url(#terminal-844581322-line-17)">
-</text><text class="terminal-844581322-r1" x="0" y="459.2" textLength="1598.2" clip-path="url(#terminal-844581322-line-18)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/dataclass_compat.py&quot;,&#160;line&#160;33,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="459.2" textLength="12.2" clip-path="url(#terminal-844581322-line-18)">
-</text><text class="terminal-844581322-r1" x="0" y="483.6" textLength="878.4" clip-path="url(#terminal-844581322-line-19)">&#160;&#160;&#160;&#160;from&#160;tensorboard.plugins.hparams&#160;import&#160;metadata&#160;as&#160;hparams_metadata</text><text class="terminal-844581322-r1" x="1915.4" y="483.6" textLength="12.2" clip-path="url(#terminal-844581322-line-19)">
-</text><text class="terminal-844581322-r1" x="0" y="508" textLength="1695.8" clip-path="url(#terminal-844581322-line-20)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/plugins/hparams/metadata.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="508" textLength="12.2" clip-path="url(#terminal-844581322-line-20)">
-</text><text class="terminal-844581322-r1" x="0" y="532.4" textLength="585.6" clip-path="url(#terminal-844581322-line-21)">&#160;&#160;&#160;&#160;NULL_TENSOR&#160;=&#160;tensor_util.make_tensor_proto(</text><text class="terminal-844581322-r1" x="1915.4" y="532.4" textLength="12.2" clip-path="url(#terminal-844581322-line-21)">
-</text><text class="terminal-844581322-r1" x="0" y="556.8" textLength="1720.2" clip-path="url(#terminal-844581322-line-22)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/util/tensor_util.py&quot;,&#160;line&#160;405,&#160;in&#160;make_tensor_proto</text><text class="terminal-844581322-r1" x="1915.4" y="556.8" textLength="12.2" clip-path="url(#terminal-844581322-line-22)">
-</text><text class="terminal-844581322-r1" x="0" y="581.2" textLength="585.6" clip-path="url(#terminal-844581322-line-23)">&#160;&#160;&#160;&#160;numpy_dtype&#160;=&#160;dtypes.as_dtype(nparray.dtype)</text><text class="terminal-844581322-r1" x="1915.4" y="581.2" textLength="12.2" clip-path="url(#terminal-844581322-line-23)">
-</text><text class="terminal-844581322-r1" x="0" y="605.6" textLength="1769" clip-path="url(#terminal-844581322-line-24)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py&quot;,&#160;line&#160;677,&#160;in&#160;as_dtype</text><text class="terminal-844581322-r1" x="1915.4" y="605.6" textLength="12.2" clip-path="url(#terminal-844581322-line-24)">
-</text><text class="terminal-844581322-r1" x="0" y="630" textLength="866.2" clip-path="url(#terminal-844581322-line-25)">&#160;&#160;&#160;&#160;if&#160;type_value.type&#160;==&#160;np.string_&#160;or&#160;type_value.type&#160;==&#160;np.unicode_:</text><text class="terminal-844581322-r1" x="1915.4" y="630" textLength="12.2" clip-path="url(#terminal-844581322-line-25)">
-</text><text class="terminal-844581322-r1" x="0" y="654.4" textLength="1476.2" clip-path="url(#terminal-844581322-line-26)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/numpy/__init__.py&quot;,&#160;line&#160;397,&#160;in&#160;__getattr__</text><text class="terminal-844581322-r1" x="1915.4" y="654.4" textLength="12.2" clip-path="url(#terminal-844581322-line-26)">
-</text><text class="terminal-844581322-r1" x="0" y="678.8" textLength="305" clip-path="url(#terminal-844581322-line-27)">&#160;&#160;&#160;&#160;raise&#160;AttributeError(</text><text class="terminal-844581322-r1" x="1915.4" y="678.8" textLength="12.2" clip-path="url(#terminal-844581322-line-27)">
-</text><text class="terminal-844581322-r1" x="0" y="703.2" textLength="1427.4" clip-path="url(#terminal-844581322-line-28)">AttributeError:&#160;`np.string_`&#160;was&#160;removed&#160;in&#160;the&#160;NumPy&#160;2.0&#160;release.&#160;Use&#160;`np.bytes_`&#160;instead..&#160;Did&#160;you&#160;mean:&#160;&#x27;strings&#x27;?</text><text class="terminal-844581322-r1" x="1915.4" y="703.2" textLength="12.2" clip-path="url(#terminal-844581322-line-28)">
+    <g class="terminal-2982408974-matrix">
+    <text class="terminal-2982408974-r1" x="0" y="20" textLength="280.6" clip-path="url(#terminal-2982408974-line-0)">$&#160;casanovo&#160;train&#160;--help</text><text class="terminal-2982408974-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-2982408974-line-0)">
+</text><text class="terminal-2982408974-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-1)">
+</text><text class="terminal-2982408974-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-2982408974-line-2)">Usage:</text><text class="terminal-2982408974-r3" x="97.6" y="68.8" textLength="170.8" clip-path="url(#terminal-2982408974-line-2)">casanovo&#160;train</text><text class="terminal-2982408974-r1" x="268.4" y="68.8" textLength="24.4" clip-path="url(#terminal-2982408974-line-2)">&#160;[</text><text class="terminal-2982408974-r4" x="292.8" y="68.8" textLength="85.4" clip-path="url(#terminal-2982408974-line-2)">OPTIONS</text><text class="terminal-2982408974-r1" x="378.2" y="68.8" textLength="24.4" clip-path="url(#terminal-2982408974-line-2)">]&#160;</text><text class="terminal-2982408974-r4" x="402.6" y="68.8" textLength="183" clip-path="url(#terminal-2982408974-line-2)">TRAIN_PEAK_PATH</text><text class="terminal-2982408974-r1" x="585.6" y="68.8" textLength="390.4" clip-path="url(#terminal-2982408974-line-2)">...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-2)">
+</text><text class="terminal-2982408974-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-3)">
+</text><text class="terminal-2982408974-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-2982408974-line-4)">&#160;Train&#160;a&#160;Casanovo&#160;model&#160;on&#160;your&#160;own&#160;data.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-4)">
+</text><text class="terminal-2982408974-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-2982408974-line-5)">&#160;TRAIN_PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;annoated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;provided&#160;</text><text class="terminal-2982408974-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-2982408974-line-5)">
+</text><text class="terminal-2982408974-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-2982408974-line-6)">&#160;by&#160;MassIVE-KB,&#160;from&#160;which&#160;to&#160;train&#160;a&#160;new&#160;Casnovo&#160;model.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-6)">
+</text><text class="terminal-2982408974-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-7)">
+</text><text class="terminal-2982408974-r5" x="0" y="215.2" textLength="24.4" clip-path="url(#terminal-2982408974-line-8)">╭─</text><text class="terminal-2982408974-r5" x="24.4" y="215.2" textLength="134.2" clip-path="url(#terminal-2982408974-line-8)">&#160;Arguments&#160;</text><text class="terminal-2982408974-r5" x="158.6" y="215.2" textLength="793" clip-path="url(#terminal-2982408974-line-8)">─────────────────────────────────────────────────────────────────</text><text class="terminal-2982408974-r5" x="951.6" y="215.2" textLength="24.4" clip-path="url(#terminal-2982408974-line-8)">─╮</text><text class="terminal-2982408974-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-8)">
+</text><text class="terminal-2982408974-r5" x="0" y="239.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-9)">│</text><text class="terminal-2982408974-r6" x="24.4" y="239.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-9)">*</text><text class="terminal-2982408974-r1" x="36.6" y="239.6" textLength="256.2" clip-path="url(#terminal-2982408974-line-9)">&#160;&#160;TRAIN_PEAK_PATH&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r7" x="292.8" y="239.6" textLength="48.8" clip-path="url(#terminal-2982408974-line-9)">FILE</text><text class="terminal-2982408974-r8" x="366" y="239.6" textLength="122" clip-path="url(#terminal-2982408974-line-9)">[required]</text><text class="terminal-2982408974-r5" x="963.8" y="239.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-9)">│</text><text class="terminal-2982408974-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-9)">
+</text><text class="terminal-2982408974-r5" x="0" y="264" textLength="976" clip-path="url(#terminal-2982408974-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2982408974-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-2982408974-line-10)">
+</text><text class="terminal-2982408974-r5" x="0" y="288.4" textLength="24.4" clip-path="url(#terminal-2982408974-line-11)">╭─</text><text class="terminal-2982408974-r5" x="24.4" y="288.4" textLength="109.8" clip-path="url(#terminal-2982408974-line-11)">&#160;Options&#160;</text><text class="terminal-2982408974-r5" x="134.2" y="288.4" textLength="817.4" clip-path="url(#terminal-2982408974-line-11)">───────────────────────────────────────────────────────────────────</text><text class="terminal-2982408974-r5" x="951.6" y="288.4" textLength="24.4" clip-path="url(#terminal-2982408974-line-11)">─╮</text><text class="terminal-2982408974-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-11)">
+</text><text class="terminal-2982408974-r5" x="0" y="312.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-12)">│</text><text class="terminal-2982408974-r6" x="24.4" y="312.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-12)">*</text><text class="terminal-2982408974-r4" x="61" y="312.8" textLength="256.2" clip-path="url(#terminal-2982408974-line-12)">--validation_peak_pa…</text><text class="terminal-2982408974-r9" x="341.6" y="312.8" textLength="24.4" clip-path="url(#terminal-2982408974-line-12)">-p</text><text class="terminal-2982408974-r7" x="390.4" y="312.8" textLength="268.4" clip-path="url(#terminal-2982408974-line-12)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r1" x="658.8" y="312.8" textLength="305" clip-path="url(#terminal-2982408974-line-12)">&#160;&#160;An&#160;annotated&#160;MGF&#160;file&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="312.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-12)">│</text><text class="terminal-2982408974-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-12)">
+</text><text class="terminal-2982408974-r5" x="0" y="337.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-13)">│</text><text class="terminal-2982408974-r1" x="12.2" y="337.2" textLength="951.6" clip-path="url(#terminal-2982408974-line-13)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;for&#160;validation,&#160;like&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="337.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-13)">│</text><text class="terminal-2982408974-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-13)">
+</text><text class="terminal-2982408974-r5" x="0" y="361.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-14)">│</text><text class="terminal-2982408974-r1" x="12.2" y="361.6" textLength="951.6" clip-path="url(#terminal-2982408974-line-14)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;from&#160;MassIVE-KB.&#160;Use&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="361.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-14)">│</text><text class="terminal-2982408974-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-14)">
+</text><text class="terminal-2982408974-r5" x="0" y="386" textLength="12.2" clip-path="url(#terminal-2982408974-line-15)">│</text><text class="terminal-2982408974-r1" x="12.2" y="386" textLength="951.6" clip-path="url(#terminal-2982408974-line-15)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;this&#160;option&#160;multiple&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="386" textLength="12.2" clip-path="url(#terminal-2982408974-line-15)">│</text><text class="terminal-2982408974-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-2982408974-line-15)">
+</text><text class="terminal-2982408974-r5" x="0" y="410.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-16)">│</text><text class="terminal-2982408974-r1" x="12.2" y="410.4" textLength="951.6" clip-path="url(#terminal-2982408974-line-16)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;times&#160;to&#160;specify&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="410.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-16)">│</text><text class="terminal-2982408974-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-16)">
+</text><text class="terminal-2982408974-r5" x="0" y="434.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-17)">│</text><text class="terminal-2982408974-r1" x="12.2" y="434.8" textLength="951.6" clip-path="url(#terminal-2982408974-line-17)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;multiple&#160;files.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="434.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-17)">│</text><text class="terminal-2982408974-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-17)">
+</text><text class="terminal-2982408974-r5" x="0" y="459.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-18)">│</text><text class="terminal-2982408974-r8" x="683.2" y="459.2" textLength="268.4" clip-path="url(#terminal-2982408974-line-18)">[required]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="459.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-18)">│</text><text class="terminal-2982408974-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-18)">
+</text><text class="terminal-2982408974-r5" x="0" y="483.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-19)">│</text><text class="terminal-2982408974-r4" x="61" y="483.6" textLength="85.4" clip-path="url(#terminal-2982408974-line-19)">--model</text><text class="terminal-2982408974-r9" x="341.6" y="483.6" textLength="24.4" clip-path="url(#terminal-2982408974-line-19)">-m</text><text class="terminal-2982408974-r7" x="390.4" y="483.6" textLength="268.4" clip-path="url(#terminal-2982408974-line-19)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r1" x="658.8" y="483.6" textLength="305" clip-path="url(#terminal-2982408974-line-19)">&#160;&#160;The&#160;model&#160;weights&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="483.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-19)">│</text><text class="terminal-2982408974-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-19)">
+</text><text class="terminal-2982408974-r5" x="0" y="508" textLength="12.2" clip-path="url(#terminal-2982408974-line-20)">│</text><text class="terminal-2982408974-r1" x="12.2" y="508" textLength="951.6" clip-path="url(#terminal-2982408974-line-20)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;(.ckpt&#160;file).&#160;If&#160;not&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="508" textLength="12.2" clip-path="url(#terminal-2982408974-line-20)">│</text><text class="terminal-2982408974-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-2982408974-line-20)">
+</text><text class="terminal-2982408974-r5" x="0" y="532.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-21)">│</text><text class="terminal-2982408974-r1" x="12.2" y="532.4" textLength="951.6" clip-path="url(#terminal-2982408974-line-21)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;provided,&#160;Casanovo&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="532.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-21)">│</text><text class="terminal-2982408974-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-21)">
+</text><text class="terminal-2982408974-r5" x="0" y="556.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-22)">│</text><text class="terminal-2982408974-r1" x="12.2" y="556.8" textLength="951.6" clip-path="url(#terminal-2982408974-line-22)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;will&#160;try&#160;to&#160;download&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="556.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-22)">│</text><text class="terminal-2982408974-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-22)">
+</text><text class="terminal-2982408974-r5" x="0" y="581.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-23)">│</text><text class="terminal-2982408974-r1" x="12.2" y="581.2" textLength="951.6" clip-path="url(#terminal-2982408974-line-23)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;the&#160;latest&#160;release.&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="581.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-23)">│</text><text class="terminal-2982408974-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-23)">
+</text><text class="terminal-2982408974-r5" x="0" y="605.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-24)">│</text><text class="terminal-2982408974-r4" x="61" y="605.6" textLength="97.6" clip-path="url(#terminal-2982408974-line-24)">--output</text><text class="terminal-2982408974-r9" x="341.6" y="605.6" textLength="24.4" clip-path="url(#terminal-2982408974-line-24)">-o</text><text class="terminal-2982408974-r7" x="390.4" y="605.6" textLength="268.4" clip-path="url(#terminal-2982408974-line-24)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r1" x="658.8" y="605.6" textLength="305" clip-path="url(#terminal-2982408974-line-24)">&#160;&#160;The&#160;mzTab&#160;file&#160;to&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="605.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-24)">│</text><text class="terminal-2982408974-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-24)">
+</text><text class="terminal-2982408974-r5" x="0" y="630" textLength="12.2" clip-path="url(#terminal-2982408974-line-25)">│</text><text class="terminal-2982408974-r1" x="12.2" y="630" textLength="951.6" clip-path="url(#terminal-2982408974-line-25)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;which&#160;results&#160;will&#160;be&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="630" textLength="12.2" clip-path="url(#terminal-2982408974-line-25)">│</text><text class="terminal-2982408974-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-2982408974-line-25)">
+</text><text class="terminal-2982408974-r5" x="0" y="654.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-26)">│</text><text class="terminal-2982408974-r1" x="12.2" y="654.4" textLength="951.6" clip-path="url(#terminal-2982408974-line-26)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;written.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="654.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-26)">│</text><text class="terminal-2982408974-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-26)">
+</text><text class="terminal-2982408974-r5" x="0" y="678.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-27)">│</text><text class="terminal-2982408974-r4" x="61" y="678.8" textLength="97.6" clip-path="url(#terminal-2982408974-line-27)">--config</text><text class="terminal-2982408974-r9" x="341.6" y="678.8" textLength="24.4" clip-path="url(#terminal-2982408974-line-27)">-c</text><text class="terminal-2982408974-r7" x="390.4" y="678.8" textLength="268.4" clip-path="url(#terminal-2982408974-line-27)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r1" x="658.8" y="678.8" textLength="305" clip-path="url(#terminal-2982408974-line-27)">&#160;&#160;The&#160;YAML&#160;configuration&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="678.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-27)">│</text><text class="terminal-2982408974-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-27)">
+</text><text class="terminal-2982408974-r5" x="0" y="703.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-28)">│</text><text class="terminal-2982408974-r1" x="12.2" y="703.2" textLength="951.6" clip-path="url(#terminal-2982408974-line-28)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;file&#160;overriding&#160;the&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="703.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-28)">│</text><text class="terminal-2982408974-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-28)">
+</text><text class="terminal-2982408974-r5" x="0" y="727.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-29)">│</text><text class="terminal-2982408974-r1" x="12.2" y="727.6" textLength="951.6" clip-path="url(#terminal-2982408974-line-29)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;default&#160;options.&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="727.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-29)">│</text><text class="terminal-2982408974-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-29)">
+</text><text class="terminal-2982408974-r5" x="0" y="752" textLength="12.2" clip-path="url(#terminal-2982408974-line-30)">│</text><text class="terminal-2982408974-r4" x="61" y="752" textLength="134.2" clip-path="url(#terminal-2982408974-line-30)">--verbosity</text><text class="terminal-2982408974-r9" x="341.6" y="752" textLength="24.4" clip-path="url(#terminal-2982408974-line-30)">-v</text><text class="terminal-2982408974-r10" x="390.4" y="752" textLength="12.2" clip-path="url(#terminal-2982408974-line-30)">[</text><text class="terminal-2982408974-r7" x="402.6" y="752" textLength="61" clip-path="url(#terminal-2982408974-line-30)">debug</text><text class="terminal-2982408974-r10" x="463.6" y="752" textLength="12.2" clip-path="url(#terminal-2982408974-line-30)">|</text><text class="terminal-2982408974-r7" x="475.8" y="752" textLength="48.8" clip-path="url(#terminal-2982408974-line-30)">info</text><text class="terminal-2982408974-r10" x="524.6" y="752" textLength="12.2" clip-path="url(#terminal-2982408974-line-30)">|</text><text class="terminal-2982408974-r7" x="536.8" y="752" textLength="85.4" clip-path="url(#terminal-2982408974-line-30)">warning</text><text class="terminal-2982408974-r10" x="622.2" y="752" textLength="12.2" clip-path="url(#terminal-2982408974-line-30)">|</text><text class="terminal-2982408974-r7" x="634.4" y="752" textLength="24.4" clip-path="url(#terminal-2982408974-line-30)">er</text><text class="terminal-2982408974-r1" x="658.8" y="752" textLength="305" clip-path="url(#terminal-2982408974-line-30)">&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="752" textLength="12.2" clip-path="url(#terminal-2982408974-line-30)">│</text><text class="terminal-2982408974-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-2982408974-line-30)">
+</text><text class="terminal-2982408974-r5" x="0" y="776.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-31)">│</text><text class="terminal-2982408974-r7" x="390.4" y="776.4" textLength="36.6" clip-path="url(#terminal-2982408974-line-31)">ror</text><text class="terminal-2982408974-r10" x="427" y="776.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-31)">]</text><text class="terminal-2982408974-r1" x="658.8" y="776.4" textLength="305" clip-path="url(#terminal-2982408974-line-31)">&#160;&#160;console&#160;logging&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="776.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-31)">│</text><text class="terminal-2982408974-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-31)">
+</text><text class="terminal-2982408974-r5" x="0" y="800.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-32)">│</text><text class="terminal-2982408974-r1" x="12.2" y="800.8" textLength="951.6" clip-path="url(#terminal-2982408974-line-32)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;messages.&#160;Log&#160;files&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="800.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-32)">│</text><text class="terminal-2982408974-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-32)">
+</text><text class="terminal-2982408974-r5" x="0" y="825.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-33)">│</text><text class="terminal-2982408974-r1" x="12.2" y="825.2" textLength="951.6" clip-path="url(#terminal-2982408974-line-33)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;are&#160;always&#160;set&#160;to&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="825.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-33)">│</text><text class="terminal-2982408974-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-33)">
+</text><text class="terminal-2982408974-r5" x="0" y="849.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-34)">│</text><text class="terminal-2982408974-r1" x="12.2" y="849.6" textLength="951.6" clip-path="url(#terminal-2982408974-line-34)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#x27;debug&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="849.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-34)">│</text><text class="terminal-2982408974-r1" x="976" y="849.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-34)">
+</text><text class="terminal-2982408974-r5" x="0" y="874" textLength="12.2" clip-path="url(#terminal-2982408974-line-35)">│</text><text class="terminal-2982408974-r4" x="61" y="874" textLength="73.2" clip-path="url(#terminal-2982408974-line-35)">--help</text><text class="terminal-2982408974-r9" x="341.6" y="874" textLength="24.4" clip-path="url(#terminal-2982408974-line-35)">-h</text><text class="terminal-2982408974-r1" x="658.8" y="874" textLength="305" clip-path="url(#terminal-2982408974-line-35)">&#160;&#160;Show&#160;this&#160;message&#160;and&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="874" textLength="12.2" clip-path="url(#terminal-2982408974-line-35)">│</text><text class="terminal-2982408974-r1" x="976" y="874" textLength="12.2" clip-path="url(#terminal-2982408974-line-35)">
+</text><text class="terminal-2982408974-r5" x="0" y="898.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-36)">│</text><text class="terminal-2982408974-r1" x="12.2" y="898.4" textLength="951.6" clip-path="url(#terminal-2982408974-line-36)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="898.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-36)">│</text><text class="terminal-2982408974-r1" x="976" y="898.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-36)">
+</text><text class="terminal-2982408974-r5" x="0" y="922.8" textLength="976" clip-path="url(#terminal-2982408974-line-37)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2982408974-r1" x="976" y="922.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-37)">
+</text><text class="terminal-2982408974-r1" x="976" y="947.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-38)">
 </text>
     </g>
     </g>

From 812226e396f667f2d9e628e1aabd76546f8c18a1 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 20 Aug 2024 20:21:29 -0700
Subject: [PATCH 31/84] finish proteindatabase

---
 casanovo/data/db_utils.py      | 101 +++++++++++++++++----------------
 casanovo/denovo/dataloaders.py |   6 +-
 tests/unit_tests/test_unit.py  | 100 +++++++++++++++-----------------
 3 files changed, 101 insertions(+), 106 deletions(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index a7b5e850..d249e0c7 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -1,12 +1,12 @@
 """Unique methods used within db-search mode"""
 
-import bisect
 import logging
 import os
-from typing import List, Tuple
+from typing import List
 
 import depthcharge.masses
 from numba import jit
+import pandas as pd
 from pyteomics import fasta, parser
 
 logger = logging.getLogger("casanovo")
@@ -28,11 +28,29 @@
 
 class ProteinDatabase:
     """
-    TODO
+    Store digested .fasta data and return candidate peptides for a given precursor mass.
 
     Parameters
     ----------
-    TODO
+    fasta_path : str
+        Path to the FASTA file.
+    enzyme : str
+        The enzyme to use for digestion.
+        See pyteomics.parser.expasy_rules for valid enzymes.
+    digestion : str
+        The type of digestion to perform. Either 'full' or 'partial'.
+    missed_cleavages : int
+        The number of missed cleavages to allow.
+    min_peptide_len : int
+        The minimum length of peptides to consider.
+    max_peptide_len : int
+        The maximum length of peptides to consider.
+    max_mods : int
+        The maximum number of modifications to allow per peptide.
+    precursor_tolerance : float
+        The precursor mass tolerance in ppm.
+    isotope_error : List[int]
+        Isotopes to consider when comparing predicted and observed precursor m/z's.
     """
 
     def __init__(
@@ -73,27 +91,34 @@ def get_candidates(
             The precursor mass-to-charge ratio.
         charge : int
             The precursor charge.
+
+        Returns
+        -------
+        candidates : List[Tuple[str, str]]
+            A list of candidate peptides and associated
+            protein.
         """
-        candidates = set()
+        candidates = []
 
         for e in self.isotope_error:
             iso_shift = ISOTOPE_SPACING * e
-            upper_bound = (
+            upper_bound = float(
                 ProteinDatabase._to_raw_mass(precursor_mz, charge) - iso_shift
             ) * (1 + (self.precursor_tolerance / 1e6))
-            lower_bound = (
+            lower_bound = float(
                 ProteinDatabase._to_raw_mass(precursor_mz, charge) - iso_shift
             ) * (1 - (self.precursor_tolerance / 1e6))
 
-            start, end = ProteinDatabase._get_mass_indices(
-                [x[1] for x in self.digest], lower_bound, upper_bound
-            )
+            window = self.digest[
+                (self.digest["calc_mass"] >= lower_bound)
+                & (self.digest["calc_mass"] <= upper_bound)
+            ]
+            candidates.append(window[["peptide", "calc_mass", "protein"]])
 
-            candidates.update(self.digest[start:end])
-
-        candidates = list(candidates)
-        candidates.sort(key=lambda x: x[1])
-        return candidates
+        candidates = pd.concat(candidates)
+        candidates.drop_duplicates(inplace=True)
+        candidates.sort_values(by=["calc_mass", "peptide"], inplace=True)
+        return list(candidates["peptide"]), list(candidates["protein"])
 
     def _digest_fasta(
         self,
@@ -128,9 +153,9 @@ def _digest_fasta(
 
         Returns
         -------
-        mod_peptide_list : List[Tuple[str, float, str]]
-            A list of tuples containing the peptide sequence, mass,
-            and associated protein. Sorted by neutral mass in ascending order.
+        mod_peptide_list : pd.DataFrame
+            A Pandas DataFrame with peptide, mass,
+            and protein columns. Sorted by neutral mass in ascending order.
         """
         # Verify the existence of the file:
         if not os.path.isfile(fasta_filename):
@@ -180,17 +205,20 @@ def _digest_fasta(
                 map(ProteinDatabase._convert_from_modx, peptide_isoforms)
             )
             mod_peptide_list.extend(
-                (mod_pep, mass_calculator.mass(mod_pep), prot)
+                [mod_pep, mass_calculator.mass(mod_pep), prot]
                 for mod_pep in peptide_isoforms
             )
 
-        # Sort the peptides by mass and return.
-        mod_peptide_list.sort(key=lambda x: x[1])
-        logger.info(
-            "Digestion complete. %d peptides generated.", len(mod_peptide_list)
+        # Create a DataFrame for easy sorting and filtering
+        pdb_df = pd.DataFrame(
+            mod_peptide_list, columns=["peptide", "calc_mass", "protein"]
         )
-        return mod_peptide_list
+        pdb_df.sort_values(by=["calc_mass", "peptide"], inplace=True)
+
+        logger.info("Digestion complete. %d peptides generated.", len(pdb_df))
+        return pdb_df
 
+    @jit
     def _to_mz(precursor_mass, charge):
         """
         Convert precursor neutral mass to m/z value.
@@ -209,6 +237,7 @@ def _to_mz(precursor_mass, charge):
         """
         return (precursor_mass + (charge * PROTON)) / charge
 
+    @jit
     def _to_raw_mass(mz_mass, charge):
         """
         Convert precursor m/z value to neutral mass.
@@ -227,30 +256,6 @@ def _to_raw_mass(mz_mass, charge):
         """
         return charge * (mz_mass - PROTON)
 
-    def _get_mass_indices(masses, m_low, m_high):
-        """Grabs mass indices that fall within a specified range.
-
-        Pulls from masses, a list of mass values.
-        Requires that the mass values are sorted in ascending order.
-
-        Parameters
-        ----------
-        masses : List[int]
-            List of mass values
-        m_low : int
-            Lower bound of mass range (inclusive)
-        m_high : int
-            Upper bound of mass range (inclusive)
-
-        Return
-        ------
-        indices : Tuple[int, int]
-            Indices of mass values that fall within the specified range
-        """
-        start = bisect.bisect_left(masses, m_low)
-        end = bisect.bisect_right(masses, m_high)
-        return start, end
-
     def _convert_from_modx(seq: str):
         """Converts peptide sequence from modX format to Casanovo-acceptable modifications.
 
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index 4d5524f4..2d9e200b 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -284,11 +284,11 @@ def prepare_psm_batch(
     all_proteins = []
     for idx in range(len(batch)):
         digest_data = pdb.get_candidates(
-            precursor_mzs[idx],
-            precursor_charges[idx],
+            float(precursor_mzs[idx]),
+            float(precursor_charges[idx]),
         )
         try:
-            spec_peptides, _, pep_protein = list(zip(*digest_data))
+            spec_peptides, pep_protein = digest_data
             all_spectra.append(
                 spectra[idx].unsqueeze(0).repeat(len(spec_peptides), 1, 1)
             )
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index 7a37e771..2473a168 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -9,6 +9,7 @@
 import einops
 import github
 import numpy as np
+import pandas as pd
 import pytest
 import torch
 
@@ -287,8 +288,7 @@ def test_digest_fasta_cleave(tiny_fasta_file):
             precursor_tolerance=20,
             isotope_error=[0],
         )
-        peptide_list = pdb.digest
-        peptide_list = [x[0] for x in peptide_list]
+        peptide_list = list(pdb.digest["peptide"])
         assert peptide_list == expected
 
 
@@ -357,8 +357,7 @@ def test_digest_fasta_mods(tiny_fasta_file):
         precursor_tolerance=20,
         isotope_error=[0],
     )
-    peptide_list = pdb.digest
-    peptide_list = [x[0] for x in peptide_list]
+    peptide_list = list(pdb.digest["peptide"])
     peptide_list = [
         x
         for x in peptide_list
@@ -391,8 +390,7 @@ def test_length_restrictions(tiny_fasta_file):
         precursor_tolerance=20,
         isotope_error=[0],
     )
-    peptide_list = pdb.digest
-    peptide_list = [x[0] for x in peptide_list]
+    peptide_list = list(pdb.digest["peptide"])
     assert peptide_list == expected_long
 
     pdb = db_utils.ProteinDatabase(
@@ -406,8 +404,7 @@ def test_length_restrictions(tiny_fasta_file):
         precursor_tolerance=20,
         isotope_error=[0],
     )
-    peptide_list = pdb.digest
-    peptide_list = [x[0] for x in peptide_list]
+    peptide_list = list(pdb.digest["peptide"])
     assert peptide_list == expected_short
 
 
@@ -437,8 +434,7 @@ def test_digest_fasta_enzyme(tiny_fasta_file):
         precursor_tolerance=20,
         isotope_error=[0],
     )
-    peptide_list = pdb.digest
-    peptide_list = [x[0] for x in peptide_list]
+    peptide_list = list(pdb.digest["peptide"])
     assert peptide_list == expected_argc
 
     pdb = db_utils.ProteinDatabase(
@@ -452,8 +448,7 @@ def test_digest_fasta_enzyme(tiny_fasta_file):
         precursor_tolerance=20,
         isotope_error=[0],
     )
-    peptide_list = pdb.digest
-    peptide_list = [x[0] for x in peptide_list]
+    peptide_list = list(pdb.digest["peptide"])
     assert peptide_list == expected_aspn
 
 
@@ -478,8 +473,7 @@ def test_get_candidates(tiny_fasta_file):
         precursor_tolerance=10000,
         isotope_error=[0],
     )
-    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    candidates = [x[0] for x in candidates]
+    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_smallwindow == candidates
 
     pdb = db_utils.ProteinDatabase(
@@ -493,8 +487,7 @@ def test_get_candidates(tiny_fasta_file):
         precursor_tolerance=150000,
         isotope_error=[0],
     )
-    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    candidates = [x[0] for x in candidates]
+    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_midwindow == candidates
 
     pdb = db_utils.ProteinDatabase(
@@ -508,8 +501,7 @@ def test_get_candidates(tiny_fasta_file):
         precursor_tolerance=600000,
         isotope_error=[0],
     )
-    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    candidates = [x[0] for x in candidates]
+    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_widewindow == candidates
 
 
@@ -522,35 +514,38 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
     # 3: [977.510108, 997.257787]
 
     peptide_list = [
-        ("A", 1001),
-        ("B", 1000),
-        ("C", 999),
-        ("D", 998),
-        ("E", 997),
-        ("F", 996),
-        ("G", 995),
-        ("H", 994),
-        ("I", 993),
-        ("J", 992),
-        ("K", 991),
-        ("L", 990),
-        ("M", 989),
-        ("N", 988),
-        ("O", 987),
-        ("P", 986),
-        ("Q", 985),
-        ("R", 984),
-        ("S", 983),
-        ("T", 982),
-        ("U", 981),
-        ("V", 980),
-        ("W", 979),
-        ("X", 978),
-        ("Y", 977),
-        ("Z", 976),
+        ("A", 1001, "foo"),
+        ("B", 1000, "foo"),
+        ("C", 999, "foo"),
+        ("D", 998, "foo"),
+        ("E", 997, "foo"),
+        ("F", 996, "foo"),
+        ("G", 995, "foo"),
+        ("H", 994, "foo"),
+        ("I", 993, "foo"),
+        ("J", 992, "foo"),
+        ("K", 991, "foo"),
+        ("L", 990, "foo"),
+        ("M", 989, "foo"),
+        ("N", 988, "foo"),
+        ("O", 987, "foo"),
+        ("P", 986, "foo"),
+        ("Q", 985, "foo"),
+        ("R", 984, "foo"),
+        ("S", 983, "foo"),
+        ("T", 982, "foo"),
+        ("U", 981, "foo"),
+        ("V", 980, "foo"),
+        ("W", 979, "foo"),
+        ("X", 978, "foo"),
+        ("Y", 977, "foo"),
+        ("Z", 976, "foo"),
     ]
 
-    peptide_list.sort(key=lambda x: x[1])
+    peptide_list = pd.DataFrame(
+        peptide_list, columns=["peptide", "calc_mass", "protein"]
+    )
+    peptide_list.sort_values("calc_mass", inplace=True)
 
     expected_isotope0 = list("UTSRQPONMLKJIHGFEDCB")
     expected_isotope1 = list("VUTSRQPONMLKJIHGFEDC")
@@ -570,8 +565,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         isotope_error=[0],
     )
     pdb.digest = peptide_list
-    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    candidates = [x[0] for x in candidates]
+    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope0 == candidates
 
     pdb = db_utils.ProteinDatabase(
@@ -586,8 +580,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         isotope_error=[1],
     )
     pdb.digest = peptide_list
-    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    candidates = [x[0] for x in candidates]
+    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope1 == candidates
 
     pdb = db_utils.ProteinDatabase(
@@ -602,8 +595,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         isotope_error=[2],
     )
     pdb.digest = peptide_list
-    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    candidates = [x[0] for x in candidates]
+    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope2 == candidates
 
     pdb = db_utils.ProteinDatabase(
@@ -618,8 +610,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         isotope_error=[3],
     )
     pdb.digest = peptide_list
-    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    candidates = [x[0] for x in candidates]
+    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope3 == candidates
 
     pdb = db_utils.ProteinDatabase(
@@ -634,8 +625,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         isotope_error=[0, 1, 2, 3],
     )
     pdb.digest = peptide_list
-    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    candidates = [x[0] for x in candidates]
+    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope0123 == candidates
 
 

From cfd39e80b4898077f92cacc6491a5c891c5a9454 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Fri, 23 Aug 2024 14:12:50 -0700
Subject: [PATCH 32/84] all comments addressed

---
 casanovo/config.yaml            |  7 +++-
 casanovo/data/db_utils.py       | 68 +++++++++++++++++++++++++++------
 casanovo/denovo/model_runner.py |  1 +
 tests/conftest.py               |  4 ++
 tests/unit_tests/test_unit.py   | 56 +++++++++++++++++++++++++++
 5 files changed, 123 insertions(+), 13 deletions(-)

diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index 860cfabb..87795db8 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -46,7 +46,7 @@ devices:
 # See pyteomics.parser.expasy_rules for valid enzymes
 enzyme: "trypsin"
 # Digestion type for candidate peptide generation.
-# Full: standard digestion. Semi: Include products of semi-specific cleavage
+# full: standard digestion. semi: Include products of semi-specific cleavage
 digestion: "full"
 # Number of allowed missed cleavages when digesting protein
 missed_cleavages: 0
@@ -55,6 +55,11 @@ missed_cleavages: 0
 max_mods: 
 # Maximum peptide length to consider
 max_peptide_len: 50
+# Toggle allowed modifications on/off
+# Permanent fixed mod (don't include): C+57.021
+# Allowed variable mods: M+15.995, N+0.984, Q+0.984, 
+# Allowed N-terminal mods: +42.011, +43.006, -17.027, +43.006-17.027
+allowed_mods: "M+15.995,N+0.984,Q+0.984,+42.011,+43.006,-17.027,+43.006-17.027"
 
 
 ###
diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index d249e0c7..2bdf3828 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -15,16 +15,6 @@
 PROTON = 1.00727646677
 ISOTOPE_SPACING = 1.003355
 
-var_mods = {
-    "d": ["N", "Q"],
-    "ox": ["M"],
-    "ace-": True,
-    "carb-": True,
-    "nh3x-": True,
-    "carbnh3x-": True,
-}
-fixed_mods = {"carbm": ["C"]}
-
 
 class ProteinDatabase:
     """
@@ -51,6 +41,8 @@ class ProteinDatabase:
         The precursor mass tolerance in ppm.
     isotope_error : List[int]
         Isotopes to consider when comparing predicted and observed precursor m/z's.
+    allowed_mods : List[str]
+        A list of allowed modifications to consider.
     """
 
     def __init__(
@@ -64,7 +56,11 @@ def __init__(
         max_mods: int,
         precursor_tolerance: float,
         isotope_error: List[int],
+        allowed_mods: List[str],
     ):
+        self.fixed_mods, self.var_mods = self._construct_mods_dict(
+            allowed_mods
+        )
         self.digest = self._digest_fasta(
             fasta_path,
             enzyme,
@@ -197,8 +193,8 @@ def _digest_fasta(
         for pep, prot in peptide_list:
             peptide_isoforms = parser.isoforms(
                 pep,
-                variable_mods=var_mods,
-                fixed_mods=fixed_mods,
+                variable_mods=self.var_mods,
+                fixed_mods=self.fixed_mods,
                 max_mods=max_mods,
             )
             peptide_isoforms = list(
@@ -218,6 +214,54 @@ def _digest_fasta(
         logger.info("Digestion complete. %d peptides generated.", len(pdb_df))
         return pdb_df
 
+    def _construct_mods_dict(self, allowed_mods):
+        """
+        Constructs dictionaries of fixed and variable modifications.
+
+        Parameters
+        ----------
+        allowed_mods : str
+            A comma-separated list of allowed modifications.
+
+        Returns
+        -------
+        fixed_mods : dict
+            A dictionary of fixed modifications.
+        var_mods : dict
+            A dictionary of variable modifications.
+        """
+        fixed_mods = {"carbm": ["C"]}
+        var_mods = {}
+
+        if allowed_mods is "" or None:
+            return fixed_mods, var_mods
+        for mod in allowed_mods.split(","):
+            if mod == "M+15.995":
+                if "ox" not in var_mods:
+                    var_mods["ox"] = []
+                var_mods["ox"].append("M")
+            elif mod == "N+0.984":
+                if "d" not in var_mods:
+                    var_mods["d"] = []
+                var_mods["d"].append("N")
+            elif mod == "Q+0.984":
+                if "d" not in var_mods:
+                    var_mods["d"] = []
+                var_mods["d"].append("Q")
+            elif mod == "+42.011":
+                var_mods["ace-"] = True
+            elif mod == "+43.006":
+                var_mods["carb-"] = True
+            elif mod == "-17.027":
+                var_mods["nh3x-"] = True
+            elif mod == "+43.006-17.027":
+                var_mods["carbnh3x-"] = True
+            else:
+                logger.error("Modification %s not recognized.", mod)
+                raise ValueError(f"Modification {mod} not recognized.")
+
+        return fixed_mods, var_mods
+
     @jit
     def _to_mz(precursor_mass, charge):
         """
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index b90f06b0..789c960b 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -124,6 +124,7 @@ def db_search(
             self.config.max_mods,
             self.config.precursor_mass_tol,
             self.config.isotope_error_range,
+            self.config.allowed_mods,
         )
         self.loaders.setup(stage="test", annotated=False)
         self.trainer.predict(self.model, self.loaders.db_dataloader())
diff --git a/tests/conftest.py b/tests/conftest.py
index f20d7879..452316c8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -299,6 +299,10 @@ def tiny_config(tmp_path):
             "-17.027": -17.026549,
             "+43.006-17.027": 25.980265,
         },
+        "allowed_mods": (
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     }
 
     cfg_file = tmp_path / "config.yml"
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index 2473a168..a31e2024 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -287,6 +287,10 @@ def test_digest_fasta_cleave(tiny_fasta_file):
             max_mods=0,
             precursor_tolerance=20,
             isotope_error=[0],
+            allowed_mods=(
+                "M+15.995,N+0.984,Q+0.984,"
+                "+42.011,+43.006,-17.027,+43.006-17.027"
+            ),
         )
         peptide_list = list(pdb.digest["peptide"])
         assert peptide_list == expected
@@ -356,6 +360,10 @@ def test_digest_fasta_mods(tiny_fasta_file):
         max_mods=1,
         precursor_tolerance=20,
         isotope_error=[0],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     peptide_list = list(pdb.digest["peptide"])
     peptide_list = [
@@ -389,6 +397,10 @@ def test_length_restrictions(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=20,
         isotope_error=[0],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     peptide_list = list(pdb.digest["peptide"])
     assert peptide_list == expected_long
@@ -403,6 +415,10 @@ def test_length_restrictions(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=20,
         isotope_error=[0],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     peptide_list = list(pdb.digest["peptide"])
     assert peptide_list == expected_short
@@ -433,6 +449,10 @@ def test_digest_fasta_enzyme(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=20,
         isotope_error=[0],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     peptide_list = list(pdb.digest["peptide"])
     assert peptide_list == expected_argc
@@ -447,6 +467,10 @@ def test_digest_fasta_enzyme(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=20,
         isotope_error=[0],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     peptide_list = list(pdb.digest["peptide"])
     assert peptide_list == expected_aspn
@@ -472,6 +496,10 @@ def test_get_candidates(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[0],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_smallwindow == candidates
@@ -486,6 +514,10 @@ def test_get_candidates(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=150000,
         isotope_error=[0],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_midwindow == candidates
@@ -500,6 +532,10 @@ def test_get_candidates(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=600000,
         isotope_error=[0],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_widewindow == candidates
@@ -563,6 +599,10 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[0],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     pdb.digest = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
@@ -578,6 +618,10 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[1],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     pdb.digest = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
@@ -593,6 +637,10 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[2],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     pdb.digest = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
@@ -608,6 +656,10 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[3],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     pdb.digest = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
@@ -623,6 +675,10 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[0, 1, 2, 3],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     pdb.digest = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)

From 106c4ecc524c202a7624d6fa025afc82adac1a0c Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Wed, 28 Aug 2024 16:41:24 -0700
Subject: [PATCH 33/84] new comments addressed

---
 casanovo/config.yaml            |  22 +--
 casanovo/data/db_utils.py       | 276 +++++++++++++++++---------------
 casanovo/denovo/dataloaders.py  |  22 +--
 casanovo/denovo/model.py        |   2 +-
 casanovo/denovo/model_runner.py |   2 +-
 tests/conftest.py               |  25 ++-
 tests/test_integration.py       |   4 +-
 tests/unit_tests/test_unit.py   |  74 ++++-----
 8 files changed, 209 insertions(+), 218 deletions(-)

diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index 87795db8..6c9063f5 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -11,13 +11,13 @@
 
 # Max absolute difference allowed with respect to observed precursor m/z.
 # denovo: Predictions outside the tolerance range are assigned a negative peptide score.
-# db-search: Used to create mas windows for candidate generation.
+# db-search: Select candidate peptides within the specified precursor m/z tolerance.
 precursor_mass_tol: 50  # ppm
 # Isotopes to consider when comparing predicted and observed precursor m/z's.
 isotope_error_range: [0, 1]
-# The minimum length of predicted/scored peptides.
+# The minimum length of considered peptides.
 min_peptide_len: 6
-# Number of spectra or psms in one inference batch.
+# Number of spectra in one inference batch.
 predict_batch_size: 1024
 
 
@@ -43,21 +43,21 @@ devices:
 ###
 
 # Enzyme for in silico digestion, used to generate candidate peptides.
-# See pyteomics.parser.expasy_rules for valid enzymes
+# See pyteomics.parser.expasy_rules for valid enzymes.
 enzyme: "trypsin"
 # Digestion type for candidate peptide generation.
-# full: standard digestion. semi: Include products of semi-specific cleavage
+# full: standard digestion. semi: Include products of semi-specific cleavage.
 digestion: "full"
-# Number of allowed missed cleavages when digesting protein
+# Number of allowed missed cleavages when digesting protein.
 missed_cleavages: 0
-# Maximum number of amino acid modifications per peptide.
+# Maximum number of amino acid modifications per peptide,
 # None generates all possible isoforms as candidates.
-max_mods: 
-# Maximum peptide length to consider
+max_mods: 0
+# Maximum peptide length to consider.
 max_peptide_len: 50
-# Toggle allowed modifications on/off
+# Select which modifications from the vocabulary can be used in candidate creation.
 # Permanent fixed mod (don't include): C+57.021
-# Allowed variable mods: M+15.995, N+0.984, Q+0.984, 
+# Allowed variable mods: M+15.995, N+0.984, Q+0.984
 # Allowed N-terminal mods: +42.011, +43.006, -17.027, +43.006-17.027
 allowed_mods: "M+15.995,N+0.984,Q+0.984,+42.011,+43.006,-17.027,+43.006-17.027"
 
diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 2bdf3828..c1d5e91e 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -2,11 +2,11 @@
 
 import logging
 import os
-from typing import List
+from typing import List, Tuple
 
 import depthcharge.masses
-from numba import jit
 import pandas as pd
+from numba import njit
 from pyteomics import fasta, parser
 
 logger = logging.getLogger("casanovo")
@@ -39,10 +39,10 @@ class ProteinDatabase:
         The maximum number of modifications to allow per peptide.
     precursor_tolerance : float
         The precursor mass tolerance in ppm.
-    isotope_error : List[int]
-        Isotopes to consider when comparing predicted and observed precursor m/z's.
-    allowed_mods : List[str]
-        A list of allowed modifications to consider.
+    isotope_error : Tuple[int, int]
+        Isotope range [min, max] to consider when comparing predicted and observed precursor m/z's.
+    allowed_mods : str
+        A comma separated string of allowed modifications to consider.
     """
 
     def __init__(
@@ -55,13 +55,11 @@ def __init__(
         max_peptide_len: int,
         max_mods: int,
         precursor_tolerance: float,
-        isotope_error: List[int],
-        allowed_mods: List[str],
+        isotope_error: Tuple[int, int],
+        allowed_mods: str,
     ):
-        self.fixed_mods, self.var_mods = self._construct_mods_dict(
-            allowed_mods
-        )
-        self.digest = self._digest_fasta(
+        self.fixed_mods, self.var_mods = _construct_mods_dict(allowed_mods)
+        self.db_peptides = self._digest_fasta(
             fasta_path,
             enzyme,
             digestion,
@@ -77,7 +75,7 @@ def get_candidates(
         self,
         precursor_mz: float,
         charge: int,
-    ):
+    ) -> List[Tuple[str, str]]:
         """
         Returns a list of candidate peptides that fall within the specified mass range.
 
@@ -96,18 +94,18 @@ def get_candidates(
         """
         candidates = []
 
-        for e in self.isotope_error:
+        for e in range(self.isotope_error[0], self.isotope_error[1] + 1):
             iso_shift = ISOTOPE_SPACING * e
             upper_bound = float(
-                ProteinDatabase._to_raw_mass(precursor_mz, charge) - iso_shift
+                _to_raw_mass(precursor_mz, charge) - iso_shift
             ) * (1 + (self.precursor_tolerance / 1e6))
             lower_bound = float(
-                ProteinDatabase._to_raw_mass(precursor_mz, charge) - iso_shift
+                _to_raw_mass(precursor_mz, charge) - iso_shift
             ) * (1 - (self.precursor_tolerance / 1e6))
 
-            window = self.digest[
-                (self.digest["calc_mass"] >= lower_bound)
-                & (self.digest["calc_mass"] <= upper_bound)
+            window = self.db_peptides[
+                (self.db_peptides["calc_mass"] >= lower_bound)
+                & (self.db_peptides["calc_mass"] <= upper_bound)
             ]
             candidates.append(window[["peptide", "calc_mass", "protein"]])
 
@@ -125,7 +123,7 @@ def _digest_fasta(
         max_mods: int,
         min_peptide_length: int,
         max_peptide_length: int,
-    ):
+    ) -> pd.DataFrame:
         """
         Digests a FASTA file and returns the peptides, their masses, and associated protein.
 
@@ -158,13 +156,18 @@ def _digest_fasta(
             logger.error("File %s does not exist.", fasta_filename)
             raise FileNotFoundError(f"File {fasta_filename} does not exist.")
 
-        fasta_data = fasta.read(fasta_filename)
         peptide_list = []
         if digestion not in ["full", "partial"]:
             logger.error("Digestion type %s not recognized.", digestion)
             raise ValueError(f"Digestion type {digestion} not recognized.")
+        if enzyme not in parser.expasy_rules:
+            logger.error(
+                "Enzyme %s not recognized. Must be in pyteomics.parser.expasy_rules",
+                enzyme,
+            )
+            raise ValueError(f"Enzyme {enzyme} not recognized.")
         semi = digestion == "partial"
-        for header, seq in fasta_data:
+        for header, seq in fasta.read(fasta_filename):
             pep_set = parser.cleave(
                 seq,
                 rule=parser.expasy_rules[enzyme],
@@ -182,136 +185,143 @@ def _digest_fasta(
                     aa in pep for aa in "BJOUXZ"
                 ):  # Check for incorrect AA letters
                     logger.warn(
-                        "Skipping peptide with ambiguous amino acids: %s", pep
+                        "Skipping peptide with unknown amino acids: %s", pep
                     )
                     continue
                 peptide_list.append((pep, protein))
 
         # Generate modified peptides
         mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
-        mod_peptide_list = []
-        for pep, prot in peptide_list:
-            peptide_isoforms = parser.isoforms(
-                pep,
-                variable_mods=self.var_mods,
-                fixed_mods=self.fixed_mods,
-                max_mods=max_mods,
-            )
-            peptide_isoforms = list(
-                map(ProteinDatabase._convert_from_modx, peptide_isoforms)
-            )
-            mod_peptide_list.extend(
-                [mod_pep, mass_calculator.mass(mod_pep), prot]
-                for mod_pep in peptide_isoforms
+        peptide_isoforms = [
+            (
+                parser.isoforms(
+                    pep,
+                    variable_mods=self.var_mods,
+                    fixed_mods=self.fixed_mods,
+                    max_mods=max_mods,
+                ),
+                prot,
             )
-
+            for pep, prot in peptide_list
+        ]
+        mod_peptide_list = [
+            (mod_pep, mass_calculator.mass(mod_pep), prot)
+            for isos, prot in peptide_isoforms
+            for mod_pep in map(_convert_from_modx, isos)
+        ]
         # Create a DataFrame for easy sorting and filtering
-        pdb_df = pd.DataFrame(
+        pep_table = pd.DataFrame(
             mod_peptide_list, columns=["peptide", "calc_mass", "protein"]
         )
-        pdb_df.sort_values(by=["calc_mass", "peptide"], inplace=True)
-
-        logger.info("Digestion complete. %d peptides generated.", len(pdb_df))
-        return pdb_df
+        pep_table.sort_values(by=["calc_mass", "peptide"], inplace=True)
 
-    def _construct_mods_dict(self, allowed_mods):
-        """
-        Constructs dictionaries of fixed and variable modifications.
+        logger.info(
+            "Digestion complete. %d peptides generated.", len(pep_table)
+        )
+        return pep_table
 
-        Parameters
-        ----------
-        allowed_mods : str
-            A comma-separated list of allowed modifications.
 
-        Returns
-        -------
-        fixed_mods : dict
-            A dictionary of fixed modifications.
-        var_mods : dict
-            A dictionary of variable modifications.
-        """
-        fixed_mods = {"carbm": ["C"]}
-        var_mods = {}
-
-        if allowed_mods is "" or None:
-            return fixed_mods, var_mods
-        for mod in allowed_mods.split(","):
-            if mod == "M+15.995":
-                if "ox" not in var_mods:
-                    var_mods["ox"] = []
-                var_mods["ox"].append("M")
-            elif mod == "N+0.984":
-                if "d" not in var_mods:
-                    var_mods["d"] = []
-                var_mods["d"].append("N")
-            elif mod == "Q+0.984":
-                if "d" not in var_mods:
-                    var_mods["d"] = []
-                var_mods["d"].append("Q")
-            elif mod == "+42.011":
-                var_mods["ace-"] = True
-            elif mod == "+43.006":
-                var_mods["carb-"] = True
-            elif mod == "-17.027":
-                var_mods["nh3x-"] = True
-            elif mod == "+43.006-17.027":
-                var_mods["carbnh3x-"] = True
-            else:
-                logger.error("Modification %s not recognized.", mod)
-                raise ValueError(f"Modification {mod} not recognized.")
+@njit
+def _to_mz(precursor_mass, charge):
+    """
+    Convert precursor neutral mass to m/z value.
 
-        return fixed_mods, var_mods
+    Parameters
+    ----------
+    precursor_mass : float
+        The precursor neutral mass.
+    charge : int
+        The precursor charge.
+
+    Returns
+    -------
+    mz : float
+        The calculated precursor mass-to-charge ratio.
+    """
+    return (precursor_mass + (charge * PROTON)) / charge
 
-    @jit
-    def _to_mz(precursor_mass, charge):
-        """
-        Convert precursor neutral mass to m/z value.
 
-        Parameters
-        ----------
-        precursor_mass : float
-            The precursor neutral mass.
-        charge : int
-            The precursor charge.
+@njit
+def _to_raw_mass(mz_mass, charge):
+    """
+    Convert precursor m/z value to neutral mass.
 
-        Returns
-        -------
-        mz : float
-            The calculated precursor mass-to-charge ratio.
-        """
-        return (precursor_mass + (charge * PROTON)) / charge
+    Parameters
+    ----------
+    mz_mass : float
+        The precursor mass-to-charge ratio.
+    charge : int
+        The precursor charge.
+
+    Returns
+    -------
+    mass : float
+        The calculated precursor neutral mass.
+    """
+    return charge * (mz_mass - PROTON)
 
-    @jit
-    def _to_raw_mass(mz_mass, charge):
-        """
-        Convert precursor m/z value to neutral mass.
 
-        Parameters
-        ----------
-        mz_mass : float
-            The precursor mass-to-charge ratio.
-        charge : int
-            The precursor charge.
+def _convert_from_modx(seq: str):
+    """Converts peptide sequence from modX format to Casanovo-acceptable modifications.
 
-        Returns
-        -------
-        mass : float
-            The calculated precursor neutral mass.
-        """
-        return charge * (mz_mass - PROTON)
+    Args:
+        seq (str): Peptide in modX format
+    """
+    seq = seq.replace("carbmC", "C+57.021")  # Fixed modification
+    seq = seq.replace("oxM", "M+15.995")
+    seq = seq.replace("dN", "N+0.984")
+    seq = seq.replace("dQ", "Q+0.984")
+    seq = seq.replace("ace-", "+42.011")
+    seq = seq.replace("carbnh3x-", "+43.006-17.027")
+    seq = seq.replace("carb-", "+43.006")
+    seq = seq.replace("nh3x-", "-17.027")
+    return seq
+
+
+def _construct_mods_dict(allowed_mods):
+    """
+    Constructs dictionaries of fixed and variable modifications.
 
-    def _convert_from_modx(seq: str):
-        """Converts peptide sequence from modX format to Casanovo-acceptable modifications.
+    Parameters
+    ----------
+    allowed_mods : str
+        A comma-separated list of allowed modifications.
+
+    Returns
+    -------
+    fixed_mods : dict
+        A dictionary of fixed modifications.
+    var_mods : dict
+        A dictionary of variable modifications.
+    """
+    fixed_mods = {"carbm": ["C"]}
+    var_mods = {}
 
-        Args:
-            seq (str): Peptide in modX format
-        """
-        seq = seq.replace("carbmC", "C+57.021")  # Fixed modification
-        seq = seq.replace("oxM", "M+15.995")
-        seq = seq.replace("dN", "N+0.984")
-        seq = seq.replace("dQ", "Q+0.984")
-        seq = seq.replace("ace-", "+42.011")
-        seq = seq.replace("carbnh3x-", "+43.006-17.027")
-        seq = seq.replace("carb-", "+43.006")
-        seq = seq.replace("nh3x-", "-17.027")
-        return seq
+    if not allowed_mods:
+        return fixed_mods, var_mods
+    for mod in allowed_mods.split(","):
+        if mod == "M+15.995":
+            if "ox" not in var_mods:
+                var_mods["ox"] = []
+            var_mods["ox"].append("M")
+        elif mod == "N+0.984":
+            if "d" not in var_mods:
+                var_mods["d"] = []
+            var_mods["d"].append("N")
+        elif mod == "Q+0.984":
+            if "d" not in var_mods:
+                var_mods["d"] = []
+            var_mods["d"].append("Q")
+        elif mod == "+42.011":
+            var_mods["ace-"] = True
+        elif mod == "+43.006":
+            var_mods["carb-"] = True
+        elif mod == "-17.027":
+            var_mods["nh3x-"] = True
+        elif mod == "+43.006-17.027":
+            var_mods["carbnh3x-"] = True
+        else:
+            logger.error("Modification %s not recognized.", mod)
+            raise ValueError(f"Modification {mod} not recognized.")
+
+    return fixed_mods, var_mods
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index 2d9e200b..a6ab8ddc 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -1,14 +1,14 @@
 """Data loaders for the de novo sequencing task."""
 
 import functools
-import os
 import logging
+import os
 from typing import List, Optional, Tuple
 
-from depthcharge.data import AnnotatedSpectrumIndex
 import lightning.pytorch as pl
 import numpy as np
 import torch
+from depthcharge.data import AnnotatedSpectrumIndex
 
 from ..data import db_utils
 from ..data.datasets import (
@@ -89,7 +89,7 @@ def __init__(
         self.train_dataset = None
         self.valid_dataset = None
         self.test_dataset = None
-        self.pdb = None
+        self.protein_database = None
 
     def setup(self, stage: str = None, annotated: bool = True) -> None:
         """
@@ -187,7 +187,9 @@ def db_dataloader(self) -> torch.utils.data.DataLoader:
         return torch.utils.data.DataLoader(
             self.test_dataset,
             batch_size=self.eval_batch_size,
-            collate_fn=functools.partial(prepare_psm_batch, pdb=self.pdb),
+            collate_fn=functools.partial(
+                prepare_psm_batch, protein_database=self.protein_database
+            ),
             pin_memory=True,
             num_workers=self.n_workers,
             shuffle=False,
@@ -235,8 +237,8 @@ def prepare_batch(
 
 def prepare_psm_batch(
     batch: List[Tuple[torch.Tensor, float, int, str]],
-    pdb: db_utils.ProteinDatabase,
-):
+    protein_database: db_utils.ProteinDatabase,
+) -> Tuple[torch.Tensor, torch.Tensor, np.ndarray, List[str], List[str]]:
     """
     Collate MS/MS spectra into a batch for DB search.
 
@@ -249,7 +251,7 @@ def prepare_psm_batch(
         A batch of data from an AnnotatedSpectrumDataset, consisting of for each
         spectrum (i) a tensor with the m/z and intensity peak values, (ii), the
         precursor m/z, (iii) the precursor charge, (iv) the spectrum identifier.
-    pdb : db_utils.ProteinDatabase
+    protein_database : db_utils.ProteinDatabase
         The protein database to use for candidate peptide retrieval.
 
     Returns
@@ -283,9 +285,9 @@ def prepare_psm_batch(
     all_peptides = []
     all_proteins = []
     for idx in range(len(batch)):
-        digest_data = pdb.get_candidates(
-            float(precursor_mzs[idx]),
-            float(precursor_charges[idx]),
+        digest_data = protein_database.get_candidates(
+            precursor_mzs[idx].type(torch.float64).item(),
+            precursor_charges[idx].type(torch.int64).item(),
         )
         try:
             spec_peptides, pep_protein = digest_data
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 79848682..b38a27c0 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -991,7 +991,7 @@ def configure_optimizers(
 
 class DbSpec2Pep(Spec2Pep):
     """
-    Subclass of Spec2Pep for the use of Casanovo as an \
+    Subclass of Spec2Pep for the use of Casanovo as an
     MS/MS database search score function.
 
     Uses teacher forcing to 'query' Casanovo for its score for each AA
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 789c960b..6928560d 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -114,7 +114,7 @@ def db_search(
         self.writer.set_ms_run(test_index.ms_files)
 
         self.initialize_data_module(test_index=test_index)
-        self.loaders.pdb = db_utils.ProteinDatabase(
+        self.loaders.protein_database = db_utils.ProteinDatabase(
             fasta_path,
             self.config.enzyme,
             self.config.digestion,
diff --git a/tests/conftest.py b/tests/conftest.py
index 452316c8..90e522fe 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -28,7 +28,7 @@ def tiny_fasta_file(tmp_path):
 
 
 @pytest.fixture
-def mgf_db_search(tmp_path):
+def mgf_medium(tmp_path):
     """An MGF file with 7 spectra and scan numbers, C+57.021 mass modification considered"""
     peptides = [
         "ATSIPAR",
@@ -40,10 +40,10 @@ def mgf_db_search(tmp_path):
         "FSGSGSGTDFTLTISSLQPEDFAVYYCQQDYNLP",
     ]
     mgf_file = tmp_path / "db_search.mgf"
-    return _create_mgf(peptides, mgf_file, c_mod=True)
+    return _create_mgf(peptides, mgf_file, mod_aa_mass={"C": 160.030649})
 
 
-def _create_mgf(peptides, mgf_file, random_state=42, c_mod=False):
+def _create_mgf(peptides, mgf_file, random_state=42, mod_aa_mass=None):
     """
     Create a fake MGF file from one or more peptides.
 
@@ -55,9 +55,9 @@ def _create_mgf(peptides, mgf_file, random_state=42, c_mod=False):
         The MGF file to create.
     random_state : int or numpy.random.Generator, optional
         The random seed. The charge states are chosen to be 2 or 3 randomly.
-    c_mod : bool, optional
-        Whether to use the constant carbamidomethylation
-        of C in mass calculations.
+    mod_aa_mass : dict, optional
+        A dictionary that specifies the modified masses of amino acids.
+        e.g. {"C": 160.030649} for carbamidomethylated C.
 
     Returns
     -------
@@ -65,7 +65,7 @@ def _create_mgf(peptides, mgf_file, random_state=42, c_mod=False):
     """
     rng = np.random.default_rng(random_state)
     entries = [
-        _create_mgf_entry(p, rng.choice([2, 3]), c_mod) for p in peptides
+        _create_mgf_entry(p, rng.choice([2, 3]), mod_aa_mass) for p in peptides
     ]
     with mgf_file.open("w+") as mgf_ref:
         mgf_ref.write("\n".join(entries))
@@ -73,7 +73,7 @@ def _create_mgf(peptides, mgf_file, random_state=42, c_mod=False):
     return mgf_file
 
 
-def _create_mgf_entry(peptide, charge=2, c_mod=False):
+def _create_mgf_entry(peptide, charge=2, mod_aa_mass=None):
     """
     Create a MassIVE-KB style MGF entry for a single PSM.
 
@@ -83,20 +83,19 @@ def _create_mgf_entry(peptide, charge=2, c_mod=False):
         A peptide sequence.
     charge : int, optional
         The peptide charge state.
-    c_mod : bool, optional
-        Whether to use the constant carbamidomethylation
-        of C in mass calculations.
+    mod_aa_mass : dict, optional
+        A dictionary that specifies the modified masses of amino acids.
 
     Returns
     -------
     str
         The PSM entry in an MGF file format.
     """
-    if not c_mod:
+    if mod_aa_mass is None:
         precursor_mz = calculate_mass(peptide, charge=int(charge))
     else:
         aa_mass = std_aa_mass
-        aa_mass.update({"C": 160.030649})  # Carbamidomethylated C mass
+        aa_mass.update(mod_aa_mass)
         precursor_mz = fast_mass(peptide, charge=int(charge), aa_mass=aa_mass)
     mzs, intensities = _peptide_to_peaks(peptide, charge)
     frags = "\n".join([f"{m} {i}" for m, i in zip(mzs, intensities)])
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 61f735c3..4275d792 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -8,7 +8,7 @@
 
 
 def test_db_search(
-    mgf_db_search, tiny_fasta_file, tiny_config, tmp_path, monkeypatch
+    mgf_medium, tiny_fasta_file, tiny_config, tmp_path, monkeypatch
 ):
     # Run a command:
     monkeypatch.setattr(casanovo, "__version__", "4.1.0")
@@ -24,7 +24,7 @@ def test_db_search(
         tiny_config,
         "--output",
         str(output_path),
-        str(mgf_db_search),
+        str(mgf_medium),
         str(tiny_fasta_file),
     ]
 
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index a31e2024..51d9a3c9 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -286,13 +286,13 @@ def test_digest_fasta_cleave(tiny_fasta_file):
             max_peptide_len=50,
             max_mods=0,
             precursor_tolerance=20,
-            isotope_error=[0],
+            isotope_error=[0, 0],
             allowed_mods=(
                 "M+15.995,N+0.984,Q+0.984,"
                 "+42.011,+43.006,-17.027,+43.006-17.027"
             ),
         )
-        peptide_list = list(pdb.digest["peptide"])
+        peptide_list = list(pdb.db_peptides["peptide"])
         assert peptide_list == expected
 
 
@@ -359,13 +359,13 @@ def test_digest_fasta_mods(tiny_fasta_file):
         max_peptide_len=50,
         max_mods=1,
         precursor_tolerance=20,
-        isotope_error=[0],
+        isotope_error=[0, 0],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
         ),
     )
-    peptide_list = list(pdb.digest["peptide"])
+    peptide_list = list(pdb.db_peptides["peptide"])
     peptide_list = [
         x
         for x in peptide_list
@@ -396,13 +396,13 @@ def test_length_restrictions(tiny_fasta_file):
         max_peptide_len=50,
         max_mods=0,
         precursor_tolerance=20,
-        isotope_error=[0],
+        isotope_error=[0, 0],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
         ),
     )
-    peptide_list = list(pdb.digest["peptide"])
+    peptide_list = list(pdb.db_peptides["peptide"])
     assert peptide_list == expected_long
 
     pdb = db_utils.ProteinDatabase(
@@ -414,13 +414,13 @@ def test_length_restrictions(tiny_fasta_file):
         max_peptide_len=8,
         max_mods=0,
         precursor_tolerance=20,
-        isotope_error=[0],
+        isotope_error=[0, 0],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
         ),
     )
-    peptide_list = list(pdb.digest["peptide"])
+    peptide_list = list(pdb.db_peptides["peptide"])
     assert peptide_list == expected_short
 
 
@@ -448,13 +448,13 @@ def test_digest_fasta_enzyme(tiny_fasta_file):
         max_peptide_len=50,
         max_mods=0,
         precursor_tolerance=20,
-        isotope_error=[0],
+        isotope_error=[0, 0],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
         ),
     )
-    peptide_list = list(pdb.digest["peptide"])
+    peptide_list = list(pdb.db_peptides["peptide"])
     assert peptide_list == expected_argc
 
     pdb = db_utils.ProteinDatabase(
@@ -466,13 +466,13 @@ def test_digest_fasta_enzyme(tiny_fasta_file):
         max_peptide_len=50,
         max_mods=0,
         precursor_tolerance=20,
-        isotope_error=[0],
+        isotope_error=[0, 0],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
         ),
     )
-    peptide_list = list(pdb.digest["peptide"])
+    peptide_list = list(pdb.db_peptides["peptide"])
     assert peptide_list == expected_aspn
 
 
@@ -495,7 +495,7 @@ def test_get_candidates(tiny_fasta_file):
         max_peptide_len=50,
         max_mods=0,
         precursor_tolerance=10000,
-        isotope_error=[0],
+        isotope_error=[0, 0],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
@@ -513,7 +513,7 @@ def test_get_candidates(tiny_fasta_file):
         max_peptide_len=50,
         max_mods=0,
         precursor_tolerance=150000,
-        isotope_error=[0],
+        isotope_error=[0, 0],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
@@ -531,7 +531,7 @@ def test_get_candidates(tiny_fasta_file):
         max_peptide_len=50,
         max_mods=0,
         precursor_tolerance=600000,
-        isotope_error=[0],
+        isotope_error=[0, 0],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
@@ -584,9 +584,8 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
     peptide_list.sort_values("calc_mass", inplace=True)
 
     expected_isotope0 = list("UTSRQPONMLKJIHGFEDCB")
-    expected_isotope1 = list("VUTSRQPONMLKJIHGFEDC")
-    expected_isotope2 = list("WVUTSRQPONMLKJIHGFED")
-    expected_isotope3 = list("XWVUTSRQPONMLKJIHGFE")
+    expected_isotope01 = list("VUTSRQPONMLKJIHGFEDCB")
+    expected_isotope012 = list("WVUTSRQPONMLKJIHGFEDCB")
     expected_isotope0123 = list("XWVUTSRQPONMLKJIHGFEDCB")
 
     pdb = db_utils.ProteinDatabase(
@@ -598,13 +597,13 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_peptide_len=0,
         max_mods=0,
         precursor_tolerance=10000,
-        isotope_error=[0],
+        isotope_error=[0, 0],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
         ),
     )
-    pdb.digest = peptide_list
+    pdb.db_peptides = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope0 == candidates
 
@@ -617,15 +616,15 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_peptide_len=0,
         max_mods=0,
         precursor_tolerance=10000,
-        isotope_error=[1],
+        isotope_error=[0, 1],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
         ),
     )
-    pdb.digest = peptide_list
+    pdb.db_peptides = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    assert expected_isotope1 == candidates
+    assert expected_isotope01 == candidates
 
     pdb = db_utils.ProteinDatabase(
         fasta_path=str(tiny_fasta_file),
@@ -636,15 +635,15 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_peptide_len=0,
         max_mods=0,
         precursor_tolerance=10000,
-        isotope_error=[2],
+        isotope_error=[0, 2],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
         ),
     )
-    pdb.digest = peptide_list
+    pdb.db_peptides = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    assert expected_isotope2 == candidates
+    assert expected_isotope012 == candidates
 
     pdb = db_utils.ProteinDatabase(
         fasta_path=str(tiny_fasta_file),
@@ -655,32 +654,13 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_peptide_len=0,
         max_mods=0,
         precursor_tolerance=10000,
-        isotope_error=[3],
+        isotope_error=[0, 3],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
         ),
     )
-    pdb.digest = peptide_list
-    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    assert expected_isotope3 == candidates
-
-    pdb = db_utils.ProteinDatabase(
-        fasta_path=str(tiny_fasta_file),
-        enzyme="trypsin",
-        digestion="full",
-        missed_cleavages=0,
-        min_peptide_len=0,
-        max_peptide_len=0,
-        max_mods=0,
-        precursor_tolerance=10000,
-        isotope_error=[0, 1, 2, 3],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
-        ),
-    )
-    pdb.digest = peptide_list
+    pdb.db_peptides = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope0123 == candidates
 

From 0dfdb2cb89514a0189e20cf19c231363567a7c72 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Mon, 2 Sep 2024 17:48:31 -0700
Subject: [PATCH 34/84] final adjustments added

---
 casanovo/config.yaml            |  19 ++--
 casanovo/data/db_utils.py       | 158 +++++++++++++++++++-------------
 casanovo/denovo/dataloaders.py  |   7 +-
 casanovo/denovo/model.py        |   4 +-
 casanovo/denovo/model_runner.py |  17 ++--
 tests/conftest.py               |  43 ++++++++-
 tests/unit_tests/test_unit.py   | 151 +++++++++++++++++++-----------
 7 files changed, 254 insertions(+), 145 deletions(-)

diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index 6c9063f5..af2f79d1 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -17,6 +17,8 @@ precursor_mass_tol: 50  # ppm
 isotope_error_range: [0, 1]
 # The minimum length of considered peptides.
 min_peptide_len: 6
+# The maximum length of considered peptides.
+max_length: 100
 # Number of spectra in one inference batch.
 predict_batch_size: 1024
 
@@ -47,19 +49,20 @@ devices:
 enzyme: "trypsin"
 # Digestion type for candidate peptide generation.
 # full: standard digestion. semi: Include products of semi-specific cleavage.
+# Can also take a regex expression to specify custom digestion rules.
 digestion: "full"
 # Number of allowed missed cleavages when digesting protein.
 missed_cleavages: 0
 # Maximum number of amino acid modifications per peptide,
 # None generates all possible isoforms as candidates.
-max_mods: 0
-# Maximum peptide length to consider.
-max_peptide_len: 50
+max_mods: 1
 # Select which modifications from the vocabulary can be used in candidate creation.
-# Permanent fixed mod (don't include): C+57.021
-# Allowed variable mods: M+15.995, N+0.984, Q+0.984
-# Allowed N-terminal mods: +42.011, +43.006, -17.027, +43.006-17.027
-allowed_mods: "M+15.995,N+0.984,Q+0.984,+42.011,+43.006,-17.027,+43.006-17.027"
+# Format: Comma-separated list of "aa:mod_residue", 
+# where aa is a standard amino acid or "X" for an N-terminal mod
+# and mod_residue is a key from the "residues" dictionary.
+# Example: "M:M+15.995,X:+43.006-17.027"
+allowed_fixed_mods: "C:C+57.021"
+allowed_var_mods: "M:M+15.995,N:N+0.984,Q:Q+0.984,X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
 
 
 ###
@@ -111,8 +114,6 @@ dropout: 0.0
 # Number of dimensions to use for encoding peak intensity.
 # Projected up to `dim_model` by default and summed with the peak m/z encoding.
 dim_intensity:
-# Max decoded peptide length.
-max_length: 100
 # The number of iterations for the linear warm-up of the learning rate.
 warmup_iters: 100_000
 # The number of iterations for the cosine half period of the learning rate.
diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index c1d5e91e..c9201538 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -1,13 +1,17 @@
 """Unique methods used within db-search mode"""
 
+import functools
 import logging
 import os
+import re
+import string
 from typing import List, Tuple
 
 import depthcharge.masses
 import pandas as pd
+import pyteomics.fasta as fasta
+import pyteomics.parser as parser
 from numba import njit
-from pyteomics import fasta, parser
 
 logger = logging.getLogger("casanovo")
 
@@ -41,8 +45,12 @@ class ProteinDatabase:
         The precursor mass tolerance in ppm.
     isotope_error : Tuple[int, int]
         Isotope range [min, max] to consider when comparing predicted and observed precursor m/z's.
-    allowed_mods : str
-        A comma separated string of allowed modifications to consider.
+    allowed_fixed_mods : str
+        A comma separated string of fixed modifications to consider.
+    allowed_var_mods : str
+        A comma separated string of variable modifications to consider.
+    residues : dict
+        A dictionary of amino acid masses.
     """
 
     def __init__(
@@ -56,9 +64,14 @@ def __init__(
         max_mods: int,
         precursor_tolerance: float,
         isotope_error: Tuple[int, int],
-        allowed_mods: str,
+        allowed_fixed_mods: str,
+        allowed_var_mods: str,
+        residues: dict,
     ):
-        self.fixed_mods, self.var_mods = _construct_mods_dict(allowed_mods)
+        self.residues = residues
+        self.fixed_mods, self.var_mods, self.swap_map = _construct_mods_dict(
+            allowed_fixed_mods, allowed_var_mods
+        )
         self.db_peptides = self._digest_fasta(
             fasta_path,
             enzyme,
@@ -88,20 +101,22 @@ def get_candidates(
 
         Returns
         -------
-        candidates : List[Tuple[str, str]]
-            A list of candidate peptides and associated
-            protein.
+        candidates : pd.Series
+            A series of candidate peptides.
         """
         candidates = []
 
         for e in range(self.isotope_error[0], self.isotope_error[1] + 1):
             iso_shift = ISOTOPE_SPACING * e
-            upper_bound = float(
-                _to_raw_mass(precursor_mz, charge) - iso_shift
-            ) * (1 + (self.precursor_tolerance / 1e6))
-            lower_bound = float(
+            shift_raw_mass = float(
                 _to_raw_mass(precursor_mz, charge) - iso_shift
-            ) * (1 - (self.precursor_tolerance / 1e6))
+            )
+            upper_bound = shift_raw_mass * (
+                1 + (self.precursor_tolerance / 1e6)
+            )
+            lower_bound = shift_raw_mass * (
+                1 - (self.precursor_tolerance / 1e6)
+            )
 
             window = self.db_peptides[
                 (self.db_peptides["calc_mass"] >= lower_bound)
@@ -112,7 +127,25 @@ def get_candidates(
         candidates = pd.concat(candidates)
         candidates.drop_duplicates(inplace=True)
         candidates.sort_values(by=["calc_mass", "peptide"], inplace=True)
-        return list(candidates["peptide"]), list(candidates["protein"])
+        return candidates["peptide"], candidates["protein"]
+
+    def get_associated_protein(self, peptide: str) -> str:
+        """
+        Returns the associated protein for a given peptide.
+
+        Parameters
+        ----------
+        peptide : str
+            The peptide sequence.
+
+        Returns
+        -------
+        protein : str
+            The associated protein.
+        """
+        return self.db_peptides[self.db_peptides["peptide"] == peptide][
+            "protein"
+        ].values[0]
 
     def _digest_fasta(
         self,
@@ -161,16 +194,18 @@ def _digest_fasta(
             logger.error("Digestion type %s not recognized.", digestion)
             raise ValueError(f"Digestion type {digestion} not recognized.")
         if enzyme not in parser.expasy_rules:
-            logger.error(
-                "Enzyme %s not recognized. Must be in pyteomics.parser.expasy_rules",
+            logger.info(
+                "Enzyme %s not recognized. Interpreting as cleavage rule.",
                 enzyme,
             )
-            raise ValueError(f"Enzyme {enzyme} not recognized.")
         semi = digestion == "partial"
+        valid_aa = set(
+            [re.sub(r"[^A-Z]+", "", res) for res in self.residues.keys()]
+        )
         for header, seq in fasta.read(fasta_filename):
             pep_set = parser.cleave(
                 seq,
-                rule=parser.expasy_rules[enzyme],
+                rule=enzyme,
                 missed_cleavages=missed_cleavages,
                 semi=semi,
             )
@@ -181,9 +216,8 @@ def _digest_fasta(
                     or len(pep) > max_peptide_length
                 ):
                     continue
-                if any(
-                    aa in pep for aa in "BJOUXZ"
-                ):  # Check for incorrect AA letters
+
+                if any(aa not in valid_aa for aa in pep):
                     logger.warn(
                         "Skipping peptide with unknown amino acids: %s", pep
                     )
@@ -207,7 +241,10 @@ def _digest_fasta(
         mod_peptide_list = [
             (mod_pep, mass_calculator.mass(mod_pep), prot)
             for isos, prot in peptide_isoforms
-            for mod_pep in map(_convert_from_modx, isos)
+            for mod_pep in map(
+                functools.partial(_convert_from_modx, swap_map=self.swap_map),
+                isos,
+            )
         ]
         # Create a DataFrame for easy sorting and filtering
         pep_table = pd.DataFrame(
@@ -261,31 +298,29 @@ def _to_raw_mass(mz_mass, charge):
     return charge * (mz_mass - PROTON)
 
 
-def _convert_from_modx(seq: str):
+def _convert_from_modx(seq: str, swap_map: dict) -> str:
     """Converts peptide sequence from modX format to Casanovo-acceptable modifications.
 
     Args:
-        seq (str): Peptide in modX format
+        seq : str
+            Peptide in modX format
+        swap_map : dict
+            Dictionary that allows for swapping of modX to Casanovo-acceptable modifications.
     """
-    seq = seq.replace("carbmC", "C+57.021")  # Fixed modification
-    seq = seq.replace("oxM", "M+15.995")
-    seq = seq.replace("dN", "N+0.984")
-    seq = seq.replace("dQ", "Q+0.984")
-    seq = seq.replace("ace-", "+42.011")
-    seq = seq.replace("carbnh3x-", "+43.006-17.027")
-    seq = seq.replace("carb-", "+43.006")
-    seq = seq.replace("nh3x-", "-17.027")
-    return seq
-
-
-def _construct_mods_dict(allowed_mods):
+    regex = re.compile("(%s)" % "|".join(map(re.escape, swap_map.keys())))
+    return regex.sub(lambda x: swap_map[x.group()], seq)
+
+
+def _construct_mods_dict(allowed_fixed_mods, allowed_var_mods):
     """
     Constructs dictionaries of fixed and variable modifications.
 
     Parameters
     ----------
-    allowed_mods : str
-        A comma-separated list of allowed modifications.
+    allowed_fixed_mods : str
+        A comma separated string of fixed modifications to consider.
+    allowed_var_mods : str
+        A comma separated string of variable modifications to consider.
 
     Returns
     -------
@@ -293,35 +328,26 @@ def _construct_mods_dict(allowed_mods):
         A dictionary of fixed modifications.
     var_mods : dict
         A dictionary of variable modifications.
+    swap_map : dict
+        A dictionary that allows for swapping of modX to Casanovo-acceptable modifications.
     """
-    fixed_mods = {"carbm": ["C"]}
-    var_mods = {}
+    swap_map = {}
+    fixed_mods = {}
+    for idx, mod in enumerate(allowed_fixed_mods.split(",")):
+        aa, mod_aa = mod.split(":")
+        mod_id = string.ascii_lowercase[idx]
+        fixed_mods[mod_id] = [aa]
+        swap_map[f"{mod_id}{aa}"] = f"{mod_aa}"
 
-    if not allowed_mods:
-        return fixed_mods, var_mods
-    for mod in allowed_mods.split(","):
-        if mod == "M+15.995":
-            if "ox" not in var_mods:
-                var_mods["ox"] = []
-            var_mods["ox"].append("M")
-        elif mod == "N+0.984":
-            if "d" not in var_mods:
-                var_mods["d"] = []
-            var_mods["d"].append("N")
-        elif mod == "Q+0.984":
-            if "d" not in var_mods:
-                var_mods["d"] = []
-            var_mods["d"].append("Q")
-        elif mod == "+42.011":
-            var_mods["ace-"] = True
-        elif mod == "+43.006":
-            var_mods["carb-"] = True
-        elif mod == "-17.027":
-            var_mods["nh3x-"] = True
-        elif mod == "+43.006-17.027":
-            var_mods["carbnh3x-"] = True
+    var_mods = {}
+    for idx, mod in enumerate(allowed_var_mods.split(",")):
+        aa, mod_aa = mod.split(":")
+        mod_id = string.ascii_lowercase[idx]
+        if aa == "X":
+            var_mods[f"{mod_id}-"] = True
+            swap_map[f"{mod_id}-"] = f"{mod_aa}"
         else:
-            logger.error("Modification %s not recognized.", mod)
-            raise ValueError(f"Modification {mod} not recognized.")
+            var_mods[mod_id] = [aa]
+            swap_map[f"{mod_id}{aa}"] = f"{mod_aa}"
 
-    return fixed_mods, var_mods
+    return fixed_mods, var_mods, swap_map
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index a6ab8ddc..6e8c93b3 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -267,7 +267,7 @@ def prepare_psm_batch(
     all_peptides : List[str]
         The candidate peptides for each spectrum.
     all_proteins : List[str]
-        The associated proteins for each candidate peptide.
+        The proteins associated with each candidate peptide.
     """
     spectra, precursor_mzs, precursor_charges, spectrum_ids = list(zip(*batch))
     spectra = torch.nn.utils.rnn.pad_sequence(spectra, batch_first=True)
@@ -285,12 +285,11 @@ def prepare_psm_batch(
     all_peptides = []
     all_proteins = []
     for idx in range(len(batch)):
-        digest_data = protein_database.get_candidates(
+        spec_peptides, spec_proteins = protein_database.get_candidates(
             precursor_mzs[idx].type(torch.float64).item(),
             precursor_charges[idx].type(torch.int64).item(),
         )
         try:
-            spec_peptides, pep_protein = digest_data
             all_spectra.append(
                 spectra[idx].unsqueeze(0).repeat(len(spec_peptides), 1, 1)
             )
@@ -299,7 +298,7 @@ def prepare_psm_batch(
             )
             all_spectrum_ids.extend([spectrum_ids[idx]] * len(spec_peptides))
             all_peptides.extend(spec_peptides)
-            all_proteins.extend(pep_protein)
+            all_proteins.extend(spec_proteins)
         except ValueError:
             logger.warning(
                 "No candidates found for spectrum %s", spectrum_ids[idx]
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index b38a27c0..dc7e5f7b 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1017,9 +1017,9 @@ def predict_step(self, batch, *args):
 
         Parameters
         ----------
-        batch : Tuple[torch.Tensor, torch.Tensor, np.array, List[str], List[str]]
+        batch : Tuple[torch.Tensor, torch.Tensor, np.array, List[str]]
             A batch of (i) MS/MS spectra, (ii) precursor information, (iii)
-            spectrum identifiers, (iv) candidate peptides, (v) associated proteins.
+            spectrum identifiers, (iv) candidate peptides, (v) associated protein.
 
         Returns
         -------
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 6928560d..395320e5 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -110,22 +110,25 @@ def db_search(
         self.initialize_model(train=False, db_search=True)
         self.model.out_writer = self.writer
         self.model.psm_batch_size = self.config.predict_batch_size
-        test_index = self._get_index(peak_path, False, "db search")
-        self.writer.set_ms_run(test_index.ms_files)
-
-        self.initialize_data_module(test_index=test_index)
-        self.loaders.protein_database = db_utils.ProteinDatabase(
+        self.model.protein_database = db_utils.ProteinDatabase(
             fasta_path,
             self.config.enzyme,
             self.config.digestion,
             self.config.missed_cleavages,
             self.config.min_peptide_len,
-            self.config.max_peptide_len,
+            self.config.max_length,
             self.config.max_mods,
             self.config.precursor_mass_tol,
             self.config.isotope_error_range,
-            self.config.allowed_mods,
+            self.config.allowed_fixed_mods,
+            self.config.allowed_var_mods,
+            self.config.residues,
         )
+        test_index = self._get_index(peak_path, False, "db search")
+        self.writer.set_ms_run(test_index.ms_files)
+
+        self.initialize_data_module(test_index=test_index)
+        self.loaders.protein_database = self.model.protein_database
         self.loaders.setup(stage="test", annotated=False)
         self.trainer.predict(self.model, self.loaders.db_dataloader())
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 90e522fe..3b94896a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -241,7 +241,7 @@ def tiny_config(tmp_path):
         "precursor_mass_tol": 5,
         "isotope_error_range": [0, 1],
         "min_peptide_len": 6,
-        "max_peptide_len": 50,
+        "max_length": 100,
         "enzyme": "trypsin",
         "digestion": "full",
         "missed_cleavages": 0,
@@ -263,7 +263,6 @@ def tiny_config(tmp_path):
         "dim_model": 512,
         "dropout": 0.0,
         "dim_intensity": None,
-        "max_length": 100,
         "learning_rate": 5e-4,
         "weight_decay": 1e-5,
         "train_batch_size": 32,
@@ -298,9 +297,10 @@ def tiny_config(tmp_path):
             "-17.027": -17.026549,
             "+43.006-17.027": 25.980265,
         },
-        "allowed_mods": (
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        "allowed_fixed_mods": "C:C+57.021",
+        "allowed_var_mods": (
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
     }
 
@@ -311,6 +311,39 @@ def tiny_config(tmp_path):
     return cfg_file
 
 
+@pytest.fixture
+def residues_dict():
+    return {
+        "G": 57.021464,
+        "A": 71.037114,
+        "S": 87.032028,
+        "P": 97.052764,
+        "V": 99.068414,
+        "T": 101.047670,
+        "C+57.021": 160.030649,
+        "L": 113.084064,
+        "I": 113.084064,
+        "N": 114.042927,
+        "D": 115.026943,
+        "Q": 128.058578,
+        "K": 128.094963,
+        "E": 129.042593,
+        "M": 131.040485,
+        "H": 137.058912,
+        "F": 147.068414,
+        "R": 156.101111,
+        "Y": 163.063329,
+        "W": 186.079313,
+        "M+15.995": 147.035400,
+        "N+0.984": 115.026943,
+        "Q+0.984": 129.042594,
+        "+42.011": 42.010565,
+        "+43.006": 43.005814,
+        "-17.027": -17.026549,
+        "+43.006-17.027": 25.980265,
+    }
+
+
 @pytest.fixture
 def tide_dir_small(tmp_path):
     """A directory with a very small TIDE search result."""
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index 51d9a3c9..c06ec788 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -221,7 +221,7 @@ def test_calc_match_score():
     assert np.sum(masked_per_aa_scores.numpy()[3]) == 3
 
 
-def test_digest_fasta_cleave(tiny_fasta_file):
+def test_digest_fasta_cleave(tiny_fasta_file, residues_dict):
 
     # No missed cleavages
     expected_normal = [
@@ -287,16 +287,18 @@ def test_digest_fasta_cleave(tiny_fasta_file):
             max_mods=0,
             precursor_tolerance=20,
             isotope_error=[0, 0],
-            allowed_mods=(
-                "M+15.995,N+0.984,Q+0.984,"
-                "+42.011,+43.006,-17.027,+43.006-17.027"
+            allowed_fixed_mods="C:C+57.021",
+            allowed_var_mods=(
+                "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+                "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
             ),
+            residues=residues_dict,
         )
         peptide_list = list(pdb.db_peptides["peptide"])
         assert peptide_list == expected
 
 
-def test_digest_fasta_mods(tiny_fasta_file):
+def test_digest_fasta_mods(tiny_fasta_file, residues_dict):
     # 1 modification allowed
     # fixed: C+57.02146
     # variable: 1M+15.994915,1N+0.984016,1Q+0.984016
@@ -360,10 +362,12 @@ def test_digest_fasta_mods(tiny_fasta_file):
         max_mods=1,
         precursor_tolerance=20,
         isotope_error=[0, 0],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     peptide_list = list(pdb.db_peptides["peptide"])
     peptide_list = [
@@ -376,7 +380,7 @@ def test_digest_fasta_mods(tiny_fasta_file):
     assert peptide_list == expected_1mod
 
 
-def test_length_restrictions(tiny_fasta_file):
+def test_length_restrictions(tiny_fasta_file, residues_dict):
     # length between 20 and 50
     expected_long = [
         "MEAPAQLLFLLLLWLPDTTR",
@@ -397,10 +401,12 @@ def test_length_restrictions(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=20,
         isotope_error=[0, 0],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     peptide_list = list(pdb.db_peptides["peptide"])
     assert peptide_list == expected_long
@@ -415,16 +421,18 @@ def test_length_restrictions(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=20,
         isotope_error=[0, 0],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     peptide_list = list(pdb.db_peptides["peptide"])
     assert peptide_list == expected_short
 
 
-def test_digest_fasta_enzyme(tiny_fasta_file):
+def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
     # arg-c enzyme
     expected_argc = [
         "ATSIPAR",
@@ -449,10 +457,12 @@ def test_digest_fasta_enzyme(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=20,
         isotope_error=[0, 0],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     peptide_list = list(pdb.db_peptides["peptide"])
     assert peptide_list == expected_argc
@@ -467,16 +477,39 @@ def test_digest_fasta_enzyme(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=20,
         isotope_error=[0, 0],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     peptide_list = list(pdb.db_peptides["peptide"])
     assert peptide_list == expected_aspn
 
+    # Tesr regex rule instead of named enzyme
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
+        enzyme="R",
+        digestion="full",
+        missed_cleavages=0,
+        min_peptide_len=6,
+        max_peptide_len=50,
+        max_mods=0,
+        precursor_tolerance=20,
+        isotope_error=[0, 0],
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+        ),
+        residues=residues_dict,
+    )
+    peptide_list = list(pdb.db_peptides["peptide"])
+    assert peptide_list == expected_argc
+
 
-def test_get_candidates(tiny_fasta_file):
+def test_get_candidates(tiny_fasta_file, residues_dict):
     # precursor_window is 10000
     expected_smallwindow = ["LLIYGASTR"]
 
@@ -496,13 +529,15 @@ def test_get_candidates(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[0, 0],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    assert expected_smallwindow == candidates
+    assert expected_smallwindow == list(candidates)
 
     pdb = db_utils.ProteinDatabase(
         fasta_path=str(tiny_fasta_file),
@@ -514,13 +549,15 @@ def test_get_candidates(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=150000,
         isotope_error=[0, 0],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    assert expected_midwindow == candidates
+    assert expected_midwindow == list(candidates)
 
     pdb = db_utils.ProteinDatabase(
         fasta_path=str(tiny_fasta_file),
@@ -532,16 +569,18 @@ def test_get_candidates(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=600000,
         isotope_error=[0, 0],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    assert expected_widewindow == candidates
+    assert expected_widewindow == list(candidates)
 
 
-def test_get_candidates_isotope_error(tiny_fasta_file):
+def test_get_candidates_isotope_error(tiny_fasta_file, residues_dict):
 
     # Tide isotope error windows for 496.2, 2+:
     # 0: [980.481617, 1000.289326]
@@ -598,14 +637,16 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[0, 0],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     pdb.db_peptides = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    assert expected_isotope0 == candidates
+    assert expected_isotope0 == list(candidates)
 
     pdb = db_utils.ProteinDatabase(
         fasta_path=str(tiny_fasta_file),
@@ -617,14 +658,16 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[0, 1],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     pdb.db_peptides = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    assert expected_isotope01 == candidates
+    assert expected_isotope01 == list(candidates)
 
     pdb = db_utils.ProteinDatabase(
         fasta_path=str(tiny_fasta_file),
@@ -636,14 +679,16 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[0, 2],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     pdb.db_peptides = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    assert expected_isotope012 == candidates
+    assert expected_isotope012 == list(candidates)
 
     pdb = db_utils.ProteinDatabase(
         fasta_path=str(tiny_fasta_file),
@@ -655,14 +700,16 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[0, 3],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     pdb.db_peptides = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    assert expected_isotope0123 == candidates
+    assert expected_isotope0123 == list(candidates)
 
 
 def test_beam_search_decode():

From 4a5b238133aaa1db27f584f52d9328b2f90c35f4 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 3 Sep 2024 10:29:23 -0700
Subject: [PATCH 35/84] minor changes regarding formatting and small efficiency
 boosts

---
 casanovo/config.yaml           |  8 +++---
 casanovo/data/db_utils.py      | 52 ++++++++++++++++++++--------------
 casanovo/denovo/dataloaders.py | 13 ++++-----
 casanovo/denovo/model.py       |  2 +-
 4 files changed, 42 insertions(+), 33 deletions(-)

diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index af2f79d1..17cba6a4 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -46,23 +46,23 @@ devices:
 
 # Enzyme for in silico digestion, used to generate candidate peptides.
 # See pyteomics.parser.expasy_rules for valid enzymes.
+# Can also take a regex expression to specify custom digestion rules.
 enzyme: "trypsin"
 # Digestion type for candidate peptide generation.
 # full: standard digestion. semi: Include products of semi-specific cleavage.
-# Can also take a regex expression to specify custom digestion rules.
 digestion: "full"
 # Number of allowed missed cleavages when digesting protein.
 missed_cleavages: 0
-# Maximum number of amino acid modifications per peptide,
+# Maximum number of variable amino acid modifications per peptide,
 # None generates all possible isoforms as candidates.
 max_mods: 1
 # Select which modifications from the vocabulary can be used in candidate creation.
 # Format: Comma-separated list of "aa:mod_residue", 
-# where aa is a standard amino acid or "X" for an N-terminal mod
+# where aa is a standard amino acid or "nterm" for an N-terminal mod
 # and mod_residue is a key from the "residues" dictionary.
 # Example: "M:M+15.995,X:+43.006-17.027"
 allowed_fixed_mods: "C:C+57.021"
-allowed_var_mods: "M:M+15.995,N:N+0.984,Q:Q+0.984,X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+allowed_var_mods: "M:M+15.995,N:N+0.984,Q:Q+0.984,nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
 
 
 ###
diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index c9201538..86c2112d 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -5,6 +5,7 @@
 import os
 import re
 import string
+from collections import defaultdict
 from typing import List, Tuple
 
 import depthcharge.masses
@@ -13,6 +14,7 @@
 import pyteomics.parser as parser
 from numba import njit
 
+
 logger = logging.getLogger("casanovo")
 
 # CONSTANTS
@@ -72,6 +74,9 @@ def __init__(
         self.fixed_mods, self.var_mods, self.swap_map = _construct_mods_dict(
             allowed_fixed_mods, allowed_var_mods
         )
+        self.swap_regex = re.compile(
+            "(%s)" % "|".join(map(re.escape, self.swap_map.keys()))
+        )
         self.db_peptides = self._digest_fasta(
             fasta_path,
             enzyme,
@@ -167,6 +172,7 @@ def _digest_fasta(
         enzyme : str
             The enzyme to use for digestion.
             See pyteomics.parser.expasy_rules for valid enzymes.
+            Can also be a regex pattern.
         digestion : str
             The type of digestion to perform. Either 'full' or 'partial'.
         missed_cleavages : int
@@ -199,9 +205,7 @@ def _digest_fasta(
                 enzyme,
             )
         semi = digestion == "partial"
-        valid_aa = set(
-            [re.sub(r"[^A-Z]+", "", res) for res in self.residues.keys()]
-        )
+        valid_aa = set(list(self.residues.keys()) + ["C"])
         for header, seq in fasta.read(fasta_filename):
             pep_set = parser.cleave(
                 seq,
@@ -212,17 +216,16 @@ def _digest_fasta(
             protein = header.split()[0]
             for pep in pep_set:
                 if (
-                    len(pep) < min_peptide_length
-                    or len(pep) > max_peptide_length
+                    len(pep) >= min_peptide_length
+                    or len(pep) <= max_peptide_length
                 ):
-                    continue
-
-                if any(aa not in valid_aa for aa in pep):
-                    logger.warn(
-                        "Skipping peptide with unknown amino acids: %s", pep
-                    )
-                    continue
-                peptide_list.append((pep, protein))
+                    if any(aa not in valid_aa for aa in pep):
+                        logger.warn(
+                            "Skipping peptide with unknown amino acids: %s",
+                            pep,
+                        )
+                    else:
+                        peptide_list.append((pep, protein))
 
         # Generate modified peptides
         mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
@@ -242,7 +245,11 @@ def _digest_fasta(
             (mod_pep, mass_calculator.mass(mod_pep), prot)
             for isos, prot in peptide_isoforms
             for mod_pep in map(
-                functools.partial(_convert_from_modx, swap_map=self.swap_map),
+                functools.partial(
+                    _convert_from_modx,
+                    swap_map=self.swap_map,
+                    swap_regex=self.swap_regex,
+                ),
                 isos,
             )
         ]
@@ -259,7 +266,7 @@ def _digest_fasta(
 
 
 @njit
-def _to_mz(precursor_mass, charge):
+def _to_mz(precursor_mass: float, charge: int) -> float:
     """
     Convert precursor neutral mass to m/z value.
 
@@ -279,7 +286,7 @@ def _to_mz(precursor_mass, charge):
 
 
 @njit
-def _to_raw_mass(mz_mass, charge):
+def _to_raw_mass(mz_mass: float, charge: int) -> float:
     """
     Convert precursor m/z value to neutral mass.
 
@@ -298,7 +305,7 @@ def _to_raw_mass(mz_mass, charge):
     return charge * (mz_mass - PROTON)
 
 
-def _convert_from_modx(seq: str, swap_map: dict) -> str:
+def _convert_from_modx(seq: str, swap_map: dict, swap_regex: str) -> str:
     """Converts peptide sequence from modX format to Casanovo-acceptable modifications.
 
     Args:
@@ -306,12 +313,15 @@ def _convert_from_modx(seq: str, swap_map: dict) -> str:
             Peptide in modX format
         swap_map : dict
             Dictionary that allows for swapping of modX to Casanovo-acceptable modifications.
+        swap_regex : str
+            Regular expression to match modX format.
     """
-    regex = re.compile("(%s)" % "|".join(map(re.escape, swap_map.keys())))
-    return regex.sub(lambda x: swap_map[x.group()], seq)
+    return swap_regex.sub(lambda x: swap_map[x.group()], seq)
 
 
-def _construct_mods_dict(allowed_fixed_mods, allowed_var_mods):
+def _construct_mods_dict(
+    allowed_fixed_mods: str, allowed_var_mods: str
+) -> Tuple[dict, dict, dict]:
     """
     Constructs dictionaries of fixed and variable modifications.
 
@@ -343,7 +353,7 @@ def _construct_mods_dict(allowed_fixed_mods, allowed_var_mods):
     for idx, mod in enumerate(allowed_var_mods.split(",")):
         aa, mod_aa = mod.split(":")
         mod_id = string.ascii_lowercase[idx]
-        if aa == "X":
+        if aa == "nterm":
             var_mods[f"{mod_id}-"] = True
             swap_map[f"{mod_id}-"] = f"{mod_aa}"
         else:
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index 6e8c93b3..4793e2f3 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -272,11 +272,11 @@ def prepare_psm_batch(
     spectra, precursor_mzs, precursor_charges, spectrum_ids = list(zip(*batch))
     spectra = torch.nn.utils.rnn.pad_sequence(spectra, batch_first=True)
 
-    precursor_mzs = torch.tensor(precursor_mzs)
-    precursor_charges = torch.tensor(precursor_charges)
-    precursor_masses = (precursor_mzs - 1.007276) * precursor_charges
+    precursor_mzs_t = torch.tensor(precursor_mzs)
+    precursor_charges_t = torch.tensor(precursor_charges)
+    precursor_masses_t = (precursor_mzs_t - 1.007276) * precursor_charges_t
     precursors = torch.vstack(
-        [precursor_masses, precursor_charges, precursor_mzs]
+        [precursor_masses_t, precursor_charges_t, precursor_mzs_t]
     ).T.float()
 
     all_spectra = []
@@ -286,8 +286,8 @@ def prepare_psm_batch(
     all_proteins = []
     for idx in range(len(batch)):
         spec_peptides, spec_proteins = protein_database.get_candidates(
-            precursor_mzs[idx].type(torch.float64).item(),
-            precursor_charges[idx].type(torch.int64).item(),
+            precursor_mzs[idx],
+            precursor_charges[idx],
         )
         try:
             all_spectra.append(
@@ -303,7 +303,6 @@ def prepare_psm_batch(
             logger.warning(
                 "No candidates found for spectrum %s", spectrum_ids[idx]
             )
-            continue
 
     return (
         torch.cat(all_spectra, dim=0),
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index dc7e5f7b..31757d81 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1009,7 +1009,7 @@ class DbSpec2Pep(Spec2Pep):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.psm_batch_size = 1024
+        self.psm_batch_size = None
 
     def predict_step(self, batch, *args):
         """

From 4352bbdfb41aeeb61675c9a290f7bc83eae2f717 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 3 Sep 2024 11:24:18 -0700
Subject: [PATCH 36/84] changes before reformatting config

---
 casanovo/data/db_utils.py     | 21 +++++++++++++--------
 tests/conftest.py             |  2 +-
 tests/unit_tests/test_unit.py | 28 ++++++++++++++--------------
 3 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 86c2112d..26f7152c 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -77,7 +77,7 @@ def __init__(
         self.swap_regex = re.compile(
             "(%s)" % "|".join(map(re.escape, self.swap_map.keys()))
         )
-        self.db_peptides = self._digest_fasta(
+        self.db_peptides, self.prot_map = self._digest_fasta(
             fasta_path,
             enzyme,
             digestion,
@@ -146,11 +146,9 @@ def get_associated_protein(self, peptide: str) -> str:
         Returns
         -------
         protein : str
-            The associated protein.
+            The associated protein(s).
         """
-        return self.db_peptides[self.db_peptides["peptide"] == peptide][
-            "protein"
-        ].values[0]
+        return ",".join(self.prot_map[peptide])
 
     def _digest_fasta(
         self,
@@ -186,9 +184,11 @@ def _digest_fasta(
 
         Returns
         -------
-        mod_peptide_list : pd.DataFrame
+        pep_table : pd.DataFrame
             A Pandas DataFrame with peptide, mass,
             and protein columns. Sorted by neutral mass in ascending order.
+        prot_map : dict
+            A dictionary mapping peptides to associated proteins.
         """
         # Verify the existence of the file:
         if not os.path.isfile(fasta_filename):
@@ -217,7 +217,7 @@ def _digest_fasta(
             for pep in pep_set:
                 if (
                     len(pep) >= min_peptide_length
-                    or len(pep) <= max_peptide_length
+                    and len(pep) <= max_peptide_length
                 ):
                     if any(aa not in valid_aa for aa in pep):
                         logger.warn(
@@ -259,10 +259,15 @@ def _digest_fasta(
         )
         pep_table.sort_values(by=["calc_mass", "peptide"], inplace=True)
 
+        # Create a dictionary mapping for easy accession of associated proteins
+        prot_map = defaultdict(list)
+        for pep, _, prot in mod_peptide_list:
+            prot_map[pep].append(prot)
+
         logger.info(
             "Digestion complete. %d peptides generated.", len(pep_table)
         )
-        return pep_table
+        return pep_table, prot_map
 
 
 @njit
diff --git a/tests/conftest.py b/tests/conftest.py
index 3b94896a..bf02a3ab 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -300,7 +300,7 @@ def tiny_config(tmp_path):
         "allowed_fixed_mods": "C:C+57.021",
         "allowed_var_mods": (
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
     }
 
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index c06ec788..d03d6f7f 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -290,7 +290,7 @@ def test_digest_fasta_cleave(tiny_fasta_file, residues_dict):
             allowed_fixed_mods="C:C+57.021",
             allowed_var_mods=(
                 "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-                "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+                "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
             ),
             residues=residues_dict,
         )
@@ -365,7 +365,7 @@ def test_digest_fasta_mods(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -404,7 +404,7 @@ def test_length_restrictions(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -424,7 +424,7 @@ def test_length_restrictions(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -460,7 +460,7 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -480,7 +480,7 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -501,7 +501,7 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -532,7 +532,7 @@ def test_get_candidates(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -552,7 +552,7 @@ def test_get_candidates(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -572,7 +572,7 @@ def test_get_candidates(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -640,7 +640,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -661,7 +661,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -682,7 +682,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -703,7 +703,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )

From ddff67fb03b06d3b27f73ff58dfdd478cd8a826b Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 3 Sep 2024 12:00:28 -0700
Subject: [PATCH 37/84] replace all occurences of "max_length" with
 "max_peptide_len"

---
 casanovo/config.py              |  2 +-
 casanovo/config.yaml            |  2 +-
 casanovo/denovo/model.py        | 26 +++++++++++++-------------
 casanovo/denovo/model_runner.py |  6 +++---
 tests/conftest.py               |  2 +-
 tests/unit_tests/test_unit.py   | 10 +++++-----
 6 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/casanovo/config.py b/casanovo/config.py
index 792da35a..8577d087 100644
--- a/casanovo/config.py
+++ b/casanovo/config.py
@@ -59,7 +59,7 @@ class Config:
         n_layers=int,
         dropout=float,
         dim_intensity=int,
-        max_length=int,
+        max_peptide_len=int,
         residues=dict,
         n_log=int,
         tb_summarywriter=str,
diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index 17cba6a4..e8732b20 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -18,7 +18,7 @@ isotope_error_range: [0, 1]
 # The minimum length of considered peptides.
 min_peptide_len: 6
 # The maximum length of considered peptides.
-max_length: 100
+max_peptide_len: 100
 # Number of spectra in one inference batch.
 predict_batch_size: 1024
 
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 31757d81..6fe34bfa 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -46,7 +46,7 @@ class Spec2Pep(pl.LightningModule, ModelMixin):
         (``dim_model - dim_intensity``) are reserved for encoding the m/z value.
         If ``None``, the intensity will be projected up to ``dim_model`` using a
         linear layer, then summed with the m/z encoding for each peak.
-    max_length : int
+    max_peptide_len : int
         The maximum peptide length to decode.
     residues : Union[Dict[str, float], str]
         The amino acid dictionary and their masses. By default ("canonical) this
@@ -99,7 +99,7 @@ def __init__(
         n_layers: int = 9,
         dropout: float = 0.0,
         dim_intensity: Optional[int] = None,
-        max_length: int = 100,
+        max_peptide_len: int = 100,
         residues: Union[Dict[str, float], str] = "canonical",
         max_charge: int = 5,
         precursor_mass_tol: float = 50,
@@ -158,7 +158,7 @@ def __init__(
         self.opt_kwargs = kwargs
 
         # Data properties.
-        self.max_length = max_length
+        self.max_peptide_len = max_peptide_len
         self.residues = residues
         self.precursor_mass_tol = precursor_mass_tol
         self.isotope_error_range = isotope_error_range
@@ -241,7 +241,7 @@ def beam_search_decode(
 
         # Sizes.
         batch = spectra.shape[0]  # B
-        length = self.max_length + 1  # L
+        length = self.max_peptide_len + 1  # L
         vocab = self.decoder.vocab_size + 1  # V
         beam = self.n_beams  # S
 
@@ -269,7 +269,7 @@ def beam_search_decode(
         scores = einops.rearrange(scores, "B L V S -> (B S) L V")
 
         # The main decoding loop.
-        for step in range(0, self.max_length):
+        for step in range(0, self.max_peptide_len):
             # Terminate beams exceeding the precursor m/z tolerance and track
             # all finished beams (either terminated or stop token predicted).
             (
@@ -323,10 +323,10 @@ def _finish_beams(
 
         Parameters
         ----------
-        tokens : torch.Tensor of shape (n_spectra * n_beams, max_length)
+        tokens : torch.Tensor of shape (n_spectra * n_beams, max_peptide_len)
             Predicted amino acid tokens for all beams and all spectra.
          scores : torch.Tensor of shape
-         (n_spectra *  n_beams, max_length, n_amino_acids)
+         (n_spectra *  n_beams, max_peptide_len, n_amino_acids)
             Scores for the predicted amino acid tokens for all beams and all
             spectra.
         step : int
@@ -491,10 +491,10 @@ def _cache_finished_beams(
 
         Parameters
         ----------
-        tokens : torch.Tensor of shape (n_spectra * n_beams, max_length)
+        tokens : torch.Tensor of shape (n_spectra * n_beams, max_peptide_len)
             Predicted amino acid tokens for all beams and all spectra.
          scores : torch.Tensor of shape
-         (n_spectra *  n_beams, max_length, n_amino_acids)
+         (n_spectra *  n_beams, max_peptide_len, n_amino_acids)
             Scores for the predicted amino acid tokens for all beams and all
             spectra.
         step : int
@@ -576,10 +576,10 @@ def _get_topk_beams(
 
         Parameters
         ----------
-        tokens : torch.Tensor of shape (n_spectra * n_beams, max_length)
+        tokens : torch.Tensor of shape (n_spectra * n_beams, max_peptide_len)
             Predicted amino acid tokens for all beams and all spectra.
          scores : torch.Tensor of shape
-         (n_spectra *  n_beams, max_length, n_amino_acids)
+         (n_spectra *  n_beams, max_peptide_len, n_amino_acids)
             Scores for the predicted amino acid tokens for all beams and all
             spectra.
         finished_beams : torch.Tensor of shape (n_spectra * n_beams)
@@ -592,10 +592,10 @@ def _get_topk_beams(
 
         Returns
         -------
-        tokens : torch.Tensor of shape (n_spectra * n_beams, max_length)
+        tokens : torch.Tensor of shape (n_spectra * n_beams, max_peptide_len)
             Predicted amino acid tokens for all beams and all spectra.
          scores : torch.Tensor of shape
-         (n_spectra *  n_beams, max_length, n_amino_acids)
+         (n_spectra *  n_beams, max_peptide_len, n_amino_acids)
             Scores for the predicted amino acid tokens for all beams and all
             spectra.
         """
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 395320e5..efb380cb 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -116,7 +116,7 @@ def db_search(
             self.config.digestion,
             self.config.missed_cleavages,
             self.config.min_peptide_len,
-            self.config.max_length,
+            self.config.max_peptide_len,
             self.config.max_mods,
             self.config.precursor_mass_tol,
             self.config.isotope_error_range,
@@ -271,7 +271,7 @@ def initialize_model(
             n_layers=self.config.n_layers,
             dropout=self.config.dropout,
             dim_intensity=self.config.dim_intensity,
-            max_length=self.config.max_length,
+            max_peptide_len=self.config.max_peptide_len,
             residues=self.config.residues,
             max_charge=self.config.max_charge,
             precursor_mass_tol=self.config.precursor_mass_tol,
@@ -292,7 +292,7 @@ def initialize_model(
 
         # Reconfigurable non-architecture related parameters for a loaded model.
         loaded_model_params = dict(
-            max_length=self.config.max_length,
+            max_peptide_len=self.config.max_peptide_len,
             precursor_mass_tol=self.config.precursor_mass_tol,
             isotope_error_range=self.config.isotope_error_range,
             n_beams=self.config.n_beams,
diff --git a/tests/conftest.py b/tests/conftest.py
index bf02a3ab..95ef2d02 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -241,7 +241,7 @@ def tiny_config(tmp_path):
         "precursor_mass_tol": 5,
         "isotope_error_range": [0, 1],
         "min_peptide_len": 6,
-        "max_length": 100,
+        "max_peptide_len": 100,
         "enzyme": "trypsin",
         "digestion": "full",
         "missed_cleavages": 0,
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index d03d6f7f..63d492f8 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -722,7 +722,7 @@ def test_beam_search_decode():
 
     # Sizes.
     batch = 1  # B
-    length = model.max_length + 1  # L
+    length = model.max_peptide_len + 1  # L
     vocab = model.decoder.vocab_size + 1  # V
     beam = model.n_beams  # S
     step = 3
@@ -839,12 +839,12 @@ def test_beam_search_decode():
     assert torch.equal(new_scores[:, step, :], expected_scores)
 
     # Test output if decoding loop isn't stopped with termination of all beams.
-    model.max_length = 0
+    model.max_peptide_len = 0
     # 1 spectrum with 5 peaks (2 values: m/z and intensity).
     spectra = torch.zeros(1, 5, 2)
     precursors = torch.tensor([[469.25364, 2.0, 235.63410]])
     assert len(list(model.beam_search_decode(spectra, precursors))[0]) == 0
-    model.max_length = 100
+    model.max_peptide_len = 100
 
     # Re-initialize scores and tokens to further test caching functionality.
     scores = torch.full(
@@ -1004,7 +1004,7 @@ def test_beam_search_decode():
     batch = 2  # B
     beam = model.n_beams  # S
     model.decoder.reverse = True
-    length = model.max_length + 1  # L
+    length = model.max_peptide_len + 1  # L
     vocab = model.decoder.vocab_size + 1  # V
     step = 4
 
@@ -1045,7 +1045,7 @@ def test_beam_search_decode():
     batch = 2  # B
     beam = model.n_beams  # S
     model.decoder.reverse = True
-    length = model.max_length + 1  # L
+    length = model.max_peptide_len + 1  # L
     vocab = model.decoder.vocab_size + 1  # V
     step = 4
 

From a3548d00124c1242350a62fdbcb2f719484254fe Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 3 Sep 2024 13:37:46 -0700
Subject: [PATCH 38/84] added nonspecific digestion

---
 casanovo/config.py            |   1 +
 casanovo/config.yaml          |   4 +-
 casanovo/data/db_utils.py     |  67 +++++++-----
 tests/unit_tests/test_unit.py | 185 ++++++++++++++++++++++++++++++++--
 4 files changed, 225 insertions(+), 32 deletions(-)

diff --git a/casanovo/config.py b/casanovo/config.py
index 8577d087..dc2a3d2c 100644
--- a/casanovo/config.py
+++ b/casanovo/config.py
@@ -18,6 +18,7 @@
 _config_deprecated = dict(
     every_n_train_steps="val_check_interval",
     max_iters="cosine_schedule_period_iters",
+    max_length="max_peptide_len",
 )
 
 
diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index e8732b20..df6fa8bb 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -49,7 +49,9 @@ devices:
 # Can also take a regex expression to specify custom digestion rules.
 enzyme: "trypsin"
 # Digestion type for candidate peptide generation.
-# full: standard digestion. semi: Include products of semi-specific cleavage.
+# full: standard digestion. 
+# semi: Include products of semi-specific cleavage.
+# non-specific: Include products of non-specific cleavage.
 digestion: "full"
 # Number of allowed missed cleavages when digesting protein.
 missed_cleavages: 0
diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 26f7152c..f9c669ed 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -172,7 +172,7 @@ def _digest_fasta(
             See pyteomics.parser.expasy_rules for valid enzymes.
             Can also be a regex pattern.
         digestion : str
-            The type of digestion to perform. Either 'full' or 'partial'.
+            The type of digestion to perform. Either 'full', 'partial' or 'non-specific'.
         missed_cleavages : int
             The number of missed cleavages to allow.
         max_mods : int
@@ -196,7 +196,7 @@ def _digest_fasta(
             raise FileNotFoundError(f"File {fasta_filename} does not exist.")
 
         peptide_list = []
-        if digestion not in ["full", "partial"]:
+        if digestion not in ["full", "partial", "non-specific"]:
             logger.error("Digestion type %s not recognized.", digestion)
             raise ValueError(f"Digestion type {digestion} not recognized.")
         if enzyme not in parser.expasy_rules:
@@ -204,28 +204,49 @@ def _digest_fasta(
                 "Enzyme %s not recognized. Interpreting as cleavage rule.",
                 enzyme,
             )
-        semi = digestion == "partial"
         valid_aa = set(list(self.residues.keys()) + ["C"])
-        for header, seq in fasta.read(fasta_filename):
-            pep_set = parser.cleave(
-                seq,
-                rule=enzyme,
-                missed_cleavages=missed_cleavages,
-                semi=semi,
-            )
-            protein = header.split()[0]
-            for pep in pep_set:
-                if (
-                    len(pep) >= min_peptide_length
-                    and len(pep) <= max_peptide_length
-                ):
-                    if any(aa not in valid_aa for aa in pep):
-                        logger.warn(
-                            "Skipping peptide with unknown amino acids: %s",
-                            pep,
-                        )
-                    else:
-                        peptide_list.append((pep, protein))
+        if digestion == "non-specific":
+            for header, seq in fasta.read(fasta_filename):
+                pep_set = []
+                # Generate all possible peptides
+                for i in range(len(seq)):
+                    for j in range(i + 1, len(seq) + 1):
+                        pep_set.append(seq[i:j])
+                protein = header.split()[0]
+                for pep in pep_set:
+                    if (
+                        len(pep) >= min_peptide_length
+                        and len(pep) <= max_peptide_length
+                    ):
+                        if any(aa not in valid_aa for aa in pep):
+                            logger.warn(
+                                "Skipping peptide with unknown amino acids: %s",
+                                pep,
+                            )
+                        else:
+                            peptide_list.append((pep, protein))
+        else:
+            semi = digestion == "partial"
+            for header, seq in fasta.read(fasta_filename):
+                pep_set = parser.cleave(
+                    seq,
+                    rule=enzyme,
+                    missed_cleavages=missed_cleavages,
+                    semi=semi,
+                )
+                protein = header.split()[0]
+                for pep in pep_set:
+                    if (
+                        len(pep) >= min_peptide_length
+                        and len(pep) <= max_peptide_length
+                    ):
+                        if any(aa not in valid_aa for aa in pep):
+                            logger.warn(
+                                "Skipping peptide with unknown amino acids: %s",
+                                pep,
+                            )
+                        else:
+                            peptide_list.append((pep, protein))
 
         # Generate modified peptides
         mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index 63d492f8..594552af 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -327,12 +327,16 @@ def test_digest_fasta_mods(tiny_fasta_file, residues_dict):
         "+42.011EIVMTQSPPTLSLSPGER",
         "+43.006EIVMTQSPPTLSLSPGER",
         "-17.027MEAPAQLLFLLLLWLPDTTR",
+        "-17.027M+15.995EAPAQLLFLLLLWLPDTTR",  #
         "MEAPAQLLFLLLLWLPDTTR",
         "MEAPAQ+0.984LLFLLLLWLPDTTR",
         "M+15.995EAPAQLLFLLLLWLPDTTR",
         "+43.006-17.027MEAPAQLLFLLLLWLPDTTR",
+        "+43.006-17.027M+15.995EAPAQLLFLLLLWLPDTTR",  #
         "+42.011MEAPAQLLFLLLLWLPDTTR",
         "+43.006MEAPAQLLFLLLLWLPDTTR",
+        "+42.011M+15.995EAPAQLLFLLLLWLPDTTR",  #
+        "+43.006M+15.995EAPAQLLFLLLLWLPDTTR",  #
         "-17.027ASQSVSSSYLTWYQQKPGQAPR",
         "ASQSVSSSYLTWYQQKPGQAPR",
         "ASQ+0.984SVSSSYLTWYQQKPGQAPR",
@@ -370,13 +374,6 @@ def test_digest_fasta_mods(tiny_fasta_file, residues_dict):
         residues=residues_dict,
     )
     peptide_list = list(pdb.db_peptides["peptide"])
-    peptide_list = [
-        x
-        for x in peptide_list
-        if not re.search(
-            r"(\+42\.011|\+43\.006|\-17\.027|\+43\.006\-17\.027)+[A-Z]\+", x
-        )
-    ]
     assert peptide_list == expected_1mod
 
 
@@ -447,6 +444,136 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
     # asp-n enzyme
     expected_aspn = ["DFAVYYC+57.021QQ", "DFTLTISSLQPE", "MEAPAQLLFLLLLWLP"]
 
+    expected_semispecific = [
+        "FSGSGS",
+        "ATSIPA",
+        "ASQSVS",
+        "PGQAPR",
+        "TSIPAR",
+        "MEAPAQ",
+        "LLIYGA",
+        "YGASTR",
+        "LSPGER",
+        "LPDTTR",
+        "EIVMTQ",
+        "VTLSC+57.021R",
+        "QDYNLP",
+    ]
+
+    expected_nonspecific = [
+        "SGSGSG",
+        "GSGSGT",
+        "SGSGTD",
+        "FSGSGS",
+        "ATSIPA",
+        "GASTRA",
+        "LSLSPG",
+        "ASQSVS",
+        "GSGTDF",
+        "SLSPGE",
+        "QSVSSS",
+        "SQSVSS",
+        "KPGQAP",
+        "SPPTLS",
+        "ASTRAT",
+        "RFSGSG",
+        "IYGAST",
+        "APAQLL",
+        "PTLSLS",
+        "TLSLSP",
+        "TLTISS",
+        "STRATS",
+        "LIYGAS",
+        "ARFSGS",
+        "PGQAPR",
+        "SGTDFT",
+        "PPTLSL",
+        "EAPAQL",
+        "QKPGQA",
+        "SVSSSY",
+        "TQSPPT",
+        "LTISSL",
+        "PARFSG",
+        "GQAPRL",
+        "QSPPTL",
+        "SPGERV",
+        "ISSLQP",
+        "RATSIP",
+        "TSIPAR",
+        "MEAPAQ",
+        "RASQSV",
+        "TISSLQ",
+        "TRATSI",
+        "LLIYGA",
+        "GTDFTL",
+        "YGASTR",
+        "VSSSYL",
+        "SSSYLT",
+        "LSPGER",
+        "PGERVT",
+        "MTQSPP",
+        "SSLQPE",
+        "VMTQSP",
+        "GERVTL",
+        "PEDFAV",
+        "IVMTQS",
+        "FTLTIS",
+        "APRLLI",
+        "QQKPGQ",
+        "SLQPED",
+        "PAQLLF",
+        "IPARFS",
+        "SIPARF",
+        "LSC+57.021RAS",
+        "TDFTLT",
+        "QAPRLL",
+        "LPDTTR",
+        "ERVTLS",
+        "AQLLFL",
+        "QPEDFA",
+        "TLSC+57.021RA",
+        "C+57.021RASQS",
+        "SC+57.021RASQ",
+        "DFTLTI",
+        "PDTTRE",
+        "TTREIV",
+        "EIVMTQ",
+        "YQQKPG",
+        "LFLLLL",
+        "LLFLLL",
+        "WLPDTT",
+        "DTTREI",
+        "RLLIYG",
+        "RVTLSC+57.021",
+        "VTLSC+57.021R",
+        "EDFAVY",
+        "LWLPDT",
+        "QLLFLL",
+        "LQPEDF",
+        "REIVMT",
+        "TREIVM",
+        "QDYNLP",
+        "LLLWLP",
+        "SSYLTW",
+        "LLWLPD",
+        "LLLLWL",
+        "PRLLIY",
+        "DFAVYY",
+        "QQDYNL",
+        "AVYYC+57.021Q",
+        "FLLLLW",
+        "FAVYYC+57.021",
+        "C+57.021QQDYN",
+        "SYLTWY",
+        "LTWYQQ",
+        "WYQQKP",
+        "TWYQQK",
+        "VYYC+57.021QQ",
+        "YLTWYQ",
+        "YC+57.021QQDY",
+        "YYC+57.021QQD",
+    ]
+
     pdb = db_utils.ProteinDatabase(
         fasta_path=str(tiny_fasta_file),
         enzyme="arg-c",
@@ -487,7 +614,7 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
     peptide_list = list(pdb.db_peptides["peptide"])
     assert peptide_list == expected_aspn
 
-    # Tesr regex rule instead of named enzyme
+    # Test regex rule instead of named enzyme
     pdb = db_utils.ProteinDatabase(
         fasta_path=str(tiny_fasta_file),
         enzyme="R",
@@ -508,6 +635,48 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
     peptide_list = list(pdb.db_peptides["peptide"])
     assert peptide_list == expected_argc
 
+    # Test semispecific digest
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
+        enzyme="trypsin",
+        digestion="partial",
+        missed_cleavages=0,
+        min_peptide_len=6,
+        max_peptide_len=6,
+        max_mods=0,
+        precursor_tolerance=10000,
+        isotope_error=[0, 0],
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
+        ),
+        residues=residues_dict,
+    )
+    peptide_list = list(pdb.db_peptides["peptide"])
+    assert peptide_list == expected_semispecific
+
+    # Test nonspecific digest
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
+        enzyme="trypsin",
+        digestion="non-specific",
+        missed_cleavages=0,
+        min_peptide_len=6,
+        max_peptide_len=6,
+        max_mods=0,
+        precursor_tolerance=10000,
+        isotope_error=[0, 0],
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
+        ),
+        residues=residues_dict,
+    )
+    peptide_list = list(pdb.db_peptides["peptide"])
+    assert peptide_list == expected_nonspecific
+
 
 def test_get_candidates(tiny_fasta_file, residues_dict):
     # precursor_window is 10000

From e8d4682241b9b4d10384e9dfd92fd04258103e3e Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Fri, 13 Sep 2024 12:06:31 -0700
Subject: [PATCH 39/84] minor comments

---
 casanovo/data/db_utils.py      | 35 +++++++++++++++++-----------------
 casanovo/denovo/dataloaders.py |  7 +------
 casanovo/denovo/model.py       |  6 ++----
 3 files changed, 20 insertions(+), 28 deletions(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index f9c669ed..19b312e2 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -127,12 +127,12 @@ def get_candidates(
                 (self.db_peptides["calc_mass"] >= lower_bound)
                 & (self.db_peptides["calc_mass"] <= upper_bound)
             ]
-            candidates.append(window[["peptide", "calc_mass", "protein"]])
+            candidates.append(window[["peptide", "calc_mass"]])
 
         candidates = pd.concat(candidates)
         candidates.drop_duplicates(inplace=True)
         candidates.sort_values(by=["calc_mass", "peptide"], inplace=True)
-        return candidates["peptide"], candidates["protein"]
+        return candidates["peptide"]
 
     def get_associated_protein(self, peptide: str) -> str:
         """
@@ -159,7 +159,7 @@ def _digest_fasta(
         max_mods: int,
         min_peptide_length: int,
         max_peptide_length: int,
-    ) -> pd.DataFrame:
+    ) -> Tuple[pd.DataFrame, dict]:
         """
         Digests a FASTA file and returns the peptides, their masses, and associated protein.
 
@@ -185,8 +185,8 @@ def _digest_fasta(
         Returns
         -------
         pep_table : pd.DataFrame
-            A Pandas DataFrame with peptide, mass,
-            and protein columns. Sorted by neutral mass in ascending order.
+            A Pandas DataFrame with peptide and mass columns.
+            Sorted by neutral mass in ascending order.
         prot_map : dict
             A dictionary mapping peptides to associated proteins.
         """
@@ -207,17 +207,14 @@ def _digest_fasta(
         valid_aa = set(list(self.residues.keys()) + ["C"])
         if digestion == "non-specific":
             for header, seq in fasta.read(fasta_filename):
-                pep_set = []
+                protein = header.split()[0]
                 # Generate all possible peptides
                 for i in range(len(seq)):
-                    for j in range(i + 1, len(seq) + 1):
-                        pep_set.append(seq[i:j])
-                protein = header.split()[0]
-                for pep in pep_set:
-                    if (
-                        len(pep) >= min_peptide_length
-                        and len(pep) <= max_peptide_length
+                    for j in range(
+                        i + min_peptide_length,
+                        min(i + max_peptide_length + 1, len(seq) + 1),
                     ):
+                        pep = seq[i:j]
                         if any(aa not in valid_aa for aa in pep):
                             logger.warn(
                                 "Skipping peptide with unknown amino acids: %s",
@@ -274,17 +271,19 @@ def _digest_fasta(
                 isos,
             )
         ]
-        # Create a DataFrame for easy sorting and filtering
-        pep_table = pd.DataFrame(
-            mod_peptide_list, columns=["peptide", "calc_mass", "protein"]
-        )
-        pep_table.sort_values(by=["calc_mass", "peptide"], inplace=True)
 
         # Create a dictionary mapping for easy accession of associated proteins
         prot_map = defaultdict(list)
         for pep, _, prot in mod_peptide_list:
             prot_map[pep].append(prot)
 
+        # Create a DataFrame for easy sorting and filtering
+        pep_table = pd.DataFrame(
+            [(pep, mass) for pep, mass, _ in mod_peptide_list],
+            columns=["peptide", "calc_mass"],
+        )
+        pep_table.sort_values(by=["calc_mass", "peptide"], inplace=True)
+
         logger.info(
             "Digestion complete. %d peptides generated.", len(pep_table)
         )
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index 4793e2f3..2646329d 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -266,8 +266,6 @@ def prepare_psm_batch(
         The spectrum identifiers.
     all_peptides : List[str]
         The candidate peptides for each spectrum.
-    all_proteins : List[str]
-        The proteins associated with each candidate peptide.
     """
     spectra, precursor_mzs, precursor_charges, spectrum_ids = list(zip(*batch))
     spectra = torch.nn.utils.rnn.pad_sequence(spectra, batch_first=True)
@@ -283,9 +281,8 @@ def prepare_psm_batch(
     all_precursors = []
     all_spectrum_ids = []
     all_peptides = []
-    all_proteins = []
     for idx in range(len(batch)):
-        spec_peptides, spec_proteins = protein_database.get_candidates(
+        spec_peptides = protein_database.get_candidates(
             precursor_mzs[idx],
             precursor_charges[idx],
         )
@@ -298,7 +295,6 @@ def prepare_psm_batch(
             )
             all_spectrum_ids.extend([spectrum_ids[idx]] * len(spec_peptides))
             all_peptides.extend(spec_peptides)
-            all_proteins.extend(spec_proteins)
         except ValueError:
             logger.warning(
                 "No candidates found for spectrum %s", spectrum_ids[idx]
@@ -309,5 +305,4 @@ def prepare_psm_batch(
         torch.cat(all_precursors, dim=0),
         all_spectrum_ids,
         all_peptides,
-        all_proteins,
     )
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 6fe34bfa..ca5557fc 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1019,7 +1019,7 @@ def predict_step(self, batch, *args):
         ----------
         batch : Tuple[torch.Tensor, torch.Tensor, np.array, List[str]]
             A batch of (i) MS/MS spectra, (ii) precursor information, (iii)
-            spectrum identifiers, (iv) candidate peptides, (v) associated protein.
+            spectrum identifiers, (iv) candidate peptides
 
         Returns
         -------
@@ -1049,7 +1049,6 @@ def predict_step(self, batch, *args):
                 peptide_score,
                 aa_scores,
                 peptide,
-                protein,
             ) in zip(
                 current_batch[1][:, 1].cpu().detach().numpy(),
                 current_batch[1][:, 2].cpu().detach().numpy(),
@@ -1057,7 +1056,6 @@ def predict_step(self, batch, *args):
                 all_scores.cpu().detach().numpy(),
                 per_aa_scores.cpu().detach().numpy(),
                 current_batch[3],
-                current_batch[4],
             ):
                 predictions.append(
                     (
@@ -1067,7 +1065,7 @@ def predict_step(self, batch, *args):
                         peptide,
                         peptide_score,
                         aa_scores,
-                        protein,
+                        self.protein_database.get_associated_protein(peptide),
                     )
                 )
         return predictions

From 68b6926032814dcc4a6b650e1736c8ff92edf7cb Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Fri, 13 Sep 2024 13:41:39 -0700
Subject: [PATCH 40/84] full branch comments addressed

---
 casanovo/data/db_utils.py     | 197 ++++++++++++++++++++--------------
 tests/unit_tests/test_unit.py |  14 +--
 2 files changed, 123 insertions(+), 88 deletions(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 19b312e2..34671eb1 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -6,7 +6,7 @@
 import re
 import string
 from collections import defaultdict
-from typing import List, Tuple
+from typing import List, Tuple, Iterator
 
 import depthcharge.masses
 import pandas as pd
@@ -70,22 +70,23 @@ def __init__(
         allowed_var_mods: str,
         residues: dict,
     ):
-        self.residues = residues
         self.fixed_mods, self.var_mods, self.swap_map = _construct_mods_dict(
             allowed_fixed_mods, allowed_var_mods
         )
+        self.max_mods = max_mods
         self.swap_regex = re.compile(
             "(%s)" % "|".join(map(re.escape, self.swap_map.keys()))
         )
-        self.db_peptides, self.prot_map = self._digest_fasta(
+        peptide_generator = _peptide_generator(
             fasta_path,
             enzyme,
             digestion,
             missed_cleavages,
-            max_mods,
             min_peptide_len,
             max_peptide_len,
+            set(list(residues.keys()) + ["C"]),
         )
+        self.db_peptides, self.prot_map = self._digest_fasta(peptide_generator)
         self.precursor_tolerance = precursor_tolerance
         self.isotope_error = isotope_error
 
@@ -152,35 +153,15 @@ def get_associated_protein(self, peptide: str) -> str:
 
     def _digest_fasta(
         self,
-        fasta_filename: str,
-        enzyme: str,
-        digestion: str,
-        missed_cleavages: int,
-        max_mods: int,
-        min_peptide_length: int,
-        max_peptide_length: int,
+        peptide_generator: Iterator[Tuple[str, str]],
     ) -> Tuple[pd.DataFrame, dict]:
         """
         Digests a FASTA file and returns the peptides, their masses, and associated protein.
 
         Parameters
         ----------
-        fasta_filename : str
-            Path to the FASTA file.
-        enzyme : str
-            The enzyme to use for digestion.
-            See pyteomics.parser.expasy_rules for valid enzymes.
-            Can also be a regex pattern.
-        digestion : str
-            The type of digestion to perform. Either 'full', 'partial' or 'non-specific'.
-        missed_cleavages : int
-            The number of missed cleavages to allow.
-        max_mods : int
-            The maximum number of modifications to allow per peptide.
-        min_peptide_length : int
-            The minimum length of peptides to consider.
-        max_peptide_length : int
-            The maximum length of peptides to consider.
+        peptide_generator : Iterator[Tuple[str, str]]
+            An iterator that yields peptides and associated proteins.
 
         Returns
         -------
@@ -190,60 +171,9 @@ def _digest_fasta(
         prot_map : dict
             A dictionary mapping peptides to associated proteins.
         """
-        # Verify the existence of the file:
-        if not os.path.isfile(fasta_filename):
-            logger.error("File %s does not exist.", fasta_filename)
-            raise FileNotFoundError(f"File {fasta_filename} does not exist.")
-
         peptide_list = []
-        if digestion not in ["full", "partial", "non-specific"]:
-            logger.error("Digestion type %s not recognized.", digestion)
-            raise ValueError(f"Digestion type {digestion} not recognized.")
-        if enzyme not in parser.expasy_rules:
-            logger.info(
-                "Enzyme %s not recognized. Interpreting as cleavage rule.",
-                enzyme,
-            )
-        valid_aa = set(list(self.residues.keys()) + ["C"])
-        if digestion == "non-specific":
-            for header, seq in fasta.read(fasta_filename):
-                protein = header.split()[0]
-                # Generate all possible peptides
-                for i in range(len(seq)):
-                    for j in range(
-                        i + min_peptide_length,
-                        min(i + max_peptide_length + 1, len(seq) + 1),
-                    ):
-                        pep = seq[i:j]
-                        if any(aa not in valid_aa for aa in pep):
-                            logger.warn(
-                                "Skipping peptide with unknown amino acids: %s",
-                                pep,
-                            )
-                        else:
-                            peptide_list.append((pep, protein))
-        else:
-            semi = digestion == "partial"
-            for header, seq in fasta.read(fasta_filename):
-                pep_set = parser.cleave(
-                    seq,
-                    rule=enzyme,
-                    missed_cleavages=missed_cleavages,
-                    semi=semi,
-                )
-                protein = header.split()[0]
-                for pep in pep_set:
-                    if (
-                        len(pep) >= min_peptide_length
-                        and len(pep) <= max_peptide_length
-                    ):
-                        if any(aa not in valid_aa for aa in pep):
-                            logger.warn(
-                                "Skipping peptide with unknown amino acids: %s",
-                                pep,
-                            )
-                        else:
-                            peptide_list.append((pep, protein))
+        for pep, prot in peptide_generator:
+            peptide_list.append((pep, prot))
 
         # Generate modified peptides
         mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
@@ -253,7 +183,7 @@ def _digest_fasta(
                     pep,
                     variable_mods=self.var_mods,
                     fixed_mods=self.fixed_mods,
-                    max_mods=max_mods,
+                    max_mods=self.max_mods,
                 ),
                 prot,
             )
@@ -290,6 +220,111 @@ def _digest_fasta(
         return pep_table, prot_map
 
 
+def _peptide_generator(
+    fasta_filename: str,
+    enzyme: str,
+    digestion: str,
+    missed_cleavages: int,
+    min_peptide_length: int,
+    max_peptide_length: int,
+    valid_aa: set[str],
+) -> Iterator[str]:
+    """
+    Create a generator the yields peptides from a FASTA file
+    depending on the type of digestion specified.
+
+    Parameters
+    ----------
+    fasta_filename : str
+        Path to the FASTA file.
+    enzyme : str
+        The enzyme to use for digestion.
+        See pyteomics.parser.expasy_rules for valid enzymes.
+        Can also be a regex pattern.
+    digestion : str
+        The type of digestion to perform. Either 'full', 'partial' or 'non-specific'.
+    missed_cleavages : int
+        The number of missed cleavages to allow.
+    min_peptide_length : int
+        The minimum length of peptides to consider.
+    max_peptide_length : int
+        The maximum length of peptides to consider.
+    valid_aa : set[str]
+        A set of valid amino acids.
+
+    Yields
+    ------
+    pep : str
+        A peptide sequence, unmodified.
+    protein : str
+        The associated protein.
+    """
+    # Verify the existence of the file:
+    if not os.path.isfile(fasta_filename):
+        logger.error("File %s does not exist.", fasta_filename)
+        raise FileNotFoundError(f"File {fasta_filename} does not exist.")
+    if digestion not in ["full", "partial", "non-specific"]:
+        logger.error("Digestion type %s not recognized.", digestion)
+        raise ValueError(f"Digestion type {digestion} not recognized.")
+    if enzyme not in parser.expasy_rules:
+        logger.info(
+            "Enzyme %s not recognized. Interpreting as cleavage rule.",
+            enzyme,
+        )
+
+    # Verify the existence of the file:
+    if not os.path.isfile(fasta_filename):
+        logger.error("File %s does not exist.", fasta_filename)
+        raise FileNotFoundError(f"File {fasta_filename} does not exist.")
+    if digestion not in ["full", "partial", "non-specific"]:
+        logger.error("Digestion type %s not recognized.", digestion)
+        raise ValueError(f"Digestion type {digestion} not recognized.")
+    if enzyme not in parser.expasy_rules:
+        logger.info(
+            "Enzyme %s not recognized. Interpreting as cleavage rule.",
+            enzyme,
+        )
+    if digestion == "non-specific":
+        for header, seq in fasta.read(fasta_filename):
+            protein = header.split()[0]
+            # Generate all possible peptides
+            for i in range(len(seq)):
+                for j in range(
+                    i + min_peptide_length,
+                    min(i + max_peptide_length + 1, len(seq) + 1),
+                ):
+                    pep = seq[i:j]
+                    if any(aa not in valid_aa for aa in pep):
+                        logger.warn(
+                            "Skipping peptide with unknown amino acids: %s",
+                            pep,
+                        )
+                    else:
+                        yield pep, protein
+    else:
+        semi = digestion == "partial"
+        for header, seq in fasta.read(fasta_filename):
+            pep_set = parser.cleave(
+                seq,
+                rule=enzyme,
+                missed_cleavages=missed_cleavages,
+                semi=semi,
+            )
+            protein = header.split()[0]
+            for pep in pep_set:
+                if (
+                    len(pep) >= min_peptide_length
+                    and len(pep) <= max_peptide_length
+                ):
+                    if any(aa not in valid_aa for aa in pep):
+                        logger.warn(
+                            "Skipping peptide with unknown amino acids: %s",
+                            pep,
+                        )
+                    else:
+                        yield pep, protein
+
+
 @njit
 def _to_mz(precursor_mass: float, charge: int) -> float:
     """
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index 594552af..a0b0935d 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -705,7 +705,7 @@ def test_get_candidates(tiny_fasta_file, residues_dict):
         ),
         residues=residues_dict,
     )
-    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_smallwindow == list(candidates)
 
     pdb = db_utils.ProteinDatabase(
@@ -725,7 +725,7 @@ def test_get_candidates(tiny_fasta_file, residues_dict):
         ),
         residues=residues_dict,
     )
-    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_midwindow == list(candidates)
 
     pdb = db_utils.ProteinDatabase(
@@ -745,7 +745,7 @@ def test_get_candidates(tiny_fasta_file, residues_dict):
         ),
         residues=residues_dict,
     )
-    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_widewindow == list(candidates)
 
 
@@ -814,7 +814,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file, residues_dict):
         residues=residues_dict,
     )
     pdb.db_peptides = peptide_list
-    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope0 == list(candidates)
 
     pdb = db_utils.ProteinDatabase(
@@ -835,7 +835,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file, residues_dict):
         residues=residues_dict,
     )
     pdb.db_peptides = peptide_list
-    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope01 == list(candidates)
 
     pdb = db_utils.ProteinDatabase(
@@ -856,7 +856,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file, residues_dict):
         residues=residues_dict,
     )
     pdb.db_peptides = peptide_list
-    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope012 == list(candidates)
 
     pdb = db_utils.ProteinDatabase(
@@ -877,7 +877,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file, residues_dict):
         residues=residues_dict,
     )
     pdb.db_peptides = peptide_list
-    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope0123 == list(candidates)
 
 

From e8c9c7d3aba05f7466ac3e94ecdf7e5a2156fd7f Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Fri, 13 Sep 2024 21:21:25 +0000
Subject: [PATCH 41/84] Generate new screengrabs with rich-codex

---
 docs/images/evaluate-help.svg |   2 +-
 docs/images/help.svg          | 169 +++++++++++++++++-----------------
 docs/images/sequence-help.svg |   2 +-
 docs/images/train-help.svg    |   2 +-
 4 files changed, 89 insertions(+), 86 deletions(-)

diff --git a/docs/images/evaluate-help.svg b/docs/images/evaluate-help.svg
index ec9c23a3..661f0efe 100644
--- a/docs/images/evaluate-help.svg
+++ b/docs/images/evaluate-help.svg
@@ -31,7 +31,7 @@
         font-weight: bold;
         font-family: arial;
     }
-      
+
     .terminal-1819499677-r1 { fill: #c5c8c6 }
 .terminal-1819499677-r2 { fill: #d0b344 }
 .terminal-1819499677-r3 { fill: #c5c8c6;font-weight: bold }
diff --git a/docs/images/help.svg b/docs/images/help.svg
index bf0fbef8..5418b95a 100644
--- a/docs/images/help.svg
+++ b/docs/images/help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 994 879.5999999999999" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 994 904.0" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,178 +19,181 @@
         font-weight: 700;
     }
 
-    .terminal-1998256653-matrix {
+    .terminal-782331977-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-1998256653-title {
+    .terminal-782331977-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-
-    .terminal-1998256653-r1 { fill: #c5c8c6 }
-.terminal-1998256653-r2 { fill: #d0b344 }
-.terminal-1998256653-r3 { fill: #c5c8c6;font-weight: bold }
-.terminal-1998256653-r4 { fill: #68a0b3;font-weight: bold }
-.terminal-1998256653-r5 { fill: #d0b344;font-weight: bold }
-.terminal-1998256653-r6 { fill: #868887 }
-.terminal-1998256653-r7 { fill: #98a84b;font-weight: bold }
+    .terminal-782331977-r1 { fill: #c5c8c6 }
+.terminal-782331977-r2 { fill: #d0b344 }
+.terminal-782331977-r3 { fill: #c5c8c6;font-weight: bold }
+.terminal-782331977-r4 { fill: #68a0b3;font-weight: bold }
+.terminal-782331977-r5 { fill: #d0b344;font-weight: bold }
+.terminal-782331977-r6 { fill: #868887 }
+.terminal-782331977-r7 { fill: #98a84b;font-weight: bold }
     </style>
 
     <defs>
-    <clipPath id="terminal-1998256653-clip-terminal">
-      <rect x="0" y="0" width="975.0" height="828.5999999999999" />
+    <clipPath id="terminal-782331977-clip-terminal">
+      <rect x="0" y="0" width="975.0" height="853.0" />
     </clipPath>
-    <clipPath id="terminal-1998256653-line-0">
+    <clipPath id="terminal-782331977-line-0">
     <rect x="0" y="1.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-1">
+<clipPath id="terminal-782331977-line-1">
     <rect x="0" y="25.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-2">
+<clipPath id="terminal-782331977-line-2">
     <rect x="0" y="50.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-3">
+<clipPath id="terminal-782331977-line-3">
     <rect x="0" y="74.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-4">
+<clipPath id="terminal-782331977-line-4">
     <rect x="0" y="99.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-5">
+<clipPath id="terminal-782331977-line-5">
     <rect x="0" y="123.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-6">
+<clipPath id="terminal-782331977-line-6">
     <rect x="0" y="147.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-7">
+<clipPath id="terminal-782331977-line-7">
     <rect x="0" y="172.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-8">
+<clipPath id="terminal-782331977-line-8">
     <rect x="0" y="196.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-9">
+<clipPath id="terminal-782331977-line-9">
     <rect x="0" y="221.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-10">
+<clipPath id="terminal-782331977-line-10">
     <rect x="0" y="245.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-11">
+<clipPath id="terminal-782331977-line-11">
     <rect x="0" y="269.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-12">
+<clipPath id="terminal-782331977-line-12">
     <rect x="0" y="294.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-13">
+<clipPath id="terminal-782331977-line-13">
     <rect x="0" y="318.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-14">
+<clipPath id="terminal-782331977-line-14">
     <rect x="0" y="343.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-15">
+<clipPath id="terminal-782331977-line-15">
     <rect x="0" y="367.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-16">
+<clipPath id="terminal-782331977-line-16">
     <rect x="0" y="391.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-17">
+<clipPath id="terminal-782331977-line-17">
     <rect x="0" y="416.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-18">
+<clipPath id="terminal-782331977-line-18">
     <rect x="0" y="440.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-19">
+<clipPath id="terminal-782331977-line-19">
     <rect x="0" y="465.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-20">
+<clipPath id="terminal-782331977-line-20">
     <rect x="0" y="489.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-21">
+<clipPath id="terminal-782331977-line-21">
     <rect x="0" y="513.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-22">
+<clipPath id="terminal-782331977-line-22">
     <rect x="0" y="538.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-23">
+<clipPath id="terminal-782331977-line-23">
     <rect x="0" y="562.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-24">
+<clipPath id="terminal-782331977-line-24">
     <rect x="0" y="587.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-25">
+<clipPath id="terminal-782331977-line-25">
     <rect x="0" y="611.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-26">
+<clipPath id="terminal-782331977-line-26">
     <rect x="0" y="635.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-27">
+<clipPath id="terminal-782331977-line-27">
     <rect x="0" y="660.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-28">
+<clipPath id="terminal-782331977-line-28">
     <rect x="0" y="684.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-29">
+<clipPath id="terminal-782331977-line-29">
     <rect x="0" y="709.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-30">
+<clipPath id="terminal-782331977-line-30">
     <rect x="0" y="733.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-31">
+<clipPath id="terminal-782331977-line-31">
     <rect x="0" y="757.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1998256653-line-32">
+<clipPath id="terminal-782331977-line-32">
     <rect x="0" y="782.3" width="976" height="24.65"/>
             </clipPath>
+<clipPath id="terminal-782331977-line-33">
+    <rect x="0" y="806.7" width="976" height="24.65"/>
+            </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="877.6" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="902" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-1998256653-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-782331977-clip-terminal)">
     
-    <g class="terminal-1998256653-matrix">
-    <text class="terminal-1998256653-r1" x="0" y="20" textLength="207.4" clip-path="url(#terminal-1998256653-line-0)">$&#160;casanovo&#160;--help</text><text class="terminal-1998256653-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-1998256653-line-0)">
-</text><text class="terminal-1998256653-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-1998256653-line-1)">
-</text><text class="terminal-1998256653-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-1998256653-line-2)">Usage:</text><text class="terminal-1998256653-r3" x="97.6" y="68.8" textLength="97.6" clip-path="url(#terminal-1998256653-line-2)">casanovo</text><text class="terminal-1998256653-r1" x="195.2" y="68.8" textLength="24.4" clip-path="url(#terminal-1998256653-line-2)">&#160;[</text><text class="terminal-1998256653-r4" x="219.6" y="68.8" textLength="85.4" clip-path="url(#terminal-1998256653-line-2)">OPTIONS</text><text class="terminal-1998256653-r1" x="305" y="68.8" textLength="24.4" clip-path="url(#terminal-1998256653-line-2)">]&#160;</text><text class="terminal-1998256653-r4" x="329.4" y="68.8" textLength="85.4" clip-path="url(#terminal-1998256653-line-2)">COMMAND</text><text class="terminal-1998256653-r1" x="414.8" y="68.8" textLength="24.4" clip-path="url(#terminal-1998256653-line-2)">&#160;[</text><text class="terminal-1998256653-r4" x="439.2" y="68.8" textLength="48.8" clip-path="url(#terminal-1998256653-line-2)">ARGS</text><text class="terminal-1998256653-r1" x="488" y="68.8" textLength="488" clip-path="url(#terminal-1998256653-line-2)">]...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1998256653-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-1998256653-line-2)">
-</text><text class="terminal-1998256653-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-1998256653-line-3)">
-</text><text class="terminal-1998256653-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-1998256653-line-4)">&#160;┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓&#160;</text><text class="terminal-1998256653-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-1998256653-line-4)">
-</text><text class="terminal-1998256653-r1" x="0" y="142" textLength="439.2" clip-path="url(#terminal-1998256653-line-5)">&#160;┃&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1998256653-r3" x="439.2" y="142" textLength="97.6" clip-path="url(#terminal-1998256653-line-5)">Casanovo</text><text class="terminal-1998256653-r1" x="536.8" y="142" textLength="439.2" clip-path="url(#terminal-1998256653-line-5)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;┃&#160;</text><text class="terminal-1998256653-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-1998256653-line-5)">
-</text><text class="terminal-1998256653-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-1998256653-line-6)">&#160;┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛&#160;</text><text class="terminal-1998256653-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-1998256653-line-6)">
-</text><text class="terminal-1998256653-r1" x="0" y="190.8" textLength="976" clip-path="url(#terminal-1998256653-line-7)">&#160;Casanovo&#160;de&#160;novo&#160;sequences&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra&#160;using&#160;a&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1998256653-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-1998256653-line-7)">
-</text><text class="terminal-1998256653-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-1998256653-line-8)">&#160;Transformer&#160;model.&#160;Casanovo&#160;currently&#160;supports&#160;mzML,&#160;mzXML,&#160;and&#160;MGF&#160;files&#160;for&#160;&#160;</text><text class="terminal-1998256653-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-1998256653-line-8)">
-</text><text class="terminal-1998256653-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-1998256653-line-9)">&#160;de&#160;novo&#160;sequencing&#160;and&#160;annotated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;from&#160;MassIVE-KB,&#160;for&#160;</text><text class="terminal-1998256653-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-1998256653-line-9)">
-</text><text class="terminal-1998256653-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-1998256653-line-10)">&#160;training&#160;new&#160;models.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1998256653-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-1998256653-line-10)">
-</text><text class="terminal-1998256653-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-1998256653-line-11)">
-</text><text class="terminal-1998256653-r1" x="0" y="312.8" textLength="976" clip-path="url(#terminal-1998256653-line-12)">&#160;Links:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1998256653-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-1998256653-line-12)">
-</text><text class="terminal-1998256653-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-1998256653-line-13)">
-</text><text class="terminal-1998256653-r5" x="12.2" y="361.6" textLength="36.6" clip-path="url(#terminal-1998256653-line-14)">&#160;•&#160;</text><text class="terminal-1998256653-r1" x="48.8" y="361.6" textLength="927.2" clip-path="url(#terminal-1998256653-line-14)">Documentation:&#160;https://casanovo.readthedocs.io&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1998256653-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-1998256653-line-14)">
-</text><text class="terminal-1998256653-r5" x="12.2" y="386" textLength="36.6" clip-path="url(#terminal-1998256653-line-15)">&#160;•&#160;</text><text class="terminal-1998256653-r1" x="48.8" y="386" textLength="927.2" clip-path="url(#terminal-1998256653-line-15)">Official&#160;code&#160;repository:&#160;https://github.com/Noble-Lab/casanovo&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1998256653-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-1998256653-line-15)">
-</text><text class="terminal-1998256653-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-1998256653-line-16)">
-</text><text class="terminal-1998256653-r1" x="0" y="434.8" textLength="976" clip-path="url(#terminal-1998256653-line-17)">&#160;If&#160;you&#160;use&#160;Casanovo&#160;in&#160;your&#160;work,&#160;please&#160;cite:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1998256653-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-1998256653-line-17)">
-</text><text class="terminal-1998256653-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-1998256653-line-18)">
-</text><text class="terminal-1998256653-r5" x="12.2" y="483.6" textLength="36.6" clip-path="url(#terminal-1998256653-line-19)">&#160;•&#160;</text><text class="terminal-1998256653-r1" x="48.8" y="483.6" textLength="927.2" clip-path="url(#terminal-1998256653-line-19)">Yilmaz,&#160;M.,&#160;Fondrie,&#160;W.&#160;E.,&#160;Bittremieux,&#160;W.,&#160;Oh,&#160;S.&#160;&amp;&#160;Noble,&#160;W.&#160;S.&#160;De&#160;novo&#160;&#160;</text><text class="terminal-1998256653-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-1998256653-line-19)">
-</text><text class="terminal-1998256653-r1" x="48.8" y="508" textLength="927.2" clip-path="url(#terminal-1998256653-line-20)">mass&#160;spectrometry&#160;peptide&#160;sequencing&#160;with&#160;a&#160;transformer&#160;model.&#160;Proceedings&#160;&#160;</text><text class="terminal-1998256653-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-1998256653-line-20)">
-</text><text class="terminal-1998256653-r1" x="48.8" y="532.4" textLength="927.2" clip-path="url(#terminal-1998256653-line-21)">of&#160;the&#160;39th&#160;International&#160;Conference&#160;on&#160;Machine&#160;Learning&#160;-&#160;ICML&#160;&#x27;22&#160;(2022)&#160;&#160;</text><text class="terminal-1998256653-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-1998256653-line-21)">
-</text><text class="terminal-1998256653-r1" x="48.8" y="556.8" textLength="927.2" clip-path="url(#terminal-1998256653-line-22)">doi:10.1101/2022.02.07.479481.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1998256653-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-1998256653-line-22)">
-</text><text class="terminal-1998256653-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-1998256653-line-23)">
-</text><text class="terminal-1998256653-r6" x="0" y="605.6" textLength="24.4" clip-path="url(#terminal-1998256653-line-24)">╭─</text><text class="terminal-1998256653-r6" x="24.4" y="605.6" textLength="109.8" clip-path="url(#terminal-1998256653-line-24)">&#160;Options&#160;</text><text class="terminal-1998256653-r6" x="134.2" y="605.6" textLength="817.4" clip-path="url(#terminal-1998256653-line-24)">───────────────────────────────────────────────────────────────────</text><text class="terminal-1998256653-r6" x="951.6" y="605.6" textLength="24.4" clip-path="url(#terminal-1998256653-line-24)">─╮</text><text class="terminal-1998256653-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-1998256653-line-24)">
-</text><text class="terminal-1998256653-r6" x="0" y="630" textLength="12.2" clip-path="url(#terminal-1998256653-line-25)">│</text><text class="terminal-1998256653-r4" x="24.4" y="630" textLength="73.2" clip-path="url(#terminal-1998256653-line-25)">--help</text><text class="terminal-1998256653-r7" x="122" y="630" textLength="24.4" clip-path="url(#terminal-1998256653-line-25)">-h</text><text class="terminal-1998256653-r1" x="146.4" y="630" textLength="817.4" clip-path="url(#terminal-1998256653-line-25)">&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1998256653-r6" x="963.8" y="630" textLength="12.2" clip-path="url(#terminal-1998256653-line-25)">│</text><text class="terminal-1998256653-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-1998256653-line-25)">
-</text><text class="terminal-1998256653-r6" x="0" y="654.4" textLength="976" clip-path="url(#terminal-1998256653-line-26)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-1998256653-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-1998256653-line-26)">
-</text><text class="terminal-1998256653-r6" x="0" y="678.8" textLength="24.4" clip-path="url(#terminal-1998256653-line-27)">╭─</text><text class="terminal-1998256653-r6" x="24.4" y="678.8" textLength="122" clip-path="url(#terminal-1998256653-line-27)">&#160;Commands&#160;</text><text class="terminal-1998256653-r6" x="146.4" y="678.8" textLength="805.2" clip-path="url(#terminal-1998256653-line-27)">──────────────────────────────────────────────────────────────────</text><text class="terminal-1998256653-r6" x="951.6" y="678.8" textLength="24.4" clip-path="url(#terminal-1998256653-line-27)">─╮</text><text class="terminal-1998256653-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-1998256653-line-27)">
-</text><text class="terminal-1998256653-r6" x="0" y="703.2" textLength="12.2" clip-path="url(#terminal-1998256653-line-28)">│</text><text class="terminal-1998256653-r4" x="24.4" y="703.2" textLength="109.8" clip-path="url(#terminal-1998256653-line-28)">configure</text><text class="terminal-1998256653-r1" x="146.4" y="703.2" textLength="817.4" clip-path="url(#terminal-1998256653-line-28)">&#160;Generate&#160;a&#160;Casanovo&#160;configuration&#160;file&#160;to&#160;customize.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1998256653-r6" x="963.8" y="703.2" textLength="12.2" clip-path="url(#terminal-1998256653-line-28)">│</text><text class="terminal-1998256653-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-1998256653-line-28)">
-</text><text class="terminal-1998256653-r6" x="0" y="727.6" textLength="12.2" clip-path="url(#terminal-1998256653-line-29)">│</text><text class="terminal-1998256653-r4" x="24.4" y="727.6" textLength="109.8" clip-path="url(#terminal-1998256653-line-29)">sequence&#160;</text><text class="terminal-1998256653-r1" x="146.4" y="727.6" textLength="817.4" clip-path="url(#terminal-1998256653-line-29)">&#160;De&#160;novo&#160;sequence&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1998256653-r6" x="963.8" y="727.6" textLength="12.2" clip-path="url(#terminal-1998256653-line-29)">│</text><text class="terminal-1998256653-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-1998256653-line-29)">
-</text><text class="terminal-1998256653-r6" x="0" y="752" textLength="12.2" clip-path="url(#terminal-1998256653-line-30)">│</text><text class="terminal-1998256653-r4" x="24.4" y="752" textLength="109.8" clip-path="url(#terminal-1998256653-line-30)">train&#160;&#160;&#160;&#160;</text><text class="terminal-1998256653-r1" x="146.4" y="752" textLength="817.4" clip-path="url(#terminal-1998256653-line-30)">&#160;Train&#160;a&#160;Casanovo&#160;model&#160;on&#160;your&#160;own&#160;data.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1998256653-r6" x="963.8" y="752" textLength="12.2" clip-path="url(#terminal-1998256653-line-30)">│</text><text class="terminal-1998256653-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-1998256653-line-30)">
-</text><text class="terminal-1998256653-r6" x="0" y="776.4" textLength="12.2" clip-path="url(#terminal-1998256653-line-31)">│</text><text class="terminal-1998256653-r4" x="24.4" y="776.4" textLength="109.8" clip-path="url(#terminal-1998256653-line-31)">version&#160;&#160;</text><text class="terminal-1998256653-r1" x="146.4" y="776.4" textLength="817.4" clip-path="url(#terminal-1998256653-line-31)">&#160;Get&#160;the&#160;Casanovo&#160;version&#160;information&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1998256653-r6" x="963.8" y="776.4" textLength="12.2" clip-path="url(#terminal-1998256653-line-31)">│</text><text class="terminal-1998256653-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-1998256653-line-31)">
-</text><text class="terminal-1998256653-r6" x="0" y="800.8" textLength="976" clip-path="url(#terminal-1998256653-line-32)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-1998256653-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-1998256653-line-32)">
-</text><text class="terminal-1998256653-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-1998256653-line-33)">
+    <g class="terminal-782331977-matrix">
+    <text class="terminal-782331977-r1" x="0" y="20" textLength="207.4" clip-path="url(#terminal-782331977-line-0)">$&#160;casanovo&#160;--help</text><text class="terminal-782331977-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-782331977-line-0)">
+</text><text class="terminal-782331977-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-782331977-line-1)">
+</text><text class="terminal-782331977-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-782331977-line-2)">Usage:</text><text class="terminal-782331977-r3" x="97.6" y="68.8" textLength="97.6" clip-path="url(#terminal-782331977-line-2)">casanovo</text><text class="terminal-782331977-r1" x="195.2" y="68.8" textLength="24.4" clip-path="url(#terminal-782331977-line-2)">&#160;[</text><text class="terminal-782331977-r4" x="219.6" y="68.8" textLength="85.4" clip-path="url(#terminal-782331977-line-2)">OPTIONS</text><text class="terminal-782331977-r1" x="305" y="68.8" textLength="24.4" clip-path="url(#terminal-782331977-line-2)">]&#160;</text><text class="terminal-782331977-r4" x="329.4" y="68.8" textLength="85.4" clip-path="url(#terminal-782331977-line-2)">COMMAND</text><text class="terminal-782331977-r1" x="414.8" y="68.8" textLength="24.4" clip-path="url(#terminal-782331977-line-2)">&#160;[</text><text class="terminal-782331977-r4" x="439.2" y="68.8" textLength="48.8" clip-path="url(#terminal-782331977-line-2)">ARGS</text><text class="terminal-782331977-r1" x="488" y="68.8" textLength="488" clip-path="url(#terminal-782331977-line-2)">]...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-782331977-line-2)">
+</text><text class="terminal-782331977-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-782331977-line-3)">
+</text><text class="terminal-782331977-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-782331977-line-4)">&#160;┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓&#160;</text><text class="terminal-782331977-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-782331977-line-4)">
+</text><text class="terminal-782331977-r1" x="0" y="142" textLength="439.2" clip-path="url(#terminal-782331977-line-5)">&#160;┃&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r3" x="439.2" y="142" textLength="97.6" clip-path="url(#terminal-782331977-line-5)">Casanovo</text><text class="terminal-782331977-r1" x="536.8" y="142" textLength="439.2" clip-path="url(#terminal-782331977-line-5)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;┃&#160;</text><text class="terminal-782331977-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-782331977-line-5)">
+</text><text class="terminal-782331977-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-782331977-line-6)">&#160;┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛&#160;</text><text class="terminal-782331977-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-782331977-line-6)">
+</text><text class="terminal-782331977-r1" x="0" y="190.8" textLength="976" clip-path="url(#terminal-782331977-line-7)">&#160;Casanovo&#160;de&#160;novo&#160;sequences&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra&#160;using&#160;a&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-782331977-line-7)">
+</text><text class="terminal-782331977-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-782331977-line-8)">&#160;Transformer&#160;model.&#160;Casanovo&#160;currently&#160;supports&#160;mzML,&#160;mzXML,&#160;and&#160;MGF&#160;files&#160;for&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-782331977-line-8)">
+</text><text class="terminal-782331977-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-782331977-line-9)">&#160;de&#160;novo&#160;sequencing&#160;and&#160;annotated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;from&#160;MassIVE-KB,&#160;for&#160;</text><text class="terminal-782331977-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-782331977-line-9)">
+</text><text class="terminal-782331977-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-782331977-line-10)">&#160;training&#160;new&#160;models.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-782331977-line-10)">
+</text><text class="terminal-782331977-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-782331977-line-11)">
+</text><text class="terminal-782331977-r1" x="0" y="312.8" textLength="976" clip-path="url(#terminal-782331977-line-12)">&#160;Links:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-782331977-line-12)">
+</text><text class="terminal-782331977-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-782331977-line-13)">
+</text><text class="terminal-782331977-r5" x="12.2" y="361.6" textLength="36.6" clip-path="url(#terminal-782331977-line-14)">&#160;•&#160;</text><text class="terminal-782331977-r1" x="48.8" y="361.6" textLength="927.2" clip-path="url(#terminal-782331977-line-14)">Documentation:&#160;https://casanovo.readthedocs.io&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-782331977-line-14)">
+</text><text class="terminal-782331977-r5" x="12.2" y="386" textLength="36.6" clip-path="url(#terminal-782331977-line-15)">&#160;•&#160;</text><text class="terminal-782331977-r1" x="48.8" y="386" textLength="927.2" clip-path="url(#terminal-782331977-line-15)">Official&#160;code&#160;repository:&#160;https://github.com/Noble-Lab/casanovo&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-782331977-line-15)">
+</text><text class="terminal-782331977-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-782331977-line-16)">
+</text><text class="terminal-782331977-r1" x="0" y="434.8" textLength="976" clip-path="url(#terminal-782331977-line-17)">&#160;If&#160;you&#160;use&#160;Casanovo&#160;in&#160;your&#160;work,&#160;please&#160;cite:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-782331977-line-17)">
+</text><text class="terminal-782331977-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-782331977-line-18)">
+</text><text class="terminal-782331977-r5" x="12.2" y="483.6" textLength="36.6" clip-path="url(#terminal-782331977-line-19)">&#160;•&#160;</text><text class="terminal-782331977-r1" x="48.8" y="483.6" textLength="927.2" clip-path="url(#terminal-782331977-line-19)">Yilmaz,&#160;M.,&#160;Fondrie,&#160;W.&#160;E.,&#160;Bittremieux,&#160;W.,&#160;Oh,&#160;S.&#160;&amp;&#160;Noble,&#160;W.&#160;S.&#160;De&#160;novo&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-782331977-line-19)">
+</text><text class="terminal-782331977-r1" x="48.8" y="508" textLength="927.2" clip-path="url(#terminal-782331977-line-20)">mass&#160;spectrometry&#160;peptide&#160;sequencing&#160;with&#160;a&#160;transformer&#160;model.&#160;Proceedings&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-782331977-line-20)">
+</text><text class="terminal-782331977-r1" x="48.8" y="532.4" textLength="927.2" clip-path="url(#terminal-782331977-line-21)">of&#160;the&#160;39th&#160;International&#160;Conference&#160;on&#160;Machine&#160;Learning&#160;-&#160;ICML&#160;&#x27;22&#160;(2022)&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-782331977-line-21)">
+</text><text class="terminal-782331977-r1" x="48.8" y="556.8" textLength="927.2" clip-path="url(#terminal-782331977-line-22)">doi:10.1101/2022.02.07.479481.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-782331977-line-22)">
+</text><text class="terminal-782331977-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-782331977-line-23)">
+</text><text class="terminal-782331977-r6" x="0" y="605.6" textLength="24.4" clip-path="url(#terminal-782331977-line-24)">╭─</text><text class="terminal-782331977-r6" x="24.4" y="605.6" textLength="109.8" clip-path="url(#terminal-782331977-line-24)">&#160;Options&#160;</text><text class="terminal-782331977-r6" x="134.2" y="605.6" textLength="817.4" clip-path="url(#terminal-782331977-line-24)">───────────────────────────────────────────────────────────────────</text><text class="terminal-782331977-r6" x="951.6" y="605.6" textLength="24.4" clip-path="url(#terminal-782331977-line-24)">─╮</text><text class="terminal-782331977-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-782331977-line-24)">
+</text><text class="terminal-782331977-r6" x="0" y="630" textLength="12.2" clip-path="url(#terminal-782331977-line-25)">│</text><text class="terminal-782331977-r4" x="24.4" y="630" textLength="73.2" clip-path="url(#terminal-782331977-line-25)">--help</text><text class="terminal-782331977-r7" x="122" y="630" textLength="24.4" clip-path="url(#terminal-782331977-line-25)">-h</text><text class="terminal-782331977-r1" x="146.4" y="630" textLength="817.4" clip-path="url(#terminal-782331977-line-25)">&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r6" x="963.8" y="630" textLength="12.2" clip-path="url(#terminal-782331977-line-25)">│</text><text class="terminal-782331977-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-782331977-line-25)">
+</text><text class="terminal-782331977-r6" x="0" y="654.4" textLength="976" clip-path="url(#terminal-782331977-line-26)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-782331977-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-782331977-line-26)">
+</text><text class="terminal-782331977-r6" x="0" y="678.8" textLength="24.4" clip-path="url(#terminal-782331977-line-27)">╭─</text><text class="terminal-782331977-r6" x="24.4" y="678.8" textLength="122" clip-path="url(#terminal-782331977-line-27)">&#160;Commands&#160;</text><text class="terminal-782331977-r6" x="146.4" y="678.8" textLength="805.2" clip-path="url(#terminal-782331977-line-27)">──────────────────────────────────────────────────────────────────</text><text class="terminal-782331977-r6" x="951.6" y="678.8" textLength="24.4" clip-path="url(#terminal-782331977-line-27)">─╮</text><text class="terminal-782331977-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-782331977-line-27)">
+</text><text class="terminal-782331977-r6" x="0" y="703.2" textLength="12.2" clip-path="url(#terminal-782331977-line-28)">│</text><text class="terminal-782331977-r4" x="24.4" y="703.2" textLength="109.8" clip-path="url(#terminal-782331977-line-28)">configure</text><text class="terminal-782331977-r1" x="146.4" y="703.2" textLength="817.4" clip-path="url(#terminal-782331977-line-28)">&#160;Generate&#160;a&#160;Casanovo&#160;configuration&#160;file&#160;to&#160;customize.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r6" x="963.8" y="703.2" textLength="12.2" clip-path="url(#terminal-782331977-line-28)">│</text><text class="terminal-782331977-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-782331977-line-28)">
+</text><text class="terminal-782331977-r6" x="0" y="727.6" textLength="12.2" clip-path="url(#terminal-782331977-line-29)">│</text><text class="terminal-782331977-r4" x="24.4" y="727.6" textLength="109.8" clip-path="url(#terminal-782331977-line-29)">db-search</text><text class="terminal-782331977-r1" x="146.4" y="727.6" textLength="817.4" clip-path="url(#terminal-782331977-line-29)">&#160;Perform&#160;a&#160;database&#160;search&#160;on&#160;MS/MS&#160;data&#160;using&#160;Casanovo-DB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r6" x="963.8" y="727.6" textLength="12.2" clip-path="url(#terminal-782331977-line-29)">│</text><text class="terminal-782331977-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-782331977-line-29)">
+</text><text class="terminal-782331977-r6" x="0" y="752" textLength="12.2" clip-path="url(#terminal-782331977-line-30)">│</text><text class="terminal-782331977-r4" x="24.4" y="752" textLength="109.8" clip-path="url(#terminal-782331977-line-30)">sequence&#160;</text><text class="terminal-782331977-r1" x="146.4" y="752" textLength="817.4" clip-path="url(#terminal-782331977-line-30)">&#160;De&#160;novo&#160;sequence&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r6" x="963.8" y="752" textLength="12.2" clip-path="url(#terminal-782331977-line-30)">│</text><text class="terminal-782331977-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-782331977-line-30)">
+</text><text class="terminal-782331977-r6" x="0" y="776.4" textLength="12.2" clip-path="url(#terminal-782331977-line-31)">│</text><text class="terminal-782331977-r4" x="24.4" y="776.4" textLength="109.8" clip-path="url(#terminal-782331977-line-31)">train&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r1" x="146.4" y="776.4" textLength="817.4" clip-path="url(#terminal-782331977-line-31)">&#160;Train&#160;a&#160;Casanovo&#160;model&#160;on&#160;your&#160;own&#160;data.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r6" x="963.8" y="776.4" textLength="12.2" clip-path="url(#terminal-782331977-line-31)">│</text><text class="terminal-782331977-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-782331977-line-31)">
+</text><text class="terminal-782331977-r6" x="0" y="800.8" textLength="12.2" clip-path="url(#terminal-782331977-line-32)">│</text><text class="terminal-782331977-r4" x="24.4" y="800.8" textLength="109.8" clip-path="url(#terminal-782331977-line-32)">version&#160;&#160;</text><text class="terminal-782331977-r1" x="146.4" y="800.8" textLength="817.4" clip-path="url(#terminal-782331977-line-32)">&#160;Get&#160;the&#160;Casanovo&#160;version&#160;information&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r6" x="963.8" y="800.8" textLength="12.2" clip-path="url(#terminal-782331977-line-32)">│</text><text class="terminal-782331977-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-782331977-line-32)">
+</text><text class="terminal-782331977-r6" x="0" y="825.2" textLength="976" clip-path="url(#terminal-782331977-line-33)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-782331977-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-782331977-line-33)">
+</text><text class="terminal-782331977-r1" x="976" y="849.6" textLength="12.2" clip-path="url(#terminal-782331977-line-34)">
 </text>
     </g>
     </g>
diff --git a/docs/images/sequence-help.svg b/docs/images/sequence-help.svg
index 3c3d5c54..70570e2a 100644
--- a/docs/images/sequence-help.svg
+++ b/docs/images/sequence-help.svg
@@ -32,7 +32,7 @@
         font-family: arial;
     }
 
-.terminal-3834786767-r1 { fill: #c5c8c6 }
+    .terminal-3834786767-r1 { fill: #c5c8c6 }
 .terminal-3834786767-r2 { fill: #d0b344 }
 .terminal-3834786767-r3 { fill: #c5c8c6;font-weight: bold }
 .terminal-3834786767-r4 { fill: #68a0b3;font-weight: bold }
diff --git a/docs/images/train-help.svg b/docs/images/train-help.svg
index 8875b1c4..e27717e1 100644
--- a/docs/images/train-help.svg
+++ b/docs/images/train-help.svg
@@ -32,7 +32,7 @@
         font-family: arial;
     }
 
-.terminal-956334679-r1 { fill: #c5c8c6 }
+    .terminal-956334679-r1 { fill: #c5c8c6 }
 .terminal-956334679-r2 { fill: #d0b344 }
 .terminal-956334679-r3 { fill: #c5c8c6;font-weight: bold }
 .terminal-956334679-r4 { fill: #68a0b3;font-weight: bold }

From e474eeec619728df9b076b49b4360264c5d5ad6d Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Fri, 13 Sep 2024 16:58:37 -0700
Subject: [PATCH 42/84] updated and fixed failed tests

---
 casanovo/data/ms_io.py   |  6 +++++-
 casanovo/denovo/model.py | 20 ++++++++++----------
 tests/conftest.py        | 10 ++++++++--
 3 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/casanovo/data/ms_io.py b/casanovo/data/ms_io.py
index 79143681..86f894ea 100644
--- a/casanovo/data/ms_io.py
+++ b/casanovo/data/ms_io.py
@@ -41,6 +41,9 @@ class PepSpecMatch:
     aa_scores : Iterable[float]
         A list of scores for individual amino acids in the peptide
         sequence, where len(aa_scores) == len(sequence)
+    protein : str
+        For DB-search mode, the protein from which the peptide
+        in the PSM was derived. Default value is "null".
     """
 
     sequence: str
@@ -50,6 +53,7 @@ class PepSpecMatch:
     calc_mz: float
     exp_mz: float
     aa_scores: Iterable[float]
+    protein: str = "null"
 
 
 class MztabWriter:
@@ -228,7 +232,7 @@ def save(self) -> None:
                         "PSM",
                         psm.sequence,  # sequence
                         i,  # PSM_ID
-                        "null" if len(psm) < 8 else psm[7],  # accession
+                        psm.protein,  # accession
                         "null",  # unique
                         "null",  # database
                         "null",  # database_version
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 61064e09..245514fe 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1088,16 +1088,16 @@ def on_predict_batch_end(
             protein,
         ) in outputs:
             self.out_writer.psms.append(
-                (
-                    peptide,
-                    tuple(spectrum_i),
-                    peptide_score,
-                    charge,
-                    precursor_mz,
-                    self.peptide_mass_calculator.mass(peptide, charge),
-                    ",".join(list(map("{:.5f}".format, aa_scores))),
-                    protein,
-                ),
+                ms_io.PepSpecMatch(
+                    sequence=peptide,
+                    spectrum_id=tuple(spectrum_i),
+                    peptide_score=peptide_score,
+                    charge=int(charge),
+                    calc_mz=precursor_mz,
+                    exp_mz=self.peptide_mass_calculator.mass(peptide, charge),
+                    aa_scores=aa_scores,
+                    protein=protein,
+                )
             )
 
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 3c286f7c..2674c4ae 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -42,6 +42,7 @@ def mgf_medium(tmp_path):
     return _create_mgf(peptides, mgf_file, mod_aa_mass={"C": 160.030649})
 
 
+@pytest.fixture
 def mgf_small_unannotated(tmp_path):
     """An MGF file with 2 unannotated spectra."""
     peptides = ["LESLIEK", "PEPTIDEK"]
@@ -49,7 +50,9 @@ def mgf_small_unannotated(tmp_path):
     return _create_mgf(peptides, mgf_file, annotate=False)
 
 
-def _create_mgf(peptides, mgf_file, random_state=42, mod_aa_mass=None, annotate=True):
+def _create_mgf(
+    peptides, mgf_file, random_state=42, mod_aa_mass=None, annotate=True
+):
     """
     Create a fake MGF file from one or more peptides.
 
@@ -73,7 +76,10 @@ def _create_mgf(peptides, mgf_file, random_state=42, mod_aa_mass=None, annotate=
     """
     rng = np.random.default_rng(random_state)
     entries = [
-        _create_mgf_entry(p, rng.choice([2, 3]), mod_aa_mass=mod_aa_mass, annotate=annotate) for p in peptides
+        _create_mgf_entry(
+            p, rng.choice([2, 3]), mod_aa_mass=mod_aa_mass, annotate=annotate
+        )
+        for p in peptides
     ]
     with mgf_file.open("w+") as mgf_ref:
         mgf_ref.write("\n".join(entries))

From 4e696b45f92bb270155802b62fc1a3c09dfb7ee1 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Fri, 13 Sep 2024 17:58:18 -0700
Subject: [PATCH 43/84] add mztab validation to dbsearch test

---
 tests/test_integration.py | 34 +++++++++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 5 deletions(-)

diff --git a/tests/test_integration.py b/tests/test_integration.py
index b0034a12..3b7ae580 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -20,14 +20,14 @@ def test_db_search(
         CliRunner().invoke, casanovo.main, catch_exceptions=False
     )
 
-    output_path = tmp_path / "db_search.mztab"
+    output_filename = tmp_path / "db_search.mztab"
 
     search_args = [
         "db-search",
         "--config",
         tiny_config,
         "--output",
-        str(output_path),
+        str(output_filename),
         str(mgf_medium),
         str(tiny_fasta_file),
     ]
@@ -35,10 +35,10 @@ def test_db_search(
     result = run(search_args)
 
     assert result.exit_code == 0
-    assert output_path.exists()
-    assert output_path.is_file()
+    assert output_filename.exists()
+    assert output_filename.is_file()
 
-    mztab = pyteomics.mztab.MzTab(str(output_path))
+    mztab = pyteomics.mztab.MzTab(str(output_filename))
 
     psms = mztab.spectrum_match_table
     assert list(psms.sequence) == [
@@ -51,6 +51,30 @@ def test_db_search(
         "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
     ]
 
+    # Validate mztab output
+    validate_args = [
+        "java",
+        "-jar",
+        f"{TEST_DIR}/jmzTabValidator.jar",
+        "--check",
+        f"inFile={output_filename}",
+    ]
+
+    validate_result = subprocess.run(
+        validate_args,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+    )
+
+    assert validate_result.returncode == 0
+    assert not any(
+        [
+            line.startswith("[Error-")
+            for line in validate_result.stdout.splitlines()
+        ]
+    )
+
 
 def test_train_and_run(
     mgf_small, mzml_small, tiny_config, tmp_path, monkeypatch

From 4655452d94260fa6bdbae4c92506db2a4fc92985 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 17 Sep 2024 09:10:35 -0700
Subject: [PATCH 44/84] lint fix

---
 casanovo/denovo/model_runner.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 5ed8f0ec..4a122e9f 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -27,7 +27,6 @@
 from ..denovo.model import Spec2Pep, DbSpec2Pep
 
 
-
 logger = logging.getLogger("casanovo")
 
 

From 5e1b9d7ee1f80bf92c9aa3680d4ff2b40d8d693a Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 17 Sep 2024 11:37:23 -0700
Subject: [PATCH 45/84] fix integration test

---
 casanovo/casanovo.py          | 23 ++++++++++++++++++-----
 tests/test_integration.py     | 10 ++++++----
 tests/unit_tests/test_unit.py |  4 ++--
 3 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/casanovo/casanovo.py b/casanovo/casanovo.py
index 4b3fdd3c..5550a6dd 100644
--- a/casanovo/casanovo.py
+++ b/casanovo/casanovo.py
@@ -219,17 +219,30 @@ def db_search(
     fasta_path: str,
     model: Optional[str],
     config: Optional[str],
-    output: Optional[str],
+    output_dir: Optional[str],
+    output_root: Optional[str],
     verbosity: str,
+    force_overwrite: bool,
 ) -> None:
     """Perform a database search on MS/MS data using Casanovo-DB.
 
     PEAK_PATH must be one or more mzML, mzXML, or MGF files.
     FASTA_PATH must be one FASTA file.
     """
-    output = setup_logging(output, verbosity)
-    config, model = setup_model(model, config, output, False)
-    with ModelRunner(config, model) as runner:
+    output_path, output_root_name = _setup_output(
+        output_dir, output_root, force_overwrite, verbosity
+    )
+    utils.check_dir_file_exists(output_path, f"{output_root}.mztab")
+    config, model = setup_model(
+        model, config, output_path, output_root_name, False
+    )
+    with ModelRunner(
+        config,
+        model,
+        output_path,
+        output_root_name if output_root is not None else None,
+        False,
+    ) as runner:
         logger.info("Performing database search on:")
         for peak_file in peak_path:
             logger.info("  %s", peak_file)
@@ -239,7 +252,7 @@ def db_search(
         runner.db_search(
             peak_path,
             fasta_path,
-            output,
+            str((output_path / output_root).with_suffix(".mztab")),
         )
 
     logger.info("DONE!")
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 59ea1e3a..eeeb498f 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -20,14 +20,17 @@ def test_db_search(
         CliRunner().invoke, casanovo.main, catch_exceptions=False
     )
 
-    output_filename = tmp_path / "db_search.mztab"
+    output_rootname = "db"
+    output_filename = (tmp_path / output_rootname).with_suffix(".mztab")
 
     search_args = [
         "db-search",
         "--config",
         tiny_config,
-        "--output",
-        str(output_filename),
+        "--output_dir",
+        str(tmp_path),
+        "--output_root",
+        output_rootname,
         str(mgf_medium),
         str(tiny_fasta_file),
     ]
@@ -36,7 +39,6 @@ def test_db_search(
 
     assert result.exit_code == 0
     assert output_filename.exists()
-    assert output_filename.is_file()
 
     mztab = pyteomics.mztab.MzTab(str(output_filename))
 
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index 3f0699ab..59e29b34 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -654,10 +654,10 @@ def test_digest_fasta_mods(tiny_fasta_file, residues_dict):
         "+43.006ASQSVSSSYLTWYQQKPGQAPR",
         "-17.027FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
         "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
-        "FSGSGSGTDFTLTISSLQ+0.984PEDFAVYYC+57.021QQDYNLP",
         "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021Q+0.984QDYNLP",
         "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQ+0.984DYNLP",
         "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYN+0.984LP",
+        "FSGSGSGTDFTLTISSLQ+0.984PEDFAVYYC+57.021QQDYNLP",
         "+43.006-17.027FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
         "+42.011FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
         "+43.006FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
@@ -828,8 +828,8 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
         "QQKPGQ",
         "SLQPED",
         "PAQLLF",
-        "IPARFS",
         "SIPARF",
+        "IPARFS",
         "LSC+57.021RAS",
         "TDFTLT",
         "QAPRLL",

From 4d6b726dca7be84fcc74537a4dcf3229c93c6d8c Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 17 Sep 2024 11:42:17 -0700
Subject: [PATCH 46/84] fix unit tests

---
 tests/unit_tests/test_unit.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index 59e29b34..3f0699ab 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -654,10 +654,10 @@ def test_digest_fasta_mods(tiny_fasta_file, residues_dict):
         "+43.006ASQSVSSSYLTWYQQKPGQAPR",
         "-17.027FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
         "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+        "FSGSGSGTDFTLTISSLQ+0.984PEDFAVYYC+57.021QQDYNLP",
         "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021Q+0.984QDYNLP",
         "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQ+0.984DYNLP",
         "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYN+0.984LP",
-        "FSGSGSGTDFTLTISSLQ+0.984PEDFAVYYC+57.021QQDYNLP",
         "+43.006-17.027FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
         "+42.011FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
         "+43.006FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
@@ -828,8 +828,8 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
         "QQKPGQ",
         "SLQPED",
         "PAQLLF",
-        "SIPARF",
         "IPARFS",
+        "SIPARF",
         "LSC+57.021RAS",
         "TDFTLT",
         "QAPRLL",

From e7f0fdca13b10fd2401b34c21a574774cbbd7de4 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Fri, 20 Sep 2024 17:41:54 -0700
Subject: [PATCH 47/84] force fix test

---
 casanovo/data/db_utils.py     | 4 +++-
 tests/unit_tests/test_unit.py | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 34671eb1..c68d208c 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -212,7 +212,9 @@ def _digest_fasta(
             [(pep, mass) for pep, mass, _ in mod_peptide_list],
             columns=["peptide", "calc_mass"],
         )
-        pep_table.sort_values(by=["calc_mass", "peptide"], inplace=True)
+        pep_table.sort_values(
+            by=["calc_mass", "peptide"], ascending=True, inplace=True
+        )
 
         logger.info(
             "Digestion complete. %d peptides generated.", len(pep_table)
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index 3f0699ab..e8562f49 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -680,7 +680,7 @@ def test_digest_fasta_mods(tiny_fasta_file, residues_dict):
         residues=residues_dict,
     )
     peptide_list = list(pdb.db_peptides["peptide"])
-    assert peptide_list == expected_1mod
+    assert set(peptide_list) == set(expected_1mod)
 
 
 def test_length_restrictions(tiny_fasta_file, residues_dict):
@@ -981,7 +981,7 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
         residues=residues_dict,
     )
     peptide_list = list(pdb.db_peptides["peptide"])
-    assert peptide_list == expected_nonspecific
+    assert set(peptide_list) == set(expected_nonspecific)
 
 
 def test_get_candidates(tiny_fasta_file, residues_dict):

From 813fac0fab5bdad0a2f8b1187c1a9f61299407d5 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Fri, 20 Sep 2024 17:52:15 -0700
Subject: [PATCH 48/84] clean up test_digest_fasta_enzyme

---
 tests/unit_tests/test_unit.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index e8562f49..8564ffcd 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -828,8 +828,8 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
         "QQKPGQ",
         "SLQPED",
         "PAQLLF",
-        "IPARFS",
         "SIPARF",
+        "IPARFS",
         "LSC+57.021RAS",
         "TDFTLT",
         "QAPRLL",
@@ -981,7 +981,7 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
         residues=residues_dict,
     )
     peptide_list = list(pdb.db_peptides["peptide"])
-    assert set(peptide_list) == set(expected_nonspecific)
+    assert peptide_list == expected_nonspecific
 
 
 def test_get_candidates(tiny_fasta_file, residues_dict):

From 310c3fda82d778a2f69077a7f8a1373ead3a1fd8 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Fri, 20 Sep 2024 18:01:50 -0700
Subject: [PATCH 49/84] adjust test_digest_fasta_mods

---
 tests/unit_tests/test_unit.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index 8564ffcd..59e29b34 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -654,10 +654,10 @@ def test_digest_fasta_mods(tiny_fasta_file, residues_dict):
         "+43.006ASQSVSSSYLTWYQQKPGQAPR",
         "-17.027FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
         "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
-        "FSGSGSGTDFTLTISSLQ+0.984PEDFAVYYC+57.021QQDYNLP",
         "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021Q+0.984QDYNLP",
         "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQ+0.984DYNLP",
         "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYN+0.984LP",
+        "FSGSGSGTDFTLTISSLQ+0.984PEDFAVYYC+57.021QQDYNLP",
         "+43.006-17.027FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
         "+42.011FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
         "+43.006FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
@@ -680,7 +680,7 @@ def test_digest_fasta_mods(tiny_fasta_file, residues_dict):
         residues=residues_dict,
     )
     peptide_list = list(pdb.db_peptides["peptide"])
-    assert set(peptide_list) == set(expected_1mod)
+    assert peptide_list == expected_1mod
 
 
 def test_length_restrictions(tiny_fasta_file, residues_dict):

From 775def7435580f1b3f5cc73e717d9706892388da Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Wed, 2 Oct 2024 08:43:00 -0700
Subject: [PATCH 50/84] allows top_match filtering for casanovo-db

---
 casanovo/config.yaml     |  4 ++--
 casanovo/data/psm.py     |  3 +++
 casanovo/denovo/model.py | 22 +++++++++++++++++-----
 3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index 98c8290f..6df7d094 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -21,6 +21,8 @@ min_peptide_len: 6
 max_peptide_len: 100
 # Number of spectra in one inference batch.
 predict_batch_size: 1024
+# Number of PSMs for each spectrum.
+top_match: 1
 
 
 ###
@@ -29,8 +31,6 @@ predict_batch_size: 1024
 
 # Number of beams used in beam search.
 n_beams: 1
-# Number of PSMs for each spectrum.
-top_match: 1
 # The hardware accelerator to use. Must be one of:
 # "cpu", "gpu", "tpu", "ipu", "hpu", "mps", or "auto".
 accelerator: "auto"
diff --git a/casanovo/data/psm.py b/casanovo/data/psm.py
index 0dc3c48b..3c33b4df 100644
--- a/casanovo/data/psm.py
+++ b/casanovo/data/psm.py
@@ -30,6 +30,8 @@ class PepSpecMatch:
     aa_scores : Iterable[float]
         A list of scores for individual amino acids in the peptide
         sequence, where len(aa_scores) == len(sequence)
+    protein : str
+        Protein associated with the peptide sequence (for db mode)
     """
 
     sequence: str
@@ -39,3 +41,4 @@ class PepSpecMatch:
     calc_mz: float
     exp_mz: float
     aa_scores: Iterable[float]
+    protein: str = None
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 0ac649ac..67d561bc 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1023,12 +1023,13 @@ def predict_step(self, batch, *args):
 
         Returns
         -------
-        predictions: List[Tuple[int, int, float, str, np.ndarray, np.ndarray, str]]
+        predictions: List[Tuple[List[str], int, float, str, np.ndarray, np.ndarray, str]]
             Model predictions for the given batch of spectra containing spectrum
             ids, precursor charge and m/z, candidate peptide sequences, peptide
-            scores, amino acid-level scores, and associated proteins.
+            scores, amino acid-level scores, and associated proteins. Stored separately by
+            spectrum id.
         """
-        predictions = []
+        store_dict = collections.defaultdict(list)
         for start_idx in range(0, len(batch[0]), self.psm_batch_size):
             current_batch = [
                 b[start_idx : start_idx + self.psm_batch_size] for b in batch
@@ -1057,7 +1058,7 @@ def predict_step(self, batch, *args):
                 per_aa_scores.cpu().detach().numpy(),
                 current_batch[3],
             ):
-                predictions.append(
+                store_dict[str(spectrum_i)].append(
                     (
                         spectrum_i,
                         precursor_charge,
@@ -1068,11 +1069,22 @@ def predict_step(self, batch, *args):
                         self.protein_database.get_associated_protein(peptide),
                     )
                 )
+        predictions = []
+        for spectrum_i in store_dict:
+            predictions.extend(
+                sorted(
+                    store_dict[str(spectrum_i)],
+                    key=lambda x: x[4],
+                    reverse=True,
+                )[: self.top_match]
+            )
         return predictions
 
     def on_predict_batch_end(
         self,
-        outputs: List[Tuple[np.ndarray, List[str], torch.Tensor]],
+        outputs: List[
+            Tuple[List[str], int, float, str, np.ndarray, np.ndarray, str]
+        ],
         *args,
     ) -> None:
         """

From e35c60dcbb7068a1aa5d09f506aacd4ac31ac36c Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Wed, 2 Oct 2024 09:08:46 -0700
Subject: [PATCH 51/84] change default value for protein value in PepSpecMatch

---
 casanovo/data/psm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/casanovo/data/psm.py b/casanovo/data/psm.py
index 3c33b4df..e4ef3af7 100644
--- a/casanovo/data/psm.py
+++ b/casanovo/data/psm.py
@@ -41,4 +41,4 @@ class PepSpecMatch:
     calc_mz: float
     exp_mz: float
     aa_scores: Iterable[float]
-    protein: str = None
+    protein: str = "null"

From 79cba590948544e2fdb89a7fb8e00207fa4ed93f Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Wed, 2 Oct 2024 10:30:26 -0700
Subject: [PATCH 52/84] reverse issues with decoder

---
 casanovo/denovo/model.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 67d561bc..8850c3d7 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1148,7 +1148,7 @@ def _calc_match_score(
         (for an entire batch)
     """
     # Remove trailing tokens from predictions based on decoder reversal
-    if decoder_reverse:
+    if not decoder_reverse:
         batch_all_aa_scores = batch_all_aa_scores[:, 1:]
     else:
         batch_all_aa_scores = batch_all_aa_scores[:, :-1]
@@ -1163,6 +1163,8 @@ def _calc_match_score(
 
     per_aa_scores = batch_all_aa_scores[rows, cols, truth_aa_indices]
 
+    logging.debug("$$$$$$$$$$$$$||%s||$$$$$$$$$$$$$$", per_aa_scores)
+
     per_aa_scores[per_aa_scores == 0] += 1e-10
     score_mask = truth_aa_indices != 0
     per_aa_scores[~score_mask] = 0

From c9eb8b70e4724ff3f4639315129a72ef4adea4bd Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Wed, 2 Oct 2024 10:50:40 -0700
Subject: [PATCH 53/84] update test and remove logging statement

---
 casanovo/denovo/model.py      | 2 --
 tests/unit_tests/test_unit.py | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 8850c3d7..23b777cc 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1163,8 +1163,6 @@ def _calc_match_score(
 
     per_aa_scores = batch_all_aa_scores[rows, cols, truth_aa_indices]
 
-    logging.debug("$$$$$$$$$$$$$||%s||$$$$$$$$$$$$$$", per_aa_scores)
-
     per_aa_scores[per_aa_scores == 0] += 1e-10
     score_mask = truth_aa_indices != 0
     per_aa_scores[~score_mask] = 0
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index e5ac2253..5eee0e4e 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -510,7 +510,7 @@ def test_calc_match_score():
     )
 
     all_scores, masked_per_aa_scores = _calc_match_score(
-        batch_all_aa_scores, truth_aa_indices
+        batch_all_aa_scores, truth_aa_indices, True
     )
 
     assert all_scores.numpy()[0] == 0

From 68e67e833607aeb8e5856181f1e74e357e993235 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Sun, 3 Nov 2024 06:08:00 -0800
Subject: [PATCH 54/84] db_utils fixes

---
 casanovo/casanovo.py      |   5 +-
 casanovo/config.yaml      |  20 +++---
 casanovo/data/datasets.py |   2 +-
 casanovo/data/db_utils.py | 148 ++++++++++++++++++--------------------
 4 files changed, 85 insertions(+), 90 deletions(-)

diff --git a/casanovo/casanovo.py b/casanovo/casanovo.py
index 4feff0cb..5547a807 100644
--- a/casanovo/casanovo.py
+++ b/casanovo/casanovo.py
@@ -236,6 +236,7 @@ def db_search(
     config, model = setup_model(
         model, config, output_path, output_root_name, False
     )
+    start_time = time.time()
     with ModelRunner(
         config,
         model,
@@ -246,6 +247,7 @@ def db_search(
         logger.info("Performing database search on:")
         for peak_file in peak_path:
             logger.info("  %s", peak_file)
+
         logger.info("Using the following FASTA file:")
         logger.info("  %s", fasta_path)
 
@@ -254,8 +256,7 @@ def db_search(
             fasta_path,
             str((output_path / output_root).with_suffix(".mztab")),
         )
-
-    logger.info("DONE!")
+        utils.log_run_report(start_time=start_time, end_time=time.time())
 
 
 @main.command(cls=_SharedParams)
diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index 6df7d094..014f02ee 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -23,6 +23,13 @@ max_peptide_len: 100
 predict_batch_size: 1024
 # Number of PSMs for each spectrum.
 top_match: 1
+# The hardware accelerator to use. Must be one of:
+# "cpu", "gpu", "tpu", "ipu", "hpu", "mps", or "auto".
+accelerator: "auto"
+# The devices to use. Can be set to a positive number int, or the value -1 to
+# indicate all available devices should be used. If left empty, the appropriate
+# number will be automatically selected for based on the chosen accelerator.
+devices:
 
 
 ###
@@ -31,13 +38,6 @@ top_match: 1
 
 # Number of beams used in beam search.
 n_beams: 1
-# The hardware accelerator to use. Must be one of:
-# "cpu", "gpu", "tpu", "ipu", "hpu", "mps", or "auto".
-accelerator: "auto"
-# The devices to use. Can be set to a positive number int, or the value -1 to
-# indicate all available devices should be used. If left empty, the appropriate
-# number will be automatically selected for based on the chosen accelerator.
-devices:
 
 
 ###
@@ -46,7 +46,7 @@ devices:
 
 # Enzyme for in silico digestion, used to generate candidate peptides.
 # See pyteomics.parser.expasy_rules for valid enzymes.
-# Can also take a regex expression to specify custom digestion rules.
+# Can also take a regex to specify custom digestion rules.
 enzyme: "trypsin"
 # Digestion type for candidate peptide generation.
 # full: standard digestion. 
@@ -60,9 +60,9 @@ missed_cleavages: 0
 max_mods: 1
 # Select which modifications from the vocabulary can be used in candidate creation.
 # Format: Comma-separated list of "aa:mod_residue", 
-# where aa is a standard amino acid or "nterm" for an N-terminal mod
+# where aa is a standard amino acid (or "nterm" for an N-terminal mod)
 # and mod_residue is a key from the "residues" dictionary.
-# Example: "M:M+15.995,X:+43.006-17.027"
+# Example: "M:M+15.995,nterm:+43.006"
 allowed_fixed_mods: "C:C+57.021"
 allowed_var_mods: "M:M+15.995,N:N+0.984,Q:Q+0.984,nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
 
diff --git a/casanovo/data/datasets.py b/casanovo/data/datasets.py
index 33d84e49..3917a2c8 100644
--- a/casanovo/data/datasets.py
+++ b/casanovo/data/datasets.py
@@ -1,6 +1,6 @@
 """A PyTorch Dataset class for annotated spectra."""
 
-from typing import List, Optional, Tuple
+from typing import Optional, Tuple
 
 import depthcharge
 import numpy as np
diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index c68d208c..8d141117 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -24,7 +24,8 @@
 
 class ProteinDatabase:
     """
-    Store digested .fasta data and return candidate peptides for a given precursor mass.
+    Store digested .fasta data and return candidate peptides
+    for a given precursor mass.
 
     Parameters
     ----------
@@ -34,7 +35,8 @@ class ProteinDatabase:
         The enzyme to use for digestion.
         See pyteomics.parser.expasy_rules for valid enzymes.
     digestion : str
-        The type of digestion to perform. Either 'full' or 'partial'.
+        The type of digestion to perform.
+        Either 'full', 'partial' or 'non-specific'.
     missed_cleavages : int
         The number of missed cleavages to allow.
     min_peptide_len : int
@@ -46,12 +48,13 @@ class ProteinDatabase:
     precursor_tolerance : float
         The precursor mass tolerance in ppm.
     isotope_error : Tuple[int, int]
-        Isotope range [min, max] to consider when comparing predicted and observed precursor m/z's.
+        Isotope range [min, max] to consider when comparing predicted
+        and observed precursor m/z's.
     allowed_fixed_mods : str
         A comma separated string of fixed modifications to consider.
     allowed_var_mods : str
         A comma separated string of variable modifications to consider.
-    residues : dict
+    residues : dict[str, float]
         A dictionary of amino acid masses.
     """
 
@@ -68,7 +71,7 @@ def __init__(
         isotope_error: Tuple[int, int],
         allowed_fixed_mods: str,
         allowed_var_mods: str,
-        residues: dict,
+        residues: dict[str, float],
     ):
         self.fixed_mods, self.var_mods, self.swap_map = _construct_mods_dict(
             allowed_fixed_mods, allowed_var_mods
@@ -84,7 +87,7 @@ def __init__(
             missed_cleavages,
             min_peptide_len,
             max_peptide_len,
-            set(list(residues.keys()) + ["C"]),
+            set([aa[0] for aa in residues.keys() if aa[0].isalpha()]),
         )
         self.db_peptides, self.prot_map = self._digest_fasta(peptide_generator)
         self.precursor_tolerance = precursor_tolerance
@@ -94,9 +97,10 @@ def get_candidates(
         self,
         precursor_mz: float,
         charge: int,
-    ) -> List[Tuple[str, str]]:
+    ) -> pd.Series:
         """
-        Returns a list of candidate peptides that fall within the specified mass range.
+        Returns a list of candidate peptides that fall within the
+        specified mass range.
 
         Parameters
         ----------
@@ -115,7 +119,7 @@ def get_candidates(
         for e in range(self.isotope_error[0], self.isotope_error[1] + 1):
             iso_shift = ISOTOPE_SPACING * e
             shift_raw_mass = float(
-                _to_raw_mass(precursor_mz, charge) - iso_shift
+                _to_neutral_mass(precursor_mz, charge) - iso_shift
             )
             upper_bound = shift_raw_mass * (
                 1 + (self.precursor_tolerance / 1e6)
@@ -154,9 +158,10 @@ def get_associated_protein(self, peptide: str) -> str:
     def _digest_fasta(
         self,
         peptide_generator: Iterator[Tuple[str, str]],
-    ) -> Tuple[pd.DataFrame, dict]:
+    ) -> Tuple[pd.DataFrame, dict[str, str]]:
         """
-        Digests a FASTA file and returns the peptides, their masses, and associated protein.
+        Digests a FASTA file and returns the peptides, their masses,
+        and associated protein.
 
         Parameters
         ----------
@@ -168,13 +173,9 @@ def _digest_fasta(
         pep_table : pd.DataFrame
             A Pandas DataFrame with peptide and mass columns.
             Sorted by neutral mass in ascending order.
-        prot_map : dict
+        prot_map : dict[str, str]
             A dictionary mapping peptides to associated proteins.
         """
-        peptide_list = []
-        for pep, prot in peptide_generator:
-            peptide_list.append((pep, prot))
-
         # Generate modified peptides
         mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
         peptide_isoforms = [
@@ -187,7 +188,7 @@ def _digest_fasta(
                 ),
                 prot,
             )
-            for pep, prot in peptide_list
+            for pep, prot in peptide_generator
         ]
         mod_peptide_list = [
             (mod_pep, mass_calculator.mass(mod_pep), prot)
@@ -203,9 +204,9 @@ def _digest_fasta(
         ]
 
         # Create a dictionary mapping for easy accession of associated proteins
-        prot_map = defaultdict(list)
+        prot_map = defaultdict(set)
         for pep, _, prot in mod_peptide_list:
-            prot_map[pep].append(prot)
+            prot_map[pep].add(prot)
 
         # Create a DataFrame for easy sorting and filtering
         pep_table = pd.DataFrame(
@@ -227,8 +228,8 @@ def _peptide_generator(
     enzyme: str,
     digestion: str,
     missed_cleavages: int,
-    min_peptide_length: int,
-    max_peptide_length: int,
+    min_peptide_len: int,
+    max_peptide_len: int,
     valid_aa: set[str],
 ) -> Iterator[str]:
     """
@@ -242,14 +243,15 @@ def _peptide_generator(
     enzyme : str
         The enzyme to use for digestion.
         See pyteomics.parser.expasy_rules for valid enzymes.
-        Can also be a regex pattern.
+        Can also be a regex.
     digestion : str
-        The type of digestion to perform. Either 'full', 'partial' or 'non-specific'.
+        The type of digestion to perform.
+        Either 'full', 'partial' or 'non-specific'.
     missed_cleavages : int
         The number of missed cleavages to allow.
-    min_peptide_length : int
+    min_peptide_len : int
         The minimum length of peptides to consider.
-    max_peptide_length : int
+    max_peptide_len : int
         The maximum length of peptides to consider.
     valid_aa : set[str]
         A set of valid amino acids.
@@ -261,19 +263,6 @@ def _peptide_generator(
     protein : str
         The associated protein.
     """
-    # Verify the existence of the file:
-    if not os.path.isfile(fasta_filename):
-        logger.error("File %s does not exist.", fasta_filename)
-        raise FileNotFoundError(f"File {fasta_filename} does not exist.")
-    if digestion not in ["full", "partial", "non-specific"]:
-        logger.error("Digestion type %s not recognized.", digestion)
-        raise ValueError(f"Digestion type {digestion} not recognized.")
-    if enzyme not in parser.expasy_rules:
-        logger.info(
-            "Enzyme %s not recognized. Interpreting as cleavage rule.",
-            enzyme,
-        )
-
     # Verify the existence of the file:
     if not os.path.isfile(fasta_filename):
         logger.error("File %s does not exist.", fasta_filename)
@@ -292,12 +281,12 @@ def _peptide_generator(
             # Generate all possible peptides
             for i in range(len(seq)):
                 for j in range(
-                    i + min_peptide_length,
-                    min(i + max_peptide_length + 1, len(seq) + 1),
+                    i + min_peptide_len,
+                    min(i + max_peptide_len + 1, len(seq) + 1),
                 ):
                     pep = seq[i:j]
                     if any(aa not in valid_aa for aa in pep):
-                        logger.warn(
+                        logger.warning(
                             "Skipping peptide with unknown amino acids: %s",
                             pep,
                         )
@@ -314,12 +303,9 @@ def _peptide_generator(
             )
             protein = header.split()[0]
             for pep in pep_set:
-                if (
-                    len(pep) >= min_peptide_length
-                    and len(pep) <= max_peptide_length
-                ):
+                if len(pep) >= min_peptide_len and len(pep) <= max_peptide_len:
                     if any(aa not in valid_aa for aa in pep):
-                        logger.warn(
+                        logger.warning(
                             "Skipping peptide with unknown amino acids: %s",
                             pep,
                         )
@@ -348,7 +334,7 @@ def _to_mz(precursor_mass: float, charge: int) -> float:
 
 
 @njit
-def _to_raw_mass(mz_mass: float, charge: int) -> float:
+def _to_neutral_mass(mz_mass: float, charge: int) -> float:
     """
     Convert precursor m/z value to neutral mass.
 
@@ -367,23 +353,33 @@ def _to_raw_mass(mz_mass: float, charge: int) -> float:
     return charge * (mz_mass - PROTON)
 
 
-def _convert_from_modx(seq: str, swap_map: dict, swap_regex: str) -> str:
-    """Converts peptide sequence from modX format to Casanovo-acceptable modifications.
-
-    Args:
-        seq : str
-            Peptide in modX format
-        swap_map : dict
-            Dictionary that allows for swapping of modX to Casanovo-acceptable modifications.
-        swap_regex : str
-            Regular expression to match modX format.
+def _convert_from_modx(
+    seq: str, swap_map: dict[str, str], swap_regex: str
+) -> str:
+    """
+    Converts peptide sequence from modX format to
+    Casanovo-acceptable modifications.
+
+    Parameters:
+    -----------
+    seq : str
+        Peptide in modX format
+    swap_map : dict[str, str]
+        Dictionary that allows for swapping of modX to Casanovo-acceptable modifications.
+    swap_regex : str
+        Regular expression to match modX format.
+
+    Returns:
+    --------
+    swap_regex : str
+        Peptide in Casanovo-acceptable modifications.
     """
     return swap_regex.sub(lambda x: swap_map[x.group()], seq)
 
 
 def _construct_mods_dict(
     allowed_fixed_mods: str, allowed_var_mods: str
-) -> Tuple[dict, dict, dict]:
+) -> Tuple[dict[str, str], dict[str, str], dict[str, str]]:
     """
     Constructs dictionaries of fixed and variable modifications.
 
@@ -396,30 +392,28 @@ def _construct_mods_dict(
 
     Returns
     -------
-    fixed_mods : dict
+    fixed_mods : dict[str, str]
         A dictionary of fixed modifications.
-    var_mods : dict
+    var_mods : dict[str, str]
         A dictionary of variable modifications.
-    swap_map : dict
-        A dictionary that allows for swapping of modX to Casanovo-acceptable modifications.
+    swap_map : dict[str, str]
+        A dictionary that allows for swapping of modX to
+        Casanovo-acceptable modifications.
     """
     swap_map = {}
     fixed_mods = {}
-    for idx, mod in enumerate(allowed_fixed_mods.split(",")):
-        aa, mod_aa = mod.split(":")
-        mod_id = string.ascii_lowercase[idx]
-        fixed_mods[mod_id] = [aa]
-        swap_map[f"{mod_id}{aa}"] = f"{mod_aa}"
-
     var_mods = {}
-    for idx, mod in enumerate(allowed_var_mods.split(",")):
-        aa, mod_aa = mod.split(":")
-        mod_id = string.ascii_lowercase[idx]
-        if aa == "nterm":
-            var_mods[f"{mod_id}-"] = True
-            swap_map[f"{mod_id}-"] = f"{mod_aa}"
-        else:
-            var_mods[mod_id] = [aa]
-            swap_map[f"{mod_id}{aa}"] = f"{mod_aa}"
+    for mod_map, allowed_mods in zip(
+        [fixed_mods, var_mods], [allowed_fixed_mods, allowed_var_mods]
+    ):
+        for idx, mod in enumerate(allowed_mods.split(",")):
+            aa, mod_aa = mod.split(":")
+            mod_id = string.ascii_lowercase[idx]
+            if aa == "nterm":
+                mod_map[f"{mod_id}-"] = True
+                swap_map[f"{mod_id}-"] = f"{mod_aa}"
+            else:
+                mod_map[mod_id] = [aa]
+                swap_map[f"{mod_id}{aa}"] = f"{mod_aa}"
 
     return fixed_mods, var_mods, swap_map

From d01dd7fda222acff904575e7df528df05f11abbd Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Sun, 3 Nov 2024 11:39:35 -0800
Subject: [PATCH 55/84] updates to dataloaders, model_runner, and model.py

---
 casanovo/denovo/dataloaders.py  |  12 ++--
 casanovo/denovo/model.py        | 108 +++++++++++++-------------------
 casanovo/denovo/model_runner.py |  51 ++++++---------
 tests/conftest.py               | 102 ++----------------------------
 4 files changed, 72 insertions(+), 201 deletions(-)

diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index 2646329d..4eb4d2e2 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -137,6 +137,7 @@ def _make_loader(
         dataset: torch.utils.data.Dataset,
         batch_size: int,
         shuffle: bool = False,
+        collate_fn: Optional[callable] = None,
     ) -> torch.utils.data.DataLoader:
         """
         Create a PyTorch DataLoader.
@@ -149,6 +150,8 @@ def _make_loader(
             The batch size to use.
         shuffle : bool
             Option to shuffle the batches.
+        collate_fn : Optional[callable]
+            A function to collate the data into a batch.
 
         Returns
         -------
@@ -158,7 +161,7 @@ def _make_loader(
         return torch.utils.data.DataLoader(
             dataset,
             batch_size=batch_size,
-            collate_fn=prepare_batch,
+            collate_fn=prepare_batch if collate_fn is None else collate_fn,
             pin_memory=True,
             num_workers=self.n_workers,
             shuffle=shuffle,
@@ -184,15 +187,12 @@ def predict_dataloader(self) -> torch.utils.data.DataLoader:
 
     def db_dataloader(self) -> torch.utils.data.DataLoader:
         """Get a special dataloader for DB search"""
-        return torch.utils.data.DataLoader(
+        return self._make_loader(
             self.test_dataset,
-            batch_size=self.eval_batch_size,
+            self.eval_batch_size,
             collate_fn=functools.partial(
                 prepare_psm_batch, protein_database=self.protein_database
             ),
-            pin_memory=True,
-            num_workers=self.n_workers,
-            shuffle=False,
         )
 
 
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 40328701..40d0cc3d 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -994,17 +994,12 @@ class DbSpec2Pep(Spec2Pep):
     Subclass of Spec2Pep for the use of Casanovo as an
     MS/MS database search score function.
 
-    Uses teacher forcing to 'query' Casanovo for its score for each AA
-    within a candidate peptide, and takes the geometric average of these scores
-    and reports this as the score for the spectrum-peptide pair. Note that the
-    geometric mean of the AA scores is actually calculated by a
-    summation and average of the log of the scores, to preserve numerical
-    stability. This does not affect PSM ranking.
+    Uses teacher forcing to 'query' Casanovo to score a peptide-spectrum
+    pair. Higher scores indicate a better match between the peptide and
+    spectrum. The amino acid-level scores are also returned.
 
     Also note that although teacher-forcing is used within this method,
     there is *no training* involved. This is a prediction-only method.
-
-    Output is provided in .mztab format.
     """
 
     def __init__(self, *args, **kwargs):
@@ -1034,17 +1029,15 @@ def predict_step(self, batch, *args):
             current_batch = [
                 b[start_idx : start_idx + self.psm_batch_size] for b in batch
             ]
-            pred, truth = self.decoder(
-                current_batch[3],
-                current_batch[1],
-                *self.encoder(current_batch[0]),
+            pred, truth = self._forward_step(
+                current_batch[0], current_batch[1], current_batch[3]
             )
             pred = self.softmax(pred)
-            all_scores, per_aa_scores = _calc_match_score(
+            all_peptide_scores, all_aa_scores = _calc_match_score(
                 pred, truth, self.decoder.reverse
             )
             for (
-                precursor_charge,
+                charge,
                 precursor_mz,
                 spectrum_i,
                 peptide_score,
@@ -1054,27 +1047,32 @@ def predict_step(self, batch, *args):
                 current_batch[1][:, 1].cpu().detach().numpy(),
                 current_batch[1][:, 2].cpu().detach().numpy(),
                 current_batch[2],
-                all_scores.cpu().detach().numpy(),
-                per_aa_scores.cpu().detach().numpy(),
+                all_peptide_scores,
+                all_aa_scores,
                 current_batch[3],
             ):
-                store_dict[str(spectrum_i)].append(
-                    (
-                        spectrum_i,
-                        precursor_charge,
-                        precursor_mz,
-                        peptide,
-                        peptide_score,
-                        aa_scores,
-                        self.protein_database.get_associated_protein(peptide),
+                store_dict[spectrum_i].append(
+                    ms_io.PepSpecMatch(
+                        sequence=peptide,
+                        spectrum_id=tuple(spectrum_i),
+                        peptide_score=peptide_score,
+                        charge=int(charge),
+                        calc_mz=precursor_mz,
+                        exp_mz=self.peptide_mass_calculator.mass(
+                            peptide, charge
+                        ),
+                        aa_scores=aa_scores,
+                        protein=self.protein_database.get_associated_protein(
+                            peptide
+                        ),
                     )
                 )
         predictions = []
         for spectrum_i in store_dict:
             predictions.extend(
                 sorted(
-                    store_dict[str(spectrum_i)],
-                    key=lambda x: x[4],
+                    store_dict[spectrum_i],
+                    key=lambda x: x.peptide_score,
                     reverse=True,
                 )[: self.top_match]
             )
@@ -1090,27 +1088,7 @@ def on_predict_batch_end(
         """
         Write the database search results to the output file.
         """
-        for (
-            spectrum_i,
-            charge,
-            precursor_mz,
-            peptide,
-            peptide_score,
-            aa_scores,
-            protein,
-        ) in outputs:
-            self.out_writer.psms.append(
-                ms_io.PepSpecMatch(
-                    sequence=peptide,
-                    spectrum_id=tuple(spectrum_i),
-                    peptide_score=peptide_score,
-                    charge=int(charge),
-                    calc_mz=precursor_mz,
-                    exp_mz=self.peptide_mass_calculator.mass(peptide, charge),
-                    aa_scores=aa_scores,
-                    protein=protein,
-                )
-            )
+        self.out_writer.psms.extend(outputs)
 
 
 def _calc_match_score(
@@ -1124,8 +1102,7 @@ def _calc_match_score(
     Take in teacher-forced scoring of amino acids
     of the peptides (in a batch) and use the truth labels
     to calculate a score between the input spectra and
-    associated peptide. The score is the geometric
-    mean of the AA probabilities
+    associated peptide.
 
     Parameters
     ----------
@@ -1134,18 +1111,19 @@ def _calc_match_score(
         the vocabulary for every prediction made to generate
         the associated peptide (for an entire batch)
     truth_aa_indices : torch.Tensor
-        Indicies of the score for each actual amino acid
+        Indices of the score for each actual amino acid
         in the peptide (for an entire batch)
     decoder_reverse : bool
         Whether the decoder is reversed.
 
     Returns
     -------
-    (all_scores, per_aa_scores) : Tuple[torch.Tensor, torch.Tensor]
+    all_peptide_scores: List[float]
         The score between the input spectra and associated peptide
-        (for an entire batch)
-        a list of lists of per amino acid scores
-        (for an entire batch)
+        for each PSM in the batch.
+    all_aa_scores : List[List[float]]
+        A list of lists of per amino acid scores
+        for each PSM in the batch.
     """
     # Remove trailing tokens from predictions based on decoder reversal
     if not decoder_reverse:
@@ -1162,19 +1140,19 @@ def _calc_match_score(
     cols = torch.arange(0, batch_all_aa_scores.shape[1]).expand_as(rows)
 
     per_aa_scores = batch_all_aa_scores[rows, cols, truth_aa_indices]
-
+    per_aa_scores = per_aa_scores.cpu().detach().numpy()
     per_aa_scores[per_aa_scores == 0] += 1e-10
     score_mask = truth_aa_indices != 0
     per_aa_scores[~score_mask] = 0
-    log_per_aa_scores = torch.log(per_aa_scores)
-    all_scores = torch.where(
-        log_per_aa_scores == float("-inf"),
-        torch.tensor(0.0),
-        log_per_aa_scores,
-    ).sum(dim=1) / score_mask.sum(
-        dim=1
-    )  # Calculates geometric score
-    return all_scores, per_aa_scores
+    all_peptide_scores = []
+    all_aa_scores = []
+    for psm_score in per_aa_scores:
+        psm_score = np.trim_zeros(psm_score)
+        aa_scores, peptide_score = _aa_pep_score(psm_score, True)
+        all_peptide_scores.append(peptide_score)
+        all_aa_scores.append(aa_scores)
+
+    return all_peptide_scores, all_aa_scores
 
 
 class CosineWarmupScheduler(torch.optim.lr_scheduler._LRScheduler):
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index b1b1046f..b097f6d5 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -127,24 +127,24 @@ def db_search(
         self,
         peak_path: Iterable[str],
         fasta_path: str,
-        output: str,
+        results_path: str,
     ) -> None:
         """Perform database search with Casanovo.
 
         Parameters
         ----------
         peak_path : Iterable[str]
-            The paths to the .mgf data files for database search.
+            The path with the MS data files for database search.
         fasta_path : str
-            The path to the FASTA file for database search.
-        output : str
-            Where should the output be saved?
+            The path with the FASTA file for database search.
+        results_path : str
+            Sequencing results file path
 
         Returns
         -------
         self
         """
-        self.writer = ms_io.MztabWriter(Path(output).with_suffix(".mztab"))
+        self.writer = ms_io.MztabWriter(results_path)
         self.writer.set_metadata(
             self.config,
             model=str(self.model_filename),
@@ -266,7 +266,7 @@ def predict(
 
         Parameters
         ----------
-        peak_path : iterable of str
+        peak_path : Iterable[str]
             The path with the MS data files for predicting peptide sequences.
         results_path : str
             Sequencing results file path
@@ -431,12 +431,12 @@ def initialize_model(
         )
 
         if self.model_filename is None:
-            # Train a model from scratch if no model file is provided.
             if db_search:
                 logger.error("DB search mode requires a model file")
                 raise ValueError(
                     "A model file must be provided for DB search mode"
                 )
+            # Train a model from scratch if no model file is provided.
             if train:
                 self.model = Spec2Pep(**model_params)
                 return
@@ -456,19 +456,13 @@ def initialize_model(
         # First try loading model details from the weights file, otherwise use
         # the provided configuration.
         device = torch.empty(1).device  # Use the default device.
+        Model = DbSpec2Pep if db_search else Spec2Pep
         try:
-            if db_search:
-                self.model = DbSpec2Pep.load_from_checkpoint(
-                    self.model_filename,
-                    map_location=device,
-                    **loaded_model_params,
-                )
-            else:
-                self.model = Spec2Pep.load_from_checkpoint(
-                    self.model_filename,
-                    map_location=device,
-                    **loaded_model_params,
-                )
+            self.model = Model.load_from_checkpoint(
+                self.model_filename,
+                map_location=device,
+                **loaded_model_params,
+            )
 
             architecture_params = set(model_params.keys()) - set(
                 loaded_model_params.keys()
@@ -484,18 +478,11 @@ def initialize_model(
         except RuntimeError:
             # This only doesn't work if the weights are from an older version
             try:
-                if db_search:
-                    self.model = DbSpec2Pep.load_from_checkpoint(
-                        self.model_filename,
-                        map_location=device,
-                        **model_params,
-                    )
-                else:
-                    self.model = Spec2Pep.load_from_checkpoint(
-                        self.model_filename,
-                        map_location=device,
-                        **model_params,
-                    )
+                self.model = Model.load_from_checkpoint(
+                    self.model_filename,
+                    map_location=device,
+                    **model_params,
+                )
             except RuntimeError:
                 raise RuntimeError(
                     "Weights file incompatible with the current version of "
diff --git a/tests/conftest.py b/tests/conftest.py
index 94b2d744..1729dcb3 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,5 +1,6 @@
 """Fixtures used for testing."""
 
+import depthcharge
 import numpy as np
 import pandas as pd
 import psims
@@ -108,9 +109,9 @@ def _create_mgf_entry(peptide, charge=2, mod_aa_mass=None, annotate=True):
         The PSM entry in an MGF file format.
     """
     if mod_aa_mass is None:
-        precursor_mz = calculate_mass(peptide, charge=int(charge))
+        precursor_mz = fast_mass(peptide, charge=int(charge))
     else:
-        aa_mass = std_aa_mass
+        aa_mass = std_aa_mass.copy()
         aa_mass.update(mod_aa_mass)
         precursor_mz = fast_mass(peptide, charge=int(charge), aa_mass=aa_mass)
     mzs, intensities = _peptide_to_peaks(peptide, charge)
@@ -332,99 +333,4 @@ def tiny_config(tmp_path):
 
 @pytest.fixture
 def residues_dict():
-    return {
-        "G": 57.021464,
-        "A": 71.037114,
-        "S": 87.032028,
-        "P": 97.052764,
-        "V": 99.068414,
-        "T": 101.047670,
-        "C+57.021": 160.030649,
-        "L": 113.084064,
-        "I": 113.084064,
-        "N": 114.042927,
-        "D": 115.026943,
-        "Q": 128.058578,
-        "K": 128.094963,
-        "E": 129.042593,
-        "M": 131.040485,
-        "H": 137.058912,
-        "F": 147.068414,
-        "R": 156.101111,
-        "Y": 163.063329,
-        "W": 186.079313,
-        "M+15.995": 147.035400,
-        "N+0.984": 115.026943,
-        "Q+0.984": 129.042594,
-        "+42.011": 42.010565,
-        "+43.006": 43.005814,
-        "-17.027": -17.026549,
-        "+43.006-17.027": 25.980265,
-    }
-
-
-@pytest.fixture
-def tide_dir_small(tmp_path):
-    """A directory with a very small TIDE search result."""
-    tide_dir = tmp_path / "tide_results"
-    tide_dir.mkdir()
-
-    # Key is the scan number
-    built_dict = {
-        0: {
-            "targets": ["LESLIEK", "PEPTIDEK"],
-            "decoys": ["KEILSEL", "KEDITEPP"],
-        },
-        1: {
-            "targets": ["LESLIEK", "PEPTIDEK"],
-            "decoys": ["KEILSEL", "KEDITEPP"],
-        },
-        2: {
-            "targets": [
-                "L[42.011]EM[15.9]SLIM[15.995]EK",
-                "P[43.01]EN[0.99]PTIQ[0.984]DEK",
-            ],
-            "decoys": [
-                "K[-17.03]M[15.995]EILSEL",
-                "K[25.1]EDITEPP",
-                "KEDIQ[0.984]TEPPQ[0.984]",
-            ],
-        },
-    }
-
-    _create_tide_results_target(tide_dir, built_dict)
-    _create_tide_results_decoy(tide_dir, built_dict)
-
-    return tide_dir
-
-
-def _create_tide_results_target(tide_dir, built_dict):
-    """Create a fake TIDE search result file (target)."""
-    out_file = tide_dir / "tide-search.target.txt"
-    df = pd.DataFrame(columns=["scan", "sequence", "target/decoy"])
-    for scan, peptides in built_dict.items():
-        entry = pd.DataFrame.from_dict(
-            {
-                "scan": [scan] * len(peptides["targets"]),
-                "sequence": peptides["targets"],
-                "target/decoy": ["target"] * len(peptides["targets"]),
-            }
-        )
-        df = pd.concat([df, entry], ignore_index=True)
-    df.to_csv(out_file, sep="\t", index=True)
-
-
-def _create_tide_results_decoy(tide_dir, built_dict):
-    """Create a fake TIDE search result file (decoy)."""
-    out_file = tide_dir / "tide-search.decoy.txt"
-    df = pd.DataFrame(columns=["scan", "sequence", "target/decoy"])
-    for scan, peptides in built_dict.items():
-        entry = pd.DataFrame.from_dict(
-            {
-                "scan": [scan] * len(peptides["decoys"]),
-                "sequence": peptides["decoys"],
-                "target/decoy": ["decoy"] * len(peptides["decoys"]),
-            }
-        )
-        df = pd.concat([df, entry], ignore_index=True)
-    df.to_csv(out_file, sep="\t", index=True)
+    return depthcharge.masses.PeptideMass("massivekb").masses

From d5819442011e29c4b24743cedb56a604e21a7afb Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Sun, 3 Nov 2024 12:26:18 -0800
Subject: [PATCH 56/84] near final changes for all but db_utils

---
 casanovo/data/db_utils.py     | 20 ------------
 tests/unit_tests/test_unit.py | 59 +++++++++++++++++++++++++++++------
 2 files changed, 49 insertions(+), 30 deletions(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 8d141117..e704907c 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -313,26 +313,6 @@ def _peptide_generator(
                         yield pep, protein
 
 
-@njit
-def _to_mz(precursor_mass: float, charge: int) -> float:
-    """
-    Convert precursor neutral mass to m/z value.
-
-    Parameters
-    ----------
-    precursor_mass : float
-        The precursor neutral mass.
-    charge : int
-        The precursor charge.
-
-    Returns
-    -------
-    mz : float
-        The calculated precursor mass-to-charge ratio.
-    """
-    return (precursor_mass + (charge * PROTON)) / charge
-
-
 @njit
 def _to_neutral_mass(mz_mass: float, charge: int) -> float:
     """
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index 482ef853..034f4874 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -453,6 +453,35 @@ def test_aa_pep_score():
     assert peptide_score == pytest.approx(0.5)
 
 
+def test_peptide_generator_errors(residues_dict, tiny_fasta_file):
+    with pytest.raises(FileNotFoundError) as e_info:
+        [
+            (a, b)
+            for a, b in db_utils._peptide_generator(
+                "fail.fasta", "trypsin", "full", 0, 5, 10, residues_dict
+            )
+        ]
+    with pytest.raises(ValueError) as e_info:
+        [
+            (a, b)
+            for a, b in db_utils._peptide_generator(
+                tiny_fasta_file, "trypsin", "fail", 0, 5, 10, residues_dict
+            )
+        ]
+
+
+def test_to_neutral_mass():
+    mz = 500
+    charge = 2
+    neutral_mass = db_utils._to_neutral_mass(mz, charge)
+    assert neutral_mass == 997.98544706646
+
+    mz = 500
+    charge = 1
+    neutral_mass = db_utils._to_neutral_mass(mz, charge)
+    assert neutral_mass == 498.99272353323
+
+
 def test_calc_match_score():
     """
     Test the calculation of geometric scores using teacher-forced
@@ -518,19 +547,29 @@ def test_calc_match_score():
         batch_all_aa_scores, truth_aa_indices, True
     )
 
-    assert all_scores.numpy()[0] == 0
-    assert all_scores.numpy()[1] == 0
-    assert all_scores.numpy()[2] == pytest.approx(
-        np.log(0.5 * 0.5 * 1 * 1) / 4
+    assert all_scores[0] == np.exp(0)
+    assert all_scores[1] == np.exp(0)
+    assert all_scores[2] == pytest.approx(
+        np.exp(np.log(0.5 * 0.5 * 1 * 1) / 4)
     )
-    assert all_scores.numpy()[3] == pytest.approx(
-        np.log(1e-10 * 1 * 1 * 1) / 4
+    assert all_scores[3] == pytest.approx(
+        np.exp(np.log(1e-10 * 1 * 1 * 1) / 4)
     )
 
-    assert np.sum(masked_per_aa_scores.numpy()[0]) == 4
-    assert np.sum(masked_per_aa_scores.numpy()[1]) == 3
-    assert np.sum(masked_per_aa_scores.numpy()[2]) == 3
-    assert np.sum(masked_per_aa_scores.numpy()[3]) == 3
+    aa_scores = np.array([1, 1, 1, 1])
+    assert np.allclose(masked_per_aa_scores[0], (aa_scores + 1) / 2)
+    aa_scores = np.array([1, 1, 1])
+    assert np.allclose(masked_per_aa_scores[1], (aa_scores + 1) / 2)
+    aa_scores = np.array([0.5, 0.5, 1, 1])
+    assert np.allclose(
+        masked_per_aa_scores[2],
+        (aa_scores + np.exp(np.log(0.5 * 0.5 * 1 * 1) / 4)) / 2,
+    )
+    aa_scores = np.array([1e-10, 1, 1, 1])
+    assert np.allclose(
+        masked_per_aa_scores[3],
+        (aa_scores + np.exp(np.log(1e-10 * 1 * 1 * 1) / 4)) / 2,
+    )
 
 
 def test_digest_fasta_cleave(tiny_fasta_file, residues_dict):

From 092fa2a6da155c93898c8581a1bc6de7c72a9827 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Sun, 3 Nov 2024 12:45:35 -0800
Subject: [PATCH 57/84] line length fixes

---
 casanovo/casanovo.py     | 5 +++--
 casanovo/denovo/model.py | 4 ++--
 tests/conftest.py        | 8 ++++++--
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/casanovo/casanovo.py b/casanovo/casanovo.py
index 5547a807..01098255 100644
--- a/casanovo/casanovo.py
+++ b/casanovo/casanovo.py
@@ -62,8 +62,9 @@ def __init__(self, *args, **kwargs) -> None:
             click.Option(
                 ("-m", "--model"),
                 help="""
-                Either the model weights (.ckpt file) or a URL pointing to the model weights
-                file. If not provided, Casanovo will try to download the latest release automatically.
+                Either the model weights (.ckpt file) or a URL pointing to 
+                the model weights file. If not provided, 
+                Casanovo will try to download the latest release automatically.
                 """,
             ),
             click.Option(
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 40d0cc3d..5e807153 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1021,8 +1021,8 @@ def predict_step(self, batch, *args):
         predictions: List[Tuple[List[str], int, float, str, np.ndarray, np.ndarray, str]]
             Model predictions for the given batch of spectra containing spectrum
             ids, precursor charge and m/z, candidate peptide sequences, peptide
-            scores, amino acid-level scores, and associated proteins. Stored separately by
-            spectrum id.
+            scores, amino acid-level scores, and associated proteins.
+            Stored separately by spectrum id.
         """
         store_dict = collections.defaultdict(list)
         for start_idx in range(0, len(batch[0]), self.psm_batch_size):
diff --git a/tests/conftest.py b/tests/conftest.py
index 1729dcb3..009c0737 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -22,14 +22,18 @@ def tiny_fasta_file(tmp_path):
     fasta_file = tmp_path / "tiny_fasta.fasta"
     with fasta_file.open("w+") as fasta_ref:
         fasta_ref.write(
-            ">foo\nMEAPAQLLFLLLLWLPDTTREIVMTQSPPTLSLSPGERVTLSCRASQSVSSSYLTWYQQKPGQAPRLLIYGASTRATSIPARFSGSGSGTDFTLTISSLQPEDFAVYYCQQDYNLP"
+            (
+                ">foo\nMEAPAQLLFLLLLWLPDTTREIVMTQSPPTLSLSPGERVTLSCRASQSVSSSYLTWYQ"
+                "QKPGQAPRLLIYGASTRATSIPARFSGSGSGTDFTLTISSLQPEDFAVYYCQQDYNLP"
+            )
         )
     return fasta_file
 
 
 @pytest.fixture
 def mgf_medium(tmp_path):
-    """An MGF file with 7 spectra and scan numbers, C+57.021 mass modification considered"""
+    """An MGF file with 7 spectra and scan numbers,
+    C+57.021 mass modification considered"""
     peptides = [
         "ATSIPAR",
         "VTLSCR",

From 6d0868c6141f3d329783139929c2db63928875e9 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Sun, 10 Nov 2024 13:42:57 +0100
Subject: [PATCH 58/84] Minor refactoring and type hint fixes

---
 casanovo/data/db_utils.py | 130 +++++++++++++++++++-------------------
 1 file changed, 66 insertions(+), 64 deletions(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index e704907c..b1121780 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -6,13 +6,14 @@
 import re
 import string
 from collections import defaultdict
-from typing import List, Tuple, Iterator
+from typing import DefaultDict, Dict, Iterator, Pattern, Set, Tuple
 
 import depthcharge.masses
+import numba as nb
+import numpy as np
 import pandas as pd
-import pyteomics.fasta as fasta
-import pyteomics.parser as parser
-from numba import njit
+import pyteomics.fasta
+import pyteomics.parser
 
 
 logger = logging.getLogger("casanovo")
@@ -24,8 +25,8 @@
 
 class ProteinDatabase:
     """
-    Store digested .fasta data and return candidate peptides
-    for a given precursor mass.
+    Store digested FASTA data and return candidate peptides for a given
+    precursor mass.
 
     Parameters
     ----------
@@ -36,7 +37,7 @@ class ProteinDatabase:
         See pyteomics.parser.expasy_rules for valid enzymes.
     digestion : str
         The type of digestion to perform.
-        Either 'full', 'partial' or 'non-specific'.
+        Either 'full', 'partial', or 'non-specific'.
     missed_cleavages : int
         The number of missed cleavages to allow.
     min_peptide_len : int
@@ -51,10 +52,10 @@ class ProteinDatabase:
         Isotope range [min, max] to consider when comparing predicted
         and observed precursor m/z's.
     allowed_fixed_mods : str
-        A comma separated string of fixed modifications to consider.
+        A comma-separated string of fixed modifications to consider.
     allowed_var_mods : str
-        A comma separated string of variable modifications to consider.
-    residues : dict[str, float]
+        A comma-separated string of variable modifications to consider.
+    residues : Dict[str, float]
         A dictionary of amino acid masses.
     """
 
@@ -71,7 +72,7 @@ def __init__(
         isotope_error: Tuple[int, int],
         allowed_fixed_mods: str,
         allowed_var_mods: str,
-        residues: dict[str, float],
+        residues: Dict[str, float],
     ):
         self.fixed_mods, self.var_mods, self.swap_map = _construct_mods_dict(
             allowed_fixed_mods, allowed_var_mods
@@ -99,8 +100,8 @@ def get_candidates(
         charge: int,
     ) -> pd.Series:
         """
-        Returns a list of candidate peptides that fall within the
-        specified mass range.
+        Returns candidate peptides that fall within the search
+        parameter's precursor mass tolerance.
 
         Parameters
         ----------
@@ -141,7 +142,7 @@ def get_candidates(
 
     def get_associated_protein(self, peptide: str) -> str:
         """
-        Returns the associated protein for a given peptide.
+        Returns the associated protein(s) for a given peptide.
 
         Parameters
         ----------
@@ -151,17 +152,17 @@ def get_associated_protein(self, peptide: str) -> str:
         Returns
         -------
         protein : str
-            The associated protein(s).
+            The associated protein(s) identifiers, separated by commas.
         """
         return ",".join(self.prot_map[peptide])
 
     def _digest_fasta(
         self,
         peptide_generator: Iterator[Tuple[str, str]],
-    ) -> Tuple[pd.DataFrame, dict[str, str]]:
+    ) -> Tuple[pd.DataFrame, DefaultDict[str, Set]]:
         """
         Digests a FASTA file and returns the peptides, their masses,
-        and associated protein.
+        and associated protein(s).
 
         Parameters
         ----------
@@ -173,14 +174,14 @@ def _digest_fasta(
         pep_table : pd.DataFrame
             A Pandas DataFrame with peptide and mass columns.
             Sorted by neutral mass in ascending order.
-        prot_map : dict[str, str]
+        prot_map : DefaultDict[str, Set]
             A dictionary mapping peptides to associated proteins.
         """
-        # Generate modified peptides
+        # Generate all possible peptide isoforms.
         mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
         peptide_isoforms = [
             (
-                parser.isoforms(
+                pyteomics.parser.isoforms(
                     pep,
                     variable_mods=self.var_mods,
                     fixed_mods=self.fixed_mods,
@@ -203,12 +204,13 @@ def _digest_fasta(
             )
         ]
 
-        # Create a dictionary mapping for easy accession of associated proteins
-        prot_map = defaultdict(set)
+        # Create a dictionary mapping for easy accession of associated
+        # proteins.
+        prot_map: DefaultDict[str, Set] = defaultdict(set)
         for pep, _, prot in mod_peptide_list:
             prot_map[pep].add(prot)
 
-        # Create a DataFrame for easy sorting and filtering
+        # Create a DataFrame for easy sorting and filtering.
         pep_table = pd.DataFrame(
             [(pep, mass) for pep, mass, _ in mod_peptide_list],
             columns=["peptide", "calc_mass"],
@@ -230,11 +232,11 @@ def _peptide_generator(
     missed_cleavages: int,
     min_peptide_len: int,
     max_peptide_len: int,
-    valid_aa: set[str],
-) -> Iterator[str]:
+    valid_aa: Set[str],
+) -> Iterator[Tuple[str, str]]:
     """
-    Create a generator the yields peptides from a FASTA file
-    depending on the type of digestion specified.
+    Creates a generator that yields peptides from a FASTA file depending
+    on the type of digestion specified.
 
     Parameters
     ----------
@@ -246,74 +248,73 @@ def _peptide_generator(
         Can also be a regex.
     digestion : str
         The type of digestion to perform.
-        Either 'full', 'partial' or 'non-specific'.
+        Either 'full', 'partial', or 'non-specific'.
     missed_cleavages : int
         The number of missed cleavages to allow.
     min_peptide_len : int
         The minimum length of peptides to consider.
     max_peptide_len : int
         The maximum length of peptides to consider.
-    valid_aa : set[str]
+    valid_aa : Set[str]
         A set of valid amino acids.
 
     Yields
     ------
-    pep : str
+    peptide : str
         A peptide sequence, unmodified.
     protein : str
         The associated protein.
     """
-    # Verify the existence of the file:
+    # Verify the existence of the file.
     if not os.path.isfile(fasta_filename):
         logger.error("File %s does not exist.", fasta_filename)
         raise FileNotFoundError(f"File {fasta_filename} does not exist.")
-    if digestion not in ["full", "partial", "non-specific"]:
+    if digestion not in ("full", "partial", "non-specific"):
         logger.error("Digestion type %s not recognized.", digestion)
         raise ValueError(f"Digestion type {digestion} not recognized.")
-    if enzyme not in parser.expasy_rules:
+    if enzyme not in pyteomics.parser.expasy_rules:
         logger.info(
             "Enzyme %s not recognized. Interpreting as cleavage rule.",
             enzyme,
         )
     if digestion == "non-specific":
-        for header, seq in fasta.read(fasta_filename):
+        for header, seq in pyteomics.fasta.read(fasta_filename):
             protein = header.split()[0]
-            # Generate all possible peptides
+            # Generate all possible peptides.
             for i in range(len(seq)):
                 for j in range(
                     i + min_peptide_len,
                     min(i + max_peptide_len + 1, len(seq) + 1),
                 ):
-                    pep = seq[i:j]
-                    if any(aa not in valid_aa for aa in pep):
+                    peptide = seq[i:j]
+                    if any(aa not in valid_aa for aa in peptide):
                         logger.warning(
                             "Skipping peptide with unknown amino acids: %s",
-                            pep,
+                            peptide,
                         )
                     else:
-                        yield pep, protein
+                        yield peptide, protein
     else:
-        semi = digestion == "partial"
-        for header, seq in fasta.read(fasta_filename):
-            pep_set = parser.cleave(
+        for header, seq in pyteomics.fasta.read(fasta_filename):
+            peptides = pyteomics.parser.cleave(
                 seq,
                 rule=enzyme,
                 missed_cleavages=missed_cleavages,
-                semi=semi,
+                semi=digestion == "partial",
             )
             protein = header.split()[0]
-            for pep in pep_set:
-                if len(pep) >= min_peptide_len and len(pep) <= max_peptide_len:
-                    if any(aa not in valid_aa for aa in pep):
+            for peptide in peptides:
+                if min_peptide_len <= len(peptide) <= max_peptide_len:
+                    if any(aa not in valid_aa for aa in peptide):
                         logger.warning(
                             "Skipping peptide with unknown amino acids: %s",
-                            pep,
+                            peptide,
                         )
                     else:
-                        yield pep, protein
+                        yield peptide, protein
 
 
-@njit
+@nb.njit
 def _to_neutral_mass(mz_mass: float, charge: int) -> float:
     """
     Convert precursor m/z value to neutral mass.
@@ -334,7 +335,7 @@ def _to_neutral_mass(mz_mass: float, charge: int) -> float:
 
 
 def _convert_from_modx(
-    seq: str, swap_map: dict[str, str], swap_regex: str
+    seq: str, swap_map: dict[str, str], swap_regex: Pattern
 ) -> str:
     """
     Converts peptide sequence from modX format to
@@ -345,50 +346,51 @@ def _convert_from_modx(
     seq : str
         Peptide in modX format
     swap_map : dict[str, str]
-        Dictionary that allows for swapping of modX to Casanovo-acceptable modifications.
-    swap_regex : str
+        Dictionary that allows for swapping of modX to
+        Casanovo-acceptable modifications.
+    swap_regex : Pattern
         Regular expression to match modX format.
 
     Returns:
     --------
-    swap_regex : str
+    str
         Peptide in Casanovo-acceptable modifications.
     """
+    # FIXME: This might be handled by the DepthCharge residues vocabulary
+    #  instead.
     return swap_regex.sub(lambda x: swap_map[x.group()], seq)
 
 
 def _construct_mods_dict(
     allowed_fixed_mods: str, allowed_var_mods: str
-) -> Tuple[dict[str, str], dict[str, str], dict[str, str]]:
+) -> Tuple[Dict[str, str], Dict[str, str], Dict[str, str]]:
     """
     Constructs dictionaries of fixed and variable modifications.
 
     Parameters
     ----------
     allowed_fixed_mods : str
-        A comma separated string of fixed modifications to consider.
+        A comma-separated string of fixed modifications to consider.
     allowed_var_mods : str
-        A comma separated string of variable modifications to consider.
+        A comma-separated string of variable modifications to consider.
 
     Returns
     -------
-    fixed_mods : dict[str, str]
+    fixed_mods : Dict[str, str]
         A dictionary of fixed modifications.
-    var_mods : dict[str, str]
+    var_mods : Dict[str, str]
         A dictionary of variable modifications.
-    swap_map : dict[str, str]
+    swap_map : Dict[str, str]
         A dictionary that allows for swapping of modX to
         Casanovo-acceptable modifications.
     """
-    swap_map = {}
-    fixed_mods = {}
-    var_mods = {}
+    swap_map, fixed_mods, var_mods = {}, {}, {}
     for mod_map, allowed_mods in zip(
         [fixed_mods, var_mods], [allowed_fixed_mods, allowed_var_mods]
     ):
-        for idx, mod in enumerate(allowed_mods.split(",")):
+        for i, mod in enumerate(allowed_mods.split(",")):
             aa, mod_aa = mod.split(":")
-            mod_id = string.ascii_lowercase[idx]
+            mod_id = string.ascii_lowercase[i]
             if aa == "nterm":
                 mod_map[f"{mod_id}-"] = True
                 swap_map[f"{mod_id}-"] = f"{mod_aa}"

From 6ea037813fe5ab966e2d6e14497487d45dd7d1b7 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Sun, 10 Nov 2024 13:43:17 +0100
Subject: [PATCH 59/84] Use mask for more efficient candidate filtering

---
 casanovo/data/db_utils.py | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index b1121780..2e60663b 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -115,30 +115,22 @@ def get_candidates(
         candidates : pd.Series
             A series of candidate peptides.
         """
-        candidates = []
-
+        # FIXME: This could potentially be sped up with only a single pass
+        #  through the database.
+        mask = np.zeros(len(self.db_peptides), dtype=bool)
+        precursor_tol_ppm = self.precursor_tolerance / 1e6
         for e in range(self.isotope_error[0], self.isotope_error[1] + 1):
             iso_shift = ISOTOPE_SPACING * e
             shift_raw_mass = float(
                 _to_neutral_mass(precursor_mz, charge) - iso_shift
             )
-            upper_bound = shift_raw_mass * (
-                1 + (self.precursor_tolerance / 1e6)
-            )
-            lower_bound = shift_raw_mass * (
-                1 - (self.precursor_tolerance / 1e6)
-            )
-
-            window = self.db_peptides[
+            upper_bound = shift_raw_mass * (1 + precursor_tol_ppm)
+            lower_bound = shift_raw_mass * (1 - precursor_tol_ppm)
+            mask |= (
                 (self.db_peptides["calc_mass"] >= lower_bound)
                 & (self.db_peptides["calc_mass"] <= upper_bound)
-            ]
-            candidates.append(window[["peptide", "calc_mass"]])
-
-        candidates = pd.concat(candidates)
-        candidates.drop_duplicates(inplace=True)
-        candidates.sort_values(by=["calc_mass", "peptide"], inplace=True)
-        return candidates["peptide"]
+            )
+        return self.db_peptides[mask]["peptide"]
 
     def get_associated_protein(self, peptide: str) -> str:
         """

From 408aa4d2359847cc8834ba70aef5b1179bb8ee91 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Sun, 10 Nov 2024 13:46:11 +0100
Subject: [PATCH 60/84] Reorder methods in logical order

---
 casanovo/data/db_utils.py | 208 +++++++++++++++++++-------------------
 1 file changed, 104 insertions(+), 104 deletions(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 2e60663b..024452c8 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -94,60 +94,6 @@ def __init__(
         self.precursor_tolerance = precursor_tolerance
         self.isotope_error = isotope_error
 
-    def get_candidates(
-        self,
-        precursor_mz: float,
-        charge: int,
-    ) -> pd.Series:
-        """
-        Returns candidate peptides that fall within the search
-        parameter's precursor mass tolerance.
-
-        Parameters
-        ----------
-        precursor_mz : float
-            The precursor mass-to-charge ratio.
-        charge : int
-            The precursor charge.
-
-        Returns
-        -------
-        candidates : pd.Series
-            A series of candidate peptides.
-        """
-        # FIXME: This could potentially be sped up with only a single pass
-        #  through the database.
-        mask = np.zeros(len(self.db_peptides), dtype=bool)
-        precursor_tol_ppm = self.precursor_tolerance / 1e6
-        for e in range(self.isotope_error[0], self.isotope_error[1] + 1):
-            iso_shift = ISOTOPE_SPACING * e
-            shift_raw_mass = float(
-                _to_neutral_mass(precursor_mz, charge) - iso_shift
-            )
-            upper_bound = shift_raw_mass * (1 + precursor_tol_ppm)
-            lower_bound = shift_raw_mass * (1 - precursor_tol_ppm)
-            mask |= (
-                (self.db_peptides["calc_mass"] >= lower_bound)
-                & (self.db_peptides["calc_mass"] <= upper_bound)
-            )
-        return self.db_peptides[mask]["peptide"]
-
-    def get_associated_protein(self, peptide: str) -> str:
-        """
-        Returns the associated protein(s) for a given peptide.
-
-        Parameters
-        ----------
-        peptide : str
-            The peptide sequence.
-
-        Returns
-        -------
-        protein : str
-            The associated protein(s) identifiers, separated by commas.
-        """
-        return ",".join(self.prot_map[peptide])
-
     def _digest_fasta(
         self,
         peptide_generator: Iterator[Tuple[str, str]],
@@ -216,6 +162,100 @@ def _digest_fasta(
         )
         return pep_table, prot_map
 
+    def get_candidates(
+        self,
+        precursor_mz: float,
+        charge: int,
+    ) -> pd.Series:
+        """
+        Returns candidate peptides that fall within the search
+        parameter's precursor mass tolerance.
+
+        Parameters
+        ----------
+        precursor_mz : float
+            The precursor mass-to-charge ratio.
+        charge : int
+            The precursor charge.
+
+        Returns
+        -------
+        candidates : pd.Series
+            A series of candidate peptides.
+        """
+        # FIXME: This could potentially be sped up with only a single pass
+        #  through the database.
+        mask = np.zeros(len(self.db_peptides), dtype=bool)
+        precursor_tol_ppm = self.precursor_tolerance / 1e6
+        for e in range(self.isotope_error[0], self.isotope_error[1] + 1):
+            iso_shift = ISOTOPE_SPACING * e
+            shift_raw_mass = float(
+                _to_neutral_mass(precursor_mz, charge) - iso_shift
+            )
+            upper_bound = shift_raw_mass * (1 + precursor_tol_ppm)
+            lower_bound = shift_raw_mass * (1 - precursor_tol_ppm)
+            mask |= (
+                (self.db_peptides["calc_mass"] >= lower_bound)
+                & (self.db_peptides["calc_mass"] <= upper_bound)
+            )
+        return self.db_peptides[mask]["peptide"]
+
+    def get_associated_protein(self, peptide: str) -> str:
+        """
+        Returns the associated protein(s) for a given peptide.
+
+        Parameters
+        ----------
+        peptide : str
+            The peptide sequence.
+
+        Returns
+        -------
+        protein : str
+            The associated protein(s) identifiers, separated by commas.
+        """
+        return ",".join(self.prot_map[peptide])
+
+
+def _construct_mods_dict(
+    allowed_fixed_mods: str, allowed_var_mods: str
+) -> Tuple[Dict[str, str], Dict[str, str], Dict[str, str]]:
+    """
+    Constructs dictionaries of fixed and variable modifications.
+
+    Parameters
+    ----------
+    allowed_fixed_mods : str
+        A comma-separated string of fixed modifications to consider.
+    allowed_var_mods : str
+        A comma-separated string of variable modifications to consider.
+
+    Returns
+    -------
+    fixed_mods : Dict[str, str]
+        A dictionary of fixed modifications.
+    var_mods : Dict[str, str]
+        A dictionary of variable modifications.
+    swap_map : Dict[str, str]
+        A dictionary that allows for swapping of modX to
+        Casanovo-acceptable modifications.
+    """
+    swap_map, fixed_mods, var_mods = {}, {}, {}
+    for mod_map, allowed_mods in zip(
+        [fixed_mods, var_mods], [allowed_fixed_mods, allowed_var_mods]
+    ):
+        for i, mod in enumerate(allowed_mods.split(",")):
+            aa, mod_aa = mod.split(":")
+            mod_id = string.ascii_lowercase[i]
+            if aa == "nterm":
+                mod_map[f"{mod_id}-"] = True
+                swap_map[f"{mod_id}-"] = f"{mod_aa}"
+            else:
+                mod_map[mod_id] = [aa]
+                swap_map[f"{mod_id}{aa}"] = f"{mod_aa}"
+
+    return fixed_mods, var_mods, swap_map
+
 
 def _peptide_generator(
     fasta_filename: str,
@@ -306,26 +346,6 @@ def _peptide_generator(
                         yield peptide, protein
 
 
-@nb.njit
-def _to_neutral_mass(mz_mass: float, charge: int) -> float:
-    """
-    Convert precursor m/z value to neutral mass.
-
-    Parameters
-    ----------
-    mz_mass : float
-        The precursor mass-to-charge ratio.
-    charge : int
-        The precursor charge.
-
-    Returns
-    -------
-    mass : float
-        The calculated precursor neutral mass.
-    """
-    return charge * (mz_mass - PROTON)
-
-
 def _convert_from_modx(
     seq: str, swap_map: dict[str, str], swap_regex: Pattern
 ) -> str:
@@ -353,41 +373,21 @@ def _convert_from_modx(
     return swap_regex.sub(lambda x: swap_map[x.group()], seq)
 
 
-def _construct_mods_dict(
-    allowed_fixed_mods: str, allowed_var_mods: str
-) -> Tuple[Dict[str, str], Dict[str, str], Dict[str, str]]:
+@nb.njit
+def _to_neutral_mass(mz_mass: float, charge: int) -> float:
     """
-    Constructs dictionaries of fixed and variable modifications.
+    Convert precursor m/z value to neutral mass.
 
     Parameters
     ----------
-    allowed_fixed_mods : str
-        A comma-separated string of fixed modifications to consider.
-    allowed_var_mods : str
-        A comma-separated string of variable modifications to consider.
+    mz_mass : float
+        The precursor mass-to-charge ratio.
+    charge : int
+        The precursor charge.
 
     Returns
     -------
-    fixed_mods : Dict[str, str]
-        A dictionary of fixed modifications.
-    var_mods : Dict[str, str]
-        A dictionary of variable modifications.
-    swap_map : Dict[str, str]
-        A dictionary that allows for swapping of modX to
-        Casanovo-acceptable modifications.
+    mass : float
+        The calculated precursor neutral mass.
     """
-    swap_map, fixed_mods, var_mods = {}, {}, {}
-    for mod_map, allowed_mods in zip(
-        [fixed_mods, var_mods], [allowed_fixed_mods, allowed_var_mods]
-    ):
-        for i, mod in enumerate(allowed_mods.split(",")):
-            aa, mod_aa = mod.split(":")
-            mod_id = string.ascii_lowercase[i]
-            if aa == "nterm":
-                mod_map[f"{mod_id}-"] = True
-                swap_map[f"{mod_id}-"] = f"{mod_aa}"
-            else:
-                mod_map[mod_id] = [aa]
-                swap_map[f"{mod_id}{aa}"] = f"{mod_aa}"
-
-    return fixed_mods, var_mods, swap_map
+    return charge * (mz_mass - PROTON)

From 65189ee142ea686d1f148da000a684a88bacdbea Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Sun, 10 Nov 2024 13:50:50 +0100
Subject: [PATCH 61/84] Fix unit tests

---
 tests/unit_tests/test_unit.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index 034f4874..a863b1f7 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -699,10 +699,10 @@ def test_digest_fasta_mods(tiny_fasta_file, residues_dict):
         "+43.006ASQSVSSSYLTWYQQKPGQAPR",
         "-17.027FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
         "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+        "FSGSGSGTDFTLTISSLQ+0.984PEDFAVYYC+57.021QQDYNLP",
         "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021Q+0.984QDYNLP",
         "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQ+0.984DYNLP",
         "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYN+0.984LP",
-        "FSGSGSGTDFTLTISSLQ+0.984PEDFAVYYC+57.021QQDYNLP",
         "+43.006-17.027FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
         "+42.011FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
         "+43.006FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
@@ -873,8 +873,8 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
         "QQKPGQ",
         "SLQPED",
         "PAQLLF",
-        "SIPARF",
         "IPARFS",
+        "SIPARF",
         "LSC+57.021RAS",
         "TDFTLT",
         "QAPRLL",

From 1efd9dda358d1b84df9028bb2fa2654466ff8c53 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Sun, 10 Nov 2024 14:11:38 +0100
Subject: [PATCH 62/84] Directly generate DB peptides as DataFrame

---
 casanovo/data/db_utils.py     | 81 ++++++++++++++++-------------------
 tests/unit_tests/test_unit.py | 29 +++++--------
 2 files changed, 46 insertions(+), 64 deletions(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 024452c8..55127cff 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -5,8 +5,7 @@
 import os
 import re
 import string
-from collections import defaultdict
-from typing import DefaultDict, Dict, Iterator, Pattern, Set, Tuple
+from typing import Dict, Iterator, Pattern, Set, Tuple
 
 import depthcharge.masses
 import numba as nb
@@ -90,14 +89,14 @@ def __init__(
             max_peptide_len,
             set([aa[0] for aa in residues.keys() if aa[0].isalpha()]),
         )
-        self.db_peptides, self.prot_map = self._digest_fasta(peptide_generator)
+        self.db_peptides = self._digest_fasta(peptide_generator)
         self.precursor_tolerance = precursor_tolerance
         self.isotope_error = isotope_error
 
     def _digest_fasta(
         self,
         peptide_generator: Iterator[Tuple[str, str]],
-    ) -> Tuple[pd.DataFrame, DefaultDict[str, Set]]:
+    ) -> pd.DataFrame:
         """
         Digests a FASTA file and returns the peptides, their masses,
         and associated protein(s).
@@ -109,58 +108,50 @@ def _digest_fasta(
 
         Returns
         -------
-        pep_table : pd.DataFrame
-            A Pandas DataFrame with peptide and mass columns.
-            Sorted by neutral mass in ascending order.
-        prot_map : DefaultDict[str, Set]
-            A dictionary mapping peptides to associated proteins.
+        peptides : pd.DataFrame
+            A Pandas DataFrame with index "peptide" (the peptide
+            sequence), and columns "calc_mass" (the peptide neutral
+            mass) and "protein" (a list of associated protein(s)).
         """
         # Generate all possible peptide isoforms.
-        mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
-        peptide_isoforms = [
-            (
-                pyteomics.parser.isoforms(
+        peptides = pd.DataFrame(
+            data=[
+                (iso, prot)
+                for pep, prot in peptide_generator
+                for iso in pyteomics.parser.isoforms(
                     pep,
                     variable_mods=self.var_mods,
                     fixed_mods=self.fixed_mods,
                     max_mods=self.max_mods,
-                ),
-                prot,
-            )
-            for pep, prot in peptide_generator
-        ]
-        mod_peptide_list = [
-            (mod_pep, mass_calculator.mass(mod_pep), prot)
-            for isos, prot in peptide_isoforms
-            for mod_pep in map(
-                functools.partial(
-                    _convert_from_modx,
-                    swap_map=self.swap_map,
-                    swap_regex=self.swap_regex,
-                ),
-                isos,
+                )
+            ],
+            columns=["peptide", "protein"],
+        )
+        # Convert modX peptide to Casanovo format.
+        peptides["peptide"] = peptides["peptide"].apply(
+            functools.partial(
+                _convert_from_modx,
+                swap_map=self.swap_map,
+                swap_regex=self.swap_regex,
             )
-        ]
-
-        # Create a dictionary mapping for easy accession of associated
-        # proteins.
-        prot_map: DefaultDict[str, Set] = defaultdict(set)
-        for pep, _, prot in mod_peptide_list:
-            prot_map[pep].add(prot)
-
-        # Create a DataFrame for easy sorting and filtering.
-        pep_table = pd.DataFrame(
-            [(pep, mass) for pep, mass, _ in mod_peptide_list],
-            columns=["peptide", "calc_mass"],
         )
-        pep_table.sort_values(
+        # Merge proteins from duplicate peptides.
+        peptides = peptides.groupby("peptide")["protein"].apply(
+            lambda proteins: sorted(set(proteins))
+        ).reset_index()
+        # Calculate the mass of each peptide.
+        mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
+        peptides["calc_mass"] = peptides["peptide"].apply(mass_calculator.mass)
+        # Sort by peptide mass and index by peptide sequence.
+        peptides.sort_values(
             by=["calc_mass", "peptide"], ascending=True, inplace=True
         )
+        peptides.set_index("peptide", inplace=True)
 
         logger.info(
-            "Digestion complete. %d peptides generated.", len(pep_table)
+            "Digestion complete. %d peptides generated.", len(peptides)
         )
-        return pep_table, prot_map
+        return peptides
 
     def get_candidates(
         self,
@@ -198,7 +189,7 @@ def get_candidates(
                 (self.db_peptides["calc_mass"] >= lower_bound)
                 & (self.db_peptides["calc_mass"] <= upper_bound)
             )
-        return self.db_peptides[mask]["peptide"]
+        return self.db_peptides.index[mask]
 
     def get_associated_protein(self, peptide: str) -> str:
         """
@@ -214,7 +205,7 @@ def get_associated_protein(self, peptide: str) -> str:
         protein : str
             The associated protein(s) identifiers, separated by commas.
         """
-        return ",".join(self.prot_map[peptide])
+        return ",".join(self.db_peptides.loc[peptide, "protein"])
 
 
 def _construct_mods_dict(
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index a863b1f7..0d4812f9 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -645,8 +645,7 @@ def test_digest_fasta_cleave(tiny_fasta_file, residues_dict):
             ),
             residues=residues_dict,
         )
-        peptide_list = list(pdb.db_peptides["peptide"])
-        assert peptide_list == expected
+        assert pdb.db_peptides.index.to_list() == expected
 
 
 def test_digest_fasta_mods(tiny_fasta_file, residues_dict):
@@ -724,8 +723,7 @@ def test_digest_fasta_mods(tiny_fasta_file, residues_dict):
         ),
         residues=residues_dict,
     )
-    peptide_list = list(pdb.db_peptides["peptide"])
-    assert peptide_list == expected_1mod
+    assert pdb.db_peptides.index.to_list() == expected_1mod
 
 
 def test_length_restrictions(tiny_fasta_file, residues_dict):
@@ -756,8 +754,7 @@ def test_length_restrictions(tiny_fasta_file, residues_dict):
         ),
         residues=residues_dict,
     )
-    peptide_list = list(pdb.db_peptides["peptide"])
-    assert peptide_list == expected_long
+    assert pdb.db_peptides.index.to_list() == expected_long
 
     pdb = db_utils.ProteinDatabase(
         fasta_path=str(tiny_fasta_file),
@@ -776,8 +773,7 @@ def test_length_restrictions(tiny_fasta_file, residues_dict):
         ),
         residues=residues_dict,
     )
-    peptide_list = list(pdb.db_peptides["peptide"])
-    assert peptide_list == expected_short
+    assert pdb.db_peptides.index.to_list() == expected_short
 
 
 def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
@@ -942,8 +938,7 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
         ),
         residues=residues_dict,
     )
-    peptide_list = list(pdb.db_peptides["peptide"])
-    assert peptide_list == expected_argc
+    assert pdb.db_peptides.index.to_list() == expected_argc
 
     pdb = db_utils.ProteinDatabase(
         fasta_path=str(tiny_fasta_file),
@@ -962,8 +957,7 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
         ),
         residues=residues_dict,
     )
-    peptide_list = list(pdb.db_peptides["peptide"])
-    assert peptide_list == expected_aspn
+    assert pdb.db_peptides.index.to_list() == expected_aspn
 
     # Test regex rule instead of named enzyme
     pdb = db_utils.ProteinDatabase(
@@ -983,8 +977,7 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
         ),
         residues=residues_dict,
     )
-    peptide_list = list(pdb.db_peptides["peptide"])
-    assert peptide_list == expected_argc
+    assert pdb.db_peptides.index.to_list() == expected_argc
 
     # Test semispecific digest
     pdb = db_utils.ProteinDatabase(
@@ -1004,8 +997,7 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
         ),
         residues=residues_dict,
     )
-    peptide_list = list(pdb.db_peptides["peptide"])
-    assert peptide_list == expected_semispecific
+    assert pdb.db_peptides.index.to_list() == expected_semispecific
 
     # Test nonspecific digest
     pdb = db_utils.ProteinDatabase(
@@ -1025,8 +1017,7 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
         ),
         residues=residues_dict,
     )
-    peptide_list = list(pdb.db_peptides["peptide"])
-    assert peptide_list == expected_nonspecific
+    assert pdb.db_peptides.index.to_list() == expected_nonspecific
 
 
 def test_get_candidates(tiny_fasta_file, residues_dict):
@@ -1139,7 +1130,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file, residues_dict):
 
     peptide_list = pd.DataFrame(
         peptide_list, columns=["peptide", "calc_mass", "protein"]
-    )
+    ).set_index("peptide")
     peptide_list.sort_values("calc_mass", inplace=True)
 
     expected_isotope0 = list("UTSRQPONMLKJIHGFEDCB")

From f679cdc595d6a0a65e0a6f8cdefebc0909fe391f Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Sun, 10 Nov 2024 14:23:41 +0100
Subject: [PATCH 63/84] Fix type hints and line lengths

---
 casanovo/casanovo.py | 142 +++++++++++++++++++++++--------------------
 casanovo/utils.py    |  14 ++---
 2 files changed, 82 insertions(+), 74 deletions(-)

diff --git a/casanovo/casanovo.py b/casanovo/casanovo.py
index 01098255..17786793 100644
--- a/casanovo/casanovo.py
+++ b/casanovo/casanovo.py
@@ -12,7 +12,7 @@
 import urllib.parse
 import warnings
 from pathlib import Path
-from typing import Optional, Tuple, List
+from typing import Optional, Tuple
 
 warnings.formatwarning = lambda message, category, *args, **kwargs: (
     f"{category.__name__}: {message}"
@@ -62,19 +62,19 @@ def __init__(self, *args, **kwargs) -> None:
             click.Option(
                 ("-m", "--model"),
                 help="""
-                Either the model weights (.ckpt file) or a URL pointing to 
-                the model weights file. If not provided, 
-                Casanovo will try to download the latest release automatically.
+                Either the model weights (.ckpt file) or a URL pointing to the 
+                model weights file. If not provided, Casanovo will try to 
+                download the latest release automatically.
                 """,
             ),
             click.Option(
                 ("-d", "--output_dir"),
-                help="The destination directory for output files",
+                help="The destination directory for output files.",
                 type=click.Path(dir_okay=True),
             ),
             click.Option(
                 ("-o", "--output_root"),
-                help="The root name for all output files",
+                help="The root name for all output files.",
                 type=click.Path(dir_okay=False),
             ),
             click.Option(
@@ -113,9 +113,9 @@ def main() -> None:
     ========
 
     Casanovo de novo sequences peptides from tandem mass spectra using a
-    Transformer model. Casanovo currently supports mzML, mzXML, and MGF files
-    for de novo sequencing and annotated MGF files, such as those from
-    MassIVE-KB, for training new models.
+    Transformer model. Casanovo currently supports mzML, mzXML, and MGF
+    files for de novo sequencing and annotated MGF files, such as those
+    from MassIVE-KB, for training new models.
 
     Links:
 
@@ -124,10 +124,10 @@ def main() -> None:
 
     If you use Casanovo in your work, please cite:
 
-    - Yilmaz, M., Fondrie, W. E., Bittremieux, W., Oh, S. & Noble, W. S. De novo
-      mass spectrometry peptide sequencing with a transformer model. Proceedings
-      of the 39th International Conference on Machine Learning - ICML '22 (2022)
-      doi:10.1101/2022.02.07.479481.
+    - Yilmaz, M., Fondrie, W. E., Bittremieux, W., Oh, S. & Noble, W. S.
+      De novo mass spectrometry peptide sequencing with a transformer
+      model. Proceedings of the 39th International Conference on Machine
+      Learning - ICML '22 (2022) doi:10.1101/2022.02.07.479481.
 
 
     """
@@ -147,9 +147,9 @@ def main() -> None:
     is_flag=True,
     default=False,
     help="""
-    Run in evaluation mode. When this flag is set the peptide and amino
-    acid precision will be calculated and logged at the end of the sequencing
-    run. All input files must be annotated MGF files if running in evaluation
+    Run in evaluation mode. When this flag is set the peptide and amino acid  
+    precision will be calculated and logged at the end of the sequencing run. 
+    All input files must be annotated MGF files if running in evaluation 
     mode.
     """,
 )
@@ -290,8 +290,9 @@ def train(
 ) -> None:
     """Train a Casanovo model on your own data.
 
-    TRAIN_PEAK_PATH must be one or more annoated MGF files, such as those
-    provided by MassIVE-KB, from which to train a new Casnovo model.
+    TRAIN_PEAK_PATH must be one or more annoated MGF files, such as
+    those provided by MassIVE-KB, from which to train a new Casnovo
+    model.
     """
     output_path, output_root_name = _setup_output(
         output_dir, output_root, force_overwrite, verbosity
@@ -324,7 +325,7 @@ def train(
 
 @main.command()
 def version() -> None:
-    """Get the Casanovo version information"""
+    """Get the Casanovo version information."""
     versions = [
         f"Casanovo: {__version__}",
         f"Depthcharge: {depthcharge.__version__}",
@@ -342,20 +343,20 @@ def version() -> None:
     default="casanovo.yaml",
     type=click.Path(dir_okay=False),
 )
-def configure(output: str) -> None:
+def configure(output: Path) -> None:
     """Generate a Casanovo configuration file to customize.
 
     The casanovo configuration file is in the YAML format.
     """
-    Config.copy_default(output)
-    output = setup_logging(output, "info")
+    Config.copy_default(str(output))
+    setup_logging(output, "info")
     logger.info(f"Wrote {output}\n")
 
 
 def setup_logging(
     log_file_path: Path,
     verbosity: str,
-) -> Path:
+) -> None:
     """Set up the logger.
 
     Logging occurs to the command-line and to the given log file.
@@ -423,10 +424,11 @@ def setup_model(
     Parameters
     ----------
     model : str | None
-        May be a file system path, a URL pointing to a .ckpt file, or None.
-        If `model` is a URL the weights will be downloaded and cached from
-        `model`. If `model` is `None` the weights from the latest matching
-        official release will be used (downloaded and cached).
+        May be a file system path, a URL pointing to a .ckpt file, or
+        None. If `model` is a URL the weights will be downloaded and
+        cached from `model`. If `model` is `None` the weights from the
+        latest matching official release will be used (downloaded and
+        cached).
     config : str | None
         Config file path. If None the default config will be used.
     output_dir: : Path | str
@@ -434,20 +436,21 @@ def setup_model(
     output_root_name : str,
         The base name for the output files.
     is_train : bool
-        Are we training? If not, we need to retrieve weights when the model is
-        None.
+        Are we training? If not, we need to retrieve weights when the
+        model is None.
 
     Return
     ------
     Tuple[Config, Path]
-        Initialized Casanovo config, local path to model weights if any (may be
-        `None` if training using random starting weights).
+        Initialized Casanovo config, local path to model weights if any
+        (may be `None` if training using random starting weights).
     """
     # Read parameters from the config file.
     config = Config(config)
     seed_everything(seed=config["random_seed"], workers=True)
 
-    # Download model weights if these were not specified (except when training).
+    # Download model weights if these were not specified (except when
+    # training).
     cache_dir = Path(appdirs.user_cache_dir("casanovo", False, opinion=False))
     if model is None:
         if not is_train:
@@ -455,16 +458,16 @@ def setup_model(
                 model = _get_model_weights(cache_dir)
             except github.RateLimitExceededException:
                 logger.error(
-                    "GitHub API rate limit exceeded while trying to download the "
-                    "model weights. Please download compatible model weights "
-                    "manually from the official Casanovo code website "
-                    "(https://github.com/Noble-Lab/casanovo) and specify these "
-                    "explicitly using the `--model` parameter when running "
-                    "Casanovo."
+                    "GitHub API rate limit exceeded while trying to download "
+                    "the model weights. Please download compatible model "
+                    "weights manually from the official Casanovo code website "
+                    "(https://github.com/Noble-Lab/casanovo) and specify "
+                    "these explicitly using the `--model` parameter when "
+                    "running Casanovo."
                 )
                 raise PermissionError(
-                    "GitHub API rate limit exceeded while trying to download the "
-                    "model weights"
+                    "GitHub API rate limit exceeded while trying to download "
+                    "the model weights"
                 ) from None
     else:
         if _is_valid_url(model):
@@ -489,29 +492,30 @@ def setup_model(
     return config, model
 
 
-def _get_model_weights(cache_dir: Path) -> str:
+def _get_model_weights(cache_dir: Path) -> Path:
     """
     Use cached model weights or download them from GitHub.
 
-    If no weights file (extension: .ckpt) is available in the cache directory,
-    it will be downloaded from a release asset on GitHub.
-    Model weights are retrieved by matching release version. If no model weights
-    for an identical release (major, minor, patch), alternative releases with
-    matching (i) major and minor, or (ii) major versions will be used.
-    If no matching release can be found, no model weights will be downloaded.
+    If no weights file (extension: .ckpt) is available in the cache
+    directory, it will be downloaded from a release asset on GitHub.
+    Model weights are retrieved by matching release version. If no model
+    weights for an identical release (major, minor, patch), alternative
+    releases with matching (i) major and minor, or (ii) major versions
+    will be used. If no matching release can be found, no model weights
+    will be downloaded.
 
-    Note that the GitHub API is limited to 60 requests from the same IP per
-    hour.
+    Note that the GitHub API is limited to 60 requests from the same IP
+    per hour.
 
     Parameters
     ----------
     cache_dir : Path
-        model weights cache directory path
+        Model weights cache directory path.
 
     Returns
     -------
-    str
-        The name of the model weights file.
+    Path
+        The path of the model weights file.
     """
     os.makedirs(cache_dir, exist_ok=True)
     version = utils.split_version(__version__)
@@ -598,11 +602,11 @@ def _setup_output(
     Parameters:
     -----------
     output_dir : str | None
-        The path to the output directory. If `None`, the output directory will
-        be resolved to the current working directory.
+        The path to the output directory. If `None`, the output
+        directory will be resolved to the current working directory.
     output_root : str | None
-        The base name for the output files. If `None` the output root name will
-        be resolved to casanovo_<current date and time>
+        The base name for the output files. If `None` the output root
+        name will be resolved to casanovo_<current date and time>
     overwrite: bool
         Whether to overwrite log file if it already exists in the output
         directory.
@@ -612,8 +616,8 @@ def _setup_output(
     Returns:
     --------
     Tuple[Path, str]
-        A tuple containing the resolved output directory and root name for
-        output files.
+        A tuple containing the resolved output directory and root name
+        for output files.
     """
     if output_root is None:
         output_root = (
@@ -627,7 +631,8 @@ def _setup_output(
         if not output_path.is_dir():
             output_path.mkdir(parents=True)
             logger.warning(
-                "Target output directory %s does not exists, so it will be created.",
+                "Target output directory %s does not exists, so it will be "
+                "created.",
                 output_path,
             )
 
@@ -647,8 +652,8 @@ def _get_weights_from_url(
     Resolve weight file from URL
 
     Attempt to download weight file from URL if weights are not already
-    cached - otherwise use cached weights. Downloaded weight files will be
-    cached.
+    cached - otherwise use cached weights. Downloaded weight files will
+    be cached.
 
     Parameters
     ----------
@@ -657,8 +662,8 @@ def _get_weights_from_url(
     cache_dir : Path
         Model weights cache directory path.
     force_download : Optional[bool], default=False
-        If True, forces a new download of the weight file even if it exists in
-        the cache.
+        If True, forces a new download of the weight file even if it
+        exists in the cache.
 
     Returns
     -------
@@ -688,7 +693,8 @@ def _get_weights_from_url(
                     ).timestamp()
             else:
                 logger.warning(
-                    "Attempted HEAD request to %s yielded non-ok status code - using cached file",
+                    "Attempted HEAD request to %s yielded non-ok status code—"
+                    "using cached file",
                     file_url,
                 )
         except (
@@ -697,7 +703,8 @@ def _get_weights_from_url(
             requests.TooManyRedirects,
         ):
             logger.warning(
-                "Failed to reach %s to get remote last modified time - using cached file",
+                "Failed to reach %s to get remote last modified time—using "
+                "cached file",
                 file_url,
             )
 
@@ -715,8 +722,9 @@ def _download_weights(file_url: str, download_path: Path) -> None:
     """
     Download weights file from URL
 
-    Download the model weights file from the specified URL and save it to the
-    given path. Ensures the download directory exists, and uses a progress
+    Download the model weights file from the specified URL and save it
+    to the given path. Ensures the download directory exists, and uses a
+    progress
     bar to indicate download status.
 
     Parameters
diff --git a/casanovo/utils.py b/casanovo/utils.py
index 43b1cb7d..6e4273e3 100644
--- a/casanovo/utils.py
+++ b/casanovo/utils.py
@@ -161,16 +161,16 @@ def get_report_dict(
 
 
 def log_run_report(
-    start_time: Optional[int] = None, end_time: Optional[int] = None
+    start_time: Optional[float] = None, end_time: Optional[float] = None
 ) -> None:
     """
     Log general run report
 
     Parameters
     ----------
-    start_time : Optional[int], default=None
+    start_time : Optional[float], default=None
         The start time of the sequencing run in seconds since the epoch.
-    end_time : Optional[int], default=None
+    end_time : Optional[float], default=None
         The end time of the sequencing run in seconds since the epoch.
     """
     logger.info("======= End of Run Report =======")
@@ -197,8 +197,8 @@ def log_run_report(
 
 def log_sequencing_report(
     predictions: List[PepSpecMatch],
-    start_time: Optional[int] = None,
-    end_time: Optional[int] = None,
+    start_time: Optional[float] = None,
+    end_time: Optional[float] = None,
     score_bins: List[float] = SCORE_BINS,
 ) -> None:
     """
@@ -210,9 +210,9 @@ def log_sequencing_report(
         str, Tuple[str, str], float, float, float, float, str
     ]
         PSM predictions
-    start_time : Optional[int], default=None
+    start_time : Optional[float], default=None
         The start time of the sequencing run in seconds since the epoch.
-    end_time : Optional[int], default=None
+    end_time : Optional[float], default=None
         The end time of the sequencing run in seconds since the epoch.
     score_bins: List[float], Optional
         Confidence scores for creating confidence score distribution,

From c07ef5773eb238a94789545ca8188496cd2787a2 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Sun, 10 Nov 2024 13:28:11 +0000
Subject: [PATCH 64/84] Generate new screengrabs with rich-codex

---
 docs/images/help.svg          | 162 ++++++++++++-------------
 docs/images/sequence-help.svg | 216 +++++++++++++++++-----------------
 docs/images/train-help.svg    | 204 ++++++++++++++++----------------
 3 files changed, 291 insertions(+), 291 deletions(-)

diff --git a/docs/images/help.svg b/docs/images/help.svg
index 5418b95a..d25376e4 100644
--- a/docs/images/help.svg
+++ b/docs/images/help.svg
@@ -19,132 +19,132 @@
         font-weight: 700;
     }
 
-    .terminal-782331977-matrix {
+    .terminal-1140158551-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-782331977-title {
+    .terminal-1140158551-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-782331977-r1 { fill: #c5c8c6 }
-.terminal-782331977-r2 { fill: #d0b344 }
-.terminal-782331977-r3 { fill: #c5c8c6;font-weight: bold }
-.terminal-782331977-r4 { fill: #68a0b3;font-weight: bold }
-.terminal-782331977-r5 { fill: #d0b344;font-weight: bold }
-.terminal-782331977-r6 { fill: #868887 }
-.terminal-782331977-r7 { fill: #98a84b;font-weight: bold }
+    .terminal-1140158551-r1 { fill: #c5c8c6 }
+.terminal-1140158551-r2 { fill: #d0b344 }
+.terminal-1140158551-r3 { fill: #c5c8c6;font-weight: bold }
+.terminal-1140158551-r4 { fill: #68a0b3;font-weight: bold }
+.terminal-1140158551-r5 { fill: #d0b344;font-weight: bold }
+.terminal-1140158551-r6 { fill: #868887 }
+.terminal-1140158551-r7 { fill: #98a84b;font-weight: bold }
     </style>
 
     <defs>
-    <clipPath id="terminal-782331977-clip-terminal">
+    <clipPath id="terminal-1140158551-clip-terminal">
       <rect x="0" y="0" width="975.0" height="853.0" />
     </clipPath>
-    <clipPath id="terminal-782331977-line-0">
+    <clipPath id="terminal-1140158551-line-0">
     <rect x="0" y="1.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-1">
+<clipPath id="terminal-1140158551-line-1">
     <rect x="0" y="25.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-2">
+<clipPath id="terminal-1140158551-line-2">
     <rect x="0" y="50.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-3">
+<clipPath id="terminal-1140158551-line-3">
     <rect x="0" y="74.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-4">
+<clipPath id="terminal-1140158551-line-4">
     <rect x="0" y="99.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-5">
+<clipPath id="terminal-1140158551-line-5">
     <rect x="0" y="123.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-6">
+<clipPath id="terminal-1140158551-line-6">
     <rect x="0" y="147.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-7">
+<clipPath id="terminal-1140158551-line-7">
     <rect x="0" y="172.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-8">
+<clipPath id="terminal-1140158551-line-8">
     <rect x="0" y="196.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-9">
+<clipPath id="terminal-1140158551-line-9">
     <rect x="0" y="221.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-10">
+<clipPath id="terminal-1140158551-line-10">
     <rect x="0" y="245.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-11">
+<clipPath id="terminal-1140158551-line-11">
     <rect x="0" y="269.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-12">
+<clipPath id="terminal-1140158551-line-12">
     <rect x="0" y="294.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-13">
+<clipPath id="terminal-1140158551-line-13">
     <rect x="0" y="318.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-14">
+<clipPath id="terminal-1140158551-line-14">
     <rect x="0" y="343.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-15">
+<clipPath id="terminal-1140158551-line-15">
     <rect x="0" y="367.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-16">
+<clipPath id="terminal-1140158551-line-16">
     <rect x="0" y="391.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-17">
+<clipPath id="terminal-1140158551-line-17">
     <rect x="0" y="416.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-18">
+<clipPath id="terminal-1140158551-line-18">
     <rect x="0" y="440.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-19">
+<clipPath id="terminal-1140158551-line-19">
     <rect x="0" y="465.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-20">
+<clipPath id="terminal-1140158551-line-20">
     <rect x="0" y="489.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-21">
+<clipPath id="terminal-1140158551-line-21">
     <rect x="0" y="513.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-22">
+<clipPath id="terminal-1140158551-line-22">
     <rect x="0" y="538.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-23">
+<clipPath id="terminal-1140158551-line-23">
     <rect x="0" y="562.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-24">
+<clipPath id="terminal-1140158551-line-24">
     <rect x="0" y="587.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-25">
+<clipPath id="terminal-1140158551-line-25">
     <rect x="0" y="611.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-26">
+<clipPath id="terminal-1140158551-line-26">
     <rect x="0" y="635.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-27">
+<clipPath id="terminal-1140158551-line-27">
     <rect x="0" y="660.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-28">
+<clipPath id="terminal-1140158551-line-28">
     <rect x="0" y="684.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-29">
+<clipPath id="terminal-1140158551-line-29">
     <rect x="0" y="709.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-30">
+<clipPath id="terminal-1140158551-line-30">
     <rect x="0" y="733.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-31">
+<clipPath id="terminal-1140158551-line-31">
     <rect x="0" y="757.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-32">
+<clipPath id="terminal-1140158551-line-32">
     <rect x="0" y="782.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-782331977-line-33">
+<clipPath id="terminal-1140158551-line-33">
     <rect x="0" y="806.7" width="976" height="24.65"/>
             </clipPath>
     </defs>
@@ -156,44 +156,44 @@
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-782331977-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-1140158551-clip-terminal)">
     
-    <g class="terminal-782331977-matrix">
-    <text class="terminal-782331977-r1" x="0" y="20" textLength="207.4" clip-path="url(#terminal-782331977-line-0)">$&#160;casanovo&#160;--help</text><text class="terminal-782331977-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-782331977-line-0)">
-</text><text class="terminal-782331977-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-782331977-line-1)">
-</text><text class="terminal-782331977-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-782331977-line-2)">Usage:</text><text class="terminal-782331977-r3" x="97.6" y="68.8" textLength="97.6" clip-path="url(#terminal-782331977-line-2)">casanovo</text><text class="terminal-782331977-r1" x="195.2" y="68.8" textLength="24.4" clip-path="url(#terminal-782331977-line-2)">&#160;[</text><text class="terminal-782331977-r4" x="219.6" y="68.8" textLength="85.4" clip-path="url(#terminal-782331977-line-2)">OPTIONS</text><text class="terminal-782331977-r1" x="305" y="68.8" textLength="24.4" clip-path="url(#terminal-782331977-line-2)">]&#160;</text><text class="terminal-782331977-r4" x="329.4" y="68.8" textLength="85.4" clip-path="url(#terminal-782331977-line-2)">COMMAND</text><text class="terminal-782331977-r1" x="414.8" y="68.8" textLength="24.4" clip-path="url(#terminal-782331977-line-2)">&#160;[</text><text class="terminal-782331977-r4" x="439.2" y="68.8" textLength="48.8" clip-path="url(#terminal-782331977-line-2)">ARGS</text><text class="terminal-782331977-r1" x="488" y="68.8" textLength="488" clip-path="url(#terminal-782331977-line-2)">]...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-782331977-line-2)">
-</text><text class="terminal-782331977-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-782331977-line-3)">
-</text><text class="terminal-782331977-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-782331977-line-4)">&#160;┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓&#160;</text><text class="terminal-782331977-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-782331977-line-4)">
-</text><text class="terminal-782331977-r1" x="0" y="142" textLength="439.2" clip-path="url(#terminal-782331977-line-5)">&#160;┃&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r3" x="439.2" y="142" textLength="97.6" clip-path="url(#terminal-782331977-line-5)">Casanovo</text><text class="terminal-782331977-r1" x="536.8" y="142" textLength="439.2" clip-path="url(#terminal-782331977-line-5)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;┃&#160;</text><text class="terminal-782331977-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-782331977-line-5)">
-</text><text class="terminal-782331977-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-782331977-line-6)">&#160;┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛&#160;</text><text class="terminal-782331977-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-782331977-line-6)">
-</text><text class="terminal-782331977-r1" x="0" y="190.8" textLength="976" clip-path="url(#terminal-782331977-line-7)">&#160;Casanovo&#160;de&#160;novo&#160;sequences&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra&#160;using&#160;a&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-782331977-line-7)">
-</text><text class="terminal-782331977-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-782331977-line-8)">&#160;Transformer&#160;model.&#160;Casanovo&#160;currently&#160;supports&#160;mzML,&#160;mzXML,&#160;and&#160;MGF&#160;files&#160;for&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-782331977-line-8)">
-</text><text class="terminal-782331977-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-782331977-line-9)">&#160;de&#160;novo&#160;sequencing&#160;and&#160;annotated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;from&#160;MassIVE-KB,&#160;for&#160;</text><text class="terminal-782331977-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-782331977-line-9)">
-</text><text class="terminal-782331977-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-782331977-line-10)">&#160;training&#160;new&#160;models.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-782331977-line-10)">
-</text><text class="terminal-782331977-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-782331977-line-11)">
-</text><text class="terminal-782331977-r1" x="0" y="312.8" textLength="976" clip-path="url(#terminal-782331977-line-12)">&#160;Links:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-782331977-line-12)">
-</text><text class="terminal-782331977-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-782331977-line-13)">
-</text><text class="terminal-782331977-r5" x="12.2" y="361.6" textLength="36.6" clip-path="url(#terminal-782331977-line-14)">&#160;•&#160;</text><text class="terminal-782331977-r1" x="48.8" y="361.6" textLength="927.2" clip-path="url(#terminal-782331977-line-14)">Documentation:&#160;https://casanovo.readthedocs.io&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-782331977-line-14)">
-</text><text class="terminal-782331977-r5" x="12.2" y="386" textLength="36.6" clip-path="url(#terminal-782331977-line-15)">&#160;•&#160;</text><text class="terminal-782331977-r1" x="48.8" y="386" textLength="927.2" clip-path="url(#terminal-782331977-line-15)">Official&#160;code&#160;repository:&#160;https://github.com/Noble-Lab/casanovo&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-782331977-line-15)">
-</text><text class="terminal-782331977-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-782331977-line-16)">
-</text><text class="terminal-782331977-r1" x="0" y="434.8" textLength="976" clip-path="url(#terminal-782331977-line-17)">&#160;If&#160;you&#160;use&#160;Casanovo&#160;in&#160;your&#160;work,&#160;please&#160;cite:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-782331977-line-17)">
-</text><text class="terminal-782331977-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-782331977-line-18)">
-</text><text class="terminal-782331977-r5" x="12.2" y="483.6" textLength="36.6" clip-path="url(#terminal-782331977-line-19)">&#160;•&#160;</text><text class="terminal-782331977-r1" x="48.8" y="483.6" textLength="927.2" clip-path="url(#terminal-782331977-line-19)">Yilmaz,&#160;M.,&#160;Fondrie,&#160;W.&#160;E.,&#160;Bittremieux,&#160;W.,&#160;Oh,&#160;S.&#160;&amp;&#160;Noble,&#160;W.&#160;S.&#160;De&#160;novo&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-782331977-line-19)">
-</text><text class="terminal-782331977-r1" x="48.8" y="508" textLength="927.2" clip-path="url(#terminal-782331977-line-20)">mass&#160;spectrometry&#160;peptide&#160;sequencing&#160;with&#160;a&#160;transformer&#160;model.&#160;Proceedings&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-782331977-line-20)">
-</text><text class="terminal-782331977-r1" x="48.8" y="532.4" textLength="927.2" clip-path="url(#terminal-782331977-line-21)">of&#160;the&#160;39th&#160;International&#160;Conference&#160;on&#160;Machine&#160;Learning&#160;-&#160;ICML&#160;&#x27;22&#160;(2022)&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-782331977-line-21)">
-</text><text class="terminal-782331977-r1" x="48.8" y="556.8" textLength="927.2" clip-path="url(#terminal-782331977-line-22)">doi:10.1101/2022.02.07.479481.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-782331977-line-22)">
-</text><text class="terminal-782331977-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-782331977-line-23)">
-</text><text class="terminal-782331977-r6" x="0" y="605.6" textLength="24.4" clip-path="url(#terminal-782331977-line-24)">╭─</text><text class="terminal-782331977-r6" x="24.4" y="605.6" textLength="109.8" clip-path="url(#terminal-782331977-line-24)">&#160;Options&#160;</text><text class="terminal-782331977-r6" x="134.2" y="605.6" textLength="817.4" clip-path="url(#terminal-782331977-line-24)">───────────────────────────────────────────────────────────────────</text><text class="terminal-782331977-r6" x="951.6" y="605.6" textLength="24.4" clip-path="url(#terminal-782331977-line-24)">─╮</text><text class="terminal-782331977-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-782331977-line-24)">
-</text><text class="terminal-782331977-r6" x="0" y="630" textLength="12.2" clip-path="url(#terminal-782331977-line-25)">│</text><text class="terminal-782331977-r4" x="24.4" y="630" textLength="73.2" clip-path="url(#terminal-782331977-line-25)">--help</text><text class="terminal-782331977-r7" x="122" y="630" textLength="24.4" clip-path="url(#terminal-782331977-line-25)">-h</text><text class="terminal-782331977-r1" x="146.4" y="630" textLength="817.4" clip-path="url(#terminal-782331977-line-25)">&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r6" x="963.8" y="630" textLength="12.2" clip-path="url(#terminal-782331977-line-25)">│</text><text class="terminal-782331977-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-782331977-line-25)">
-</text><text class="terminal-782331977-r6" x="0" y="654.4" textLength="976" clip-path="url(#terminal-782331977-line-26)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-782331977-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-782331977-line-26)">
-</text><text class="terminal-782331977-r6" x="0" y="678.8" textLength="24.4" clip-path="url(#terminal-782331977-line-27)">╭─</text><text class="terminal-782331977-r6" x="24.4" y="678.8" textLength="122" clip-path="url(#terminal-782331977-line-27)">&#160;Commands&#160;</text><text class="terminal-782331977-r6" x="146.4" y="678.8" textLength="805.2" clip-path="url(#terminal-782331977-line-27)">──────────────────────────────────────────────────────────────────</text><text class="terminal-782331977-r6" x="951.6" y="678.8" textLength="24.4" clip-path="url(#terminal-782331977-line-27)">─╮</text><text class="terminal-782331977-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-782331977-line-27)">
-</text><text class="terminal-782331977-r6" x="0" y="703.2" textLength="12.2" clip-path="url(#terminal-782331977-line-28)">│</text><text class="terminal-782331977-r4" x="24.4" y="703.2" textLength="109.8" clip-path="url(#terminal-782331977-line-28)">configure</text><text class="terminal-782331977-r1" x="146.4" y="703.2" textLength="817.4" clip-path="url(#terminal-782331977-line-28)">&#160;Generate&#160;a&#160;Casanovo&#160;configuration&#160;file&#160;to&#160;customize.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r6" x="963.8" y="703.2" textLength="12.2" clip-path="url(#terminal-782331977-line-28)">│</text><text class="terminal-782331977-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-782331977-line-28)">
-</text><text class="terminal-782331977-r6" x="0" y="727.6" textLength="12.2" clip-path="url(#terminal-782331977-line-29)">│</text><text class="terminal-782331977-r4" x="24.4" y="727.6" textLength="109.8" clip-path="url(#terminal-782331977-line-29)">db-search</text><text class="terminal-782331977-r1" x="146.4" y="727.6" textLength="817.4" clip-path="url(#terminal-782331977-line-29)">&#160;Perform&#160;a&#160;database&#160;search&#160;on&#160;MS/MS&#160;data&#160;using&#160;Casanovo-DB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r6" x="963.8" y="727.6" textLength="12.2" clip-path="url(#terminal-782331977-line-29)">│</text><text class="terminal-782331977-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-782331977-line-29)">
-</text><text class="terminal-782331977-r6" x="0" y="752" textLength="12.2" clip-path="url(#terminal-782331977-line-30)">│</text><text class="terminal-782331977-r4" x="24.4" y="752" textLength="109.8" clip-path="url(#terminal-782331977-line-30)">sequence&#160;</text><text class="terminal-782331977-r1" x="146.4" y="752" textLength="817.4" clip-path="url(#terminal-782331977-line-30)">&#160;De&#160;novo&#160;sequence&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r6" x="963.8" y="752" textLength="12.2" clip-path="url(#terminal-782331977-line-30)">│</text><text class="terminal-782331977-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-782331977-line-30)">
-</text><text class="terminal-782331977-r6" x="0" y="776.4" textLength="12.2" clip-path="url(#terminal-782331977-line-31)">│</text><text class="terminal-782331977-r4" x="24.4" y="776.4" textLength="109.8" clip-path="url(#terminal-782331977-line-31)">train&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r1" x="146.4" y="776.4" textLength="817.4" clip-path="url(#terminal-782331977-line-31)">&#160;Train&#160;a&#160;Casanovo&#160;model&#160;on&#160;your&#160;own&#160;data.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r6" x="963.8" y="776.4" textLength="12.2" clip-path="url(#terminal-782331977-line-31)">│</text><text class="terminal-782331977-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-782331977-line-31)">
-</text><text class="terminal-782331977-r6" x="0" y="800.8" textLength="12.2" clip-path="url(#terminal-782331977-line-32)">│</text><text class="terminal-782331977-r4" x="24.4" y="800.8" textLength="109.8" clip-path="url(#terminal-782331977-line-32)">version&#160;&#160;</text><text class="terminal-782331977-r1" x="146.4" y="800.8" textLength="817.4" clip-path="url(#terminal-782331977-line-32)">&#160;Get&#160;the&#160;Casanovo&#160;version&#160;information&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-782331977-r6" x="963.8" y="800.8" textLength="12.2" clip-path="url(#terminal-782331977-line-32)">│</text><text class="terminal-782331977-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-782331977-line-32)">
-</text><text class="terminal-782331977-r6" x="0" y="825.2" textLength="976" clip-path="url(#terminal-782331977-line-33)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-782331977-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-782331977-line-33)">
-</text><text class="terminal-782331977-r1" x="976" y="849.6" textLength="12.2" clip-path="url(#terminal-782331977-line-34)">
+    <g class="terminal-1140158551-matrix">
+    <text class="terminal-1140158551-r1" x="0" y="20" textLength="207.4" clip-path="url(#terminal-1140158551-line-0)">$&#160;casanovo&#160;--help</text><text class="terminal-1140158551-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-1140158551-line-0)">
+</text><text class="terminal-1140158551-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-1140158551-line-1)">
+</text><text class="terminal-1140158551-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-1140158551-line-2)">Usage:</text><text class="terminal-1140158551-r3" x="97.6" y="68.8" textLength="97.6" clip-path="url(#terminal-1140158551-line-2)">casanovo</text><text class="terminal-1140158551-r1" x="195.2" y="68.8" textLength="24.4" clip-path="url(#terminal-1140158551-line-2)">&#160;[</text><text class="terminal-1140158551-r4" x="219.6" y="68.8" textLength="85.4" clip-path="url(#terminal-1140158551-line-2)">OPTIONS</text><text class="terminal-1140158551-r1" x="305" y="68.8" textLength="24.4" clip-path="url(#terminal-1140158551-line-2)">]&#160;</text><text class="terminal-1140158551-r4" x="329.4" y="68.8" textLength="85.4" clip-path="url(#terminal-1140158551-line-2)">COMMAND</text><text class="terminal-1140158551-r1" x="414.8" y="68.8" textLength="24.4" clip-path="url(#terminal-1140158551-line-2)">&#160;[</text><text class="terminal-1140158551-r4" x="439.2" y="68.8" textLength="48.8" clip-path="url(#terminal-1140158551-line-2)">ARGS</text><text class="terminal-1140158551-r1" x="488" y="68.8" textLength="488" clip-path="url(#terminal-1140158551-line-2)">]...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1140158551-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-1140158551-line-2)">
+</text><text class="terminal-1140158551-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-1140158551-line-3)">
+</text><text class="terminal-1140158551-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-1140158551-line-4)">&#160;┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓&#160;</text><text class="terminal-1140158551-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-1140158551-line-4)">
+</text><text class="terminal-1140158551-r1" x="0" y="142" textLength="439.2" clip-path="url(#terminal-1140158551-line-5)">&#160;┃&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1140158551-r3" x="439.2" y="142" textLength="97.6" clip-path="url(#terminal-1140158551-line-5)">Casanovo</text><text class="terminal-1140158551-r1" x="536.8" y="142" textLength="439.2" clip-path="url(#terminal-1140158551-line-5)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;┃&#160;</text><text class="terminal-1140158551-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-1140158551-line-5)">
+</text><text class="terminal-1140158551-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-1140158551-line-6)">&#160;┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛&#160;</text><text class="terminal-1140158551-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-1140158551-line-6)">
+</text><text class="terminal-1140158551-r1" x="0" y="190.8" textLength="976" clip-path="url(#terminal-1140158551-line-7)">&#160;Casanovo&#160;de&#160;novo&#160;sequences&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra&#160;using&#160;a&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1140158551-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-1140158551-line-7)">
+</text><text class="terminal-1140158551-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-1140158551-line-8)">&#160;Transformer&#160;model.&#160;Casanovo&#160;currently&#160;supports&#160;mzML,&#160;mzXML,&#160;and&#160;MGF&#160;files&#160;for&#160;&#160;</text><text class="terminal-1140158551-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-1140158551-line-8)">
+</text><text class="terminal-1140158551-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-1140158551-line-9)">&#160;de&#160;novo&#160;sequencing&#160;and&#160;annotated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;from&#160;MassIVE-KB,&#160;for&#160;</text><text class="terminal-1140158551-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-1140158551-line-9)">
+</text><text class="terminal-1140158551-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-1140158551-line-10)">&#160;training&#160;new&#160;models.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1140158551-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-1140158551-line-10)">
+</text><text class="terminal-1140158551-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-1140158551-line-11)">
+</text><text class="terminal-1140158551-r1" x="0" y="312.8" textLength="976" clip-path="url(#terminal-1140158551-line-12)">&#160;Links:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1140158551-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-1140158551-line-12)">
+</text><text class="terminal-1140158551-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-1140158551-line-13)">
+</text><text class="terminal-1140158551-r5" x="12.2" y="361.6" textLength="36.6" clip-path="url(#terminal-1140158551-line-14)">&#160;•&#160;</text><text class="terminal-1140158551-r1" x="48.8" y="361.6" textLength="927.2" clip-path="url(#terminal-1140158551-line-14)">Documentation:&#160;https://casanovo.readthedocs.io&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1140158551-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-1140158551-line-14)">
+</text><text class="terminal-1140158551-r5" x="12.2" y="386" textLength="36.6" clip-path="url(#terminal-1140158551-line-15)">&#160;•&#160;</text><text class="terminal-1140158551-r1" x="48.8" y="386" textLength="927.2" clip-path="url(#terminal-1140158551-line-15)">Official&#160;code&#160;repository:&#160;https://github.com/Noble-Lab/casanovo&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1140158551-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-1140158551-line-15)">
+</text><text class="terminal-1140158551-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-1140158551-line-16)">
+</text><text class="terminal-1140158551-r1" x="0" y="434.8" textLength="976" clip-path="url(#terminal-1140158551-line-17)">&#160;If&#160;you&#160;use&#160;Casanovo&#160;in&#160;your&#160;work,&#160;please&#160;cite:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1140158551-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-1140158551-line-17)">
+</text><text class="terminal-1140158551-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-1140158551-line-18)">
+</text><text class="terminal-1140158551-r5" x="12.2" y="483.6" textLength="36.6" clip-path="url(#terminal-1140158551-line-19)">&#160;•&#160;</text><text class="terminal-1140158551-r1" x="48.8" y="483.6" textLength="927.2" clip-path="url(#terminal-1140158551-line-19)">Yilmaz,&#160;M.,&#160;Fondrie,&#160;W.&#160;E.,&#160;Bittremieux,&#160;W.,&#160;Oh,&#160;S.&#160;&amp;&#160;Noble,&#160;W.&#160;S.&#160;De&#160;novo&#160;&#160;</text><text class="terminal-1140158551-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-1140158551-line-19)">
+</text><text class="terminal-1140158551-r1" x="48.8" y="508" textLength="927.2" clip-path="url(#terminal-1140158551-line-20)">mass&#160;spectrometry&#160;peptide&#160;sequencing&#160;with&#160;a&#160;transformer&#160;model.&#160;Proceedings&#160;&#160;</text><text class="terminal-1140158551-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-1140158551-line-20)">
+</text><text class="terminal-1140158551-r1" x="48.8" y="532.4" textLength="927.2" clip-path="url(#terminal-1140158551-line-21)">of&#160;the&#160;39th&#160;International&#160;Conference&#160;on&#160;Machine&#160;Learning&#160;-&#160;ICML&#160;&#x27;22&#160;(2022)&#160;&#160;</text><text class="terminal-1140158551-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-1140158551-line-21)">
+</text><text class="terminal-1140158551-r1" x="48.8" y="556.8" textLength="927.2" clip-path="url(#terminal-1140158551-line-22)">doi:10.1101/2022.02.07.479481.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1140158551-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-1140158551-line-22)">
+</text><text class="terminal-1140158551-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-1140158551-line-23)">
+</text><text class="terminal-1140158551-r6" x="0" y="605.6" textLength="24.4" clip-path="url(#terminal-1140158551-line-24)">╭─</text><text class="terminal-1140158551-r6" x="24.4" y="605.6" textLength="109.8" clip-path="url(#terminal-1140158551-line-24)">&#160;Options&#160;</text><text class="terminal-1140158551-r6" x="134.2" y="605.6" textLength="817.4" clip-path="url(#terminal-1140158551-line-24)">───────────────────────────────────────────────────────────────────</text><text class="terminal-1140158551-r6" x="951.6" y="605.6" textLength="24.4" clip-path="url(#terminal-1140158551-line-24)">─╮</text><text class="terminal-1140158551-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-1140158551-line-24)">
+</text><text class="terminal-1140158551-r6" x="0" y="630" textLength="12.2" clip-path="url(#terminal-1140158551-line-25)">│</text><text class="terminal-1140158551-r4" x="24.4" y="630" textLength="73.2" clip-path="url(#terminal-1140158551-line-25)">--help</text><text class="terminal-1140158551-r7" x="122" y="630" textLength="24.4" clip-path="url(#terminal-1140158551-line-25)">-h</text><text class="terminal-1140158551-r1" x="146.4" y="630" textLength="817.4" clip-path="url(#terminal-1140158551-line-25)">&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1140158551-r6" x="963.8" y="630" textLength="12.2" clip-path="url(#terminal-1140158551-line-25)">│</text><text class="terminal-1140158551-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-1140158551-line-25)">
+</text><text class="terminal-1140158551-r6" x="0" y="654.4" textLength="976" clip-path="url(#terminal-1140158551-line-26)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-1140158551-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-1140158551-line-26)">
+</text><text class="terminal-1140158551-r6" x="0" y="678.8" textLength="24.4" clip-path="url(#terminal-1140158551-line-27)">╭─</text><text class="terminal-1140158551-r6" x="24.4" y="678.8" textLength="122" clip-path="url(#terminal-1140158551-line-27)">&#160;Commands&#160;</text><text class="terminal-1140158551-r6" x="146.4" y="678.8" textLength="805.2" clip-path="url(#terminal-1140158551-line-27)">──────────────────────────────────────────────────────────────────</text><text class="terminal-1140158551-r6" x="951.6" y="678.8" textLength="24.4" clip-path="url(#terminal-1140158551-line-27)">─╮</text><text class="terminal-1140158551-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-1140158551-line-27)">
+</text><text class="terminal-1140158551-r6" x="0" y="703.2" textLength="12.2" clip-path="url(#terminal-1140158551-line-28)">│</text><text class="terminal-1140158551-r4" x="24.4" y="703.2" textLength="109.8" clip-path="url(#terminal-1140158551-line-28)">configure</text><text class="terminal-1140158551-r1" x="146.4" y="703.2" textLength="817.4" clip-path="url(#terminal-1140158551-line-28)">&#160;Generate&#160;a&#160;Casanovo&#160;configuration&#160;file&#160;to&#160;customize.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1140158551-r6" x="963.8" y="703.2" textLength="12.2" clip-path="url(#terminal-1140158551-line-28)">│</text><text class="terminal-1140158551-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-1140158551-line-28)">
+</text><text class="terminal-1140158551-r6" x="0" y="727.6" textLength="12.2" clip-path="url(#terminal-1140158551-line-29)">│</text><text class="terminal-1140158551-r4" x="24.4" y="727.6" textLength="109.8" clip-path="url(#terminal-1140158551-line-29)">db-search</text><text class="terminal-1140158551-r1" x="146.4" y="727.6" textLength="817.4" clip-path="url(#terminal-1140158551-line-29)">&#160;Perform&#160;a&#160;database&#160;search&#160;on&#160;MS/MS&#160;data&#160;using&#160;Casanovo-DB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1140158551-r6" x="963.8" y="727.6" textLength="12.2" clip-path="url(#terminal-1140158551-line-29)">│</text><text class="terminal-1140158551-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-1140158551-line-29)">
+</text><text class="terminal-1140158551-r6" x="0" y="752" textLength="12.2" clip-path="url(#terminal-1140158551-line-30)">│</text><text class="terminal-1140158551-r4" x="24.4" y="752" textLength="109.8" clip-path="url(#terminal-1140158551-line-30)">sequence&#160;</text><text class="terminal-1140158551-r1" x="146.4" y="752" textLength="817.4" clip-path="url(#terminal-1140158551-line-30)">&#160;De&#160;novo&#160;sequence&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1140158551-r6" x="963.8" y="752" textLength="12.2" clip-path="url(#terminal-1140158551-line-30)">│</text><text class="terminal-1140158551-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-1140158551-line-30)">
+</text><text class="terminal-1140158551-r6" x="0" y="776.4" textLength="12.2" clip-path="url(#terminal-1140158551-line-31)">│</text><text class="terminal-1140158551-r4" x="24.4" y="776.4" textLength="109.8" clip-path="url(#terminal-1140158551-line-31)">train&#160;&#160;&#160;&#160;</text><text class="terminal-1140158551-r1" x="146.4" y="776.4" textLength="817.4" clip-path="url(#terminal-1140158551-line-31)">&#160;Train&#160;a&#160;Casanovo&#160;model&#160;on&#160;your&#160;own&#160;data.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1140158551-r6" x="963.8" y="776.4" textLength="12.2" clip-path="url(#terminal-1140158551-line-31)">│</text><text class="terminal-1140158551-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-1140158551-line-31)">
+</text><text class="terminal-1140158551-r6" x="0" y="800.8" textLength="12.2" clip-path="url(#terminal-1140158551-line-32)">│</text><text class="terminal-1140158551-r4" x="24.4" y="800.8" textLength="109.8" clip-path="url(#terminal-1140158551-line-32)">version&#160;&#160;</text><text class="terminal-1140158551-r1" x="146.4" y="800.8" textLength="817.4" clip-path="url(#terminal-1140158551-line-32)">&#160;Get&#160;the&#160;Casanovo&#160;version&#160;information.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1140158551-r6" x="963.8" y="800.8" textLength="12.2" clip-path="url(#terminal-1140158551-line-32)">│</text><text class="terminal-1140158551-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-1140158551-line-32)">
+</text><text class="terminal-1140158551-r6" x="0" y="825.2" textLength="976" clip-path="url(#terminal-1140158551-line-33)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-1140158551-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-1140158551-line-33)">
+</text><text class="terminal-1140158551-r1" x="976" y="849.6" textLength="12.2" clip-path="url(#terminal-1140158551-line-34)">
 </text>
     </g>
     </g>
diff --git a/docs/images/sequence-help.svg b/docs/images/sequence-help.svg
index ea6ff078..6354851d 100644
--- a/docs/images/sequence-help.svg
+++ b/docs/images/sequence-help.svg
@@ -19,171 +19,171 @@
         font-weight: 700;
     }
 
-    .terminal-3610042700-matrix {
+    .terminal-3608076648-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-3610042700-title {
+    .terminal-3608076648-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-3610042700-r1 { fill: #c5c8c6 }
-.terminal-3610042700-r2 { fill: #d0b344 }
-.terminal-3610042700-r3 { fill: #c5c8c6;font-weight: bold }
-.terminal-3610042700-r4 { fill: #68a0b3;font-weight: bold }
-.terminal-3610042700-r5 { fill: #868887 }
-.terminal-3610042700-r6 { fill: #cc555a }
-.terminal-3610042700-r7 { fill: #d0b344;font-weight: bold }
-.terminal-3610042700-r8 { fill: #8a4346 }
-.terminal-3610042700-r9 { fill: #98a84b;font-weight: bold }
-.terminal-3610042700-r10 { fill: #8d7b39;font-weight: bold }
+    .terminal-3608076648-r1 { fill: #c5c8c6 }
+.terminal-3608076648-r2 { fill: #d0b344 }
+.terminal-3608076648-r3 { fill: #c5c8c6;font-weight: bold }
+.terminal-3608076648-r4 { fill: #68a0b3;font-weight: bold }
+.terminal-3608076648-r5 { fill: #868887 }
+.terminal-3608076648-r6 { fill: #cc555a }
+.terminal-3608076648-r7 { fill: #d0b344;font-weight: bold }
+.terminal-3608076648-r8 { fill: #8a4346 }
+.terminal-3608076648-r9 { fill: #98a84b;font-weight: bold }
+.terminal-3608076648-r10 { fill: #8d7b39;font-weight: bold }
     </style>
 
     <defs>
-    <clipPath id="terminal-3610042700-clip-terminal">
+    <clipPath id="terminal-3608076648-clip-terminal">
       <rect x="0" y="0" width="975.0" height="1145.8" />
     </clipPath>
-    <clipPath id="terminal-3610042700-line-0">
+    <clipPath id="terminal-3608076648-line-0">
     <rect x="0" y="1.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-1">
+<clipPath id="terminal-3608076648-line-1">
     <rect x="0" y="25.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-2">
+<clipPath id="terminal-3608076648-line-2">
     <rect x="0" y="50.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-3">
+<clipPath id="terminal-3608076648-line-3">
     <rect x="0" y="74.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-4">
+<clipPath id="terminal-3608076648-line-4">
     <rect x="0" y="99.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-5">
+<clipPath id="terminal-3608076648-line-5">
     <rect x="0" y="123.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-6">
+<clipPath id="terminal-3608076648-line-6">
     <rect x="0" y="147.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-7">
+<clipPath id="terminal-3608076648-line-7">
     <rect x="0" y="172.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-8">
+<clipPath id="terminal-3608076648-line-8">
     <rect x="0" y="196.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-9">
+<clipPath id="terminal-3608076648-line-9">
     <rect x="0" y="221.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-10">
+<clipPath id="terminal-3608076648-line-10">
     <rect x="0" y="245.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-11">
+<clipPath id="terminal-3608076648-line-11">
     <rect x="0" y="269.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-12">
+<clipPath id="terminal-3608076648-line-12">
     <rect x="0" y="294.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-13">
+<clipPath id="terminal-3608076648-line-13">
     <rect x="0" y="318.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-14">
+<clipPath id="terminal-3608076648-line-14">
     <rect x="0" y="343.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-15">
+<clipPath id="terminal-3608076648-line-15">
     <rect x="0" y="367.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-16">
+<clipPath id="terminal-3608076648-line-16">
     <rect x="0" y="391.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-17">
+<clipPath id="terminal-3608076648-line-17">
     <rect x="0" y="416.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-18">
+<clipPath id="terminal-3608076648-line-18">
     <rect x="0" y="440.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-19">
+<clipPath id="terminal-3608076648-line-19">
     <rect x="0" y="465.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-20">
+<clipPath id="terminal-3608076648-line-20">
     <rect x="0" y="489.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-21">
+<clipPath id="terminal-3608076648-line-21">
     <rect x="0" y="513.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-22">
+<clipPath id="terminal-3608076648-line-22">
     <rect x="0" y="538.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-23">
+<clipPath id="terminal-3608076648-line-23">
     <rect x="0" y="562.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-24">
+<clipPath id="terminal-3608076648-line-24">
     <rect x="0" y="587.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-25">
+<clipPath id="terminal-3608076648-line-25">
     <rect x="0" y="611.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-26">
+<clipPath id="terminal-3608076648-line-26">
     <rect x="0" y="635.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-27">
+<clipPath id="terminal-3608076648-line-27">
     <rect x="0" y="660.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-28">
+<clipPath id="terminal-3608076648-line-28">
     <rect x="0" y="684.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-29">
+<clipPath id="terminal-3608076648-line-29">
     <rect x="0" y="709.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-30">
+<clipPath id="terminal-3608076648-line-30">
     <rect x="0" y="733.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-31">
+<clipPath id="terminal-3608076648-line-31">
     <rect x="0" y="757.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-32">
+<clipPath id="terminal-3608076648-line-32">
     <rect x="0" y="782.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-33">
+<clipPath id="terminal-3608076648-line-33">
     <rect x="0" y="806.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-34">
+<clipPath id="terminal-3608076648-line-34">
     <rect x="0" y="831.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-35">
+<clipPath id="terminal-3608076648-line-35">
     <rect x="0" y="855.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-36">
+<clipPath id="terminal-3608076648-line-36">
     <rect x="0" y="879.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-37">
+<clipPath id="terminal-3608076648-line-37">
     <rect x="0" y="904.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-38">
+<clipPath id="terminal-3608076648-line-38">
     <rect x="0" y="928.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-39">
+<clipPath id="terminal-3608076648-line-39">
     <rect x="0" y="953.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-40">
+<clipPath id="terminal-3608076648-line-40">
     <rect x="0" y="977.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-41">
+<clipPath id="terminal-3608076648-line-41">
     <rect x="0" y="1001.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-42">
+<clipPath id="terminal-3608076648-line-42">
     <rect x="0" y="1026.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-43">
+<clipPath id="terminal-3608076648-line-43">
     <rect x="0" y="1050.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-44">
+<clipPath id="terminal-3608076648-line-44">
     <rect x="0" y="1075.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3610042700-line-45">
+<clipPath id="terminal-3608076648-line-45">
     <rect x="0" y="1099.5" width="976" height="24.65"/>
             </clipPath>
     </defs>
@@ -195,56 +195,56 @@
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-3610042700-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-3608076648-clip-terminal)">
     
-    <g class="terminal-3610042700-matrix">
-    <text class="terminal-3610042700-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-3610042700-line-0)">$&#160;casanovo&#160;sequence&#160;--help</text><text class="terminal-3610042700-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-3610042700-line-0)">
-</text><text class="terminal-3610042700-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-1)">
-</text><text class="terminal-3610042700-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-3610042700-line-2)">Usage:</text><text class="terminal-3610042700-r3" x="97.6" y="68.8" textLength="207.4" clip-path="url(#terminal-3610042700-line-2)">casanovo&#160;sequence</text><text class="terminal-3610042700-r1" x="305" y="68.8" textLength="24.4" clip-path="url(#terminal-3610042700-line-2)">&#160;[</text><text class="terminal-3610042700-r4" x="329.4" y="68.8" textLength="85.4" clip-path="url(#terminal-3610042700-line-2)">OPTIONS</text><text class="terminal-3610042700-r1" x="414.8" y="68.8" textLength="24.4" clip-path="url(#terminal-3610042700-line-2)">]&#160;</text><text class="terminal-3610042700-r4" x="439.2" y="68.8" textLength="109.8" clip-path="url(#terminal-3610042700-line-2)">PEAK_PATH</text><text class="terminal-3610042700-r1" x="549" y="68.8" textLength="427" clip-path="url(#terminal-3610042700-line-2)">...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-2)">
-</text><text class="terminal-3610042700-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-3)">
-</text><text class="terminal-3610042700-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-3610042700-line-4)">&#160;De&#160;novo&#160;sequence&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-4)">
-</text><text class="terminal-3610042700-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-3610042700-line-5)">&#160;PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;mzML,&#160;mzXML,&#160;or&#160;MGF&#160;files&#160;from&#160;which&#160;to&#160;sequence&#160;</text><text class="terminal-3610042700-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-3610042700-line-5)">
-</text><text class="terminal-3610042700-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-3610042700-line-6)">&#160;peptides.&#160;If&#160;evaluate&#160;is&#160;set&#160;to&#160;True&#160;PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;annotated&#160;&#160;&#160;</text><text class="terminal-3610042700-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-6)">
-</text><text class="terminal-3610042700-r1" x="0" y="190.8" textLength="976" clip-path="url(#terminal-3610042700-line-7)">&#160;MGF&#160;file.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-7)">
-</text><text class="terminal-3610042700-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-8)">
-</text><text class="terminal-3610042700-r5" x="0" y="239.6" textLength="24.4" clip-path="url(#terminal-3610042700-line-9)">╭─</text><text class="terminal-3610042700-r5" x="24.4" y="239.6" textLength="134.2" clip-path="url(#terminal-3610042700-line-9)">&#160;Arguments&#160;</text><text class="terminal-3610042700-r5" x="158.6" y="239.6" textLength="793" clip-path="url(#terminal-3610042700-line-9)">─────────────────────────────────────────────────────────────────</text><text class="terminal-3610042700-r5" x="951.6" y="239.6" textLength="24.4" clip-path="url(#terminal-3610042700-line-9)">─╮</text><text class="terminal-3610042700-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-9)">
-</text><text class="terminal-3610042700-r5" x="0" y="264" textLength="12.2" clip-path="url(#terminal-3610042700-line-10)">│</text><text class="terminal-3610042700-r6" x="24.4" y="264" textLength="12.2" clip-path="url(#terminal-3610042700-line-10)">*</text><text class="terminal-3610042700-r1" x="36.6" y="264" textLength="183" clip-path="url(#terminal-3610042700-line-10)">&#160;&#160;PEAK_PATH&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r7" x="219.6" y="264" textLength="48.8" clip-path="url(#terminal-3610042700-line-10)">FILE</text><text class="terminal-3610042700-r8" x="292.8" y="264" textLength="122" clip-path="url(#terminal-3610042700-line-10)">[required]</text><text class="terminal-3610042700-r5" x="963.8" y="264" textLength="12.2" clip-path="url(#terminal-3610042700-line-10)">│</text><text class="terminal-3610042700-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-3610042700-line-10)">
-</text><text class="terminal-3610042700-r5" x="0" y="288.4" textLength="976" clip-path="url(#terminal-3610042700-line-11)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-3610042700-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-11)">
-</text><text class="terminal-3610042700-r5" x="0" y="312.8" textLength="24.4" clip-path="url(#terminal-3610042700-line-12)">╭─</text><text class="terminal-3610042700-r5" x="24.4" y="312.8" textLength="109.8" clip-path="url(#terminal-3610042700-line-12)">&#160;Options&#160;</text><text class="terminal-3610042700-r5" x="134.2" y="312.8" textLength="817.4" clip-path="url(#terminal-3610042700-line-12)">───────────────────────────────────────────────────────────────────</text><text class="terminal-3610042700-r5" x="951.6" y="312.8" textLength="24.4" clip-path="url(#terminal-3610042700-line-12)">─╮</text><text class="terminal-3610042700-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-12)">
-</text><text class="terminal-3610042700-r5" x="0" y="337.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-13)">│</text><text class="terminal-3610042700-r4" x="24.4" y="337.2" textLength="122" clip-path="url(#terminal-3610042700-line-13)">--evaluate</text><text class="terminal-3610042700-r9" x="256.2" y="337.2" textLength="24.4" clip-path="url(#terminal-3610042700-line-13)">-e</text><text class="terminal-3610042700-r1" x="610" y="337.2" textLength="353.8" clip-path="url(#terminal-3610042700-line-13)">&#160;&#160;Run&#160;in&#160;evaluation&#160;mode.&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="337.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-13)">│</text><text class="terminal-3610042700-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-13)">
-</text><text class="terminal-3610042700-r5" x="0" y="361.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-14)">│</text><text class="terminal-3610042700-r1" x="12.2" y="361.6" textLength="951.6" clip-path="url(#terminal-3610042700-line-14)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;When&#160;this&#160;flag&#160;is&#160;set&#160;the&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="361.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-14)">│</text><text class="terminal-3610042700-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-14)">
-</text><text class="terminal-3610042700-r5" x="0" y="386" textLength="12.2" clip-path="url(#terminal-3610042700-line-15)">│</text><text class="terminal-3610042700-r1" x="12.2" y="386" textLength="951.6" clip-path="url(#terminal-3610042700-line-15)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;peptide&#160;and&#160;amino&#160;acid&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="386" textLength="12.2" clip-path="url(#terminal-3610042700-line-15)">│</text><text class="terminal-3610042700-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-3610042700-line-15)">
-</text><text class="terminal-3610042700-r5" x="0" y="410.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-16)">│</text><text class="terminal-3610042700-r1" x="12.2" y="410.4" textLength="951.6" clip-path="url(#terminal-3610042700-line-16)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;precision&#160;will&#160;be&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="410.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-16)">│</text><text class="terminal-3610042700-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-16)">
-</text><text class="terminal-3610042700-r5" x="0" y="434.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-17)">│</text><text class="terminal-3610042700-r1" x="12.2" y="434.8" textLength="951.6" clip-path="url(#terminal-3610042700-line-17)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;calculated&#160;and&#160;logged&#160;at&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="434.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-17)">│</text><text class="terminal-3610042700-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-17)">
-</text><text class="terminal-3610042700-r5" x="0" y="459.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-18)">│</text><text class="terminal-3610042700-r1" x="12.2" y="459.2" textLength="951.6" clip-path="url(#terminal-3610042700-line-18)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;the&#160;end&#160;of&#160;the&#160;sequencing&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="459.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-18)">│</text><text class="terminal-3610042700-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-18)">
-</text><text class="terminal-3610042700-r5" x="0" y="483.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-19)">│</text><text class="terminal-3610042700-r1" x="12.2" y="483.6" textLength="951.6" clip-path="url(#terminal-3610042700-line-19)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;run.&#160;All&#160;input&#160;files&#160;must&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="483.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-19)">│</text><text class="terminal-3610042700-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-19)">
-</text><text class="terminal-3610042700-r5" x="0" y="508" textLength="12.2" clip-path="url(#terminal-3610042700-line-20)">│</text><text class="terminal-3610042700-r1" x="12.2" y="508" textLength="951.6" clip-path="url(#terminal-3610042700-line-20)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;be&#160;annotated&#160;MGF&#160;files&#160;if&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="508" textLength="12.2" clip-path="url(#terminal-3610042700-line-20)">│</text><text class="terminal-3610042700-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-3610042700-line-20)">
-</text><text class="terminal-3610042700-r5" x="0" y="532.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-21)">│</text><text class="terminal-3610042700-r1" x="12.2" y="532.4" textLength="951.6" clip-path="url(#terminal-3610042700-line-21)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;running&#160;in&#160;evaluation&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="532.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-21)">│</text><text class="terminal-3610042700-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-21)">
-</text><text class="terminal-3610042700-r5" x="0" y="556.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-22)">│</text><text class="terminal-3610042700-r1" x="12.2" y="556.8" textLength="951.6" clip-path="url(#terminal-3610042700-line-22)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;mode.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="556.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-22)">│</text><text class="terminal-3610042700-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-22)">
-</text><text class="terminal-3610042700-r5" x="0" y="581.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-23)">│</text><text class="terminal-3610042700-r4" x="24.4" y="581.2" textLength="85.4" clip-path="url(#terminal-3610042700-line-23)">--model</text><text class="terminal-3610042700-r9" x="256.2" y="581.2" textLength="24.4" clip-path="url(#terminal-3610042700-line-23)">-m</text><text class="terminal-3610042700-r7" x="305" y="581.2" textLength="305" clip-path="url(#terminal-3610042700-line-23)">TEXT&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r1" x="610" y="581.2" textLength="353.8" clip-path="url(#terminal-3610042700-line-23)">&#160;&#160;Either&#160;the&#160;model&#160;weights&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="581.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-23)">│</text><text class="terminal-3610042700-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-23)">
-</text><text class="terminal-3610042700-r5" x="0" y="605.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-24)">│</text><text class="terminal-3610042700-r1" x="12.2" y="605.6" textLength="951.6" clip-path="url(#terminal-3610042700-line-24)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;(.ckpt&#160;file)&#160;or&#160;a&#160;URL&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="605.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-24)">│</text><text class="terminal-3610042700-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-24)">
-</text><text class="terminal-3610042700-r5" x="0" y="630" textLength="12.2" clip-path="url(#terminal-3610042700-line-25)">│</text><text class="terminal-3610042700-r1" x="12.2" y="630" textLength="951.6" clip-path="url(#terminal-3610042700-line-25)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;pointing&#160;to&#160;the&#160;model&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="630" textLength="12.2" clip-path="url(#terminal-3610042700-line-25)">│</text><text class="terminal-3610042700-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-3610042700-line-25)">
-</text><text class="terminal-3610042700-r5" x="0" y="654.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-26)">│</text><text class="terminal-3610042700-r1" x="12.2" y="654.4" textLength="951.6" clip-path="url(#terminal-3610042700-line-26)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;weights&#160;file.&#160;If&#160;not&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="654.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-26)">│</text><text class="terminal-3610042700-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-26)">
-</text><text class="terminal-3610042700-r5" x="0" y="678.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-27)">│</text><text class="terminal-3610042700-r1" x="12.2" y="678.8" textLength="951.6" clip-path="url(#terminal-3610042700-line-27)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;provided,&#160;Casanovo&#160;will&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="678.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-27)">│</text><text class="terminal-3610042700-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-27)">
-</text><text class="terminal-3610042700-r5" x="0" y="703.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-28)">│</text><text class="terminal-3610042700-r1" x="12.2" y="703.2" textLength="951.6" clip-path="url(#terminal-3610042700-line-28)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;try&#160;to&#160;download&#160;the&#160;latest&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="703.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-28)">│</text><text class="terminal-3610042700-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-28)">
-</text><text class="terminal-3610042700-r5" x="0" y="727.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-29)">│</text><text class="terminal-3610042700-r1" x="12.2" y="727.6" textLength="951.6" clip-path="url(#terminal-3610042700-line-29)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;release&#160;automatically.&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="727.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-29)">│</text><text class="terminal-3610042700-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-29)">
-</text><text class="terminal-3610042700-r5" x="0" y="752" textLength="12.2" clip-path="url(#terminal-3610042700-line-30)">│</text><text class="terminal-3610042700-r4" x="24.4" y="752" textLength="146.4" clip-path="url(#terminal-3610042700-line-30)">--output_dir</text><text class="terminal-3610042700-r9" x="256.2" y="752" textLength="24.4" clip-path="url(#terminal-3610042700-line-30)">-d</text><text class="terminal-3610042700-r7" x="305" y="752" textLength="305" clip-path="url(#terminal-3610042700-line-30)">PATH&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r1" x="610" y="752" textLength="353.8" clip-path="url(#terminal-3610042700-line-30)">&#160;&#160;The&#160;destination&#160;directory&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="752" textLength="12.2" clip-path="url(#terminal-3610042700-line-30)">│</text><text class="terminal-3610042700-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-3610042700-line-30)">
-</text><text class="terminal-3610042700-r5" x="0" y="776.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-31)">│</text><text class="terminal-3610042700-r1" x="12.2" y="776.4" textLength="951.6" clip-path="url(#terminal-3610042700-line-31)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;for&#160;output&#160;files&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="776.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-31)">│</text><text class="terminal-3610042700-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-31)">
-</text><text class="terminal-3610042700-r5" x="0" y="800.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-32)">│</text><text class="terminal-3610042700-r4" x="24.4" y="800.8" textLength="158.6" clip-path="url(#terminal-3610042700-line-32)">--output_root</text><text class="terminal-3610042700-r9" x="256.2" y="800.8" textLength="24.4" clip-path="url(#terminal-3610042700-line-32)">-o</text><text class="terminal-3610042700-r7" x="305" y="800.8" textLength="305" clip-path="url(#terminal-3610042700-line-32)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r1" x="610" y="800.8" textLength="353.8" clip-path="url(#terminal-3610042700-line-32)">&#160;&#160;The&#160;root&#160;name&#160;for&#160;all&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="800.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-32)">│</text><text class="terminal-3610042700-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-32)">
-</text><text class="terminal-3610042700-r5" x="0" y="825.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-33)">│</text><text class="terminal-3610042700-r1" x="12.2" y="825.2" textLength="951.6" clip-path="url(#terminal-3610042700-line-33)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;output&#160;files&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="825.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-33)">│</text><text class="terminal-3610042700-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-33)">
-</text><text class="terminal-3610042700-r5" x="0" y="849.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-34)">│</text><text class="terminal-3610042700-r4" x="24.4" y="849.6" textLength="97.6" clip-path="url(#terminal-3610042700-line-34)">--config</text><text class="terminal-3610042700-r9" x="256.2" y="849.6" textLength="24.4" clip-path="url(#terminal-3610042700-line-34)">-c</text><text class="terminal-3610042700-r7" x="305" y="849.6" textLength="305" clip-path="url(#terminal-3610042700-line-34)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r1" x="610" y="849.6" textLength="353.8" clip-path="url(#terminal-3610042700-line-34)">&#160;&#160;The&#160;YAML&#160;configuration&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="849.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-34)">│</text><text class="terminal-3610042700-r1" x="976" y="849.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-34)">
-</text><text class="terminal-3610042700-r5" x="0" y="874" textLength="12.2" clip-path="url(#terminal-3610042700-line-35)">│</text><text class="terminal-3610042700-r1" x="12.2" y="874" textLength="951.6" clip-path="url(#terminal-3610042700-line-35)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;file&#160;overriding&#160;the&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="874" textLength="12.2" clip-path="url(#terminal-3610042700-line-35)">│</text><text class="terminal-3610042700-r1" x="976" y="874" textLength="12.2" clip-path="url(#terminal-3610042700-line-35)">
-</text><text class="terminal-3610042700-r5" x="0" y="898.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-36)">│</text><text class="terminal-3610042700-r1" x="12.2" y="898.4" textLength="951.6" clip-path="url(#terminal-3610042700-line-36)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;default&#160;options.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="898.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-36)">│</text><text class="terminal-3610042700-r1" x="976" y="898.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-36)">
-</text><text class="terminal-3610042700-r5" x="0" y="922.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-37)">│</text><text class="terminal-3610042700-r4" x="24.4" y="922.8" textLength="134.2" clip-path="url(#terminal-3610042700-line-37)">--verbosity</text><text class="terminal-3610042700-r9" x="256.2" y="922.8" textLength="24.4" clip-path="url(#terminal-3610042700-line-37)">-v</text><text class="terminal-3610042700-r10" x="305" y="922.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-37)">[</text><text class="terminal-3610042700-r7" x="317.2" y="922.8" textLength="61" clip-path="url(#terminal-3610042700-line-37)">debug</text><text class="terminal-3610042700-r10" x="378.2" y="922.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-37)">|</text><text class="terminal-3610042700-r7" x="390.4" y="922.8" textLength="48.8" clip-path="url(#terminal-3610042700-line-37)">info</text><text class="terminal-3610042700-r10" x="439.2" y="922.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-37)">|</text><text class="terminal-3610042700-r7" x="451.4" y="922.8" textLength="85.4" clip-path="url(#terminal-3610042700-line-37)">warning</text><text class="terminal-3610042700-r10" x="536.8" y="922.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-37)">|</text><text class="terminal-3610042700-r7" x="549" y="922.8" textLength="61" clip-path="url(#terminal-3610042700-line-37)">error</text><text class="terminal-3610042700-r1" x="610" y="922.8" textLength="353.8" clip-path="url(#terminal-3610042700-line-37)">&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="922.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-37)">│</text><text class="terminal-3610042700-r1" x="976" y="922.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-37)">
-</text><text class="terminal-3610042700-r5" x="0" y="947.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-38)">│</text><text class="terminal-3610042700-r10" x="305" y="947.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-38)">]</text><text class="terminal-3610042700-r1" x="610" y="947.2" textLength="353.8" clip-path="url(#terminal-3610042700-line-38)">&#160;&#160;console&#160;logging&#160;messages.&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="947.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-38)">│</text><text class="terminal-3610042700-r1" x="976" y="947.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-38)">
-</text><text class="terminal-3610042700-r5" x="0" y="971.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-39)">│</text><text class="terminal-3610042700-r1" x="12.2" y="971.6" textLength="951.6" clip-path="url(#terminal-3610042700-line-39)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;Log&#160;files&#160;are&#160;always&#160;set&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="971.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-39)">│</text><text class="terminal-3610042700-r1" x="976" y="971.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-39)">
-</text><text class="terminal-3610042700-r5" x="0" y="996" textLength="12.2" clip-path="url(#terminal-3610042700-line-40)">│</text><text class="terminal-3610042700-r1" x="12.2" y="996" textLength="951.6" clip-path="url(#terminal-3610042700-line-40)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;to&#160;&#x27;debug&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="996" textLength="12.2" clip-path="url(#terminal-3610042700-line-40)">│</text><text class="terminal-3610042700-r1" x="976" y="996" textLength="12.2" clip-path="url(#terminal-3610042700-line-40)">
-</text><text class="terminal-3610042700-r5" x="0" y="1020.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-41)">│</text><text class="terminal-3610042700-r4" x="24.4" y="1020.4" textLength="207.4" clip-path="url(#terminal-3610042700-line-41)">--force_overwrite</text><text class="terminal-3610042700-r9" x="256.2" y="1020.4" textLength="24.4" clip-path="url(#terminal-3610042700-line-41)">-f</text><text class="terminal-3610042700-r1" x="610" y="1020.4" textLength="353.8" clip-path="url(#terminal-3610042700-line-41)">&#160;&#160;Whether&#160;to&#160;overwrite&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="1020.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-41)">│</text><text class="terminal-3610042700-r1" x="976" y="1020.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-41)">
-</text><text class="terminal-3610042700-r5" x="0" y="1044.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-42)">│</text><text class="terminal-3610042700-r1" x="12.2" y="1044.8" textLength="951.6" clip-path="url(#terminal-3610042700-line-42)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;output&#160;files.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="1044.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-42)">│</text><text class="terminal-3610042700-r1" x="976" y="1044.8" textLength="12.2" clip-path="url(#terminal-3610042700-line-42)">
-</text><text class="terminal-3610042700-r5" x="0" y="1069.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-43)">│</text><text class="terminal-3610042700-r4" x="24.4" y="1069.2" textLength="73.2" clip-path="url(#terminal-3610042700-line-43)">--help</text><text class="terminal-3610042700-r9" x="256.2" y="1069.2" textLength="24.4" clip-path="url(#terminal-3610042700-line-43)">-h</text><text class="terminal-3610042700-r1" x="610" y="1069.2" textLength="353.8" clip-path="url(#terminal-3610042700-line-43)">&#160;&#160;Show&#160;this&#160;message&#160;and&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="1069.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-43)">│</text><text class="terminal-3610042700-r1" x="976" y="1069.2" textLength="12.2" clip-path="url(#terminal-3610042700-line-43)">
-</text><text class="terminal-3610042700-r5" x="0" y="1093.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-44)">│</text><text class="terminal-3610042700-r1" x="12.2" y="1093.6" textLength="951.6" clip-path="url(#terminal-3610042700-line-44)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3610042700-r5" x="963.8" y="1093.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-44)">│</text><text class="terminal-3610042700-r1" x="976" y="1093.6" textLength="12.2" clip-path="url(#terminal-3610042700-line-44)">
-</text><text class="terminal-3610042700-r5" x="0" y="1118" textLength="976" clip-path="url(#terminal-3610042700-line-45)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-3610042700-r1" x="976" y="1118" textLength="12.2" clip-path="url(#terminal-3610042700-line-45)">
-</text><text class="terminal-3610042700-r1" x="976" y="1142.4" textLength="12.2" clip-path="url(#terminal-3610042700-line-46)">
+    <g class="terminal-3608076648-matrix">
+    <text class="terminal-3608076648-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-3608076648-line-0)">$&#160;casanovo&#160;sequence&#160;--help</text><text class="terminal-3608076648-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-3608076648-line-0)">
+</text><text class="terminal-3608076648-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-1)">
+</text><text class="terminal-3608076648-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-3608076648-line-2)">Usage:</text><text class="terminal-3608076648-r3" x="97.6" y="68.8" textLength="207.4" clip-path="url(#terminal-3608076648-line-2)">casanovo&#160;sequence</text><text class="terminal-3608076648-r1" x="305" y="68.8" textLength="24.4" clip-path="url(#terminal-3608076648-line-2)">&#160;[</text><text class="terminal-3608076648-r4" x="329.4" y="68.8" textLength="85.4" clip-path="url(#terminal-3608076648-line-2)">OPTIONS</text><text class="terminal-3608076648-r1" x="414.8" y="68.8" textLength="24.4" clip-path="url(#terminal-3608076648-line-2)">]&#160;</text><text class="terminal-3608076648-r4" x="439.2" y="68.8" textLength="109.8" clip-path="url(#terminal-3608076648-line-2)">PEAK_PATH</text><text class="terminal-3608076648-r1" x="549" y="68.8" textLength="427" clip-path="url(#terminal-3608076648-line-2)">...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-2)">
+</text><text class="terminal-3608076648-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-3)">
+</text><text class="terminal-3608076648-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-3608076648-line-4)">&#160;De&#160;novo&#160;sequence&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-4)">
+</text><text class="terminal-3608076648-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-3608076648-line-5)">&#160;PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;mzML,&#160;mzXML,&#160;or&#160;MGF&#160;files&#160;from&#160;which&#160;to&#160;sequence&#160;</text><text class="terminal-3608076648-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-3608076648-line-5)">
+</text><text class="terminal-3608076648-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-3608076648-line-6)">&#160;peptides.&#160;If&#160;evaluate&#160;is&#160;set&#160;to&#160;True&#160;PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;annotated&#160;&#160;&#160;</text><text class="terminal-3608076648-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-6)">
+</text><text class="terminal-3608076648-r1" x="0" y="190.8" textLength="976" clip-path="url(#terminal-3608076648-line-7)">&#160;MGF&#160;file.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-7)">
+</text><text class="terminal-3608076648-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-8)">
+</text><text class="terminal-3608076648-r5" x="0" y="239.6" textLength="24.4" clip-path="url(#terminal-3608076648-line-9)">╭─</text><text class="terminal-3608076648-r5" x="24.4" y="239.6" textLength="134.2" clip-path="url(#terminal-3608076648-line-9)">&#160;Arguments&#160;</text><text class="terminal-3608076648-r5" x="158.6" y="239.6" textLength="793" clip-path="url(#terminal-3608076648-line-9)">─────────────────────────────────────────────────────────────────</text><text class="terminal-3608076648-r5" x="951.6" y="239.6" textLength="24.4" clip-path="url(#terminal-3608076648-line-9)">─╮</text><text class="terminal-3608076648-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-9)">
+</text><text class="terminal-3608076648-r5" x="0" y="264" textLength="12.2" clip-path="url(#terminal-3608076648-line-10)">│</text><text class="terminal-3608076648-r6" x="24.4" y="264" textLength="12.2" clip-path="url(#terminal-3608076648-line-10)">*</text><text class="terminal-3608076648-r1" x="36.6" y="264" textLength="183" clip-path="url(#terminal-3608076648-line-10)">&#160;&#160;PEAK_PATH&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r7" x="219.6" y="264" textLength="48.8" clip-path="url(#terminal-3608076648-line-10)">FILE</text><text class="terminal-3608076648-r8" x="292.8" y="264" textLength="122" clip-path="url(#terminal-3608076648-line-10)">[required]</text><text class="terminal-3608076648-r5" x="963.8" y="264" textLength="12.2" clip-path="url(#terminal-3608076648-line-10)">│</text><text class="terminal-3608076648-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-3608076648-line-10)">
+</text><text class="terminal-3608076648-r5" x="0" y="288.4" textLength="976" clip-path="url(#terminal-3608076648-line-11)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-3608076648-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-11)">
+</text><text class="terminal-3608076648-r5" x="0" y="312.8" textLength="24.4" clip-path="url(#terminal-3608076648-line-12)">╭─</text><text class="terminal-3608076648-r5" x="24.4" y="312.8" textLength="109.8" clip-path="url(#terminal-3608076648-line-12)">&#160;Options&#160;</text><text class="terminal-3608076648-r5" x="134.2" y="312.8" textLength="817.4" clip-path="url(#terminal-3608076648-line-12)">───────────────────────────────────────────────────────────────────</text><text class="terminal-3608076648-r5" x="951.6" y="312.8" textLength="24.4" clip-path="url(#terminal-3608076648-line-12)">─╮</text><text class="terminal-3608076648-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-12)">
+</text><text class="terminal-3608076648-r5" x="0" y="337.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-13)">│</text><text class="terminal-3608076648-r4" x="24.4" y="337.2" textLength="122" clip-path="url(#terminal-3608076648-line-13)">--evaluate</text><text class="terminal-3608076648-r9" x="256.2" y="337.2" textLength="24.4" clip-path="url(#terminal-3608076648-line-13)">-e</text><text class="terminal-3608076648-r1" x="610" y="337.2" textLength="353.8" clip-path="url(#terminal-3608076648-line-13)">&#160;&#160;Run&#160;in&#160;evaluation&#160;mode.&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="337.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-13)">│</text><text class="terminal-3608076648-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-13)">
+</text><text class="terminal-3608076648-r5" x="0" y="361.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-14)">│</text><text class="terminal-3608076648-r1" x="12.2" y="361.6" textLength="951.6" clip-path="url(#terminal-3608076648-line-14)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;When&#160;this&#160;flag&#160;is&#160;set&#160;the&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="361.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-14)">│</text><text class="terminal-3608076648-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-14)">
+</text><text class="terminal-3608076648-r5" x="0" y="386" textLength="12.2" clip-path="url(#terminal-3608076648-line-15)">│</text><text class="terminal-3608076648-r1" x="12.2" y="386" textLength="951.6" clip-path="url(#terminal-3608076648-line-15)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;peptide&#160;and&#160;amino&#160;acid&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="386" textLength="12.2" clip-path="url(#terminal-3608076648-line-15)">│</text><text class="terminal-3608076648-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-3608076648-line-15)">
+</text><text class="terminal-3608076648-r5" x="0" y="410.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-16)">│</text><text class="terminal-3608076648-r1" x="12.2" y="410.4" textLength="951.6" clip-path="url(#terminal-3608076648-line-16)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;precision&#160;will&#160;be&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="410.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-16)">│</text><text class="terminal-3608076648-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-16)">
+</text><text class="terminal-3608076648-r5" x="0" y="434.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-17)">│</text><text class="terminal-3608076648-r1" x="12.2" y="434.8" textLength="951.6" clip-path="url(#terminal-3608076648-line-17)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;calculated&#160;and&#160;logged&#160;at&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="434.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-17)">│</text><text class="terminal-3608076648-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-17)">
+</text><text class="terminal-3608076648-r5" x="0" y="459.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-18)">│</text><text class="terminal-3608076648-r1" x="12.2" y="459.2" textLength="951.6" clip-path="url(#terminal-3608076648-line-18)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;the&#160;end&#160;of&#160;the&#160;sequencing&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="459.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-18)">│</text><text class="terminal-3608076648-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-18)">
+</text><text class="terminal-3608076648-r5" x="0" y="483.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-19)">│</text><text class="terminal-3608076648-r1" x="12.2" y="483.6" textLength="951.6" clip-path="url(#terminal-3608076648-line-19)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;run.&#160;All&#160;input&#160;files&#160;must&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="483.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-19)">│</text><text class="terminal-3608076648-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-19)">
+</text><text class="terminal-3608076648-r5" x="0" y="508" textLength="12.2" clip-path="url(#terminal-3608076648-line-20)">│</text><text class="terminal-3608076648-r1" x="12.2" y="508" textLength="951.6" clip-path="url(#terminal-3608076648-line-20)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;be&#160;annotated&#160;MGF&#160;files&#160;if&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="508" textLength="12.2" clip-path="url(#terminal-3608076648-line-20)">│</text><text class="terminal-3608076648-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-3608076648-line-20)">
+</text><text class="terminal-3608076648-r5" x="0" y="532.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-21)">│</text><text class="terminal-3608076648-r1" x="12.2" y="532.4" textLength="951.6" clip-path="url(#terminal-3608076648-line-21)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;running&#160;in&#160;evaluation&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="532.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-21)">│</text><text class="terminal-3608076648-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-21)">
+</text><text class="terminal-3608076648-r5" x="0" y="556.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-22)">│</text><text class="terminal-3608076648-r1" x="12.2" y="556.8" textLength="951.6" clip-path="url(#terminal-3608076648-line-22)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;mode.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="556.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-22)">│</text><text class="terminal-3608076648-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-22)">
+</text><text class="terminal-3608076648-r5" x="0" y="581.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-23)">│</text><text class="terminal-3608076648-r4" x="24.4" y="581.2" textLength="85.4" clip-path="url(#terminal-3608076648-line-23)">--model</text><text class="terminal-3608076648-r9" x="256.2" y="581.2" textLength="24.4" clip-path="url(#terminal-3608076648-line-23)">-m</text><text class="terminal-3608076648-r7" x="305" y="581.2" textLength="305" clip-path="url(#terminal-3608076648-line-23)">TEXT&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r1" x="610" y="581.2" textLength="353.8" clip-path="url(#terminal-3608076648-line-23)">&#160;&#160;Either&#160;the&#160;model&#160;weights&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="581.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-23)">│</text><text class="terminal-3608076648-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-23)">
+</text><text class="terminal-3608076648-r5" x="0" y="605.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-24)">│</text><text class="terminal-3608076648-r1" x="12.2" y="605.6" textLength="951.6" clip-path="url(#terminal-3608076648-line-24)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;(.ckpt&#160;file)&#160;or&#160;a&#160;URL&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="605.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-24)">│</text><text class="terminal-3608076648-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-24)">
+</text><text class="terminal-3608076648-r5" x="0" y="630" textLength="12.2" clip-path="url(#terminal-3608076648-line-25)">│</text><text class="terminal-3608076648-r1" x="12.2" y="630" textLength="951.6" clip-path="url(#terminal-3608076648-line-25)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;pointing&#160;to&#160;the&#160;model&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="630" textLength="12.2" clip-path="url(#terminal-3608076648-line-25)">│</text><text class="terminal-3608076648-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-3608076648-line-25)">
+</text><text class="terminal-3608076648-r5" x="0" y="654.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-26)">│</text><text class="terminal-3608076648-r1" x="12.2" y="654.4" textLength="951.6" clip-path="url(#terminal-3608076648-line-26)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;weights&#160;file.&#160;If&#160;not&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="654.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-26)">│</text><text class="terminal-3608076648-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-26)">
+</text><text class="terminal-3608076648-r5" x="0" y="678.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-27)">│</text><text class="terminal-3608076648-r1" x="12.2" y="678.8" textLength="951.6" clip-path="url(#terminal-3608076648-line-27)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;provided,&#160;Casanovo&#160;will&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="678.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-27)">│</text><text class="terminal-3608076648-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-27)">
+</text><text class="terminal-3608076648-r5" x="0" y="703.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-28)">│</text><text class="terminal-3608076648-r1" x="12.2" y="703.2" textLength="951.6" clip-path="url(#terminal-3608076648-line-28)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;try&#160;to&#160;download&#160;the&#160;latest&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="703.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-28)">│</text><text class="terminal-3608076648-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-28)">
+</text><text class="terminal-3608076648-r5" x="0" y="727.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-29)">│</text><text class="terminal-3608076648-r1" x="12.2" y="727.6" textLength="951.6" clip-path="url(#terminal-3608076648-line-29)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;release&#160;automatically.&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="727.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-29)">│</text><text class="terminal-3608076648-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-29)">
+</text><text class="terminal-3608076648-r5" x="0" y="752" textLength="12.2" clip-path="url(#terminal-3608076648-line-30)">│</text><text class="terminal-3608076648-r4" x="24.4" y="752" textLength="146.4" clip-path="url(#terminal-3608076648-line-30)">--output_dir</text><text class="terminal-3608076648-r9" x="256.2" y="752" textLength="24.4" clip-path="url(#terminal-3608076648-line-30)">-d</text><text class="terminal-3608076648-r7" x="305" y="752" textLength="305" clip-path="url(#terminal-3608076648-line-30)">PATH&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r1" x="610" y="752" textLength="353.8" clip-path="url(#terminal-3608076648-line-30)">&#160;&#160;The&#160;destination&#160;directory&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="752" textLength="12.2" clip-path="url(#terminal-3608076648-line-30)">│</text><text class="terminal-3608076648-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-3608076648-line-30)">
+</text><text class="terminal-3608076648-r5" x="0" y="776.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-31)">│</text><text class="terminal-3608076648-r1" x="12.2" y="776.4" textLength="951.6" clip-path="url(#terminal-3608076648-line-31)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;for&#160;output&#160;files.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="776.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-31)">│</text><text class="terminal-3608076648-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-31)">
+</text><text class="terminal-3608076648-r5" x="0" y="800.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-32)">│</text><text class="terminal-3608076648-r4" x="24.4" y="800.8" textLength="158.6" clip-path="url(#terminal-3608076648-line-32)">--output_root</text><text class="terminal-3608076648-r9" x="256.2" y="800.8" textLength="24.4" clip-path="url(#terminal-3608076648-line-32)">-o</text><text class="terminal-3608076648-r7" x="305" y="800.8" textLength="305" clip-path="url(#terminal-3608076648-line-32)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r1" x="610" y="800.8" textLength="353.8" clip-path="url(#terminal-3608076648-line-32)">&#160;&#160;The&#160;root&#160;name&#160;for&#160;all&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="800.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-32)">│</text><text class="terminal-3608076648-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-32)">
+</text><text class="terminal-3608076648-r5" x="0" y="825.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-33)">│</text><text class="terminal-3608076648-r1" x="12.2" y="825.2" textLength="951.6" clip-path="url(#terminal-3608076648-line-33)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;output&#160;files.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="825.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-33)">│</text><text class="terminal-3608076648-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-33)">
+</text><text class="terminal-3608076648-r5" x="0" y="849.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-34)">│</text><text class="terminal-3608076648-r4" x="24.4" y="849.6" textLength="97.6" clip-path="url(#terminal-3608076648-line-34)">--config</text><text class="terminal-3608076648-r9" x="256.2" y="849.6" textLength="24.4" clip-path="url(#terminal-3608076648-line-34)">-c</text><text class="terminal-3608076648-r7" x="305" y="849.6" textLength="305" clip-path="url(#terminal-3608076648-line-34)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r1" x="610" y="849.6" textLength="353.8" clip-path="url(#terminal-3608076648-line-34)">&#160;&#160;The&#160;YAML&#160;configuration&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="849.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-34)">│</text><text class="terminal-3608076648-r1" x="976" y="849.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-34)">
+</text><text class="terminal-3608076648-r5" x="0" y="874" textLength="12.2" clip-path="url(#terminal-3608076648-line-35)">│</text><text class="terminal-3608076648-r1" x="12.2" y="874" textLength="951.6" clip-path="url(#terminal-3608076648-line-35)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;file&#160;overriding&#160;the&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="874" textLength="12.2" clip-path="url(#terminal-3608076648-line-35)">│</text><text class="terminal-3608076648-r1" x="976" y="874" textLength="12.2" clip-path="url(#terminal-3608076648-line-35)">
+</text><text class="terminal-3608076648-r5" x="0" y="898.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-36)">│</text><text class="terminal-3608076648-r1" x="12.2" y="898.4" textLength="951.6" clip-path="url(#terminal-3608076648-line-36)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;default&#160;options.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="898.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-36)">│</text><text class="terminal-3608076648-r1" x="976" y="898.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-36)">
+</text><text class="terminal-3608076648-r5" x="0" y="922.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-37)">│</text><text class="terminal-3608076648-r4" x="24.4" y="922.8" textLength="134.2" clip-path="url(#terminal-3608076648-line-37)">--verbosity</text><text class="terminal-3608076648-r9" x="256.2" y="922.8" textLength="24.4" clip-path="url(#terminal-3608076648-line-37)">-v</text><text class="terminal-3608076648-r10" x="305" y="922.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-37)">[</text><text class="terminal-3608076648-r7" x="317.2" y="922.8" textLength="61" clip-path="url(#terminal-3608076648-line-37)">debug</text><text class="terminal-3608076648-r10" x="378.2" y="922.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-37)">|</text><text class="terminal-3608076648-r7" x="390.4" y="922.8" textLength="48.8" clip-path="url(#terminal-3608076648-line-37)">info</text><text class="terminal-3608076648-r10" x="439.2" y="922.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-37)">|</text><text class="terminal-3608076648-r7" x="451.4" y="922.8" textLength="85.4" clip-path="url(#terminal-3608076648-line-37)">warning</text><text class="terminal-3608076648-r10" x="536.8" y="922.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-37)">|</text><text class="terminal-3608076648-r7" x="549" y="922.8" textLength="61" clip-path="url(#terminal-3608076648-line-37)">error</text><text class="terminal-3608076648-r1" x="610" y="922.8" textLength="353.8" clip-path="url(#terminal-3608076648-line-37)">&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="922.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-37)">│</text><text class="terminal-3608076648-r1" x="976" y="922.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-37)">
+</text><text class="terminal-3608076648-r5" x="0" y="947.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-38)">│</text><text class="terminal-3608076648-r10" x="305" y="947.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-38)">]</text><text class="terminal-3608076648-r1" x="610" y="947.2" textLength="353.8" clip-path="url(#terminal-3608076648-line-38)">&#160;&#160;console&#160;logging&#160;messages.&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="947.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-38)">│</text><text class="terminal-3608076648-r1" x="976" y="947.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-38)">
+</text><text class="terminal-3608076648-r5" x="0" y="971.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-39)">│</text><text class="terminal-3608076648-r1" x="12.2" y="971.6" textLength="951.6" clip-path="url(#terminal-3608076648-line-39)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;Log&#160;files&#160;are&#160;always&#160;set&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="971.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-39)">│</text><text class="terminal-3608076648-r1" x="976" y="971.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-39)">
+</text><text class="terminal-3608076648-r5" x="0" y="996" textLength="12.2" clip-path="url(#terminal-3608076648-line-40)">│</text><text class="terminal-3608076648-r1" x="12.2" y="996" textLength="951.6" clip-path="url(#terminal-3608076648-line-40)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;to&#160;&#x27;debug&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="996" textLength="12.2" clip-path="url(#terminal-3608076648-line-40)">│</text><text class="terminal-3608076648-r1" x="976" y="996" textLength="12.2" clip-path="url(#terminal-3608076648-line-40)">
+</text><text class="terminal-3608076648-r5" x="0" y="1020.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-41)">│</text><text class="terminal-3608076648-r4" x="24.4" y="1020.4" textLength="207.4" clip-path="url(#terminal-3608076648-line-41)">--force_overwrite</text><text class="terminal-3608076648-r9" x="256.2" y="1020.4" textLength="24.4" clip-path="url(#terminal-3608076648-line-41)">-f</text><text class="terminal-3608076648-r1" x="610" y="1020.4" textLength="353.8" clip-path="url(#terminal-3608076648-line-41)">&#160;&#160;Whether&#160;to&#160;overwrite&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="1020.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-41)">│</text><text class="terminal-3608076648-r1" x="976" y="1020.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-41)">
+</text><text class="terminal-3608076648-r5" x="0" y="1044.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-42)">│</text><text class="terminal-3608076648-r1" x="12.2" y="1044.8" textLength="951.6" clip-path="url(#terminal-3608076648-line-42)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;output&#160;files.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="1044.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-42)">│</text><text class="terminal-3608076648-r1" x="976" y="1044.8" textLength="12.2" clip-path="url(#terminal-3608076648-line-42)">
+</text><text class="terminal-3608076648-r5" x="0" y="1069.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-43)">│</text><text class="terminal-3608076648-r4" x="24.4" y="1069.2" textLength="73.2" clip-path="url(#terminal-3608076648-line-43)">--help</text><text class="terminal-3608076648-r9" x="256.2" y="1069.2" textLength="24.4" clip-path="url(#terminal-3608076648-line-43)">-h</text><text class="terminal-3608076648-r1" x="610" y="1069.2" textLength="353.8" clip-path="url(#terminal-3608076648-line-43)">&#160;&#160;Show&#160;this&#160;message&#160;and&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="1069.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-43)">│</text><text class="terminal-3608076648-r1" x="976" y="1069.2" textLength="12.2" clip-path="url(#terminal-3608076648-line-43)">
+</text><text class="terminal-3608076648-r5" x="0" y="1093.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-44)">│</text><text class="terminal-3608076648-r1" x="12.2" y="1093.6" textLength="951.6" clip-path="url(#terminal-3608076648-line-44)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3608076648-r5" x="963.8" y="1093.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-44)">│</text><text class="terminal-3608076648-r1" x="976" y="1093.6" textLength="12.2" clip-path="url(#terminal-3608076648-line-44)">
+</text><text class="terminal-3608076648-r5" x="0" y="1118" textLength="976" clip-path="url(#terminal-3608076648-line-45)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-3608076648-r1" x="976" y="1118" textLength="12.2" clip-path="url(#terminal-3608076648-line-45)">
+</text><text class="terminal-3608076648-r1" x="976" y="1142.4" textLength="12.2" clip-path="url(#terminal-3608076648-line-46)">
 </text>
     </g>
     </g>
diff --git a/docs/images/train-help.svg b/docs/images/train-help.svg
index 783a0660..8aab62d4 100644
--- a/docs/images/train-help.svg
+++ b/docs/images/train-help.svg
@@ -19,162 +19,162 @@
         font-weight: 700;
     }
 
-    .terminal-2920970231-matrix {
+    .terminal-3079567379-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-2920970231-title {
+    .terminal-3079567379-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-2920970231-r1 { fill: #c5c8c6 }
-.terminal-2920970231-r2 { fill: #d0b344 }
-.terminal-2920970231-r3 { fill: #c5c8c6;font-weight: bold }
-.terminal-2920970231-r4 { fill: #68a0b3;font-weight: bold }
-.terminal-2920970231-r5 { fill: #868887 }
-.terminal-2920970231-r6 { fill: #cc555a }
-.terminal-2920970231-r7 { fill: #d0b344;font-weight: bold }
-.terminal-2920970231-r8 { fill: #8a4346 }
-.terminal-2920970231-r9 { fill: #98a84b;font-weight: bold }
-.terminal-2920970231-r10 { fill: #8d7b39;font-weight: bold }
+    .terminal-3079567379-r1 { fill: #c5c8c6 }
+.terminal-3079567379-r2 { fill: #d0b344 }
+.terminal-3079567379-r3 { fill: #c5c8c6;font-weight: bold }
+.terminal-3079567379-r4 { fill: #68a0b3;font-weight: bold }
+.terminal-3079567379-r5 { fill: #868887 }
+.terminal-3079567379-r6 { fill: #cc555a }
+.terminal-3079567379-r7 { fill: #d0b344;font-weight: bold }
+.terminal-3079567379-r8 { fill: #8a4346 }
+.terminal-3079567379-r9 { fill: #98a84b;font-weight: bold }
+.terminal-3079567379-r10 { fill: #8d7b39;font-weight: bold }
     </style>
 
     <defs>
-    <clipPath id="terminal-2920970231-clip-terminal">
+    <clipPath id="terminal-3079567379-clip-terminal">
       <rect x="0" y="0" width="975.0" height="1072.6" />
     </clipPath>
-    <clipPath id="terminal-2920970231-line-0">
+    <clipPath id="terminal-3079567379-line-0">
     <rect x="0" y="1.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-1">
+<clipPath id="terminal-3079567379-line-1">
     <rect x="0" y="25.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-2">
+<clipPath id="terminal-3079567379-line-2">
     <rect x="0" y="50.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-3">
+<clipPath id="terminal-3079567379-line-3">
     <rect x="0" y="74.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-4">
+<clipPath id="terminal-3079567379-line-4">
     <rect x="0" y="99.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-5">
+<clipPath id="terminal-3079567379-line-5">
     <rect x="0" y="123.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-6">
+<clipPath id="terminal-3079567379-line-6">
     <rect x="0" y="147.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-7">
+<clipPath id="terminal-3079567379-line-7">
     <rect x="0" y="172.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-8">
+<clipPath id="terminal-3079567379-line-8">
     <rect x="0" y="196.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-9">
+<clipPath id="terminal-3079567379-line-9">
     <rect x="0" y="221.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-10">
+<clipPath id="terminal-3079567379-line-10">
     <rect x="0" y="245.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-11">
+<clipPath id="terminal-3079567379-line-11">
     <rect x="0" y="269.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-12">
+<clipPath id="terminal-3079567379-line-12">
     <rect x="0" y="294.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-13">
+<clipPath id="terminal-3079567379-line-13">
     <rect x="0" y="318.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-14">
+<clipPath id="terminal-3079567379-line-14">
     <rect x="0" y="343.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-15">
+<clipPath id="terminal-3079567379-line-15">
     <rect x="0" y="367.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-16">
+<clipPath id="terminal-3079567379-line-16">
     <rect x="0" y="391.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-17">
+<clipPath id="terminal-3079567379-line-17">
     <rect x="0" y="416.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-18">
+<clipPath id="terminal-3079567379-line-18">
     <rect x="0" y="440.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-19">
+<clipPath id="terminal-3079567379-line-19">
     <rect x="0" y="465.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-20">
+<clipPath id="terminal-3079567379-line-20">
     <rect x="0" y="489.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-21">
+<clipPath id="terminal-3079567379-line-21">
     <rect x="0" y="513.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-22">
+<clipPath id="terminal-3079567379-line-22">
     <rect x="0" y="538.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-23">
+<clipPath id="terminal-3079567379-line-23">
     <rect x="0" y="562.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-24">
+<clipPath id="terminal-3079567379-line-24">
     <rect x="0" y="587.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-25">
+<clipPath id="terminal-3079567379-line-25">
     <rect x="0" y="611.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-26">
+<clipPath id="terminal-3079567379-line-26">
     <rect x="0" y="635.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-27">
+<clipPath id="terminal-3079567379-line-27">
     <rect x="0" y="660.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-28">
+<clipPath id="terminal-3079567379-line-28">
     <rect x="0" y="684.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-29">
+<clipPath id="terminal-3079567379-line-29">
     <rect x="0" y="709.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-30">
+<clipPath id="terminal-3079567379-line-30">
     <rect x="0" y="733.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-31">
+<clipPath id="terminal-3079567379-line-31">
     <rect x="0" y="757.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-32">
+<clipPath id="terminal-3079567379-line-32">
     <rect x="0" y="782.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-33">
+<clipPath id="terminal-3079567379-line-33">
     <rect x="0" y="806.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-34">
+<clipPath id="terminal-3079567379-line-34">
     <rect x="0" y="831.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-35">
+<clipPath id="terminal-3079567379-line-35">
     <rect x="0" y="855.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-36">
+<clipPath id="terminal-3079567379-line-36">
     <rect x="0" y="879.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-37">
+<clipPath id="terminal-3079567379-line-37">
     <rect x="0" y="904.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-38">
+<clipPath id="terminal-3079567379-line-38">
     <rect x="0" y="928.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-39">
+<clipPath id="terminal-3079567379-line-39">
     <rect x="0" y="953.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-40">
+<clipPath id="terminal-3079567379-line-40">
     <rect x="0" y="977.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-41">
+<clipPath id="terminal-3079567379-line-41">
     <rect x="0" y="1001.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2920970231-line-42">
+<clipPath id="terminal-3079567379-line-42">
     <rect x="0" y="1026.3" width="976" height="24.65"/>
             </clipPath>
     </defs>
@@ -186,53 +186,53 @@
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-2920970231-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-3079567379-clip-terminal)">
     
-    <g class="terminal-2920970231-matrix">
-    <text class="terminal-2920970231-r1" x="0" y="20" textLength="280.6" clip-path="url(#terminal-2920970231-line-0)">$&#160;casanovo&#160;train&#160;--help</text><text class="terminal-2920970231-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-2920970231-line-0)">
-</text><text class="terminal-2920970231-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-1)">
-</text><text class="terminal-2920970231-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-2920970231-line-2)">Usage:</text><text class="terminal-2920970231-r3" x="97.6" y="68.8" textLength="170.8" clip-path="url(#terminal-2920970231-line-2)">casanovo&#160;train</text><text class="terminal-2920970231-r1" x="268.4" y="68.8" textLength="24.4" clip-path="url(#terminal-2920970231-line-2)">&#160;[</text><text class="terminal-2920970231-r4" x="292.8" y="68.8" textLength="85.4" clip-path="url(#terminal-2920970231-line-2)">OPTIONS</text><text class="terminal-2920970231-r1" x="378.2" y="68.8" textLength="24.4" clip-path="url(#terminal-2920970231-line-2)">]&#160;</text><text class="terminal-2920970231-r4" x="402.6" y="68.8" textLength="183" clip-path="url(#terminal-2920970231-line-2)">TRAIN_PEAK_PATH</text><text class="terminal-2920970231-r1" x="585.6" y="68.8" textLength="390.4" clip-path="url(#terminal-2920970231-line-2)">...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-2)">
-</text><text class="terminal-2920970231-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-3)">
-</text><text class="terminal-2920970231-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-2920970231-line-4)">&#160;Train&#160;a&#160;Casanovo&#160;model&#160;on&#160;your&#160;own&#160;data.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-4)">
-</text><text class="terminal-2920970231-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-2920970231-line-5)">&#160;TRAIN_PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;annoated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;provided&#160;</text><text class="terminal-2920970231-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-2920970231-line-5)">
-</text><text class="terminal-2920970231-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-2920970231-line-6)">&#160;by&#160;MassIVE-KB,&#160;from&#160;which&#160;to&#160;train&#160;a&#160;new&#160;Casnovo&#160;model.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-6)">
-</text><text class="terminal-2920970231-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-7)">
-</text><text class="terminal-2920970231-r5" x="0" y="215.2" textLength="24.4" clip-path="url(#terminal-2920970231-line-8)">╭─</text><text class="terminal-2920970231-r5" x="24.4" y="215.2" textLength="134.2" clip-path="url(#terminal-2920970231-line-8)">&#160;Arguments&#160;</text><text class="terminal-2920970231-r5" x="158.6" y="215.2" textLength="793" clip-path="url(#terminal-2920970231-line-8)">─────────────────────────────────────────────────────────────────</text><text class="terminal-2920970231-r5" x="951.6" y="215.2" textLength="24.4" clip-path="url(#terminal-2920970231-line-8)">─╮</text><text class="terminal-2920970231-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-8)">
-</text><text class="terminal-2920970231-r5" x="0" y="239.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-9)">│</text><text class="terminal-2920970231-r6" x="24.4" y="239.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-9)">*</text><text class="terminal-2920970231-r1" x="36.6" y="239.6" textLength="256.2" clip-path="url(#terminal-2920970231-line-9)">&#160;&#160;TRAIN_PEAK_PATH&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r7" x="292.8" y="239.6" textLength="48.8" clip-path="url(#terminal-2920970231-line-9)">FILE</text><text class="terminal-2920970231-r8" x="366" y="239.6" textLength="122" clip-path="url(#terminal-2920970231-line-9)">[required]</text><text class="terminal-2920970231-r5" x="963.8" y="239.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-9)">│</text><text class="terminal-2920970231-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-9)">
-</text><text class="terminal-2920970231-r5" x="0" y="264" textLength="976" clip-path="url(#terminal-2920970231-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2920970231-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-2920970231-line-10)">
-</text><text class="terminal-2920970231-r5" x="0" y="288.4" textLength="24.4" clip-path="url(#terminal-2920970231-line-11)">╭─</text><text class="terminal-2920970231-r5" x="24.4" y="288.4" textLength="109.8" clip-path="url(#terminal-2920970231-line-11)">&#160;Options&#160;</text><text class="terminal-2920970231-r5" x="134.2" y="288.4" textLength="817.4" clip-path="url(#terminal-2920970231-line-11)">───────────────────────────────────────────────────────────────────</text><text class="terminal-2920970231-r5" x="951.6" y="288.4" textLength="24.4" clip-path="url(#terminal-2920970231-line-11)">─╮</text><text class="terminal-2920970231-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-11)">
-</text><text class="terminal-2920970231-r5" x="0" y="312.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-12)">│</text><text class="terminal-2920970231-r4" x="24.4" y="312.8" textLength="268.4" clip-path="url(#terminal-2920970231-line-12)">--validation_peak_path</text><text class="terminal-2920970231-r9" x="317.2" y="312.8" textLength="24.4" clip-path="url(#terminal-2920970231-line-12)">-p</text><text class="terminal-2920970231-r7" x="366" y="312.8" textLength="268.4" clip-path="url(#terminal-2920970231-line-12)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r1" x="634.4" y="312.8" textLength="329.4" clip-path="url(#terminal-2920970231-line-12)">&#160;&#160;An&#160;annotated&#160;MGF&#160;file&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="312.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-12)">│</text><text class="terminal-2920970231-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-12)">
-</text><text class="terminal-2920970231-r5" x="0" y="337.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-13)">│</text><text class="terminal-2920970231-r1" x="12.2" y="337.2" textLength="951.6" clip-path="url(#terminal-2920970231-line-13)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;for&#160;validation,&#160;like&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="337.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-13)">│</text><text class="terminal-2920970231-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-13)">
-</text><text class="terminal-2920970231-r5" x="0" y="361.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-14)">│</text><text class="terminal-2920970231-r1" x="12.2" y="361.6" textLength="951.6" clip-path="url(#terminal-2920970231-line-14)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;from&#160;MassIVE-KB.&#160;Use&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="361.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-14)">│</text><text class="terminal-2920970231-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-14)">
-</text><text class="terminal-2920970231-r5" x="0" y="386" textLength="12.2" clip-path="url(#terminal-2920970231-line-15)">│</text><text class="terminal-2920970231-r1" x="12.2" y="386" textLength="951.6" clip-path="url(#terminal-2920970231-line-15)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;this&#160;option&#160;multiple&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="386" textLength="12.2" clip-path="url(#terminal-2920970231-line-15)">│</text><text class="terminal-2920970231-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-2920970231-line-15)">
-</text><text class="terminal-2920970231-r5" x="0" y="410.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-16)">│</text><text class="terminal-2920970231-r1" x="12.2" y="410.4" textLength="951.6" clip-path="url(#terminal-2920970231-line-16)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;times&#160;to&#160;specify&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="410.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-16)">│</text><text class="terminal-2920970231-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-16)">
-</text><text class="terminal-2920970231-r5" x="0" y="434.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-17)">│</text><text class="terminal-2920970231-r1" x="12.2" y="434.8" textLength="951.6" clip-path="url(#terminal-2920970231-line-17)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;multiple&#160;files.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="434.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-17)">│</text><text class="terminal-2920970231-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-17)">
-</text><text class="terminal-2920970231-r5" x="0" y="459.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-18)">│</text><text class="terminal-2920970231-r4" x="24.4" y="459.2" textLength="85.4" clip-path="url(#terminal-2920970231-line-18)">--model</text><text class="terminal-2920970231-r9" x="317.2" y="459.2" textLength="24.4" clip-path="url(#terminal-2920970231-line-18)">-m</text><text class="terminal-2920970231-r7" x="366" y="459.2" textLength="268.4" clip-path="url(#terminal-2920970231-line-18)">TEXT&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r1" x="634.4" y="459.2" textLength="329.4" clip-path="url(#terminal-2920970231-line-18)">&#160;&#160;Either&#160;the&#160;model&#160;weights&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="459.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-18)">│</text><text class="terminal-2920970231-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-18)">
-</text><text class="terminal-2920970231-r5" x="0" y="483.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-19)">│</text><text class="terminal-2920970231-r1" x="12.2" y="483.6" textLength="951.6" clip-path="url(#terminal-2920970231-line-19)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;(.ckpt&#160;file)&#160;or&#160;a&#160;URL&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="483.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-19)">│</text><text class="terminal-2920970231-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-19)">
-</text><text class="terminal-2920970231-r5" x="0" y="508" textLength="12.2" clip-path="url(#terminal-2920970231-line-20)">│</text><text class="terminal-2920970231-r1" x="12.2" y="508" textLength="951.6" clip-path="url(#terminal-2920970231-line-20)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;pointing&#160;to&#160;the&#160;model&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="508" textLength="12.2" clip-path="url(#terminal-2920970231-line-20)">│</text><text class="terminal-2920970231-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-2920970231-line-20)">
-</text><text class="terminal-2920970231-r5" x="0" y="532.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-21)">│</text><text class="terminal-2920970231-r1" x="12.2" y="532.4" textLength="951.6" clip-path="url(#terminal-2920970231-line-21)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;weights&#160;file.&#160;If&#160;not&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="532.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-21)">│</text><text class="terminal-2920970231-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-21)">
-</text><text class="terminal-2920970231-r5" x="0" y="556.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-22)">│</text><text class="terminal-2920970231-r1" x="12.2" y="556.8" textLength="951.6" clip-path="url(#terminal-2920970231-line-22)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;provided,&#160;Casanovo&#160;will&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="556.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-22)">│</text><text class="terminal-2920970231-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-22)">
-</text><text class="terminal-2920970231-r5" x="0" y="581.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-23)">│</text><text class="terminal-2920970231-r1" x="12.2" y="581.2" textLength="951.6" clip-path="url(#terminal-2920970231-line-23)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;try&#160;to&#160;download&#160;the&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="581.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-23)">│</text><text class="terminal-2920970231-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-23)">
-</text><text class="terminal-2920970231-r5" x="0" y="605.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-24)">│</text><text class="terminal-2920970231-r1" x="12.2" y="605.6" textLength="951.6" clip-path="url(#terminal-2920970231-line-24)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;latest&#160;release&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="605.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-24)">│</text><text class="terminal-2920970231-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-24)">
-</text><text class="terminal-2920970231-r5" x="0" y="630" textLength="12.2" clip-path="url(#terminal-2920970231-line-25)">│</text><text class="terminal-2920970231-r1" x="12.2" y="630" textLength="951.6" clip-path="url(#terminal-2920970231-line-25)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;automatically.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="630" textLength="12.2" clip-path="url(#terminal-2920970231-line-25)">│</text><text class="terminal-2920970231-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-2920970231-line-25)">
-</text><text class="terminal-2920970231-r5" x="0" y="654.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-26)">│</text><text class="terminal-2920970231-r4" x="24.4" y="654.4" textLength="146.4" clip-path="url(#terminal-2920970231-line-26)">--output_dir</text><text class="terminal-2920970231-r9" x="317.2" y="654.4" textLength="24.4" clip-path="url(#terminal-2920970231-line-26)">-d</text><text class="terminal-2920970231-r7" x="366" y="654.4" textLength="268.4" clip-path="url(#terminal-2920970231-line-26)">PATH&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r1" x="634.4" y="654.4" textLength="329.4" clip-path="url(#terminal-2920970231-line-26)">&#160;&#160;The&#160;destination&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="654.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-26)">│</text><text class="terminal-2920970231-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-26)">
-</text><text class="terminal-2920970231-r5" x="0" y="678.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-27)">│</text><text class="terminal-2920970231-r1" x="12.2" y="678.8" textLength="951.6" clip-path="url(#terminal-2920970231-line-27)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;directory&#160;for&#160;output&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="678.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-27)">│</text><text class="terminal-2920970231-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-27)">
-</text><text class="terminal-2920970231-r5" x="0" y="703.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-28)">│</text><text class="terminal-2920970231-r1" x="12.2" y="703.2" textLength="951.6" clip-path="url(#terminal-2920970231-line-28)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;files&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="703.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-28)">│</text><text class="terminal-2920970231-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-28)">
-</text><text class="terminal-2920970231-r5" x="0" y="727.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-29)">│</text><text class="terminal-2920970231-r4" x="24.4" y="727.6" textLength="158.6" clip-path="url(#terminal-2920970231-line-29)">--output_root</text><text class="terminal-2920970231-r9" x="317.2" y="727.6" textLength="24.4" clip-path="url(#terminal-2920970231-line-29)">-o</text><text class="terminal-2920970231-r7" x="366" y="727.6" textLength="268.4" clip-path="url(#terminal-2920970231-line-29)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r1" x="634.4" y="727.6" textLength="329.4" clip-path="url(#terminal-2920970231-line-29)">&#160;&#160;The&#160;root&#160;name&#160;for&#160;all&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="727.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-29)">│</text><text class="terminal-2920970231-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-29)">
-</text><text class="terminal-2920970231-r5" x="0" y="752" textLength="12.2" clip-path="url(#terminal-2920970231-line-30)">│</text><text class="terminal-2920970231-r1" x="12.2" y="752" textLength="951.6" clip-path="url(#terminal-2920970231-line-30)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;output&#160;files&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="752" textLength="12.2" clip-path="url(#terminal-2920970231-line-30)">│</text><text class="terminal-2920970231-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-2920970231-line-30)">
-</text><text class="terminal-2920970231-r5" x="0" y="776.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-31)">│</text><text class="terminal-2920970231-r4" x="24.4" y="776.4" textLength="97.6" clip-path="url(#terminal-2920970231-line-31)">--config</text><text class="terminal-2920970231-r9" x="317.2" y="776.4" textLength="24.4" clip-path="url(#terminal-2920970231-line-31)">-c</text><text class="terminal-2920970231-r7" x="366" y="776.4" textLength="268.4" clip-path="url(#terminal-2920970231-line-31)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r1" x="634.4" y="776.4" textLength="329.4" clip-path="url(#terminal-2920970231-line-31)">&#160;&#160;The&#160;YAML&#160;configuration&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="776.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-31)">│</text><text class="terminal-2920970231-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-31)">
-</text><text class="terminal-2920970231-r5" x="0" y="800.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-32)">│</text><text class="terminal-2920970231-r1" x="12.2" y="800.8" textLength="951.6" clip-path="url(#terminal-2920970231-line-32)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;file&#160;overriding&#160;the&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="800.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-32)">│</text><text class="terminal-2920970231-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-32)">
-</text><text class="terminal-2920970231-r5" x="0" y="825.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-33)">│</text><text class="terminal-2920970231-r1" x="12.2" y="825.2" textLength="951.6" clip-path="url(#terminal-2920970231-line-33)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;default&#160;options.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="825.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-33)">│</text><text class="terminal-2920970231-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-33)">
-</text><text class="terminal-2920970231-r5" x="0" y="849.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-34)">│</text><text class="terminal-2920970231-r4" x="24.4" y="849.6" textLength="134.2" clip-path="url(#terminal-2920970231-line-34)">--verbosity</text><text class="terminal-2920970231-r9" x="317.2" y="849.6" textLength="24.4" clip-path="url(#terminal-2920970231-line-34)">-v</text><text class="terminal-2920970231-r10" x="366" y="849.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-34)">[</text><text class="terminal-2920970231-r7" x="378.2" y="849.6" textLength="61" clip-path="url(#terminal-2920970231-line-34)">debug</text><text class="terminal-2920970231-r10" x="439.2" y="849.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-34)">|</text><text class="terminal-2920970231-r7" x="451.4" y="849.6" textLength="48.8" clip-path="url(#terminal-2920970231-line-34)">info</text><text class="terminal-2920970231-r10" x="500.2" y="849.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-34)">|</text><text class="terminal-2920970231-r7" x="512.4" y="849.6" textLength="85.4" clip-path="url(#terminal-2920970231-line-34)">warning</text><text class="terminal-2920970231-r10" x="597.8" y="849.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-34)">|</text><text class="terminal-2920970231-r7" x="610" y="849.6" textLength="24.4" clip-path="url(#terminal-2920970231-line-34)">er</text><text class="terminal-2920970231-r1" x="634.4" y="849.6" textLength="329.4" clip-path="url(#terminal-2920970231-line-34)">&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="849.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-34)">│</text><text class="terminal-2920970231-r1" x="976" y="849.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-34)">
-</text><text class="terminal-2920970231-r5" x="0" y="874" textLength="12.2" clip-path="url(#terminal-2920970231-line-35)">│</text><text class="terminal-2920970231-r7" x="366" y="874" textLength="36.6" clip-path="url(#terminal-2920970231-line-35)">ror</text><text class="terminal-2920970231-r10" x="402.6" y="874" textLength="12.2" clip-path="url(#terminal-2920970231-line-35)">]</text><text class="terminal-2920970231-r1" x="634.4" y="874" textLength="329.4" clip-path="url(#terminal-2920970231-line-35)">&#160;&#160;console&#160;logging&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="874" textLength="12.2" clip-path="url(#terminal-2920970231-line-35)">│</text><text class="terminal-2920970231-r1" x="976" y="874" textLength="12.2" clip-path="url(#terminal-2920970231-line-35)">
-</text><text class="terminal-2920970231-r5" x="0" y="898.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-36)">│</text><text class="terminal-2920970231-r1" x="12.2" y="898.4" textLength="951.6" clip-path="url(#terminal-2920970231-line-36)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;messages.&#160;Log&#160;files&#160;are&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="898.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-36)">│</text><text class="terminal-2920970231-r1" x="976" y="898.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-36)">
-</text><text class="terminal-2920970231-r5" x="0" y="922.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-37)">│</text><text class="terminal-2920970231-r1" x="12.2" y="922.8" textLength="951.6" clip-path="url(#terminal-2920970231-line-37)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;always&#160;set&#160;to&#160;&#x27;debug&#x27;.&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="922.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-37)">│</text><text class="terminal-2920970231-r1" x="976" y="922.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-37)">
-</text><text class="terminal-2920970231-r5" x="0" y="947.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-38)">│</text><text class="terminal-2920970231-r4" x="24.4" y="947.2" textLength="207.4" clip-path="url(#terminal-2920970231-line-38)">--force_overwrite</text><text class="terminal-2920970231-r9" x="317.2" y="947.2" textLength="24.4" clip-path="url(#terminal-2920970231-line-38)">-f</text><text class="terminal-2920970231-r1" x="634.4" y="947.2" textLength="329.4" clip-path="url(#terminal-2920970231-line-38)">&#160;&#160;Whether&#160;to&#160;overwrite&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="947.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-38)">│</text><text class="terminal-2920970231-r1" x="976" y="947.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-38)">
-</text><text class="terminal-2920970231-r5" x="0" y="971.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-39)">│</text><text class="terminal-2920970231-r1" x="12.2" y="971.6" textLength="951.6" clip-path="url(#terminal-2920970231-line-39)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;output&#160;files.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="971.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-39)">│</text><text class="terminal-2920970231-r1" x="976" y="971.6" textLength="12.2" clip-path="url(#terminal-2920970231-line-39)">
-</text><text class="terminal-2920970231-r5" x="0" y="996" textLength="12.2" clip-path="url(#terminal-2920970231-line-40)">│</text><text class="terminal-2920970231-r4" x="24.4" y="996" textLength="73.2" clip-path="url(#terminal-2920970231-line-40)">--help</text><text class="terminal-2920970231-r9" x="317.2" y="996" textLength="24.4" clip-path="url(#terminal-2920970231-line-40)">-h</text><text class="terminal-2920970231-r1" x="634.4" y="996" textLength="329.4" clip-path="url(#terminal-2920970231-line-40)">&#160;&#160;Show&#160;this&#160;message&#160;and&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="996" textLength="12.2" clip-path="url(#terminal-2920970231-line-40)">│</text><text class="terminal-2920970231-r1" x="976" y="996" textLength="12.2" clip-path="url(#terminal-2920970231-line-40)">
-</text><text class="terminal-2920970231-r5" x="0" y="1020.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-41)">│</text><text class="terminal-2920970231-r1" x="12.2" y="1020.4" textLength="951.6" clip-path="url(#terminal-2920970231-line-41)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2920970231-r5" x="963.8" y="1020.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-41)">│</text><text class="terminal-2920970231-r1" x="976" y="1020.4" textLength="12.2" clip-path="url(#terminal-2920970231-line-41)">
-</text><text class="terminal-2920970231-r5" x="0" y="1044.8" textLength="976" clip-path="url(#terminal-2920970231-line-42)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2920970231-r1" x="976" y="1044.8" textLength="12.2" clip-path="url(#terminal-2920970231-line-42)">
-</text><text class="terminal-2920970231-r1" x="976" y="1069.2" textLength="12.2" clip-path="url(#terminal-2920970231-line-43)">
+    <g class="terminal-3079567379-matrix">
+    <text class="terminal-3079567379-r1" x="0" y="20" textLength="280.6" clip-path="url(#terminal-3079567379-line-0)">$&#160;casanovo&#160;train&#160;--help</text><text class="terminal-3079567379-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-3079567379-line-0)">
+</text><text class="terminal-3079567379-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-1)">
+</text><text class="terminal-3079567379-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-3079567379-line-2)">Usage:</text><text class="terminal-3079567379-r3" x="97.6" y="68.8" textLength="170.8" clip-path="url(#terminal-3079567379-line-2)">casanovo&#160;train</text><text class="terminal-3079567379-r1" x="268.4" y="68.8" textLength="24.4" clip-path="url(#terminal-3079567379-line-2)">&#160;[</text><text class="terminal-3079567379-r4" x="292.8" y="68.8" textLength="85.4" clip-path="url(#terminal-3079567379-line-2)">OPTIONS</text><text class="terminal-3079567379-r1" x="378.2" y="68.8" textLength="24.4" clip-path="url(#terminal-3079567379-line-2)">]&#160;</text><text class="terminal-3079567379-r4" x="402.6" y="68.8" textLength="183" clip-path="url(#terminal-3079567379-line-2)">TRAIN_PEAK_PATH</text><text class="terminal-3079567379-r1" x="585.6" y="68.8" textLength="390.4" clip-path="url(#terminal-3079567379-line-2)">...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-2)">
+</text><text class="terminal-3079567379-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-3)">
+</text><text class="terminal-3079567379-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-3079567379-line-4)">&#160;Train&#160;a&#160;Casanovo&#160;model&#160;on&#160;your&#160;own&#160;data.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-4)">
+</text><text class="terminal-3079567379-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-3079567379-line-5)">&#160;TRAIN_PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;annoated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;provided&#160;</text><text class="terminal-3079567379-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-3079567379-line-5)">
+</text><text class="terminal-3079567379-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-3079567379-line-6)">&#160;by&#160;MassIVE-KB,&#160;from&#160;which&#160;to&#160;train&#160;a&#160;new&#160;Casnovo&#160;model.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-6)">
+</text><text class="terminal-3079567379-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-7)">
+</text><text class="terminal-3079567379-r5" x="0" y="215.2" textLength="24.4" clip-path="url(#terminal-3079567379-line-8)">╭─</text><text class="terminal-3079567379-r5" x="24.4" y="215.2" textLength="134.2" clip-path="url(#terminal-3079567379-line-8)">&#160;Arguments&#160;</text><text class="terminal-3079567379-r5" x="158.6" y="215.2" textLength="793" clip-path="url(#terminal-3079567379-line-8)">─────────────────────────────────────────────────────────────────</text><text class="terminal-3079567379-r5" x="951.6" y="215.2" textLength="24.4" clip-path="url(#terminal-3079567379-line-8)">─╮</text><text class="terminal-3079567379-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-8)">
+</text><text class="terminal-3079567379-r5" x="0" y="239.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-9)">│</text><text class="terminal-3079567379-r6" x="24.4" y="239.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-9)">*</text><text class="terminal-3079567379-r1" x="36.6" y="239.6" textLength="256.2" clip-path="url(#terminal-3079567379-line-9)">&#160;&#160;TRAIN_PEAK_PATH&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r7" x="292.8" y="239.6" textLength="48.8" clip-path="url(#terminal-3079567379-line-9)">FILE</text><text class="terminal-3079567379-r8" x="366" y="239.6" textLength="122" clip-path="url(#terminal-3079567379-line-9)">[required]</text><text class="terminal-3079567379-r5" x="963.8" y="239.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-9)">│</text><text class="terminal-3079567379-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-9)">
+</text><text class="terminal-3079567379-r5" x="0" y="264" textLength="976" clip-path="url(#terminal-3079567379-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-3079567379-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-3079567379-line-10)">
+</text><text class="terminal-3079567379-r5" x="0" y="288.4" textLength="24.4" clip-path="url(#terminal-3079567379-line-11)">╭─</text><text class="terminal-3079567379-r5" x="24.4" y="288.4" textLength="109.8" clip-path="url(#terminal-3079567379-line-11)">&#160;Options&#160;</text><text class="terminal-3079567379-r5" x="134.2" y="288.4" textLength="817.4" clip-path="url(#terminal-3079567379-line-11)">───────────────────────────────────────────────────────────────────</text><text class="terminal-3079567379-r5" x="951.6" y="288.4" textLength="24.4" clip-path="url(#terminal-3079567379-line-11)">─╮</text><text class="terminal-3079567379-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-11)">
+</text><text class="terminal-3079567379-r5" x="0" y="312.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-12)">│</text><text class="terminal-3079567379-r4" x="24.4" y="312.8" textLength="268.4" clip-path="url(#terminal-3079567379-line-12)">--validation_peak_path</text><text class="terminal-3079567379-r9" x="317.2" y="312.8" textLength="24.4" clip-path="url(#terminal-3079567379-line-12)">-p</text><text class="terminal-3079567379-r7" x="366" y="312.8" textLength="268.4" clip-path="url(#terminal-3079567379-line-12)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r1" x="634.4" y="312.8" textLength="329.4" clip-path="url(#terminal-3079567379-line-12)">&#160;&#160;An&#160;annotated&#160;MGF&#160;file&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="312.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-12)">│</text><text class="terminal-3079567379-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-12)">
+</text><text class="terminal-3079567379-r5" x="0" y="337.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-13)">│</text><text class="terminal-3079567379-r1" x="12.2" y="337.2" textLength="951.6" clip-path="url(#terminal-3079567379-line-13)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;for&#160;validation,&#160;like&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="337.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-13)">│</text><text class="terminal-3079567379-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-13)">
+</text><text class="terminal-3079567379-r5" x="0" y="361.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-14)">│</text><text class="terminal-3079567379-r1" x="12.2" y="361.6" textLength="951.6" clip-path="url(#terminal-3079567379-line-14)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;from&#160;MassIVE-KB.&#160;Use&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="361.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-14)">│</text><text class="terminal-3079567379-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-14)">
+</text><text class="terminal-3079567379-r5" x="0" y="386" textLength="12.2" clip-path="url(#terminal-3079567379-line-15)">│</text><text class="terminal-3079567379-r1" x="12.2" y="386" textLength="951.6" clip-path="url(#terminal-3079567379-line-15)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;this&#160;option&#160;multiple&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="386" textLength="12.2" clip-path="url(#terminal-3079567379-line-15)">│</text><text class="terminal-3079567379-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-3079567379-line-15)">
+</text><text class="terminal-3079567379-r5" x="0" y="410.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-16)">│</text><text class="terminal-3079567379-r1" x="12.2" y="410.4" textLength="951.6" clip-path="url(#terminal-3079567379-line-16)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;times&#160;to&#160;specify&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="410.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-16)">│</text><text class="terminal-3079567379-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-16)">
+</text><text class="terminal-3079567379-r5" x="0" y="434.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-17)">│</text><text class="terminal-3079567379-r1" x="12.2" y="434.8" textLength="951.6" clip-path="url(#terminal-3079567379-line-17)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;multiple&#160;files.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="434.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-17)">│</text><text class="terminal-3079567379-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-17)">
+</text><text class="terminal-3079567379-r5" x="0" y="459.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-18)">│</text><text class="terminal-3079567379-r4" x="24.4" y="459.2" textLength="85.4" clip-path="url(#terminal-3079567379-line-18)">--model</text><text class="terminal-3079567379-r9" x="317.2" y="459.2" textLength="24.4" clip-path="url(#terminal-3079567379-line-18)">-m</text><text class="terminal-3079567379-r7" x="366" y="459.2" textLength="268.4" clip-path="url(#terminal-3079567379-line-18)">TEXT&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r1" x="634.4" y="459.2" textLength="329.4" clip-path="url(#terminal-3079567379-line-18)">&#160;&#160;Either&#160;the&#160;model&#160;weights&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="459.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-18)">│</text><text class="terminal-3079567379-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-18)">
+</text><text class="terminal-3079567379-r5" x="0" y="483.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-19)">│</text><text class="terminal-3079567379-r1" x="12.2" y="483.6" textLength="951.6" clip-path="url(#terminal-3079567379-line-19)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;(.ckpt&#160;file)&#160;or&#160;a&#160;URL&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="483.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-19)">│</text><text class="terminal-3079567379-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-19)">
+</text><text class="terminal-3079567379-r5" x="0" y="508" textLength="12.2" clip-path="url(#terminal-3079567379-line-20)">│</text><text class="terminal-3079567379-r1" x="12.2" y="508" textLength="951.6" clip-path="url(#terminal-3079567379-line-20)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;pointing&#160;to&#160;the&#160;model&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="508" textLength="12.2" clip-path="url(#terminal-3079567379-line-20)">│</text><text class="terminal-3079567379-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-3079567379-line-20)">
+</text><text class="terminal-3079567379-r5" x="0" y="532.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-21)">│</text><text class="terminal-3079567379-r1" x="12.2" y="532.4" textLength="951.6" clip-path="url(#terminal-3079567379-line-21)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;weights&#160;file.&#160;If&#160;not&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="532.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-21)">│</text><text class="terminal-3079567379-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-21)">
+</text><text class="terminal-3079567379-r5" x="0" y="556.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-22)">│</text><text class="terminal-3079567379-r1" x="12.2" y="556.8" textLength="951.6" clip-path="url(#terminal-3079567379-line-22)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;provided,&#160;Casanovo&#160;will&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="556.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-22)">│</text><text class="terminal-3079567379-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-22)">
+</text><text class="terminal-3079567379-r5" x="0" y="581.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-23)">│</text><text class="terminal-3079567379-r1" x="12.2" y="581.2" textLength="951.6" clip-path="url(#terminal-3079567379-line-23)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;try&#160;to&#160;download&#160;the&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="581.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-23)">│</text><text class="terminal-3079567379-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-23)">
+</text><text class="terminal-3079567379-r5" x="0" y="605.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-24)">│</text><text class="terminal-3079567379-r1" x="12.2" y="605.6" textLength="951.6" clip-path="url(#terminal-3079567379-line-24)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;latest&#160;release&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="605.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-24)">│</text><text class="terminal-3079567379-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-24)">
+</text><text class="terminal-3079567379-r5" x="0" y="630" textLength="12.2" clip-path="url(#terminal-3079567379-line-25)">│</text><text class="terminal-3079567379-r1" x="12.2" y="630" textLength="951.6" clip-path="url(#terminal-3079567379-line-25)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;automatically.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="630" textLength="12.2" clip-path="url(#terminal-3079567379-line-25)">│</text><text class="terminal-3079567379-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-3079567379-line-25)">
+</text><text class="terminal-3079567379-r5" x="0" y="654.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-26)">│</text><text class="terminal-3079567379-r4" x="24.4" y="654.4" textLength="146.4" clip-path="url(#terminal-3079567379-line-26)">--output_dir</text><text class="terminal-3079567379-r9" x="317.2" y="654.4" textLength="24.4" clip-path="url(#terminal-3079567379-line-26)">-d</text><text class="terminal-3079567379-r7" x="366" y="654.4" textLength="268.4" clip-path="url(#terminal-3079567379-line-26)">PATH&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r1" x="634.4" y="654.4" textLength="329.4" clip-path="url(#terminal-3079567379-line-26)">&#160;&#160;The&#160;destination&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="654.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-26)">│</text><text class="terminal-3079567379-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-26)">
+</text><text class="terminal-3079567379-r5" x="0" y="678.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-27)">│</text><text class="terminal-3079567379-r1" x="12.2" y="678.8" textLength="951.6" clip-path="url(#terminal-3079567379-line-27)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;directory&#160;for&#160;output&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="678.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-27)">│</text><text class="terminal-3079567379-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-27)">
+</text><text class="terminal-3079567379-r5" x="0" y="703.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-28)">│</text><text class="terminal-3079567379-r1" x="12.2" y="703.2" textLength="951.6" clip-path="url(#terminal-3079567379-line-28)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;files.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="703.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-28)">│</text><text class="terminal-3079567379-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-28)">
+</text><text class="terminal-3079567379-r5" x="0" y="727.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-29)">│</text><text class="terminal-3079567379-r4" x="24.4" y="727.6" textLength="158.6" clip-path="url(#terminal-3079567379-line-29)">--output_root</text><text class="terminal-3079567379-r9" x="317.2" y="727.6" textLength="24.4" clip-path="url(#terminal-3079567379-line-29)">-o</text><text class="terminal-3079567379-r7" x="366" y="727.6" textLength="268.4" clip-path="url(#terminal-3079567379-line-29)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r1" x="634.4" y="727.6" textLength="329.4" clip-path="url(#terminal-3079567379-line-29)">&#160;&#160;The&#160;root&#160;name&#160;for&#160;all&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="727.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-29)">│</text><text class="terminal-3079567379-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-29)">
+</text><text class="terminal-3079567379-r5" x="0" y="752" textLength="12.2" clip-path="url(#terminal-3079567379-line-30)">│</text><text class="terminal-3079567379-r1" x="12.2" y="752" textLength="951.6" clip-path="url(#terminal-3079567379-line-30)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;output&#160;files.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="752" textLength="12.2" clip-path="url(#terminal-3079567379-line-30)">│</text><text class="terminal-3079567379-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-3079567379-line-30)">
+</text><text class="terminal-3079567379-r5" x="0" y="776.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-31)">│</text><text class="terminal-3079567379-r4" x="24.4" y="776.4" textLength="97.6" clip-path="url(#terminal-3079567379-line-31)">--config</text><text class="terminal-3079567379-r9" x="317.2" y="776.4" textLength="24.4" clip-path="url(#terminal-3079567379-line-31)">-c</text><text class="terminal-3079567379-r7" x="366" y="776.4" textLength="268.4" clip-path="url(#terminal-3079567379-line-31)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r1" x="634.4" y="776.4" textLength="329.4" clip-path="url(#terminal-3079567379-line-31)">&#160;&#160;The&#160;YAML&#160;configuration&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="776.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-31)">│</text><text class="terminal-3079567379-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-31)">
+</text><text class="terminal-3079567379-r5" x="0" y="800.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-32)">│</text><text class="terminal-3079567379-r1" x="12.2" y="800.8" textLength="951.6" clip-path="url(#terminal-3079567379-line-32)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;file&#160;overriding&#160;the&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="800.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-32)">│</text><text class="terminal-3079567379-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-32)">
+</text><text class="terminal-3079567379-r5" x="0" y="825.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-33)">│</text><text class="terminal-3079567379-r1" x="12.2" y="825.2" textLength="951.6" clip-path="url(#terminal-3079567379-line-33)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;default&#160;options.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="825.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-33)">│</text><text class="terminal-3079567379-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-33)">
+</text><text class="terminal-3079567379-r5" x="0" y="849.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-34)">│</text><text class="terminal-3079567379-r4" x="24.4" y="849.6" textLength="134.2" clip-path="url(#terminal-3079567379-line-34)">--verbosity</text><text class="terminal-3079567379-r9" x="317.2" y="849.6" textLength="24.4" clip-path="url(#terminal-3079567379-line-34)">-v</text><text class="terminal-3079567379-r10" x="366" y="849.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-34)">[</text><text class="terminal-3079567379-r7" x="378.2" y="849.6" textLength="61" clip-path="url(#terminal-3079567379-line-34)">debug</text><text class="terminal-3079567379-r10" x="439.2" y="849.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-34)">|</text><text class="terminal-3079567379-r7" x="451.4" y="849.6" textLength="48.8" clip-path="url(#terminal-3079567379-line-34)">info</text><text class="terminal-3079567379-r10" x="500.2" y="849.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-34)">|</text><text class="terminal-3079567379-r7" x="512.4" y="849.6" textLength="85.4" clip-path="url(#terminal-3079567379-line-34)">warning</text><text class="terminal-3079567379-r10" x="597.8" y="849.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-34)">|</text><text class="terminal-3079567379-r7" x="610" y="849.6" textLength="24.4" clip-path="url(#terminal-3079567379-line-34)">er</text><text class="terminal-3079567379-r1" x="634.4" y="849.6" textLength="329.4" clip-path="url(#terminal-3079567379-line-34)">&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="849.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-34)">│</text><text class="terminal-3079567379-r1" x="976" y="849.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-34)">
+</text><text class="terminal-3079567379-r5" x="0" y="874" textLength="12.2" clip-path="url(#terminal-3079567379-line-35)">│</text><text class="terminal-3079567379-r7" x="366" y="874" textLength="36.6" clip-path="url(#terminal-3079567379-line-35)">ror</text><text class="terminal-3079567379-r10" x="402.6" y="874" textLength="12.2" clip-path="url(#terminal-3079567379-line-35)">]</text><text class="terminal-3079567379-r1" x="634.4" y="874" textLength="329.4" clip-path="url(#terminal-3079567379-line-35)">&#160;&#160;console&#160;logging&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="874" textLength="12.2" clip-path="url(#terminal-3079567379-line-35)">│</text><text class="terminal-3079567379-r1" x="976" y="874" textLength="12.2" clip-path="url(#terminal-3079567379-line-35)">
+</text><text class="terminal-3079567379-r5" x="0" y="898.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-36)">│</text><text class="terminal-3079567379-r1" x="12.2" y="898.4" textLength="951.6" clip-path="url(#terminal-3079567379-line-36)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;messages.&#160;Log&#160;files&#160;are&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="898.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-36)">│</text><text class="terminal-3079567379-r1" x="976" y="898.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-36)">
+</text><text class="terminal-3079567379-r5" x="0" y="922.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-37)">│</text><text class="terminal-3079567379-r1" x="12.2" y="922.8" textLength="951.6" clip-path="url(#terminal-3079567379-line-37)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;always&#160;set&#160;to&#160;&#x27;debug&#x27;.&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="922.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-37)">│</text><text class="terminal-3079567379-r1" x="976" y="922.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-37)">
+</text><text class="terminal-3079567379-r5" x="0" y="947.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-38)">│</text><text class="terminal-3079567379-r4" x="24.4" y="947.2" textLength="207.4" clip-path="url(#terminal-3079567379-line-38)">--force_overwrite</text><text class="terminal-3079567379-r9" x="317.2" y="947.2" textLength="24.4" clip-path="url(#terminal-3079567379-line-38)">-f</text><text class="terminal-3079567379-r1" x="634.4" y="947.2" textLength="329.4" clip-path="url(#terminal-3079567379-line-38)">&#160;&#160;Whether&#160;to&#160;overwrite&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="947.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-38)">│</text><text class="terminal-3079567379-r1" x="976" y="947.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-38)">
+</text><text class="terminal-3079567379-r5" x="0" y="971.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-39)">│</text><text class="terminal-3079567379-r1" x="12.2" y="971.6" textLength="951.6" clip-path="url(#terminal-3079567379-line-39)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;output&#160;files.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="971.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-39)">│</text><text class="terminal-3079567379-r1" x="976" y="971.6" textLength="12.2" clip-path="url(#terminal-3079567379-line-39)">
+</text><text class="terminal-3079567379-r5" x="0" y="996" textLength="12.2" clip-path="url(#terminal-3079567379-line-40)">│</text><text class="terminal-3079567379-r4" x="24.4" y="996" textLength="73.2" clip-path="url(#terminal-3079567379-line-40)">--help</text><text class="terminal-3079567379-r9" x="317.2" y="996" textLength="24.4" clip-path="url(#terminal-3079567379-line-40)">-h</text><text class="terminal-3079567379-r1" x="634.4" y="996" textLength="329.4" clip-path="url(#terminal-3079567379-line-40)">&#160;&#160;Show&#160;this&#160;message&#160;and&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="996" textLength="12.2" clip-path="url(#terminal-3079567379-line-40)">│</text><text class="terminal-3079567379-r1" x="976" y="996" textLength="12.2" clip-path="url(#terminal-3079567379-line-40)">
+</text><text class="terminal-3079567379-r5" x="0" y="1020.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-41)">│</text><text class="terminal-3079567379-r1" x="12.2" y="1020.4" textLength="951.6" clip-path="url(#terminal-3079567379-line-41)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3079567379-r5" x="963.8" y="1020.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-41)">│</text><text class="terminal-3079567379-r1" x="976" y="1020.4" textLength="12.2" clip-path="url(#terminal-3079567379-line-41)">
+</text><text class="terminal-3079567379-r5" x="0" y="1044.8" textLength="976" clip-path="url(#terminal-3079567379-line-42)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-3079567379-r1" x="976" y="1044.8" textLength="12.2" clip-path="url(#terminal-3079567379-line-42)">
+</text><text class="terminal-3079567379-r1" x="976" y="1069.2" textLength="12.2" clip-path="url(#terminal-3079567379-line-43)">
 </text>
     </g>
     </g>

From 09ffdfb6ebe5dba1b9b52c1b6c6462a2a76a8753 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Sun, 10 Nov 2024 14:52:30 +0100
Subject: [PATCH 65/84] Refactor batching to avoid code repetition

---
 casanovo/denovo/dataloaders.py | 150 ++++++++++++++++-----------------
 1 file changed, 72 insertions(+), 78 deletions(-)

diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index 4eb4d2e2..f929b1e0 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -11,10 +11,7 @@
 from depthcharge.data import AnnotatedSpectrumIndex
 
 from ..data import db_utils
-from ..data.datasets import (
-    AnnotatedSpectrumDataset,
-    SpectrumDataset,
-)
+from ..data.datasets import AnnotatedSpectrumDataset, SpectrumDataset
 
 
 logger = logging.getLogger("casanovo")
@@ -37,25 +34,25 @@ class DeNovoDataModule(pl.LightningDataModule):
     eval_batch_size : int
         The batch size to use for inference.
     n_peaks : Optional[int]
-        The number of top-n most intense peaks to keep in each spectrum. `None`
-        retains all peaks.
+        The number of top-n most intense peaks to keep in each spectrum.
+        `None` retains all peaks.
     min_mz : float
-        The minimum m/z to include. The default is 140 m/z, in order to exclude
-        TMT and iTRAQ reporter ions.
+        The minimum m/z to include. The default is 140 m/z, in order to
+        exclude TMT and iTRAQ reporter ions.
     max_mz : float
         The maximum m/z to include.
     min_intensity : float
-        Remove peaks whose intensity is below `min_intensity` percentage of the
-        base peak intensity.
+        Remove peaks whose intensity is below `min_intensity` percentage
+        of the base peak intensity.
     remove_precursor_tol : float
-        Remove peaks within the given mass tolerance in Dalton around the
-        precursor mass.
+        Remove peaks within the given mass tolerance in Dalton around
+        the precursor mass.
     n_workers : int, optional
-        The number of workers to use for data loading. By default, the number of
-        available CPU cores on the current machine is used.
+        The number of workers to use for data loading. By default, the
+        number of available CPU cores on the current machine is used.
     random_state : Optional[int]
-        The NumPy random state. ``None`` leaves mass spectra in the order they
-        were parsed.
+        The NumPy random state. ``None`` leaves mass spectra in the
+        order they were parsed.
     """
 
     def __init__(
@@ -74,12 +71,12 @@ def __init__(
         random_state: Optional[int] = None,
     ):
         super().__init__()
-        self.train_index = train_index
-        self.valid_index = valid_index
-        self.test_index = test_index
+        self.train_index: Optional[AnnotatedSpectrumIndex] = train_index
+        self.valid_index: Optional[AnnotatedSpectrumIndex] = valid_index
+        self.test_index: Optional[AnnotatedSpectrumIndex] = test_index
         self.train_batch_size = train_batch_size
         self.eval_batch_size = eval_batch_size
-        self.n_peaks = n_peaks
+        self.n_peaks: Optional[int] = n_peaks
         self.min_mz = min_mz
         self.max_mz = max_mz
         self.min_intensity = min_intensity
@@ -98,11 +95,11 @@ def setup(self, stage: str = None, annotated: bool = True) -> None:
         Parameters
         ----------
         stage : str {"fit", "validate", "test"}
-            The stage indicating which Datasets to prepare. All are prepared by
-            default.
+            The stage indicating which Datasets to prepare. All are
+            prepared by default.
         annotated: bool
-            True if peptide sequence annotations are available for the test
-            data.
+            True if peptide sequence annotations are available for the
+            test data.
         """
         if stage in (None, "fit", "validate"):
             make_dataset = functools.partial(
@@ -186,7 +183,7 @@ def predict_dataloader(self) -> torch.utils.data.DataLoader:
         return self._make_loader(self.test_dataset, self.eval_batch_size)
 
     def db_dataloader(self) -> torch.utils.data.DataLoader:
-        """Get a special dataloader for DB search"""
+        """Get a special dataloader for DB search."""
         return self._make_loader(
             self.test_dataset,
             self.eval_batch_size,
@@ -202,21 +199,23 @@ def prepare_batch(
     """
     Collate MS/MS spectra into a batch.
 
-    The MS/MS spectra will be padded so that they fit nicely as a tensor.
-    However, the padded elements are ignored during the subsequent steps.
+    The MS/MS spectra will be padded so that they fit nicely as a
+    tensor. However, the padded elements are ignored during the
+    subsequent steps.
 
     Parameters
     ----------
     batch : List[Tuple[torch.Tensor, float, int, str]]
-        A batch of data from an AnnotatedSpectrumDataset, consisting of for each
-        spectrum (i) a tensor with the m/z and intensity peak values, (ii), the
-        precursor m/z, (iii) the precursor charge, (iv) the spectrum identifier.
+        A batch of data from an AnnotatedSpectrumDataset, consisting of
+        for each spectrum (i) a tensor with the m/z and intensity peak
+        values, (ii), the precursor m/z, (iii) the precursor charge,
+        (iv) the spectrum identifier.
 
     Returns
     -------
     spectra : torch.Tensor of shape (batch_size, n_peaks, 2)
-        The padded mass spectra tensor with the m/z and intensity peak values
-        for each spectrum.
+        The padded mass spectra tensor with the m/z and intensity peak
+        values for each spectrum.
     precursors : torch.Tensor of shape (batch_size, 3)
         A tensor with the precursor neutral mass, precursor charge, and
         precursor m/z.
@@ -229,80 +228,75 @@ def prepare_batch(
     precursor_mzs = torch.tensor(precursor_mzs)
     precursor_charges = torch.tensor(precursor_charges)
     precursor_masses = (precursor_mzs - 1.007276) * precursor_charges
-    precursors = torch.vstack(
+    precursors = torch.hstack(
         [precursor_masses, precursor_charges, precursor_mzs]
-    ).T.float()
+    ).float()
     return spectra, precursors, np.asarray(spectrum_ids)
 
 
 def prepare_psm_batch(
     batch: List[Tuple[torch.Tensor, float, int, str]],
     protein_database: db_utils.ProteinDatabase,
-) -> Tuple[torch.Tensor, torch.Tensor, np.ndarray, List[str], List[str]]:
+) -> Tuple[torch.Tensor, torch.Tensor, np.ndarray, np.ndarray]:
     """
     Collate MS/MS spectra into a batch for DB search.
 
-    The MS/MS spectra will be padded so that they fit nicely as a tensor.
-    However, the padded elements are ignored during the subsequent steps.
+    The MS/MS spectra will be padded so that they fit nicely as a
+    tensor. However, the padded elements are ignored during the
+    subsequent steps.
 
     Parameters
     ----------
     batch : List[Tuple[torch.Tensor, float, int, str]]
-        A batch of data from an AnnotatedSpectrumDataset, consisting of for each
-        spectrum (i) a tensor with the m/z and intensity peak values, (ii), the
-        precursor m/z, (iii) the precursor charge, (iv) the spectrum identifier.
+        A batch of data from an AnnotatedSpectrumDataset, consisting of
+        for each spectrum (i) a tensor with the m/z and intensity peak
+        values, (ii), the precursor m/z, (iii) the precursor charge,
+        (iv) the spectrum identifier.
     protein_database : db_utils.ProteinDatabase
         The protein database to use for candidate peptide retrieval.
 
     Returns
     -------
-    all_spectra : torch.Tensor of shape (batch_size, n_peaks, 2)
-        The padded mass spectra tensor with the m/z and intensity peak values
-        for each spectrum.
-    all_precursors : torch.Tensor of shape (batch_size, 3)
+    batch_spectra : torch.Tensor of shape (batch_size, n_peaks, 2)
+        The padded mass spectra tensor with the m/z and intensity peak
+        values for each spectrum.
+    batch_precursors : torch.Tensor of shape (batch_size, 3)
         A tensor with the precursor neutral mass, precursor charge, and
         precursor m/z.
-    all_spectrum_ids : np.ndarray
+    batch_spectrum_ids : np.ndarray
         The spectrum identifiers.
-    all_peptides : List[str]
+    batch_peptides : np.ndarray
         The candidate peptides for each spectrum.
     """
-    spectra, precursor_mzs, precursor_charges, spectrum_ids = list(zip(*batch))
-    spectra = torch.nn.utils.rnn.pad_sequence(spectra, batch_first=True)
-
-    precursor_mzs_t = torch.tensor(precursor_mzs)
-    precursor_charges_t = torch.tensor(precursor_charges)
-    precursor_masses_t = (precursor_mzs_t - 1.007276) * precursor_charges_t
-    precursors = torch.vstack(
-        [precursor_masses_t, precursor_charges_t, precursor_mzs_t]
-    ).T.float()
-
-    all_spectra = []
-    all_precursors = []
-    all_spectrum_ids = []
-    all_peptides = []
-    for idx in range(len(batch)):
-        spec_peptides = protein_database.get_candidates(
-            precursor_mzs[idx],
-            precursor_charges[idx],
+    spectra, precursors, spectrum_ids = prepare_batch(batch)
+
+    batch_spectra = []
+    batch_precursors = []
+    batch_spectrum_ids = []
+    batch_peptides = []
+    # FIXME: This can be optmized by using a sliding window instead of
+    #  retrieving candidates for each spectrum indendently.
+    for i in range(len(batch)):
+        candidate_pep = protein_database.get_candidates(
+            precursors[i][2], precursors[i][1]
         )
-        try:
-            all_spectra.append(
-                spectra[idx].unsqueeze(0).repeat(len(spec_peptides), 1, 1)
+        if len(candidate_pep) == 0:
+            logger.info(
+                "No candidate peptides found for spectrum %s", spectrum_ids[i]
             )
-            all_precursors.append(
-                precursors[idx].unsqueeze(0).repeat(len(spec_peptides), 1)
+        else:
+            batch_spectra.append(
+                spectra[i].unsqueeze(0).repeat(len(candidate_pep), 1, 1)
             )
-            all_spectrum_ids.extend([spectrum_ids[idx]] * len(spec_peptides))
-            all_peptides.extend(spec_peptides)
-        except ValueError:
-            logger.warning(
-                "No candidates found for spectrum %s", spectrum_ids[idx]
+            batch_precursors.append(
+                precursors[i].unsqueeze(0).repeat(len(candidate_pep), 1)
             )
+            batch_spectrum_ids.extend([spectrum_ids[i]] * len(candidate_pep))
+            batch_peptides.extend(candidate_pep)
 
     return (
-        torch.cat(all_spectra, dim=0),
-        torch.cat(all_precursors, dim=0),
-        all_spectrum_ids,
-        all_peptides,
+        torch.cat(batch_spectra, dim=0),
+        torch.cat(batch_precursors, dim=0),
+        np.asarray(batch_spectrum_ids),
+        np.asarray(batch_peptides),
     )

From ee784421b7981717817168614c569f2818d2f432 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Sun, 10 Nov 2024 15:46:37 +0100
Subject: [PATCH 66/84] More minor refactoring

---
 casanovo/config.py              |   8 +-
 casanovo/config.yaml            |   2 +-
 casanovo/data/psm.py            |  19 +-
 casanovo/denovo/model.py        | 477 ++++++++++++++++----------------
 casanovo/denovo/model_runner.py |  90 +++---
 5 files changed, 298 insertions(+), 298 deletions(-)

diff --git a/casanovo/config.py b/casanovo/config.py
index ea25428c..e276e12d 100644
--- a/casanovo/config.py
+++ b/casanovo/config.py
@@ -13,8 +13,8 @@
 logger = logging.getLogger("casanovo")
 
 
-# FIXME: This contains deprecated config options to be removed in the next major
-#  version update.
+# FIXME: This contains deprecated config options to be removed in the next
+#  major version update.
 _config_deprecated = dict(
     every_n_train_steps="val_check_interval",
     max_iters="cosine_schedule_period_iters",
@@ -27,8 +27,8 @@
 class Config:
     """The Casanovo configuration options.
 
-    If a parameter is missing from a user's configuration file, the default
-    value is assumed.
+    If a parameter is missing from a user's configuration file, the
+    default value is assumed.
 
     Parameters
     ----------
diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index 014f02ee..b7179347 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -33,7 +33,7 @@ devices:
 
 
 ###
-# The following parameters are unique to Casanovo's inference/finetuning mode.
+# The following parameters are unique to Casanovo's de novo sequencing mode.
 ###
 
 # Number of beams used in beam search.
diff --git a/casanovo/data/psm.py b/casanovo/data/psm.py
index e4ef3af7..eece07a4 100644
--- a/casanovo/data/psm.py
+++ b/casanovo/data/psm.py
@@ -1,4 +1,4 @@
-"""Peptide spectrum match dataclass"""
+"""Peptide spectrum match dataclass."""
 
 import dataclasses
 from typing import Tuple, Iterable
@@ -15,23 +15,24 @@ class PepSpecMatch:
         The amino acid sequence of the peptide.
     spectrum_id : Tuple[str, str]
         A tuple containing the spectrum identifier in the form
-        (spectrum file name, spectrum file idx)
+        (spectrum file name, spectrum file idx).
     peptide_score : float
         Score of the match between the full peptide sequence and the
         spectrum.
     charge : int
-        The precursor charge state of the peptide ion observed in the spectrum.
+        The precursor charge state of the peptide ion observed in the
+        spectrum.
     calc_mz : float
-        The calculated mass-to-charge ratio (m/z) of the peptide based on its
-        sequence and charge state.
+        The calculated mass-to-charge ratio (m/z) of the peptide based
+        on its sequence and charge state.
     exp_mz : float
-        The observed (experimental) precursor mass-to-charge ratio (m/z) of the
-        peptide as detected in the spectrum.
+        The observed (experimental) precursor mass-to-charge ratio (m/z)
+        of the peptide as detected in the spectrum.
     aa_scores : Iterable[float]
         A list of scores for individual amino acids in the peptide
-        sequence, where len(aa_scores) == len(sequence)
+        sequence, where len(aa_scores) == len(sequence).
     protein : str
-        Protein associated with the peptide sequence (for db mode)
+        Protein associated with the peptide sequence (for db mode).
     """
 
     sequence: str
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 5e807153..d309d11c 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -4,6 +4,7 @@
 import heapq
 import logging
 import warnings
+from pathlib import Path
 from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
 
 import depthcharge.masses
@@ -32,37 +33,39 @@ class Spec2Pep(pl.LightningModule, ModelMixin):
     dim_model : int
         The latent dimensionality used by the transformer model.
     n_head : int
-        The number of attention heads in each layer. ``dim_model`` must be
-        divisible by ``n_head``.
+        The number of attention heads in each layer. ``dim_model`` must
+        be divisible by ``n_head``.
     dim_feedforward : int
-        The dimensionality of the fully connected layers in the transformer
-        model.
+        The dimensionality of the fully connected layers in the
+        transformer model.
     n_layers : int
         The number of transformer layers.
     dropout : float
         The dropout probability for all layers.
     dim_intensity : Optional[int]
-        The number of features to use for encoding peak intensity. The remaining
-        (``dim_model - dim_intensity``) are reserved for encoding the m/z value.
-        If ``None``, the intensity will be projected up to ``dim_model`` using a
-        linear layer, then summed with the m/z encoding for each peak.
+        The number of features to use for encoding peak intensity. The
+        remaining (``dim_model - dim_intensity``) are reserved for
+        encoding the m/z value. If ``None``, the intensity will be
+        projected up to ``dim_model`` using a linear layer, then summed
+        with the m/z encoding for each peak.
     max_peptide_len : int
         The maximum peptide length to decode.
     residues : Union[Dict[str, float], str]
-        The amino acid dictionary and their masses. By default ("canonical) this
-        is only the 20 canonical amino acids, with cysteine carbamidomethylated.
-        If "massivekb", this dictionary will include the modifications found in
-        MassIVE-KB. Additionally, a dictionary can be used to specify a custom
+        The amino acid dictionary and their masses. By default
+        ("canonical) this is only the 20 canonical amino acids, with
+        cysteine carbamidomethylated. If "massivekb", this dictionary
+        will include the modifications found in MassIVE-KB.
+        Additionally, a dictionary can be used to specify a custom
         collection of amino acids and masses.
     max_charge : int
         The maximum precursor charge to consider.
     precursor_mass_tol : float, optional
-        The maximum allowable precursor mass tolerance (in ppm) for correct
-        predictions.
+        The maximum allowable precursor mass tolerance (in ppm) for
+        correct predictions.
     isotope_error_range : Tuple[int, int]
-        Take into account the error introduced by choosing a non-monoisotopic
-        peak for fragmentation by not penalizing predicted precursor m/z's that
-        fit the specified isotope error:
+        Take into account the error introduced by choosing a
+        non-monoisotopic peak for fragmentation by not penalizing
+        predicted precursor m/z's that fit the specified isotope error:
         `abs(calc_mz - (precursor_mz - isotope * 1.00335 / precursor_charge))
         < precursor_mass_tol`
     min_peptide_len : int
@@ -73,16 +76,18 @@ class Spec2Pep(pl.LightningModule, ModelMixin):
         Number of PSMs to return for each spectrum.
     n_log : int
         The number of epochs to wait between logging messages.
-    tb_summarywriter : Optional[str]
-        Folder path to record performance metrics during training. If ``None``,
-        don't use a ``SummaryWriter``.
+    tb_summarywriter : Optional[Path]
+        Folder path to record performance metrics during training. If
+        ``None``, don't use a ``SummaryWriter``.
     train_label_smoothing : float
         Smoothing factor when calculating the training loss.
     warmup_iters : int
-        The number of iterations for the linear warm-up of the learning rate.
+        The number of iterations for the linear warm-up of the learning
+        rate.
     cosine_schedule_period_iters : int
-        The number of iterations for the cosine half period of the learning rate.
-    out_writer : Optional[str]
+        The number of iterations for the cosine half period of the
+        learning rate.
+    out_writer : Optional[ms_io.MztabWriter]
         The output writer for the prediction results.
     calculate_precision : bool
         Calculate the validation set precision during training.
@@ -108,9 +113,7 @@ def __init__(
         n_beams: int = 1,
         top_match: int = 1,
         n_log: int = 10,
-        tb_summarywriter: Optional[
-            torch.utils.tensorboard.SummaryWriter
-        ] = None,
+        tb_summarywriter: Optional[Path] = None,
         train_label_smoothing: float = 0.01,
         warmup_iters: int = 100_000,
         cosine_schedule_period_iters: int = 600_000,
@@ -147,8 +150,9 @@ def __init__(
         # Optimizer settings.
         self.warmup_iters = warmup_iters
         self.cosine_schedule_period_iters = cosine_schedule_period_iters
-        # `kwargs` will contain additional arguments as well as unrecognized
-        # arguments, including deprecated ones. Remove the deprecated ones.
+        # `kwargs` will contain additional arguments as well as
+        # unrecognized arguments, including deprecated ones. Remove the
+        # deprecated ones.
         for k in config._config_deprecated:
             kwargs.pop(k, None)
             warnings.warn(
@@ -175,12 +179,12 @@ def __init__(
         self.n_log = n_log
         self._history = []
         if tb_summarywriter is not None:
-            self.tb_summarywriter = SummaryWriter(tb_summarywriter)
+            self.tb_summarywriter = SummaryWriter(str(tb_summarywriter))
         else:
-            self.tb_summarywriter = tb_summarywriter
+            self.tb_summarywriter = None
 
         # Output writer during predicting.
-        self.out_writer = out_writer
+        self.out_writer: ms_io.MztabWriter = out_writer
 
     def forward(
         self, spectra: torch.Tensor, precursors: torch.Tensor
@@ -192,20 +196,22 @@ def forward(
         ----------
         spectra : torch.Tensor of shape (n_spectra, n_peaks, 2)
             The spectra for which to predict peptide sequences.
-            Axis 0 represents an MS/MS spectrum, axis 1 contains the peaks in
-            the MS/MS spectrum, and axis 2 is essentially a 2-tuple specifying
-            the m/z-intensity pair for each peak. These should be zero-padded,
-            such that all the spectra in the batch are the same length.
+            Axis 0 represents an MS/MS spectrum, axis 1 contains the
+            peaks in the MS/MS spectrum, and axis 2 is essentially a
+            2-tuple specifying the m/z-intensity pair for each peak.
+            These should be zero-padded, such that all the spectra in
+            the batch are the same length.
         precursors : torch.Tensor of size (n_spectra, 3)
-            The measured precursor mass (axis 0), precursor charge (axis 1), and
-            precursor m/z (axis 2) of each MS/MS spectrum.
+            The measured precursor mass (axis 0), precursor charge
+            (axis 1), and precursor m/z (axis 2) of each MS/MS spectrum.
 
         Returns
         -------
         pred_peptides : List[List[Tuple[float, np.ndarray, str]]]
-            For each spectrum, a list with the top peptide predictions. A
-            peptide predictions consists of a tuple with the peptide score,
-            the amino acid scores, and the predicted peptide sequence.
+            For each spectrum, a list with the top peptide predictions.
+            A peptide predictions consists of a tuple with the peptide
+            score, the amino acid scores, and the predicted peptide
+            sequence.
         """
         return self.beam_search_decode(
             spectra.to(self.encoder.device),
@@ -222,20 +228,22 @@ def beam_search_decode(
         ----------
         spectra : torch.Tensor of shape (n_spectra, n_peaks, 2)
             The spectra for which to predict peptide sequences.
-            Axis 0 represents an MS/MS spectrum, axis 1 contains the peaks in
-            the MS/MS spectrum, and axis 2 is essentially a 2-tuple specifying
-            the m/z-intensity pair for each peak. These should be zero-padded,
-            such that all the spectra in the batch are the same length.
+            Axis 0 represents an MS/MS spectrum, axis 1 contains the
+            peaks in the MS/MS spectrum, and axis 2 is essentially a
+            2-tuple specifying the m/z-intensity pair for each peak.
+            These should be zero-padded, such that all the spectra in
+            the batch are the same length.
         precursors : torch.Tensor of size (n_spectra, 3)
-            The measured precursor mass (axis 0), precursor charge (axis 1), and
-            precursor m/z (axis 2) of each MS/MS spectrum.
+            The measured precursor mass (axis 0), precursor charge
+            (axis 1), and precursor m/z (axis 2) of each MS/MS spectrum.
 
         Returns
         -------
         pred_peptides : List[List[Tuple[float, np.ndarray, str]]]
-            For each spectrum, a list with the top peptide prediction(s). A
-            peptide predictions consists of a tuple with the peptide score,
-            the amino acid scores, and the predicted peptide sequence.
+            For each spectrum, a list with the top peptide
+            prediction(s). A peptide predictions consists of a tuple
+            with the peptide score, the amino acid scores, and the
+            predicted peptide sequence.
         """
         memories, mem_masks = self.encoder(spectra)
 
@@ -270,15 +278,16 @@ def beam_search_decode(
 
         # The main decoding loop.
         for step in range(0, self.max_peptide_len):
-            # Terminate beams exceeding the precursor m/z tolerance and track
-            # all finished beams (either terminated or stop token predicted).
+            # Terminate beams exceeding the precursor m/z tolerance and
+            # track all finished beams (either terminated or stop token
+            # predicted).
             (
                 finished_beams,
                 beam_fits_precursor,
                 discarded_beams,
             ) = self._finish_beams(tokens, precursors, step)
-            # Cache peptide predictions from the finished beams (but not the
-            # discarded beams).
+            # Cache peptide predictions from the finished beams (but not
+            # the discarded beams).
             self._cache_finished_beams(
                 tokens,
                 scores,
@@ -289,7 +298,8 @@ def beam_search_decode(
             )
 
             # Stop decoding when all current beams have been finished.
-            # Continue with beams that have not been finished and not discarded.
+            # Continue with beams that have not been finished and not
+            # discarded.
             finished_beams |= discarded_beams
             if finished_beams.all():
                 break
@@ -300,14 +310,14 @@ def beam_search_decode(
                 memories[~finished_beams, :, :],
                 mem_masks[~finished_beams, :],
             )
-            # Find the top-k beams with the highest scores and continue decoding
-            # those.
+            # Find the top-k beams with the highest scores and continue
+            # decoding those.
             tokens, scores = self._get_topk_beams(
                 tokens, scores, finished_beams, batch, step + 1
             )
 
-        # Return the peptide with the highest confidence score, within the
-        # precursor m/z tolerance if possible.
+        # Return the peptide with the highest confidence score, within
+        # the precursor m/z tolerance if possible.
         return list(self._get_top_peptide(pred_cache))
 
     def _finish_beams(
@@ -317,9 +327,9 @@ def _finish_beams(
         step: int,
     ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         """
-        Track all beams that have been finished, either by predicting the stop
-        token or because they were terminated due to exceeding the precursor
-        m/z tolerance.
+        Track all beams that have been finished, either by predicting
+        the stop token or because they were terminated due to exceeding
+        the precursor m/z tolerance.
 
         Parameters
         ----------
@@ -327,23 +337,23 @@ def _finish_beams(
             Predicted amino acid tokens for all beams and all spectra.
          scores : torch.Tensor of shape
          (n_spectra *  n_beams, max_peptide_len, n_amino_acids)
-            Scores for the predicted amino acid tokens for all beams and all
-            spectra.
+            Scores for the predicted amino acid tokens for all beams and
+            all spectra.
         step : int
             Index of the current decoding step.
 
         Returns
         -------
         finished_beams : torch.Tensor of shape (n_spectra * n_beams)
-            Boolean tensor indicating whether the current beams have been
-            finished.
+            Boolean tensor indicating whether the current beams have
+            been finished.
         beam_fits_precursor: torch.Tensor of shape (n_spectra * n_beams)
-            Boolean tensor indicating if current beams are within precursor m/z
-            tolerance.
+            Boolean tensor indicating if current beams are within
+            precursor m/z tolerance.
         discarded_beams : torch.Tensor of shape (n_spectra * n_beams)
-            Boolean tensor indicating whether the current beams should be
-            discarded (e.g. because they were predicted to end but violate the
-            minimum peptide length).
+            Boolean tensor indicating whether the current beams should
+            be discarded (e.g. because they were predicted to end but
+            violate the minimum peptide length).
         """
         # Check for tokens with a negative mass (i.e. neutral loss).
         aa_neg_mass = [None]
@@ -362,7 +372,8 @@ def _finish_beams(
         beam_fits_precursor = torch.zeros(
             tokens.shape[0], dtype=torch.bool
         ).to(self.encoder.device)
-        # Beams with a stop token predicted in the current step can be finished.
+        # Beams with a stop token predicted in the current step can be
+        # finished.
         finished_beams = torch.zeros(tokens.shape[0], dtype=torch.bool).to(
             self.encoder.device
         )
@@ -374,8 +385,9 @@ def _finish_beams(
             self.encoder.device
         )
         discarded_beams[tokens[:, step] == 0] = True
-        # Discard beams with invalid modification combinations (i.e. N-terminal
-        # modifications occur multiple times or in internal positions).
+        # Discard beams with invalid modification combinations (i.e.
+        # N-terminal modifications occur multiple times or in internal
+        # positions).
         if step > 1:  # Only relevant for longer predictions.
             dim0 = torch.arange(tokens.shape[0])
             final_pos = torch.full((ends_stop_token.shape[0],), step)
@@ -392,8 +404,8 @@ def _finish_beams(
             ).any(dim=1)
             discarded_beams[multiple_mods | internal_mods] = True
 
-        # Check which beams should be terminated or discarded based on the
-        # predicted peptide.
+        # Check which beams should be terminated or discarded based on
+        # the predicted peptide.
         for i in range(len(finished_beams)):
             # Skip already discarded beams.
             if discarded_beams[i]:
@@ -408,15 +420,15 @@ def _finish_beams(
             elif not self.decoder.reverse and peptide[-1] == "$":
                 peptide = peptide[:-1]
                 peptide_len -= 1
-            # Discard beams that were predicted to end but don't fit the minimum
-            # peptide length.
+            # Discard beams that were predicted to end but don't fit the
+            # minimum peptide length.
             if finished_beams[i] and peptide_len < self.min_peptide_len:
                 discarded_beams[i] = True
                 continue
-            # Terminate the beam if it has not been finished by the model but
-            # the peptide mass exceeds the precursor m/z to an extent that it
-            # cannot be corrected anymore by a subsequently predicted AA with
-            # negative mass.
+            # Terminate the beam if it has not been finished by the
+            # model but the peptide mass exceeds the precursor m/z to an
+            # extent that it cannot be corrected anymore by a
+            # subsequently predicted AA with negative mass.
             precursor_charge = precursors[i, 1]
             precursor_mz = precursors[i, 2]
             matches_precursor_mz = exceeds_precursor_mz = False
@@ -442,16 +454,18 @@ def _finish_beams(
                             self.isotope_error_range[1] + 1,
                         )
                     ]
-                    # Terminate the beam if the calculated m/z for the predicted
-                    # peptide (without potential additional AAs with negative
-                    # mass) is within the precursor m/z tolerance.
+                    # Terminate the beam if the calculated m/z for the
+                    # predicted peptide (without potential additional
+                    # AAs with negative mass) is within the precursor
+                    # m/z tolerance.
                     matches_precursor_mz = aa is None and any(
                         abs(d) < self.precursor_mass_tol
                         for d in delta_mass_ppm
                     )
-                    # Terminate the beam if the calculated m/z exceeds the
-                    # precursor m/z + tolerance and hasn't been corrected by a
-                    # subsequently predicted AA with negative mass.
+                    # Terminate the beam if the calculated m/z exceeds
+                    # the precursor m/z + tolerance and hasn't been
+                    # corrected by a subsequently predicted AA with
+                    # negative mass.
                     if matches_precursor_mz:
                         exceeds_precursor_mz = False
                     else:
@@ -466,8 +480,8 @@ def _finish_beams(
                 except KeyError:
                     matches_precursor_mz = exceeds_precursor_mz = False
             # Finish beams that fit or exceed the precursor m/z.
-            # Don't finish beams that don't include a stop token if they don't
-            # exceed the precursor m/z tolerance yet.
+            # Don't finish beams that don't include a stop token if they
+            # don't exceed the precursor m/z tolerance yet.
             if finished_beams[i]:
                 beam_fits_precursor[i] = matches_precursor_mz
             elif exceeds_precursor_mz:
@@ -495,13 +509,13 @@ def _cache_finished_beams(
             Predicted amino acid tokens for all beams and all spectra.
          scores : torch.Tensor of shape
          (n_spectra *  n_beams, max_peptide_len, n_amino_acids)
-            Scores for the predicted amino acid tokens for all beams and all
-            spectra.
+            Scores for the predicted amino acid tokens for all beams and
+            all spectra.
         step : int
             Index of the current decoding step.
         beams_to_cache : torch.Tensor of shape (n_spectra * n_beams)
-            Boolean tensor indicating whether the current beams are ready for
-            caching.
+            Boolean tensor indicating whether the current beams are
+            ready for caching.
         beam_fits_precursor: torch.Tensor of shape (n_spectra * n_beams)
             Boolean tensor indicating whether the beams are within the
             precursor m/z tolerance.
@@ -509,9 +523,9 @@ def _cache_finished_beams(
                 int, List[Tuple[float, float, np.ndarray, torch.Tensor]]
         ]
             Priority queue with finished beams for each spectrum, ordered by
-            peptide score. For each finished beam, a tuple with the (negated)
-            peptide score, a random tie-breaking float, the amino acid-level
-            scores, and the predicted tokens is stored.
+            peptide score. For each finished beam, a tuple with the
+            (negated) peptide score, a random tie-breaking float, the
+            amino acid-level scores, and the predicted tokens is stored.
         """
         for i in range(len(beams_to_cache)):
             if not beams_to_cache[i]:
@@ -533,8 +547,8 @@ def _cache_finished_beams(
                 continue
             smx = self.softmax(scores[i : i + 1, : step + 1, :])
             aa_scores = smx[0, range(len(pred_tokens)), pred_tokens].tolist()
-            # Add an explicit score 0 for the missing stop token in case this
-            # was not predicted (i.e. early stopping).
+            # Add an explicit score 0 for the missing stop token in case
+            # this was not predicted (i.e. early stopping).
             if not has_stop_token:
                 aa_scores.append(0)
             aa_scores = np.asarray(aa_scores)
@@ -544,8 +558,8 @@ def _cache_finished_beams(
             )
             # Omit the stop token from the amino acid-level scores.
             aa_scores = aa_scores[:-1]
-            # Add the prediction to the cache (minimum priority queue, maximum
-            # the number of beams elements).
+            # Add the prediction to the cache (minimum priority queue,
+            # maximum the number of beams elements).
             if len(pred_cache[spec_idx]) < self.n_beams:
                 heapadd = heapq.heappush
             else:
@@ -569,8 +583,8 @@ def _get_topk_beams(
         step: int,
     ) -> Tuple[torch.tensor, torch.tensor]:
         """
-        Find the top-k beams with the highest scores and continue decoding
-        those.
+        Find the top-k beams with the highest scores and continue
+        decoding those.
 
         Stop decoding for beams that have been finished.
 
@@ -580,11 +594,11 @@ def _get_topk_beams(
             Predicted amino acid tokens for all beams and all spectra.
          scores : torch.Tensor of shape
          (n_spectra *  n_beams, max_peptide_len, n_amino_acids)
-            Scores for the predicted amino acid tokens for all beams and all
-            spectra.
+            Scores for the predicted amino acid tokens for all beams and
+            all spectra.
         finished_beams : torch.Tensor of shape (n_spectra * n_beams)
-            Boolean tensor indicating whether the current beams are ready for
-            caching.
+            Boolean tensor indicating whether the current beams are
+            ready for caching.
         batch: int
             Number of spectra in the batch.
         step : int
@@ -596,8 +610,8 @@ def _get_topk_beams(
             Predicted amino acid tokens for all beams and all spectra.
          scores : torch.Tensor of shape
          (n_spectra *  n_beams, max_peptide_len, n_amino_acids)
-            Scores for the predicted amino acid tokens for all beams and all
-            spectra.
+            Scores for the predicted amino acid tokens for all beams and
+            all spectra.
         """
         beam = self.n_beams  # S
         vocab = self.decoder.vocab_size + 1  # V
@@ -632,7 +646,7 @@ def _get_topk_beams(
         ).float()
         # Mask out the index '0', i.e. padding token, by default.
         # FIXME: Set this to a very small, yet non-zero value, to only
-        # get padding after stop token.
+        #  get padding after stop token.
         active_mask[:, :beam] = 1e-8
 
         # Figure out the top K decodings.
@@ -660,24 +674,26 @@ def _get_top_peptide(
         ],
     ) -> Iterable[List[Tuple[float, np.ndarray, str]]]:
         """
-        Return the peptide with the highest confidence score for each spectrum.
+        Return the peptide with the highest confidence score for each
+        spectrum.
 
         Parameters
         ----------
         pred_cache : Dict[
                 int, List[Tuple[float, float, np.ndarray, torch.Tensor]]
         ]
-            Priority queue with finished beams for each spectrum, ordered by
-            peptide score. For each finished beam, a tuple with the peptide
-            score, a random tie-breaking float, the amino acid-level scores,
-            and the predicted tokens is stored.
+            Priority queue with finished beams for each spectrum,
+            ordered by peptide score. For each finished beam, a tuple
+            with the peptide score, a random tie-breaking float, the
+            amino acid-level scores, and the predicted tokens is stored.
 
         Returns
         -------
         pred_peptides : Iterable[List[Tuple[float, np.ndarray, str]]]
-            For each spectrum, a list with the top peptide prediction(s). A
-            peptide predictions consists of a tuple with the peptide score,
-            the amino acid scores, and the predicted peptide sequence.
+            For each spectrum, a list with the top peptide
+            prediction(s). A peptide predictions consists of a tuple
+            with the peptide score, the amino acid scores, and the
+            predicted peptide sequence.
         """
         for peptides in pred_cache.values():
             if len(peptides) > 0:
@@ -707,13 +723,14 @@ def _forward_step(
         ----------
         spectra : torch.Tensor of shape (n_spectra, n_peaks, 2)
             The spectra for which to predict peptide sequences.
-            Axis 0 represents an MS/MS spectrum, axis 1 contains the peaks in
-            the MS/MS spectrum, and axis 2 is essentially a 2-tuple specifying
-            the m/z-intensity pair for each peak. These should be zero-padded,
-            such that all the spectra in the batch are the same length.
+            Axis 0 represents an MS/MS spectrum, axis 1 contains the
+            peaks in the MS/MS spectrum, and axis 2 is essentially a
+            2-tuple specifying the m/z-intensity pair for each peak.
+            These should be zero-padded, such that all the spectra in
+            the batch are the same length.
         precursors : torch.Tensor of size (n_spectra, 3)
-            The measured precursor mass (axis 0), precursor charge (axis 1), and
-            precursor m/z (axis 2) of each MS/MS spectrum.
+            The measured precursor mass (axis 0), precursor charge
+            (axis 1), and precursor m/z (axis 2) of each MS/MS spectrum.
         sequences : List[str] of length n_spectra
             The partial peptide sequences to predict.
 
@@ -738,8 +755,8 @@ def training_step(
         Parameters
         ----------
         batch : Tuple[torch.Tensor, torch.Tensor, List[str]]
-            A batch of (i) MS/MS spectra, (ii) precursor information, (iii)
-            peptide sequences as torch Tensors.
+            A batch of (i) MS/MS spectra, (ii) precursor information,
+            (iii) peptide sequences as torch Tensors.
         mode : str
             Logging key to describe the current stage.
 
@@ -772,8 +789,8 @@ def validation_step(
         Parameters
         ----------
         batch : Tuple[torch.Tensor, torch.Tensor, List[str]]
-            A batch of (i) MS/MS spectra, (ii) precursor information, (iii)
-            peptide sequences.
+            A batch of (i) MS/MS spectra, (ii) precursor information,
+            (iii) peptide sequences.
 
         Returns
         -------
@@ -785,8 +802,8 @@ def validation_step(
         if not self.calculate_precision:
             return loss
 
-        # Calculate and log amino acid and peptide match evaluation metrics from
-        # the predicted peptides.
+        # Calculate and log amino acid and peptide match evaluation
+        # metrics from the predicted peptides.
         peptides_pred, peptides_true = [], batch[2]
         for spectrum_preds in self.forward(batch[0], batch[1]):
             for _, _, pred in spectrum_preds:
@@ -794,42 +811,30 @@ def validation_step(
 
         aa_precision, _, pep_precision = evaluate.aa_match_metrics(
             *evaluate.aa_match_batch(
-                peptides_true,
-                peptides_pred,
-                self.decoder._peptide_mass.masses,
+                peptides_true, peptides_pred, self.decoder._peptide_mass.masses
             )
         )
         log_args = dict(on_step=False, on_epoch=True, sync_dist=True)
-        self.log(
-            "Peptide precision at coverage=1",
-            pep_precision,
-            **log_args,
-        )
-        self.log(
-            "AA precision at coverage=1",
-            aa_precision,
-            **log_args,
-        )
+        self.log("Peptide precision at coverage=1", pep_precision, **log_args)
+        self.log("AA precision at coverage=1", aa_precision, **log_args)
         return loss
 
     def predict_step(
         self, batch: Tuple[torch.Tensor, torch.Tensor, torch.Tensor], *args
-    ) -> List[Tuple[np.ndarray, float, float, str, float, np.ndarray]]:
+    ) -> List[ms_io.PepSpecMatch]:
         """
         A single prediction step.
 
         Parameters
         ----------
         batch : Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
-            A batch of (i) MS/MS spectra, (ii) precursor information, (iii)
-            spectrum identifiers as torch Tensors.
+            A batch of (i) MS/MS spectra, (ii) precursor information,
+            (iii) spectrum identifiers as torch Tensors.
 
         Returns
         -------
-        predictions: List[Tuple[np.ndarray, float, float, str, float, np.ndarray]]
-            Model predictions for the given batch of spectra containing spectrum
-            ids, precursor information, peptide sequences as well as peptide
-            and amino acid-level confidence scores.
+        predictions: List[ms_io.PepSpecMatch]
+            Predicted PSMs for the given batch of spectra.
         """
         predictions = []
         for (
@@ -845,13 +850,16 @@ def predict_step(
         ):
             for peptide_score, aa_scores, peptide in spectrum_preds:
                 predictions.append(
-                    (
-                        spectrum_i,
-                        precursor_charge,
-                        precursor_mz,
-                        peptide,
-                        peptide_score,
-                        aa_scores,
+                    ms_io.PepSpecMatch(
+                        sequence=peptide,
+                        spectrum_id=tuple(spectrum_i),
+                        peptide_score=peptide_score,
+                        charge=int(precursor_charge),
+                        calc_mz=self.peptide_mass_calculator.mass(
+                            peptide, precursor_charge
+                        ),
+                        exp_mz=precursor_mz,
+                        aa_scores=aa_scores,
                     )
                 )
 
@@ -897,8 +905,8 @@ def on_predict_batch_end(
         *args,
     ) -> None:
         """
-        Write the predicted peptide sequences and amino acid scores to the
-        output file.
+        Write the predicted peptide sequences and amino acid scores to
+        the output file.
         """
         if self.out_writer is None:
             return
@@ -970,16 +978,18 @@ def _log_history(self) -> None:
 
     def configure_optimizers(
         self,
-    ) -> Tuple[torch.optim.Optimizer, Dict[str, Any]]:
+    ) -> Tuple[List[torch.optim.Optimizer], Dict[str, Any]]:
         """
         Initialize the optimizer.
 
-        This is used by pytorch-lightning when preparing the model for training.
+        This is used by pytorch-lightning when preparing the model for
+        training.
 
         Returns
         -------
-        Tuple[torch.optim.Optimizer, Dict[str, Any]]
-            The initialized Adam optimizer and its learning rate scheduler.
+        Tuple[List[torch.optim.Optimizer], Dict[str, Any]]
+            The initialized Adam optimizer and its learning rate
+            scheduler.
         """
         optimizer = torch.optim.Adam(self.parameters(), **self.opt_kwargs)
         # Apply learning rate scheduler per step.
@@ -991,8 +1001,8 @@ def configure_optimizers(
 
 class DbSpec2Pep(Spec2Pep):
     """
-    Subclass of Spec2Pep for the use of Casanovo as an
-    MS/MS database search score function.
+    Subclass of Spec2Pep for the use of Casanovo as an MS/MS database
+    search score function.
 
     Uses teacher forcing to 'query' Casanovo to score a peptide-spectrum
     pair. Higher scores indicate a better match between the peptide and
@@ -1006,34 +1016,35 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.psm_batch_size = None
 
-    def predict_step(self, batch, *args):
+    def predict_step(
+        self,
+        batch: Tuple[torch.Tensor, torch.Tensor, np.ndarray, np.ndarray],
+        *args
+    ) -> List[ms_io.PepSpecMatch]:
         """
-        A single prediction step for Casanovo-DB
+        A single prediction step.
 
         Parameters
         ----------
-        batch : Tuple[torch.Tensor, torch.Tensor, np.array, List[str]]
-            A batch of (i) MS/MS spectra, (ii) precursor information, (iii)
-            spectrum identifiers, (iv) candidate peptides
+        batch : Tuple[torch.Tensor, torch.Tensor, np.ndarray, np.ndarray]
+            A batch of (i) MS/MS spectra, (ii) precursor information,
+            (iii) spectrum identifiers, (iv) candidate peptides.
 
         Returns
         -------
-        predictions: List[Tuple[List[str], int, float, str, np.ndarray, np.ndarray, str]]
-            Model predictions for the given batch of spectra containing spectrum
-            ids, precursor charge and m/z, candidate peptide sequences, peptide
-            scores, amino acid-level scores, and associated proteins.
-            Stored separately by spectrum id.
+        predictions: List[ms_io.PepSpecMatch]
+            Predicted PSMs for the given batch of spectra.
         """
-        store_dict = collections.defaultdict(list)
-        for start_idx in range(0, len(batch[0]), self.psm_batch_size):
-            current_batch = [
-                b[start_idx : start_idx + self.psm_batch_size] for b in batch
+        predictions_all = collections.defaultdict(list)
+        for start_i in range(0, len(batch[0]), self.psm_batch_size):
+            psm_batch = [
+                b[start_i : start_i + self.psm_batch_size] for b in batch
             ]
             pred, truth = self._forward_step(
-                current_batch[0], current_batch[1], current_batch[3]
+                psm_batch[0], psm_batch[1], psm_batch[3]
             )
             pred = self.softmax(pred)
-            all_peptide_scores, all_aa_scores = _calc_match_score(
+            batch_peptide_scores, batch_aa_scores = _calc_match_score(
                 pred, truth, self.decoder.reverse
             )
             for (
@@ -1044,46 +1055,44 @@ def predict_step(self, batch, *args):
                 aa_scores,
                 peptide,
             ) in zip(
-                current_batch[1][:, 1].cpu().detach().numpy(),
-                current_batch[1][:, 2].cpu().detach().numpy(),
-                current_batch[2],
-                all_peptide_scores,
-                all_aa_scores,
-                current_batch[3],
+                psm_batch[1][:, 1].cpu().detach().numpy(),
+                psm_batch[1][:, 2].cpu().detach().numpy(),
+                psm_batch[2],
+                batch_peptide_scores,
+                batch_aa_scores,
+                psm_batch[3],
             ):
-                store_dict[spectrum_i].append(
+                predictions_all[spectrum_i].append(
                     ms_io.PepSpecMatch(
                         sequence=peptide,
                         spectrum_id=tuple(spectrum_i),
                         peptide_score=peptide_score,
                         charge=int(charge),
-                        calc_mz=precursor_mz,
-                        exp_mz=self.peptide_mass_calculator.mass(
+                        calc_mz=self.peptide_mass_calculator.mass(
                             peptide, charge
                         ),
+                        exp_mz=precursor_mz,
                         aa_scores=aa_scores,
                         protein=self.protein_database.get_associated_protein(
                             peptide
                         ),
                     )
                 )
-        predictions = []
-        for spectrum_i in store_dict:
-            predictions.extend(
+        # Filter the top-scoring prediction(s) for each spectrum.
+        predictions = [
+            *(
                 sorted(
-                    store_dict[spectrum_i],
-                    key=lambda x: x.peptide_score,
+                    spectrum_predictions,
+                    key=lambda p: p.peptide_score,
                     reverse=True,
                 )[: self.top_match]
+                for spectrum_predictions in predictions_all.values()
             )
+        ]
         return predictions
 
     def on_predict_batch_end(
-        self,
-        outputs: List[
-            Tuple[List[str], int, float, str, np.ndarray, np.ndarray, str]
-        ],
-        *args,
+        self, outputs: List[ms_io.PepSpecMatch], *args
     ) -> None:
         """
         Write the database search results to the output file.
@@ -1095,37 +1104,35 @@ def _calc_match_score(
     batch_all_aa_scores: torch.Tensor,
     truth_aa_indices: torch.Tensor,
     decoder_reverse: bool = False,
-) -> Tuple[torch.Tensor, torch.Tensor]:
+) -> Tuple[List[float], List[np.ndarray]]:
     """
-    Calculate the score between the input spectra and associated peptide.
+    Calculate the score between the input spectra and associated
+    peptide.
 
-    Take in teacher-forced scoring of amino acids
-    of the peptides (in a batch) and use the truth labels
-    to calculate a score between the input spectra and
-    associated peptide.
+    Take in teacher-forced scoring of amino acids of the peptides (in a
+    batch) and use the truth labels to calculate a score between the
+    input spectra and associated peptide.
 
     Parameters
     ----------
     batch_all_aa_scores : torch.Tensor
-        Amino acid scores for all amino acids in
-        the vocabulary for every prediction made to generate
-        the associated peptide (for an entire batch)
+        Amino acid scores for all amino acids in the vocabulary for
+        every prediction made to generate the associated peptide (for an
+        entire batch).
     truth_aa_indices : torch.Tensor
-        Indices of the score for each actual amino acid
-        in the peptide (for an entire batch)
+        Indices of the score for each actual amino acid in the peptide
+        (for an entire batch).
     decoder_reverse : bool
         Whether the decoder is reversed.
 
     Returns
     -------
-    all_peptide_scores: List[float]
-        The score between the input spectra and associated peptide
-        for each PSM in the batch.
-    all_aa_scores : List[List[float]]
-        A list of lists of per amino acid scores
-        for each PSM in the batch.
+    peptide_scores: List[float]
+        The peptide score for each PSM in the batch.
+    aa_scores : List[np.ndarray]
+        The amino acid scores for each PSM in the batch.
     """
-    # Remove trailing tokens from predictions based on decoder reversal
+    # Remove trailing tokens from predictions based on decoder reversal.
     if not decoder_reverse:
         batch_all_aa_scores = batch_all_aa_scores[:, 1:]
     else:
@@ -1144,29 +1151,31 @@ def _calc_match_score(
     per_aa_scores[per_aa_scores == 0] += 1e-10
     score_mask = truth_aa_indices != 0
     per_aa_scores[~score_mask] = 0
-    all_peptide_scores = []
-    all_aa_scores = []
+    peptide_scores, aa_scores = [], []
     for psm_score in per_aa_scores:
         psm_score = np.trim_zeros(psm_score)
-        aa_scores, peptide_score = _aa_pep_score(psm_score, True)
-        all_peptide_scores.append(peptide_score)
-        all_aa_scores.append(aa_scores)
+        psm_aa_scores, psm_peptide_score = _aa_pep_score(psm_score, True)
+        peptide_scores.append(psm_peptide_score)
+        aa_scores.append(psm_aa_scores)
 
-    return all_peptide_scores, all_aa_scores
+    return peptide_scores, aa_scores
 
 
 class CosineWarmupScheduler(torch.optim.lr_scheduler._LRScheduler):
     """
-    Learning rate scheduler with linear warm-up followed by cosine shaped decay.
+    Learning rate scheduler with linear warm-up followed by cosine
+    shaped decay.
 
     Parameters
     ----------
     optimizer : torch.optim.Optimizer
         Optimizer object.
     warmup_iters : int
-        The number of iterations for the linear warm-up of the learning rate.
+        The number of iterations for the linear warm-up of the learning
+        rate.
     cosine_schedule_period_iters : int
-        The number of iterations for the cosine half period of the learning rate.
+        The number of iterations for the cosine half period of the
+        learning rate.
     """
 
     def __init__(
@@ -1196,8 +1205,8 @@ def _calc_mass_error(
     calc_mz: float, obs_mz: float, charge: int, isotope: int = 0
 ) -> float:
     """
-    Calculate the mass error in ppm between the theoretical m/z and the observed
-    m/z, optionally accounting for an isotopologue mismatch.
+    Calculate the mass error in ppm between the theoretical m/z and the
+    observed m/z, optionally accounting for an isotopologue mismatch.
 
     Parameters
     ----------
@@ -1222,18 +1231,20 @@ def _aa_pep_score(
     aa_scores: np.ndarray, fits_precursor_mz: bool
 ) -> Tuple[np.ndarray, float]:
     """
-    Calculate amino acid and peptide-level confidence score from the raw amino
-    acid scores.
+    Calculate amino acid and peptide-level confidence score from the raw
+    amino acid scores.
 
-    The peptide score is the mean of the raw amino acid scores. The amino acid
-    scores are the mean of the raw amino acid scores and the peptide score.
+    The peptide score is the mean of the raw amino acid scores. The
+    amino acid scores are the mean of the raw amino acid scores and the
+    peptide score.
 
     Parameters
     ----------
     aa_scores : np.ndarray
         Amino acid level confidence scores.
     fits_precursor_mz : bool
-        Flag indicating whether the prediction fits the precursor m/z filter.
+        Flag indicating whether the prediction fits the precursor m/z
+        filter.
 
     Returns
     -------
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index b097f6d5..4e61b164 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -4,7 +4,6 @@
 import glob
 import logging
 import os
-import re
 import tempfile
 import uuid
 import warnings
@@ -25,7 +24,7 @@
 from ..data import db_utils, ms_io
 from ..denovo.dataloaders import DeNovoDataModule
 from ..denovo.evaluate import aa_match_batch, aa_match_metrics
-from ..denovo.model import Spec2Pep, DbSpec2Pep
+from ..denovo.model import DbSpec2Pep, Spec2Pep
 
 
 logger = logging.getLogger("casanovo")
@@ -45,11 +44,12 @@ class ModelRunner:
         The directory where checkpoint files will be saved. If `None` no
         checkpoint files will be saved and a warning will be triggered.
     output_rootname : str | None, optional
-        The root name for checkpoint files (e.g., checkpoints or results). If
-        `None` no base name will be used for checkpoint files.
-    overwrite_ckpt_check: bool, optional
-        Whether to check output_dir (if not `None`) for conflicting checkpoint
+        The root name for checkpoint files (e.g., checkpoints or
+        results). If `None` no base name will be used for checkpoint
         files.
+    overwrite_ckpt_check: bool, optional
+        Whether to check output_dir (if not `None`) for conflicting
+        checkpoint files.
     """
 
     def __init__(
@@ -138,11 +138,7 @@ def db_search(
         fasta_path : str
             The path with the FASTA file for database search.
         results_path : str
-            Sequencing results file path
-
-        Returns
-        -------
-        self
+            Sequencing results file path.
         """
         self.writer = ms_io.MztabWriter(results_path)
         self.writer.set_metadata(
@@ -189,10 +185,6 @@ def train(
             The path to the MS data files for training.
         valid_peak_path : iterable of str
             The path to the MS data files for validation.
-
-        Returns
-        -------
-        self
         """
         self.initialize_trainer(train=True)
         self.initialize_model(train=True)
@@ -209,16 +201,16 @@ def train(
         )
 
     def log_metrics(self, test_index: AnnotatedSpectrumIndex) -> None:
-        """Log peptide precision and amino acid precision
+        """Log peptide precision and amino acid precision.
 
         Calculate and log peptide precision and amino acid precision
-        based off of model predictions and spectrum annotations
+        based off of model predictions and spectrum annotations.
 
         Parameters
         ----------
         test_index : AnnotatedSpectrumIndex
-            Index containing the annotated spectra used to generate model
-            predictions
+            Index containing the annotated spectra used to generate
+            model predictions.
         """
         seq_pred = []
         seq_true = []
@@ -245,8 +237,9 @@ def log_metrics(self, test_index: AnnotatedSpectrumIndex) -> None:
 
         if self.config["top_match"] > 1:
             logger.warning(
-                "The behavior for calculating evaluation metrics is undefined when "
-                "the 'top_match' configuration option is set to a value greater than 1."
+                "The behavior for calculating evaluation metrics is undefined "
+                "when the 'top_match' configuration option is set to a value "
+                "greater than 1."
             )
 
         logger.info("Peptide Precision: %.2f%%", 100 * pep_precision)
@@ -261,13 +254,14 @@ def predict(
     ) -> None:
         """Predict peptide sequences with a trained Casanovo model.
 
-        Can also evaluate model during prediction if provided with annotated
-        peak files.
+        Can also evaluate model during prediction if provided with
+        annotated peak files.
 
         Parameters
         ----------
         peak_path : Iterable[str]
-            The path with the MS data files for predicting peptide sequences.
+            The path with the MS data files for predicting peptide
+            sequences.
         results_path : str
             Sequencing results file path
         evaluate: bool
@@ -275,10 +269,6 @@ def predict(
             Note: peak_path most point to annotated MS data files when
             running model evaluation. Files that are not an annotated
             peak file format will be ignored if evaluate is set to true.
-
-        Returns
-        -------
-        self
         """
         self.writer = ms_io.MztabWriter(results_path)
         self.writer.set_metadata(
@@ -363,7 +353,7 @@ def initialize_trainer(self, train: bool) -> None:
         self.trainer = pl.Trainer(**trainer_cfg)
 
     def initialize_model(
-        self, train: bool, db_search: Optional[bool] = False
+        self, train: bool, db_search: bool = False
     ) -> None:
         """Initialize the Casanovo model.
 
@@ -372,7 +362,7 @@ def initialize_model(
         train : bool
             Determines whether to set the model up for model training or
             evaluation / inference.
-        db_search : Optional[bool]
+        db_search : bool
             Determines whether to use the DB search model subclass.
         """
         tb_summarywriter = None
@@ -411,7 +401,8 @@ def initialize_model(
             calculate_precision=self.config.calculate_precision,
         )
 
-        # Reconfigurable non-architecture related parameters for a loaded model.
+        # Reconfigurable non-architecture related parameters for a
+        # loaded model.
         loaded_model_params = dict(
             max_peptide_len=self.config.max_peptide_len,
             precursor_mass_tol=self.config.precursor_mass_tol,
@@ -432,10 +423,8 @@ def initialize_model(
 
         if self.model_filename is None:
             if db_search:
-                logger.error("DB search mode requires a model file")
-                raise ValueError(
-                    "A model file must be provided for DB search mode"
-                )
+                logger.error("A model file must be provided for DB search")
+                raise ValueError("A model file must be provided for DB search")
             # Train a model from scratch if no model file is provided.
             if train:
                 self.model = Spec2Pep(**model_params)
@@ -444,7 +433,8 @@ def initialize_model(
             else:
                 logger.error("A model file must be provided")
                 raise ValueError("A model file must be provided")
-        # Else a model file is provided (to continue training or for inference).
+        # Else a model file is provided (to continue training or for
+        # inference).
 
         if not Path(self.model_filename).exists():
             logger.error(
@@ -453,15 +443,13 @@ def initialize_model(
             )
             raise FileNotFoundError("Could not find the model weights file")
 
-        # First try loading model details from the weights file, otherwise use
-        # the provided configuration.
+        # First try loading model details from the weights file,
+        # otherwise use the provided configuration.
         device = torch.empty(1).device  # Use the default device.
         Model = DbSpec2Pep if db_search else Spec2Pep
         try:
             self.model = Model.load_from_checkpoint(
-                self.model_filename,
-                map_location=device,
-                **loaded_model_params,
+                self.model_filename, map_location=device, **loaded_model_params
             )
 
             architecture_params = set(model_params.keys()) - set(
@@ -476,7 +464,8 @@ def initialize_model(
                         "using the checkpoint."
                     )
         except RuntimeError:
-            # This only doesn't work if the weights are from an older version
+            # This only doesn't work if the weights are from an older
+            # version.
             try:
                 self.model = Model.load_from_checkpoint(
                     self.model_filename,
@@ -497,7 +486,7 @@ def initialize_data_module(
             Union[AnnotatedSpectrumIndex, SpectrumIndex]
         ] = None,
     ) -> None:
-        """Initialize the data module
+        """Initialize the data module.
 
         Parameters
         ----------
@@ -536,8 +525,8 @@ def _get_index(
     ) -> Union[SpectrumIndex, AnnotatedSpectrumIndex]:
         """Get the spectrum index.
 
-        If the file is a SpectrumIndex, only one is allowed. Otherwise multiple
-        may be specified.
+        If the file is a SpectrumIndex, only one is allowed. Otherwise
+        multiple may be specified.
 
         Parameters
         ----------
@@ -597,15 +586,14 @@ def _get_index(
     def _get_strategy(self) -> Union[str, DDPStrategy]:
         """Get the strategy for the Trainer.
 
-        The DDP strategy works best when multiple GPUs are used. It can work
-        for CPU-only, but definitely fails using MPS (the Apple Silicon chip)
-        due to Gloo.
+        The DDP strategy works best when multiple GPUs are used. It can
+        work for CPU-only, but definitely fails using MPS (the Apple
+        Silicon chip) due to Gloo.
 
         Returns
         -------
         Union[str, DDPStrategy]
             The strategy parameter for the Trainer.
-
         """
         if self.config.accelerator in ("cpu", "mps"):
             return "auto"
@@ -623,8 +611,8 @@ def _get_peak_filenames(
     """
     Get all matching peak file names from the path pattern.
 
-    Performs cross-platform path expansion akin to the Unix shell (glob, expand
-    user, expand vars).
+    Performs cross-platform path expansion akin to the Unix shell (glob,
+    expand user, expand vars).
 
     Parameters
     ----------

From 7fa5f6f68c17c6e55aec25abac14b7cd9c3d7d2d Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Sun, 10 Nov 2024 15:53:56 +0100
Subject: [PATCH 67/84] Reformat with black

---
 casanovo/data/db_utils.py       | 13 +++++++------
 casanovo/denovo/model.py        |  2 +-
 casanovo/denovo/model_runner.py |  4 +---
 tests/conftest.py               |  1 -
 4 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 55127cff..fb9255db 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -136,9 +136,11 @@ def _digest_fasta(
             )
         )
         # Merge proteins from duplicate peptides.
-        peptides = peptides.groupby("peptide")["protein"].apply(
-            lambda proteins: sorted(set(proteins))
-        ).reset_index()
+        peptides = (
+            peptides.groupby("peptide")["protein"]
+            .apply(lambda proteins: sorted(set(proteins)))
+            .reset_index()
+        )
         # Calculate the mass of each peptide.
         mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
         peptides["calc_mass"] = peptides["peptide"].apply(mass_calculator.mass)
@@ -185,9 +187,8 @@ def get_candidates(
             )
             upper_bound = shift_raw_mass * (1 + precursor_tol_ppm)
             lower_bound = shift_raw_mass * (1 - precursor_tol_ppm)
-            mask |= (
-                (self.db_peptides["calc_mass"] >= lower_bound)
-                & (self.db_peptides["calc_mass"] <= upper_bound)
+            mask |= (self.db_peptides["calc_mass"] >= lower_bound) & (
+                self.db_peptides["calc_mass"] <= upper_bound
             )
         return self.db_peptides.index[mask]
 
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index d309d11c..1c577815 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1019,7 +1019,7 @@ def __init__(self, *args, **kwargs):
     def predict_step(
         self,
         batch: Tuple[torch.Tensor, torch.Tensor, np.ndarray, np.ndarray],
-        *args
+        *args,
     ) -> List[ms_io.PepSpecMatch]:
         """
         A single prediction step.
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 4e61b164..30f86f24 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -352,9 +352,7 @@ def initialize_trainer(self, train: bool) -> None:
 
         self.trainer = pl.Trainer(**trainer_cfg)
 
-    def initialize_model(
-        self, train: bool, db_search: bool = False
-    ) -> None:
+    def initialize_model(self, train: bool, db_search: bool = False) -> None:
         """Initialize the Casanovo model.
 
         Parameters
diff --git a/tests/conftest.py b/tests/conftest.py
index 009c0737..a35c5834 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -2,7 +2,6 @@
 
 import depthcharge
 import numpy as np
-import pandas as pd
 import psims
 import pytest
 import yaml

From 7a42e8b0d2209adeb6445c324938d1b45f78ddff Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Sun, 10 Nov 2024 16:02:52 +0100
Subject: [PATCH 68/84] Minor fix

---
 casanovo/data/db_utils.py      | 2 --
 casanovo/denovo/dataloaders.py | 8 ++++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index fb9255db..1ee9fab8 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -8,7 +8,6 @@
 from typing import Dict, Iterator, Pattern, Set, Tuple
 
 import depthcharge.masses
-import numba as nb
 import numpy as np
 import pandas as pd
 import pyteomics.fasta
@@ -365,7 +364,6 @@ def _convert_from_modx(
     return swap_regex.sub(lambda x: swap_map[x.group()], seq)
 
 
-@nb.njit
 def _to_neutral_mass(mz_mass: float, charge: int) -> float:
     """
     Convert precursor m/z value to neutral mass.
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index f929b1e0..e9759eac 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -228,9 +228,9 @@ def prepare_batch(
     precursor_mzs = torch.tensor(precursor_mzs)
     precursor_charges = torch.tensor(precursor_charges)
     precursor_masses = (precursor_mzs - 1.007276) * precursor_charges
-    precursors = torch.hstack(
+    precursors = torch.vstack(
         [precursor_masses, precursor_charges, precursor_mzs]
-    ).float()
+    ).T.float()
     return spectra, precursors, np.asarray(spectrum_ids)
 
 
@@ -274,8 +274,8 @@ def prepare_psm_batch(
     batch_precursors = []
     batch_spectrum_ids = []
     batch_peptides = []
-    # FIXME: This can be optmized by using a sliding window instead of
-    #  retrieving candidates for each spectrum indendently.
+    # FIXME: This can be optimized by using a sliding window instead of
+    #  retrieving candidates for each spectrum independently.
     for i in range(len(batch)):
         candidate_pep = protein_database.get_candidates(
             precursors[i][2], precursors[i][1]

From 17d58805419bf62b61bbd10a000d13327c66bde7 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Sun, 10 Nov 2024 17:37:22 +0100
Subject: [PATCH 69/84] Fix output name crash

---
 casanovo/casanovo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/casanovo/casanovo.py b/casanovo/casanovo.py
index 17786793..3d1811d3 100644
--- a/casanovo/casanovo.py
+++ b/casanovo/casanovo.py
@@ -255,7 +255,7 @@ def db_search(
         runner.db_search(
             peak_path,
             fasta_path,
-            str((output_path / output_root).with_suffix(".mztab")),
+            str((output_path / output_root_name).with_suffix(".mztab")),
         )
         utils.log_run_report(start_time=start_time, end_time=time.time())
 

From fff5ca418828727d190755b1ebc16d98a69dcb5e Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Sun, 10 Nov 2024 17:58:52 +0100
Subject: [PATCH 70/84] Fix AA score masking

---
 casanovo/denovo/model.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 1c577815..88f3aaca 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1149,12 +1149,12 @@ def _calc_match_score(
     per_aa_scores = batch_all_aa_scores[rows, cols, truth_aa_indices]
     per_aa_scores = per_aa_scores.cpu().detach().numpy()
     per_aa_scores[per_aa_scores == 0] += 1e-10
-    score_mask = truth_aa_indices != 0
-    per_aa_scores[~score_mask] = 0
+    score_mask = (truth_aa_indices != 0).cpu().detach().numpy()
     peptide_scores, aa_scores = [], []
-    for psm_score in per_aa_scores:
-        psm_score = np.trim_zeros(psm_score)
-        psm_aa_scores, psm_peptide_score = _aa_pep_score(psm_score, True)
+    for psm_score, psm_mask in zip(per_aa_scores, score_mask):
+        psm_aa_scores, psm_peptide_score = _aa_pep_score(
+            psm_score[psm_mask], True
+        )
         peptide_scores.append(psm_peptide_score)
         aa_scores.append(psm_aa_scores)
 

From d18d874301ea18c845db5483a81c4486a29f64c0 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Sun, 10 Nov 2024 18:13:51 +0100
Subject: [PATCH 71/84] Fix PSM export

---
 casanovo/denovo/model.py | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 88f3aaca..716dd747 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -2,6 +2,7 @@
 
 import collections
 import heapq
+import itertools
 import logging
 import warnings
 from pathlib import Path
@@ -1062,10 +1063,11 @@ def predict_step(
                 batch_aa_scores,
                 psm_batch[3],
             ):
+                spectrum_i = tuple(spectrum_i)
                 predictions_all[spectrum_i].append(
                     ms_io.PepSpecMatch(
                         sequence=peptide,
-                        spectrum_id=tuple(spectrum_i),
+                        spectrum_id=spectrum_i,
                         peptide_score=peptide_score,
                         charge=int(charge),
                         calc_mz=self.peptide_mass_calculator.mass(
@@ -1079,16 +1081,20 @@ def predict_step(
                     )
                 )
         # Filter the top-scoring prediction(s) for each spectrum.
-        predictions = [
-            *(
-                sorted(
-                    spectrum_predictions,
-                    key=lambda p: p.peptide_score,
-                    reverse=True,
-                )[: self.top_match]
-                for spectrum_predictions in predictions_all.values()
+        predictions = list(
+            itertools.chain.from_iterable(
+                [
+                    *(
+                        sorted(
+                            spectrum_predictions,
+                            key=lambda p: p.peptide_score,
+                            reverse=True,
+                        )[: self.top_match]
+                        for spectrum_predictions in predictions_all.values()
+                    )
+                ]
             )
-        ]
+        )
         return predictions
 
     def on_predict_batch_end(

From b12abd6fc05d868253ad765540b69ef8b4625395 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Thu, 14 Nov 2024 14:19:22 +0100
Subject: [PATCH 72/84] Less verbose logging of skipped peptides

---
 casanovo/data/db_utils.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 1ee9fab8..81b9daf8 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -300,6 +300,7 @@ def _peptide_generator(
             "Enzyme %s not recognized. Interpreting as cleavage rule.",
             enzyme,
         )
+    n_skipped = 0
     if digestion == "non-specific":
         for header, seq in pyteomics.fasta.read(fasta_filename):
             protein = header.split()[0]
@@ -311,7 +312,8 @@ def _peptide_generator(
                 ):
                     peptide = seq[i:j]
                     if any(aa not in valid_aa for aa in peptide):
-                        logger.warning(
+                        n_skipped += 1
+                        logger.debug(
                             "Skipping peptide with unknown amino acids: %s",
                             peptide,
                         )
@@ -329,12 +331,17 @@ def _peptide_generator(
             for peptide in peptides:
                 if min_peptide_len <= len(peptide) <= max_peptide_len:
                     if any(aa not in valid_aa for aa in peptide):
-                        logger.warning(
+                        n_skipped += 1
+                        logger.debug(
                             "Skipping peptide with unknown amino acids: %s",
                             peptide,
                         )
                     else:
                         yield peptide, protein
+    if n_skipped > 0:
+        logger.warning(
+            "Skipped %d peptides with unknown amino acids", n_skipped
+    )
 
 
 def _convert_from_modx(

From b577d594e38c3d25dd3dead874d8fdde6c6c39b8 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Thu, 14 Nov 2024 16:02:02 +0100
Subject: [PATCH 73/84] Appropriate end-of-run reporting

---
 casanovo/casanovo.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/casanovo/casanovo.py b/casanovo/casanovo.py
index 3d1811d3..fef73a9b 100644
--- a/casanovo/casanovo.py
+++ b/casanovo/casanovo.py
@@ -196,9 +196,8 @@ def sequence(
             str((output_path / output_root_name).with_suffix(".mztab")),
             evaluate=evaluate,
         )
-        psms = runner.writer.psms
-        utils.log_sequencing_report(
-            psms, start_time=start_time, end_time=time.time()
+        utils.log_annotate_report(
+            runner.writer.psms, start_time=start_time, end_time=time.time()
         )
 
 
@@ -257,7 +256,9 @@ def db_search(
             fasta_path,
             str((output_path / output_root_name).with_suffix(".mztab")),
         )
-        utils.log_run_report(start_time=start_time, end_time=time.time())
+        utils.log_annotate_report(
+            runner.writer.psms, start_time=start_time, end_time=time.time()
+        )
 
 
 @main.command(cls=_SharedParams)

From 510953c5c065f616f64c215483cfca8d05717d23 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Thu, 14 Nov 2024 16:04:43 +0100
Subject: [PATCH 74/84] Fix PSM export from de novo

---
 casanovo/denovo/model.py | 29 ++++-------------------------
 1 file changed, 4 insertions(+), 25 deletions(-)

diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 716dd747..e5a22760 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -901,9 +901,7 @@ def on_validation_epoch_end(self) -> None:
         self._log_history()
 
     def on_predict_batch_end(
-        self,
-        outputs: List[Tuple[np.ndarray, List[str], torch.Tensor]],
-        *args,
+        self, outputs: List[ms_io.PepSpecMatch], *args
     ) -> None:
         """
         Write the predicted peptide sequences and amino acid scores to
@@ -911,28 +909,9 @@ def on_predict_batch_end(
         """
         if self.out_writer is None:
             return
-        # Triply nested lists: results -> batch -> step -> spectrum.
-        for (
-            spectrum_i,
-            charge,
-            precursor_mz,
-            peptide,
-            peptide_score,
-            aa_scores,
-        ) in outputs:
-            if len(peptide) == 0:
-                continue
-            self.out_writer.psms.append(
-                psm.PepSpecMatch(
-                    sequence=peptide,
-                    spectrum_id=tuple(spectrum_i),
-                    peptide_score=peptide_score,
-                    charge=int(charge),
-                    calc_mz=precursor_mz,
-                    exp_mz=self.peptide_mass_calculator.mass(peptide, charge),
-                    aa_scores=aa_scores,
-                )
-            )
+        for pred in outputs:
+            if len(pred.sequence) > 0:
+                self.out_writer.psms.append(pred)
 
     def _log_history(self) -> None:
         """

From 3c69711b08045b81aa9637eb97950c0bbb4669d9 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Thu, 14 Nov 2024 16:05:33 +0100
Subject: [PATCH 75/84] Generalize end-of-run reporting

---
 casanovo/utils.py | 47 +++++++++++++++++++++++------------------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/casanovo/utils.py b/casanovo/utils.py
index 6e4273e3..86e0748f 100644
--- a/casanovo/utils.py
+++ b/casanovo/utils.py
@@ -8,7 +8,7 @@
 import socket
 import sys
 from datetime import datetime
-from typing import Tuple, Dict, List, Optional, Iterable
+from typing import Dict, Iterable, List, Optional, Tuple
 
 import numpy as np
 import pandas as pd
@@ -18,7 +18,7 @@
 from .data.psm import PepSpecMatch
 
 
-SCORE_BINS = [0.0, 0.5, 0.9, 0.95, 0.99]
+SCORE_BINS = (0.0, 0.5, 0.9, 0.95, 0.99)
 
 logger = logging.getLogger("casanovo")
 
@@ -27,8 +27,8 @@ def n_workers() -> int:
     """
     Get the number of workers to use for data loading.
 
-    This is the maximum number of CPUs allowed for the process, scaled for the
-    number of GPUs being used.
+    This is the maximum number of CPUs allowed for the process, scaled
+    for the number of GPUs being used.
 
     On Windows and MacOS, we only use the main process. See:
     https://discuss.pytorch.org/t/errors-when-using-num-workers-0-in-dataloader/97564/4
@@ -79,7 +79,7 @@ def split_version(version: str) -> Tuple[str, str, str]:
 
 
 def get_score_bins(
-    scores: pd.Series, score_bins: List[float]
+    scores: pd.Series, score_bins: Iterable[float]
 ) -> Dict[float, int]:
     """
     Get binned confidence scores
@@ -92,14 +92,14 @@ def get_score_bins(
     ----------
     scores: pd.Series
         Series of assigned peptide scores.
-    score_bins: List[float]
+    score_bins: Iterable[float]
         Confidence scores to map.
 
     Returns
     -------
     score_bin_dict: Dict[float, int]
-        Dictionary mapping each confidence score to the number of spectra
-        with a confidence greater than or equal to it.
+        Dictionary mapping each confidence score to the number of
+        spectra with a confidence greater than or equal to it.
     """
     return {score: (scores >= score).sum() for score in score_bins}
 
@@ -116,8 +116,8 @@ def get_peptide_lengths(sequences: pd.Series) -> np.ndarray:
     Returns
     -------
     sequence_lengths: np.ndarray
-        Numpy array containing the length of each sequence, listed in the
-        same order that the sequences are provided in.
+        Numpy array containing the length of each sequence, listed in
+        the same order that the sequences are provided in.
     """
     # Mass modifications do not contribute to sequence length
     # FIXME: If PTMs are represented in ProForma notation this filtering
@@ -126,7 +126,7 @@ def get_peptide_lengths(sequences: pd.Series) -> np.ndarray:
 
 
 def get_report_dict(
-    results_table: pd.DataFrame, score_bins: List[float] = SCORE_BINS
+    results_table: pd.DataFrame, score_bins: Iterable[float] = SCORE_BINS
 ) -> Optional[Dict]:
     """
     Generate sequencing run report
@@ -134,15 +134,16 @@ def get_report_dict(
     Parameters
     ----------
     results_table: pd.DataFrame
-        Parsed spectrum match table
-    score_bins: List[float], Optional
-        Confidence scores for creating confidence CMF, see get_score_bins
+        Parsed spectrum match table.
+    score_bins: Iterable[float], Optional
+        Confidence scores for creating confidence CMF, see
+        `get_score_bins`.
 
     Returns
     -------
     report_gen: Dict
         Generated report represented as a dictionary, or None if no
-        sequencing predictions were logged
+        sequencing predictions were logged.
     """
     if results_table.empty:
         return None
@@ -195,28 +196,26 @@ def log_run_report(
         logger.info("Max GPU Memory Utilization: %d MiB", gpu_util >> 20)
 
 
-def log_sequencing_report(
+def log_annotate_report(
     predictions: List[PepSpecMatch],
     start_time: Optional[float] = None,
     end_time: Optional[float] = None,
-    score_bins: List[float] = SCORE_BINS,
+    score_bins: Iterable[float] = SCORE_BINS,
 ) -> None:
     """
-    Log sequencing run report
+    Log run annotation report.
 
     Parameters
     ----------
-    next_prediction : Tuple[
-        str, Tuple[str, str], float, float, float, float, str
-    ]
-        PSM predictions
+    predictions: List[PepSpecMatch]
+        PSM predictions.
     start_time : Optional[float], default=None
         The start time of the sequencing run in seconds since the epoch.
     end_time : Optional[float], default=None
         The end time of the sequencing run in seconds since the epoch.
-    score_bins: List[float], Optional
+    score_bins: Iterable[float], Optional
         Confidence scores for creating confidence score distribution,
-        see get_score_bins
+        see `get_score_bins`.
     """
     log_run_report(start_time=start_time, end_time=end_time)
     run_report = get_report_dict(

From 15265048b78960c7b933d81a13a1a3e024d782d5 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Thu, 14 Nov 2024 16:05:54 +0100
Subject: [PATCH 76/84] Log additional information on spectra with no matching
 candidates

---
 casanovo/denovo/dataloaders.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index e9759eac..4db36b33 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -282,7 +282,11 @@ def prepare_psm_batch(
         )
         if len(candidate_pep) == 0:
             logger.info(
-                "No candidate peptides found for spectrum %s", spectrum_ids[i]
+                "No candidate peptides found for spectrum %s with precursor "
+                "charge %d and precursor m/z %f",
+                spectrum_ids[i],
+                precursors[i][1],
+                precursors[i][2],
             )
         else:
             batch_spectra.append(

From f18332d48a2f65cde554ea2bfefab8ea10a7a416 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Thu, 14 Nov 2024 16:16:37 +0100
Subject: [PATCH 77/84] Fix linting issue

---
 casanovo/data/db_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 81b9daf8..516e91a4 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -341,7 +341,7 @@ def _peptide_generator(
     if n_skipped > 0:
         logger.warning(
             "Skipped %d peptides with unknown amino acids", n_skipped
-    )
+        )
 
 
 def _convert_from_modx(

From a71c4404ea2220ba966e74669bb0b2ab0348f3c2 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Thu, 14 Nov 2024 16:41:15 +0100
Subject: [PATCH 78/84] Fix some testing warnings

---
 casanovo/denovo/model.py      | 2 +-
 tests/test_integration.py     | 4 ++--
 tests/unit_tests/test_unit.py | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index e5a22760..68a8fcc5 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -18,7 +18,7 @@
 
 from . import evaluate
 from .. import config
-from ..data import ms_io, psm
+from ..data import ms_io
 
 logger = logging.getLogger("casanovo")
 
diff --git a/tests/test_integration.py b/tests/test_integration.py
index eeeb498f..7dab1b5b 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -177,8 +177,8 @@ def test_train_and_run(
     mztab = pyteomics.mztab.MzTab(str(output_filename))
     filename = "small.mgf"
     # Verify that the input annotated peak file is listed in the metadata.
-    assert f"ms_run[1]-location" in mztab.metadata
-    assert mztab.metadata[f"ms_run[1]-location"].endswith(filename)
+    assert "ms_run[1]-location" in mztab.metadata
+    assert mztab.metadata["ms_run[1]-location"].endswith(filename)
 
     # Verify that the spectrum predictions are correct
     # and indexed according to the peak input file type.
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index 0d4812f9..00617457 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -454,14 +454,14 @@ def test_aa_pep_score():
 
 
 def test_peptide_generator_errors(residues_dict, tiny_fasta_file):
-    with pytest.raises(FileNotFoundError) as e_info:
+    with pytest.raises(FileNotFoundError):
         [
             (a, b)
             for a, b in db_utils._peptide_generator(
                 "fail.fasta", "trypsin", "full", 0, 5, 10, residues_dict
             )
         ]
-    with pytest.raises(ValueError) as e_info:
+    with pytest.raises(ValueError):
         [
             (a, b)
             for a, b in db_utils._peptide_generator(

From 4aa257b1e43d944dda4160206a30e69e7b56c817 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Thu, 14 Nov 2024 17:03:08 +0100
Subject: [PATCH 79/84] Log digestion settings

---
 casanovo/data/db_utils.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 516e91a4..32d975bb 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -88,6 +88,13 @@ def __init__(
             max_peptide_len,
             set([aa[0] for aa in residues.keys() if aa[0].isalpha()]),
         )
+        logger.info(
+            "Digesting FASTA file (enzyme = %s, digestion = %s, missed "
+            "cleavages = %d)...",
+            enzyme,
+            digestion,
+            missed_cleavages,
+        )
         self.db_peptides = self._digest_fasta(peptide_generator)
         self.precursor_tolerance = precursor_tolerance
         self.isotope_error = isotope_error
@@ -150,7 +157,7 @@ def _digest_fasta(
         peptides.set_index("peptide", inplace=True)
 
         logger.info(
-            "Digestion complete. %d peptides generated.", len(peptides)
+            "Digestion complete. %s peptides generated.", f"{len(peptides):,d}"
         )
         return peptides
 

From d54b66fa5242d50a625bcb5654db99c8d7a849a9 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Thu, 14 Nov 2024 17:05:16 +0100
Subject: [PATCH 80/84] Reduce logging level for spectra without candidates

---
 casanovo/denovo/dataloaders.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index 4db36b33..cdbf71bf 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -281,7 +281,7 @@ def prepare_psm_batch(
             precursors[i][2], precursors[i][1]
         )
         if len(candidate_pep) == 0:
-            logger.info(
+            logger.debug(
                 "No candidate peptides found for spectrum %s with precursor "
                 "charge %d and precursor m/z %f",
                 spectrum_ids[i],

From d97e251428a6f6108d4d3c954d648efde4ce82fd Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Mon, 18 Nov 2024 16:30:54 +0100
Subject: [PATCH 81/84] Round peptide masses for consistent sorting

---
 casanovo/data/db_utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 32d975bb..95ef2d13 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -149,7 +149,10 @@ def _digest_fasta(
         )
         # Calculate the mass of each peptide.
         mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
-        peptides["calc_mass"] = peptides["peptide"].apply(mass_calculator.mass)
+        peptides["calc_mass"] = (
+            peptides["peptide"].apply(mass_calculator.mass)
+            .round(5)
+        )
         # Sort by peptide mass and index by peptide sequence.
         peptides.sort_values(
             by=["calc_mass", "peptide"], ascending=True, inplace=True

From db5e00f063829e12f731a1106f61ab0e8bab1788 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Mon, 18 Nov 2024 16:39:37 +0100
Subject: [PATCH 82/84] Fox linting

---
 casanovo/data/db_utils.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 95ef2d13..d3670930 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -150,8 +150,7 @@ def _digest_fasta(
         # Calculate the mass of each peptide.
         mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
         peptides["calc_mass"] = (
-            peptides["peptide"].apply(mass_calculator.mass)
-            .round(5)
+            peptides["peptide"].apply(mass_calculator.mass).round(5)
         )
         # Sort by peptide mass and index by peptide sequence.
         peptides.sort_values(

From 1e565c4acaead600e67c0b1c59ec51ca7ebb2c57 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Mon, 18 Nov 2024 20:31:56 +0100
Subject: [PATCH 83/84] Remove superfluous PSM export

---
 casanovo/denovo/model.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 68a8fcc5..f350f3b3 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1076,14 +1076,6 @@ def predict_step(
         )
         return predictions
 
-    def on_predict_batch_end(
-        self, outputs: List[ms_io.PepSpecMatch], *args
-    ) -> None:
-        """
-        Write the database search results to the output file.
-        """
-        self.out_writer.psms.extend(outputs)
-
 
 def _calc_match_score(
     batch_all_aa_scores: torch.Tensor,

From 18999cf2f0e682437854fe7f0db746321e27f641 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <wout@bittremieux.be>
Date: Mon, 18 Nov 2024 20:37:44 +0100
Subject: [PATCH 84/84] Update changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 240185d1..c73eec8b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
 ### Added
 
+- Casanovo-DB mode (`casanovo db_search`) to use Casanovo as a learned  score function for sequence database searching (given a FASTA protein database).
 - During training, model checkpoints will be saved at the end of each training epoch in addition to the checkpoints saved at the end of every validation run.
 - Besides as a local file, model weights can be specified from a URL. Upon initial download, the weights file is cached for future re-use.
 - Training and optimizer metrics can now be logged to a CSV file by setting the `log_metrics` config file option to true - the CSV file will be written to under a sub-directory of the output directory named `csv_logs`.