Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename max_iters to cosine_schedule_period_iters #300

Merged
merged 16 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

## [Unreleased]

### Added

- A deprecation warning will be issued when depecrated config options are used in the config file or in the model weights file.

### Changed

- Config option `max_iters` has been renamed to `cosine_schedule_period_iters` to better reflect that it controls the number of iterations for the cosine half period of the learning rate.

## [4.1.0] - 2024-02-16

### Changed
Expand Down
20 changes: 19 additions & 1 deletion casanovo/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import logging
import shutil
import warnings
from pathlib import Path
from typing import Optional, Dict, Callable, Tuple, Union

Expand All @@ -12,6 +13,14 @@
logger = logging.getLogger("casanovo")


# FIXME: This contains deprecated config options to be removed in the next major
# version update.
_config_deprecated = dict(
every_n_train_steps="val_check_interval",
max_iters="cosine_schedule_period_iters",
)


class Config:
"""The Casanovo configuration options.

Expand Down Expand Up @@ -56,7 +65,7 @@ class Config:
tb_summarywriter=str,
train_label_smoothing=float,
warmup_iters=int,
max_iters=int,
cosine_schedule_period_iters=int,
learning_rate=float,
weight_decay=float,
train_batch_size=int,
Expand Down Expand Up @@ -84,6 +93,15 @@ def __init__(self, config_file: Optional[str] = None):
else:
with Path(config_file).open() as f_in:
self._user_config = yaml.safe_load(f_in)
# Remap deprecated config entries.
for old, new in _config_deprecated.items():
if old in self._user_config:
self._user_config[new] = self._user_config.pop(old)
warnings.warn(
f"Deprecated config option '{old}' remapped to "
f"'{new}'",
DeprecationWarning,
)
# Check for missing entries in config file.
config_missing = self._params.keys() - self._user_config.keys()
if len(config_missing) > 0:
Expand Down
89 changes: 44 additions & 45 deletions casanovo/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,103 +4,102 @@
###

###
# The following parameters can be modified when running inference or
# when fine-tuning an existing Casanovo model.
# The following parameters can be modified when running inference or when
# fine-tuning an existing Casanovo model.
###

# Max absolute difference allowed with respect to observed precursor m/z
# Max absolute difference allowed with respect to observed precursor m/z.
# Predictions outside the tolerance range are assigned a negative peptide score.
precursor_mass_tol: 50 # ppm
# Isotopes to consider when comparing predicted and observed precursor m/z's
# Isotopes to consider when comparing predicted and observed precursor m/z's.
isotope_error_range: [0, 1]
# The minimum length of predicted peptides
# The minimum length of predicted peptides.
min_peptide_len: 6
# Number of spectra in one inference batch
# Number of spectra in one inference batch.
predict_batch_size: 1024
# Number of beams used in beam search
# Number of beams used in beam search.
n_beams: 1
# Number of PSMs for each spectrum
# Number of PSMs for each spectrum.
top_match: 1
# The hardware accelerator to use. Must be one of:
# "cpu", "gpu", "tpu", "ipu", "hpu", "mps", or "auto"
# "cpu", "gpu", "tpu", "ipu", "hpu", "mps", or "auto".
accelerator: "auto"
# The devices to use. Can be set to a positive number int,
# or the value -1 to indicate all available devices should be used,
# If left empty, the appropriate number will be automatically
# selected for automatic selected on the chosen accelerator.
# The devices to use. Can be set to a positive number int, or the value -1 to
# indicate all available devices should be used. If left empty, the appropriate
# number will be automatically selected for based on the chosen accelerator.
devices:

###
# The following parameters should only be modified if you are training a new
# Casanovo model from scratch.
###

# Random seed to ensure reproducible results
# Random seed to ensure reproducible results.
random_seed: 454

# OUTPUT OPTIONS
# Logging frequency in training steps
# Logging frequency in training steps.
n_log: 1
# Tensorboard directory to use for keeping track of training metrics
# Tensorboard directory to use for keeping track of training metrics.
tb_summarywriter:
# Save the top k model checkpoints during training. -1 saves all, and
# leaving this field empty saves none.
# Save the top k model checkpoints during training. -1 saves all, and leaving
# this field empty saves none.
save_top_k: 5
# Path to saved checkpoints
# Path to saved checkpoints.
model_save_folder_path: ""
# Model validation and checkpointing frequency in training steps
# Model validation and checkpointing frequency in training steps.
val_check_interval: 50_000

# SPECTRUM PROCESSING OPTIONS
# Number of the most intense peaks to retain, any remaining peaks are discarded
# Number of the most intense peaks to retain, any remaining peaks are discarded.
n_peaks: 150
# Min peak m/z allowed, peaks with smaller m/z are discarded
# Min peak m/z allowed, peaks with smaller m/z are discarded.
min_mz: 50.0
# Max peak m/z allowed, peaks with larger m/z are discarded
# Max peak m/z allowed, peaks with larger m/z are discarded.
max_mz: 2500.0
# Min peak intensity allowed, less intense peaks are discarded
# Min peak intensity allowed, less intense peaks are discarded.
min_intensity: 0.01
# Max absolute m/z difference allowed when removing the precursor peak
# Max absolute m/z difference allowed when removing the precursor peak.
remove_precursor_tol: 2.0 # Da
# Max precursor charge allowed, spectra with larger charge are skipped
# Max precursor charge allowed, spectra with larger charge are skipped.
max_charge: 10

# MODEL ARCHITECTURE OPTIONS
# Dimensionality of latent representations, i.e. peak embeddings
# Dimensionality of latent representations, i.e. peak embeddings.
dim_model: 512
# Number of attention heads
# Number of attention heads.
n_head: 8
# Dimensionality of fully connected layers
# Dimensionality of fully connected layers.
dim_feedforward: 1024
# Number of transformer layers in spectrum encoder and peptide decoder
# Number of transformer layers in spectrum encoder and peptide decoder.
n_layers: 9
# Dropout rate for model weights
# Dropout rate for model weights.
dropout: 0.0
# Number of dimensions to use for encoding peak intensity
# Projected up to ``dim_model`` by default and summed with the peak m/z encoding
# Number of dimensions to use for encoding peak intensity.
# Projected up to `dim_model` by default and summed with the peak m/z encoding.
dim_intensity:
# Max decoded peptide length
# Max decoded peptide length.
max_length: 100
# Number of warmup iterations for learning rate scheduler
# The number of iterations for the linear warm-up of the learning rate.
warmup_iters: 100_000
# Max number of iterations for learning rate scheduler
max_iters: 600_000
# Learning rate for weight updates during training
# The number of iterations for the cosine half period of the learning rate.
cosine_schedule_period_iters: 600_000
# Learning rate for weight updates during training.
learning_rate: 5e-4
# Regularization term for weight updates
# Regularization term for weight updates.
weight_decay: 1e-5
# Amount of label smoothing when computing the training loss
# Amount of label smoothing when computing the training loss.
train_label_smoothing: 0.01

# TRAINING/INFERENCE OPTIONS
# Number of spectra in one training batch
# Number of spectra in one training batch.
train_batch_size: 32
# Max number of training epochs
# Max number of training epochs.
max_epochs: 30
# Number of validation steps to run before training begins
# Number of validation steps to run before training begins.
num_sanity_val_steps: 0
# Calculate peptide and amino acid precision during training. this
# is expensive, so we recommend against it.
# Calculate peptide and amino acid precision during training.
# This is expensive, so we recommend against it.
calculate_precision: False

# AMINO ACID AND MODIFICATION VOCABULARY
Expand Down
64 changes: 40 additions & 24 deletions casanovo/denovo/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import collections
import heapq
import logging
import warnings
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union

import depthcharge.masses
Expand All @@ -14,6 +15,7 @@
from depthcharge.components import ModelMixin, PeptideDecoder, SpectrumEncoder

from . import evaluate
from .. import config
from ..data import ms_io

logger = logging.getLogger("casanovo")
Expand Down Expand Up @@ -46,7 +48,7 @@ class Spec2Pep(pl.LightningModule, ModelMixin):
linear layer, then summed with the m/z encoding for each peak.
max_length : int
The maximum peptide length to decode.
residues: Union[Dict[str, float], str]
residues : Union[Dict[str, float], str]
The amino acid dictionary and their masses. By default ("canonical) this
is only the 20 canonical amino acids, with cysteine carbamidomethylated.
If "massivekb", this dictionary will include the modifications found in
Expand All @@ -65,24 +67,24 @@ class Spec2Pep(pl.LightningModule, ModelMixin):
< precursor_mass_tol`
min_peptide_len : int
The minimum length of predicted peptides.
n_beams: int
n_beams : int
Number of beams used during beam search decoding.
top_match: int
top_match : int
Number of PSMs to return for each spectrum.
n_log : int
The number of epochs to wait between logging messages.
tb_summarywriter: Optional[str]
tb_summarywriter : Optional[str]
Folder path to record performance metrics during training. If ``None``,
don't use a ``SummaryWriter``.
train_label_smoothing: float
train_label_smoothing : float
Smoothing factor when calculating the training loss.
warmup_iters: int
The number of warm up iterations for the learning rate scheduler.
max_iters: int
The total number of iterations for the learning rate scheduler.
out_writer: Optional[str]
warmup_iters : int
The number of iterations for the linear warm-up of the learning rate.
cosine_schedule_period_iters : int
The number of iterations for the cosine half period of the learning rate.
out_writer : Optional[str]
The output writer for the prediction results.
calculate_precision: bool
calculate_precision : bool
Calculate the validation set precision during training.
This is expensive.
**kwargs : Dict
Expand Down Expand Up @@ -111,7 +113,7 @@ def __init__(
] = None,
train_label_smoothing: float = 0.01,
warmup_iters: int = 100_000,
max_iters: int = 600_000,
cosine_schedule_period_iters: int = 600_000,
out_writer: Optional[ms_io.MztabWriter] = None,
calculate_precision: bool = False,
**kwargs: Dict,
Expand Down Expand Up @@ -144,7 +146,15 @@ def __init__(
self.val_celoss = torch.nn.CrossEntropyLoss(ignore_index=0)
# Optimizer settings.
self.warmup_iters = warmup_iters
self.max_iters = max_iters
self.cosine_schedule_period_iters = cosine_schedule_period_iters
# `kwargs` will contain additional arguments as well as unrecognized
# arguments, including deprecated ones. Remove the deprecated ones.
for k in config._config_deprecated:
kwargs.pop(k, None)
warnings.warn(
f"Deprecated hyperparameter '{k}' removed from the model.",
DeprecationWarning,
)
self.opt_kwargs = kwargs

# Data properties.
Expand Down Expand Up @@ -960,39 +970,45 @@ def configure_optimizers(
optimizer = torch.optim.Adam(self.parameters(), **self.opt_kwargs)
# Apply learning rate scheduler per step.
lr_scheduler = CosineWarmupScheduler(
optimizer, warmup=self.warmup_iters, max_iters=self.max_iters
optimizer, self.warmup_iters, self.cosine_schedule_period_iters
)
return [optimizer], {"scheduler": lr_scheduler, "interval": "step"}


class CosineWarmupScheduler(torch.optim.lr_scheduler._LRScheduler):
"""
Learning rate scheduler with linear warm up followed by cosine shaped decay.
Learning rate scheduler with linear warm-up followed by cosine shaped decay.

Parameters
----------
optimizer : torch.optim.Optimizer
Optimizer object.
warmup : int
The number of warm up iterations.
max_iters : torch.optim
The total number of iterations.
warmup_iters : int
The number of iterations for the linear warm-up of the learning rate.
cosine_schedule_period_iters : int
The number of iterations for the cosine half period of the learning rate.
"""

def __init__(
self, optimizer: torch.optim.Optimizer, warmup: int, max_iters: int
self,
optimizer: torch.optim.Optimizer,
warmup_iters: int,
cosine_schedule_period_iters: int,
):
self.warmup, self.max_iters = warmup, max_iters
self.warmup_iters = warmup_iters
self.cosine_schedule_period_iters = cosine_schedule_period_iters
super().__init__(optimizer)

def get_lr(self):
lr_factor = self.get_lr_factor(epoch=self.last_epoch)
return [base_lr * lr_factor for base_lr in self.base_lrs]

def get_lr_factor(self, epoch):
lr_factor = 0.5 * (1 + np.cos(np.pi * epoch / self.max_iters))
if epoch <= self.warmup:
lr_factor *= epoch / self.warmup
lr_factor = 0.5 * (
1 + np.cos(np.pi * epoch / self.cosine_schedule_period_iters)
)
if epoch <= self.warmup_iters:
lr_factor *= epoch / self.warmup_iters
return lr_factor


Expand Down
12 changes: 6 additions & 6 deletions casanovo/denovo/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,8 @@ def initialize_model(self, train: bool) -> None:
Parameters
----------
train : bool
Determines whether to set the model up for model training
or evaluation / inference.
Determines whether to set the model up for model training or
evaluation / inference.
"""
model_params = dict(
dim_model=self.config.dim_model,
Expand All @@ -226,14 +226,14 @@ def initialize_model(self, train: bool) -> None:
tb_summarywriter=self.config.tb_summarywriter,
train_label_smoothing=self.config.train_label_smoothing,
warmup_iters=self.config.warmup_iters,
max_iters=self.config.max_iters,
cosine_schedule_period_iters=self.config.cosine_schedule_period_iters,
lr=self.config.learning_rate,
weight_decay=self.config.weight_decay,
out_writer=self.writer,
calculate_precision=self.config.calculate_precision,
)

# Reconfigurable non-architecture related parameters for a loaded model
# Reconfigurable non-architecture related parameters for a loaded model.
loaded_model_params = dict(
max_length=self.config.max_length,
precursor_mass_tol=self.config.precursor_mass_tol,
Expand All @@ -245,7 +245,7 @@ def initialize_model(self, train: bool) -> None:
tb_summarywriter=self.config.tb_summarywriter,
train_label_smoothing=self.config.train_label_smoothing,
warmup_iters=self.config.warmup_iters,
max_iters=self.config.max_iters,
cosine_schedule_period_iters=self.config.cosine_schedule_period_iters,
lr=self.config.learning_rate,
weight_decay=self.config.weight_decay,
out_writer=self.writer,
Expand Down Expand Up @@ -300,7 +300,7 @@ def initialize_model(self, train: bool) -> None:
except RuntimeError:
raise RuntimeError(
"Weights file incompatible with the current version of "
"Casanovo. "
"Casanovo."
)

def initialize_data_module(
Expand Down
Loading
Loading