From b18442b818770f793a22279e6505455bcc6e3b6e Mon Sep 17 00:00:00 2001 From: IKostric Date: Fri, 25 Aug 2023 12:07:52 +0200 Subject: [PATCH] Address review comments --- moviebot/core/core_types.py | 1 + moviebot/nlu/annotation/joint_bert/dataset.py | 28 ++++++- .../nlu/annotation/joint_bert/joint_bert.py | 9 ++- .../annotation/joint_bert/joint_bert_train.py | 78 +++++++------------ .../nlu/annotation/joint_bert/slot_mapping.py | 1 + moviebot/nlu/neural_nlu.py | 8 +- moviebot/nlu/nlu.py | 8 +- moviebot/nlu/rule_based_nlu.py | 13 ++-- 8 files changed, 80 insertions(+), 66 deletions(-) diff --git a/moviebot/core/core_types.py b/moviebot/core/core_types.py index e0ded91..f5553cd 100644 --- a/moviebot/core/core_types.py +++ b/moviebot/core/core_types.py @@ -1,3 +1,4 @@ +"""Core types for the moviebot package.""" from typing import TYPE_CHECKING, Dict, List, Union if TYPE_CHECKING: diff --git a/moviebot/nlu/annotation/joint_bert/dataset.py b/moviebot/nlu/annotation/joint_bert/dataset.py index e87ca42..bb00b46 100644 --- a/moviebot/nlu/annotation/joint_bert/dataset.py +++ b/moviebot/nlu/annotation/joint_bert/dataset.py @@ -1,3 +1,5 @@ +"""Dataset loading for training and evaluating the JointBERT model. """ +import os import re from typing import Dict, Generator, List, Tuple @@ -26,6 +28,9 @@ def load_yaml(path: str) -> Dict[str, List[str]]: Returns: The data in the YAML file. """ + if not os.path.isfile(path): + raise FileNotFoundError(f"File not found: {path}") + with open(path) as f: return yaml.safe_load(f) @@ -53,7 +58,13 @@ def parse_data(data: Dict[str, List[str]]) -> Generator[Tuple, None, None]: class JointBERTDataset(Dataset): - def __init__(self, path: str, max_length: int = 32): + def __init__(self, path: str, max_length: int = 32) -> None: + """Initializes the dataset. + + Args: + path: The path to the YAML file containing the data. + max_length: The maximum length of the input sequence. + """ self.data = load_yaml(path) self.max_length = max_length @@ -105,6 +116,21 @@ def _tokenize_and_label( ) -> Tuple[int, List[str], List[int]]: """Tokenizes the text and assigns labels based on slot annotations. + The main purpose of this method is to convert the slot annotations into + labels that can be used to train the model. The labels need to have the + same length as the tokenized utterance. + + For example: + + Input: "I like scifi." + Tokens: ["I", "like", "sci", "##fi", "."] + Labels: ["OUT", "OUT", "B_GENRE", -100, "OUT"] + Indexes: [0, 0, 3, -100, 0] + + Note that we put -100 to ignore evaluation of the loss function for + tokens that are not beginning of a slot. This makes it easier to + decode the labels later. + Args: intent: The intent of the text. text: The text to tokenize. diff --git a/moviebot/nlu/annotation/joint_bert/joint_bert.py b/moviebot/nlu/annotation/joint_bert/joint_bert.py index 62fa1c8..639b524 100644 --- a/moviebot/nlu/annotation/joint_bert/joint_bert.py +++ b/moviebot/nlu/annotation/joint_bert/joint_bert.py @@ -1,4 +1,6 @@ """Joint BERT model for intent classification and slot annotation.""" +from __future__ import annotations + import os from typing import List, Optional, Tuple @@ -44,7 +46,7 @@ def forward( attention_mask: The attention mask. Returns: - The intent and slot logits. + Tuple of intent and slot logits. """ outputs = self.bert(input_ids, attention_mask=attention_mask) intent_logits = self.intent_classifier(outputs.pooler_output) @@ -76,11 +78,14 @@ def predict( return predicted_intent, predicted_slots @classmethod - def from_pretrained(cls, path: str) -> None: + def from_pretrained(cls, path: str) -> JointBERT: """Loads the model and tokenizer from the specified directory. Args: path: The path to the directory containing the model and tokenizer. + + Returns: + The loaded model. """ # Load the state dictionary diff --git a/moviebot/nlu/annotation/joint_bert/joint_bert_train.py b/moviebot/nlu/annotation/joint_bert/joint_bert_train.py index c9844e8..b6e59ac 100644 --- a/moviebot/nlu/annotation/joint_bert/joint_bert_train.py +++ b/moviebot/nlu/annotation/joint_bert/joint_bert_train.py @@ -3,7 +3,7 @@ import argparse import json import os -from typing import Tuple +from typing import List, Tuple import pytorch_lightning as pl import torch @@ -42,29 +42,43 @@ def __init__( ) self.save_hyperparameters() - def training_step(self, batch: Batch, batch_idx: int) -> torch.Tensor: - """Training step for the JointBERT model. + def _calculate_losses( + self, batch: Batch + ) -> Tuple[torch.Tensor, torch.Tensor]: + """Calculates losses for a batch. Args: batch: A batch of data. - batch_idx: Index of the batch. Returns: - The loss for the batch. + Tuple of intent and slot losses. """ input_ids, attention_mask, intent_labels, slot_labels = batch relevant_labels = slot_labels.view(-1) != _IGNORE_INDEX intent_logits, slot_logits = self(input_ids, attention_mask) + loss_intent = F.cross_entropy( intent_logits.view(-1, self.intent_label_count), intent_labels.view(-1), ) - loss_slot = F.cross_entropy( slot_logits.view(-1, self.slot_label_count)[relevant_labels], slot_labels.view(-1)[relevant_labels], ) + return loss_intent, loss_slot + + def training_step(self, batch: Batch, batch_idx: int) -> torch.Tensor: + """Training step for the JointBERT model. + + Args: + batch: A batch of data. + batch_idx: Index of the batch. + + Returns: + The loss for the batch. + """ + loss_intent, loss_slot = self._calculate_losses(batch) self.log( "train_loss_intent", @@ -94,32 +108,9 @@ def validation_step(self, batch: Batch, batch_idx: int) -> torch.Tensor: Returns: The loss for the batch. """ - input_ids, attention_mask, intent_labels, slot_labels = batch - intent_logits, slot_logits = self(input_ids, attention_mask) - - loss_intent = F.cross_entropy( - intent_logits.view(-1, self.intent_label_count), - intent_labels.view(-1), - ) - loss_slot = F.cross_entropy( - slot_logits.view(-1, self.slot_label_count), - slot_labels.view(-1), - ignore_index=_IGNORE_INDEX, - ) + loss_intent, loss_slot = self._calculate_losses(batch) val_loss = loss_intent + loss_slot - intent_acc = ( - (intent_logits.argmax(dim=1) == intent_labels).float().mean() - ) - slot_acc = ( - ( - (slot_logits.argmax(dim=2) == slot_labels) - & (slot_labels != _IGNORE_INDEX) - ) - .float() - .mean() - ) - # Log the metrics self.log( "val_loss", @@ -129,26 +120,10 @@ def validation_step(self, batch: Batch, batch_idx: int) -> torch.Tensor: prog_bar=True, logger=True, ) - self.log( - "intent_acc", - intent_acc, - on_step=False, - on_epoch=True, - prog_bar=True, - logger=True, - ) - self.log( - "slot_acc", - slot_acc, - on_step=False, - on_epoch=True, - prog_bar=True, - logger=True, - ) return val_loss - def configure_optimizers(self): + def configure_optimizers(self) -> Tuple[List, List]: """Configures the optimizer and scheduler for training.""" no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ @@ -205,6 +180,9 @@ def parse_arguments(): parser.add_argument("--max_epochs", type=int, default=5) parser.add_argument("--learning_rate", type=float, default=5e-5) parser.add_argument("--weight_decay", type=float, default=0.0) + parser.add_argument( + "--model_output_path", type=str, default=_MODEL_OUTPUT_PATH + ) args = parser.parse_args() return args @@ -227,6 +205,6 @@ def parse_arguments(): trainer = pl.Trainer(max_epochs=args.max_epochs, logger=wandb_logger) trainer.fit(model, dataloader) - model.save_pretrained(_MODEL_OUTPUT_PATH) - dataset.tokenizer.save_pretrained(_MODEL_OUTPUT_PATH) - print(f"Model saved to {_MODEL_OUTPUT_PATH}") + model.save_pretrained(args.model_output_path) + dataset.tokenizer.save_pretrained(args.model_output_path) + print(f"Model saved to {args.model_output_path}") diff --git a/moviebot/nlu/annotation/joint_bert/slot_mapping.py b/moviebot/nlu/annotation/joint_bert/slot_mapping.py index 80e9c7a..431beec 100644 --- a/moviebot/nlu/annotation/joint_bert/slot_mapping.py +++ b/moviebot/nlu/annotation/joint_bert/slot_mapping.py @@ -1,3 +1,4 @@ +"""Mapping of slot and intent labels to indices for JointBERT model.""" from __future__ import annotations from enum import Enum, auto diff --git a/moviebot/nlu/neural_nlu.py b/moviebot/nlu/neural_nlu.py index 840a0dc..aaa50b5 100644 --- a/moviebot/nlu/neural_nlu.py +++ b/moviebot/nlu/neural_nlu.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple from transformers import BertTokenizerFast @@ -40,12 +40,12 @@ def generate_dacts( user_utterance: UserUtterance, options: DialogueOptions = None, dialogue_state: DialogueState = None, - ): + ) -> List[DialogueAct]: """Processes the utterance according to dialogue state and generate a user dialogue act for Agent to understand. Args: - user_utterance: UserUtterance class containing user input. + user_utterance: User utterance class containing user input. options: A list of options provided to the user to choose from. dialogue_state: The current dialogue state, if available. Defaults to None. @@ -83,7 +83,7 @@ def annotate_utterance( """Annotates the utterance with intent and slot information. Args: - user_utterance: UserUtterance class containing user input. + user_utterance: User utterance class containing user input. Returns: A tuple of the intent and slot information. diff --git a/moviebot/nlu/nlu.py b/moviebot/nlu/nlu.py index 495d29f..a8e73fe 100644 --- a/moviebot/nlu/nlu.py +++ b/moviebot/nlu/nlu.py @@ -17,16 +17,18 @@ from moviebot.nlu.annotation.item_constraint import ItemConstraint from moviebot.nlu.annotation.operator import Operator from moviebot.nlu.annotation.slots import Slots +from moviebot.nlu.user_intents_checker import UserIntentsChecker class NLU(ABC): - def __init__(self, config): + def __init__(self, config: Dict[str, Any]) -> None: """The abstract NLU class. Args: config: Paths to ontology, database and tag words for slots in NLU. """ self.config = config + self.intents_checker = UserIntentsChecker(config) @abstractmethod def generate_dacts( @@ -34,7 +36,7 @@ def generate_dacts( user_utterance: UserUtterance, options: DialogueOptions, dialogue_state: DialogueState = None, - ): + ) -> List[DialogueAct]: """Processes the utterance according to dialogue state and generate user dialogue acts for Agent to understand. @@ -44,6 +46,8 @@ def generate_dacts( dialogue_state: The current dialogue state, if available. Defaults to None. + Raises: + NotImplementedError: If the method is not implemented. Returns: A list of dialogue acts. """ diff --git a/moviebot/nlu/rule_based_nlu.py b/moviebot/nlu/rule_based_nlu.py index cd67234..565a11d 100644 --- a/moviebot/nlu/rule_based_nlu.py +++ b/moviebot/nlu/rule_based_nlu.py @@ -15,11 +15,10 @@ from moviebot.dialogue_manager.dialogue_state import DialogueState from moviebot.nlu.annotation.values import Values from moviebot.nlu.nlu import NLU -from moviebot.nlu.user_intents_checker import UserIntentsChecker class RuleBasedNLU(NLU): - def __init__(self, config): + def __init__(self, config: Dict[str, Any]) -> None: """RuleBasedNLU is a basic natural language understander to generate dialogue acts for the Conversational Agent. @@ -34,14 +33,13 @@ def __init__(self, config): config: Paths to ontology, database and tag words for slots in NLU. """ super().__init__(config) - self.intents_checker = UserIntentsChecker(config) def _process_first_turn( self, user_utterance: UserUtterance ) -> List[DialogueAct]: """Generates dialogue acts for the first turn of the conversation. - The system chceks if the user provided any voluntary preferences or + The system checks if the user provided any voluntary preferences or if the user is just saying hi. Args: @@ -88,9 +86,10 @@ def _process_last_agent_dacts( def _process_recommendation_feedback( self, user_utterance: UserUtterance ) -> List[DialogueAct]: - """Processes recommendation feedback from the user. The function checks - if the user is rejecting the recommendation, inquiring about the - recommendation, or providing voluntary preferences. + """Processes recommendation feedback from the user. + + The function checks if the user is rejecting the recommendation, + inquiring about the recommendation, or providing voluntary preferences. Args: user_utterance: User utterance.