Merge branch '2.1.x' into 3900_domain_dump_order

RasaHQ · Dec 4, 2020 · bfb4f09 · bfb4f09
2 parents 75b8116 + 8042424
commit bfb4f09
Show file tree

Hide file tree

Showing 4 changed files with 50 additions and 4 deletions.
diff --git a/changelog/7316.bugfix.md b/changelog/7316.bugfix.md
@@ -0,0 +1,2 @@
+`SingleStateFeaturizer` checks whether it was trained with `RegexInterpreter` as
+nlu interpreter. If that is the case, `RegexInterpreter` is used during prediction.
diff --git a/rasa/core/featurizers/single_state_featurizer.py b/rasa/core/featurizers/single_state_featurizer.py
@@ -6,7 +6,7 @@
 
 import rasa.shared.utils.io
 from rasa.shared.core.domain import SubState, State, Domain
-from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter
+from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter, RegexInterpreter
 from rasa.shared.core.constants import PREVIOUS_ACTION, ACTIVE_LOOP, USER, SLOTS
 from rasa.shared.constants import DOCS_URL_MIGRATION_GUIDE
 from rasa.shared.core.trackers import is_prev_action_listen_in_state
@@ -34,15 +34,29 @@ class SingleStateFeaturizer:
  """
 
  def __init__(self) -> None:
+ """Initialize the single state featurizer."""
+ # rasa core can be trained separately, therefore interpreter during training
+ # will be `RegexInterpreter`. If the model is combined with a rasa nlu model
+ # during prediction the interpreter might be different.
+ # If that is the case, we need to make sure to "reset" the interpreter.
+ self._use_regex_interpreter = False
  self._default_feature_states = {}
  self.action_texts = []
 
- def prepare_from_domain(self, domain: Domain) -> None:
+ def prepare_for_training(
+ self, domain: Domain, interpreter: NaturalLanguageInterpreter
+ ) -> None:
  """Gets necessary information for featurization from domain.
 
  Args:
  domain: An instance of :class:`rasa.shared.core.domain.Domain`.
+ interpreter: The interpreter used to encode the state
  """
+ if isinstance(interpreter, RegexInterpreter):
+ # this method is called during training,
+ # RegexInterpreter means that core was trained separately
+ self._use_regex_interpreter = True
+
  # store feature states for each attribute in order to create binary features
  def convert_to_dict(feature_states: List[Text]) -> Dict[Text, int]:
  return {
@@ -156,6 +170,15 @@ def _extract_state_features(
  interpreter: NaturalLanguageInterpreter,
  sparse: bool = False,
  ) -> Dict[Text, List["Features"]]:
+ # this method is called during both prediction and training,
+ # `self._use_regex_interpreter == True` means that core was trained
+ # separately, therefore substitute interpreter based on some trained
+ # nlu model with default RegexInterpreter to make sure
+ # that prediction and train time features are the same
+ if self._use_regex_interpreter and not isinstance(
+ interpreter, RegexInterpreter
+ ):
+ interpreter = RegexInterpreter()
 
  message = Message(data=sub_state)
  # remove entities from possible attributes

diff --git a/rasa/core/featurizers/tracker_featurizers.py b/rasa/core/featurizers/tracker_featurizers.py
@@ -132,7 +132,7 @@ def featurize_trackers(
  f"to get numerical features for trackers."
  )
 
- self.state_featurizer.prepare_from_domain(domain)
+ self.state_featurizer.prepare_for_training(domain, interpreter)
 
  trackers_as_states, trackers_as_actions = self.training_states_and_actions(
  trackers, domain

diff --git a/tests/core/test_featurizer.py b/tests/core/test_featurizer.py
@@ -122,7 +122,7 @@ def test_single_state_featurizer_creates_encoded_all_actions():
  action_names=["a", "b", "c", "d"],
  )
  f = SingleStateFeaturizer()
- f.prepare_from_domain(domain)
+ f.prepare_for_training(domain, RegexInterpreter())
  encoded_actions = f.encode_all_actions(domain, RegexInterpreter())
  assert len(encoded_actions) == len(domain.action_names)
  assert all(
@@ -295,3 +295,24 @@ def test_single_state_featurizer_with_interpreter_state_with_no_action_name(
  assert (
  encoded[ACTIVE_LOOP][0].features != scipy.sparse.coo_matrix([[0, 0, 0, 1]])
  ).nnz == 0
+
+
+def test_single_state_featurizer_uses_regex_interpreter(
+ unpacked_trained_moodbot_path: Text,
+):
+ from rasa.core.agent import Agent
+
+ domain = Domain(
+ intents=[], entities=[], slots=[], templates={}, forms=[], action_names=[],
+ )
+ f = SingleStateFeaturizer()
+ # simulate that core was trained separately by passing
+ # RegexInterpreter to prepare_for_training
+ f.prepare_for_training(domain, RegexInterpreter())
+ # simulate that nlu and core models were manually combined for prediction
+ # by passing trained interpreter to encode_all_actions
+ interpreter = Agent.load(unpacked_trained_moodbot_path).interpreter
+ features = f._extract_state_features({TEXT: "some text"}, interpreter)
+ # RegexInterpreter cannot create features for text, therefore since featurizer
+ # was trained without nlu, features for text should be empty
+ assert not features