Skip to content

Commit

Permalink
Merge branch '2.1.x' into 3900_domain_dump_order
Browse files Browse the repository at this point in the history
  • Loading branch information
rasabot authored Dec 4, 2020
2 parents 75b8116 + 8042424 commit bfb4f09
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 4 deletions.
2 changes: 2 additions & 0 deletions changelog/7316.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
`SingleStateFeaturizer` checks whether it was trained with `RegexInterpreter` as
nlu interpreter. If that is the case, `RegexInterpreter` is used during prediction.
27 changes: 25 additions & 2 deletions rasa/core/featurizers/single_state_featurizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import rasa.shared.utils.io
from rasa.shared.core.domain import SubState, State, Domain
from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter
from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter, RegexInterpreter
from rasa.shared.core.constants import PREVIOUS_ACTION, ACTIVE_LOOP, USER, SLOTS
from rasa.shared.constants import DOCS_URL_MIGRATION_GUIDE
from rasa.shared.core.trackers import is_prev_action_listen_in_state
Expand Down Expand Up @@ -34,15 +34,29 @@ class SingleStateFeaturizer:
"""

def __init__(self) -> None:
"""Initialize the single state featurizer."""
# rasa core can be trained separately, therefore interpreter during training
# will be `RegexInterpreter`. If the model is combined with a rasa nlu model
# during prediction the interpreter might be different.
# If that is the case, we need to make sure to "reset" the interpreter.
self._use_regex_interpreter = False
self._default_feature_states = {}
self.action_texts = []

def prepare_from_domain(self, domain: Domain) -> None:
def prepare_for_training(
self, domain: Domain, interpreter: NaturalLanguageInterpreter
) -> None:
"""Gets necessary information for featurization from domain.
Args:
domain: An instance of :class:`rasa.shared.core.domain.Domain`.
interpreter: The interpreter used to encode the state
"""
if isinstance(interpreter, RegexInterpreter):
# this method is called during training,
# RegexInterpreter means that core was trained separately
self._use_regex_interpreter = True

# store feature states for each attribute in order to create binary features
def convert_to_dict(feature_states: List[Text]) -> Dict[Text, int]:
return {
Expand Down Expand Up @@ -156,6 +170,15 @@ def _extract_state_features(
interpreter: NaturalLanguageInterpreter,
sparse: bool = False,
) -> Dict[Text, List["Features"]]:
# this method is called during both prediction and training,
# `self._use_regex_interpreter == True` means that core was trained
# separately, therefore substitute interpreter based on some trained
# nlu model with default RegexInterpreter to make sure
# that prediction and train time features are the same
if self._use_regex_interpreter and not isinstance(
interpreter, RegexInterpreter
):
interpreter = RegexInterpreter()

message = Message(data=sub_state)
# remove entities from possible attributes
Expand Down
2 changes: 1 addition & 1 deletion rasa/core/featurizers/tracker_featurizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def featurize_trackers(
f"to get numerical features for trackers."
)

self.state_featurizer.prepare_from_domain(domain)
self.state_featurizer.prepare_for_training(domain, interpreter)

trackers_as_states, trackers_as_actions = self.training_states_and_actions(
trackers, domain
Expand Down
23 changes: 22 additions & 1 deletion tests/core/test_featurizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def test_single_state_featurizer_creates_encoded_all_actions():
action_names=["a", "b", "c", "d"],
)
f = SingleStateFeaturizer()
f.prepare_from_domain(domain)
f.prepare_for_training(domain, RegexInterpreter())
encoded_actions = f.encode_all_actions(domain, RegexInterpreter())
assert len(encoded_actions) == len(domain.action_names)
assert all(
Expand Down Expand Up @@ -295,3 +295,24 @@ def test_single_state_featurizer_with_interpreter_state_with_no_action_name(
assert (
encoded[ACTIVE_LOOP][0].features != scipy.sparse.coo_matrix([[0, 0, 0, 1]])
).nnz == 0


def test_single_state_featurizer_uses_regex_interpreter(
unpacked_trained_moodbot_path: Text,
):
from rasa.core.agent import Agent

domain = Domain(
intents=[], entities=[], slots=[], templates={}, forms=[], action_names=[],
)
f = SingleStateFeaturizer()
# simulate that core was trained separately by passing
# RegexInterpreter to prepare_for_training
f.prepare_for_training(domain, RegexInterpreter())
# simulate that nlu and core models were manually combined for prediction
# by passing trained interpreter to encode_all_actions
interpreter = Agent.load(unpacked_trained_moodbot_path).interpreter
features = f._extract_state_features({TEXT: "some text"}, interpreter)
# RegexInterpreter cannot create features for text, therefore since featurizer
# was trained without nlu, features for text should be empty
assert not features

0 comments on commit bfb4f09

Please sign in to comment.