From 573b9dc73adb4b3e1ddeba02fc716a947145c103 Mon Sep 17 00:00:00 2001 From: Alexander Khizov Date: Mon, 30 Nov 2020 19:13:45 +0100 Subject: [PATCH] Preserve slot ordering Add changelog Preserve multiline strings in respones text inside domain Preserve responses in NLU data as well fix changelog entry Take care of possible multiple text examples Address PR comments --- changelog/7418.bugfix.md | 2 + rasa/model.py | 2 +- rasa/shared/core/domain.py | 36 +++++- .../nlu/training_data/formats/rasa_yaml.py | 5 +- tests/shared/core/test_domain.py | 105 +++++++++++++++++- .../training_data/formats/test_rasa_yaml.py | 30 +++++ 6 files changed, 175 insertions(+), 5 deletions(-) create mode 100644 changelog/7418.bugfix.md diff --git a/changelog/7418.bugfix.md b/changelog/7418.bugfix.md new file mode 100644 index 000000000000..43cac3b7ce50 --- /dev/null +++ b/changelog/7418.bugfix.md @@ -0,0 +1,2 @@ +- Preserve `domain` slot ordering while dumping it back to the file. +- Preserve multiline `text` examples of `responses` defined in `domain` and `NLU` training data. \ No newline at end of file diff --git a/rasa/model.py b/rasa/model.py index b4a37357f4a1..97b2b2b3569e 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -315,7 +315,7 @@ async def model_fingerprint(file_importer: "TrainingDataImporter") -> Fingerprin domain = copy.copy(domain) # don't include the response texts in the fingerprint. # Their fingerprint is separate. - domain.templates = [] + domain.templates = {} return { FINGERPRINT_CONFIG_KEY: _get_fingerprint_of_config( diff --git a/rasa/shared/core/domain.py b/rasa/shared/core/domain.py index e0be1110359c..493b6fa16136 100644 --- a/rasa/shared/core/domain.py +++ b/rasa/shared/core/domain.py @@ -52,6 +52,7 @@ KEY_ACTIONS = "actions" KEY_FORMS = "forms" KEY_E2E_ACTIONS = "e2e_actions" +KEY_RESPONSES_TEXT = "text" ALL_DOMAIN_KEYS = [ KEY_SLOTS, @@ -286,8 +287,8 @@ def collect_slots(slot_dict: Dict[Text, Any]) -> List[Slot]: slots = [] # make a copy to not alter the input dictionary slot_dict = copy.deepcopy(slot_dict) - # Sort the slots so that the order of the slot states is consistent - for slot_name in sorted(slot_dict): + # Don't sort the slots, see https://github.com/RasaHQ/rasa-x/issues/3900 + for slot_name in slot_dict: slot_type = slot_dict[slot_name].pop("type", None) slot_class = Slot.resolve_by_type(slot_type) @@ -1057,6 +1058,33 @@ def as_dict(self) -> Dict[Text, Any]: KEY_E2E_ACTIONS: self.action_texts, } + @staticmethod + def get_responses_with_multilines( + responses: Dict[Text, List[Dict[Text, Any]]] + ) -> Dict[Text, List[Dict[Text, Any]]]: + """Returns `responses` with preserved multilines in the `text` key. + + Args: + responses: Original `responses`. + + Returns: + `responses` with preserved multilines in the `text` key. + """ + from ruamel.yaml.scalarstring import LiteralScalarString + + final_responses = responses.copy() + for response_name, examples in final_responses.items(): + for i, example in enumerate(examples): + response_text = example.get(KEY_RESPONSES_TEXT, "") + if not response_text or "\n" not in response_text: + continue + # Has new lines, use `LiteralScalarString` + final_responses[response_name][i][ + KEY_RESPONSES_TEXT + ] = LiteralScalarString(response_text) + + return final_responses + def _transform_intents_for_file(self) -> List[Union[Text, Dict[Text, Any]]]: """Transform intent properties for displaying or writing into a domain file. @@ -1200,6 +1228,10 @@ def as_yaml(self, clean_before_dump: bool = False) -> Text: domain_data.update(self.cleaned_domain()) else: domain_data.update(self.as_dict()) + if domain_data.get(KEY_RESPONSES, {}): + domain_data[KEY_RESPONSES] = self.get_responses_with_multilines( + domain_data[KEY_RESPONSES] + ) return rasa.shared.utils.io.dump_obj_as_yaml_to_string( domain_data, should_preserve_key_order=True diff --git a/rasa/shared/nlu/training_data/formats/rasa_yaml.py b/rasa/shared/nlu/training_data/formats/rasa_yaml.py index 8b93614fc51c..ce254b109b01 100644 --- a/rasa/shared/nlu/training_data/formats/rasa_yaml.py +++ b/rasa/shared/nlu/training_data/formats/rasa_yaml.py @@ -4,6 +4,7 @@ from typing import Text, Any, List, Dict, Tuple, Union, Iterator, Optional import rasa.shared.data +from rasa.shared.core.domain import Domain from rasa.shared.exceptions import YamlException from rasa.shared.utils import validation from ruamel.yaml import StringIO @@ -409,7 +410,9 @@ def training_data_to_dict( result[KEY_NLU] = nlu_items if training_data.responses: - result[KEY_RESPONSES] = training_data.responses + result[KEY_RESPONSES] = Domain.get_responses_with_multilines( + training_data.responses + ) return result diff --git a/tests/shared/core/test_domain.py b/tests/shared/core/test_domain.py index 05020ba67f71..124bea433c65 100644 --- a/tests/shared/core/test_domain.py +++ b/tests/shared/core/test_domain.py @@ -7,7 +7,10 @@ from rasa.shared.exceptions import YamlSyntaxException import rasa.shared.utils.io -from rasa.shared.constants import DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES +from rasa.shared.constants import ( + DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES, + LATEST_TRAINING_DATA_FORMAT_VERSION, +) from rasa.core import training, utils from rasa.core.featurizers.tracker_featurizers import MaxHistoryTrackerFeaturizer from rasa.shared.core.slots import InvalidSlotTypeException, TextSlot @@ -1142,3 +1145,103 @@ def test_valid_slot_mappings(domain_as_dict: Dict[Text, Any]): def test_form_invalid_mappings(domain_as_dict: Dict[Text, Any]): with pytest.raises(InvalidDomain): Domain.from_dict(domain_as_dict) + + +def test_slot_order_is_preserved(): + test_yaml = f"""version: '{LATEST_TRAINING_DATA_FORMAT_VERSION}' +session_config: + session_expiration_time: 60 + carry_over_slots_to_new_session: true +slots: + confirm: + type: bool + influence_conversation: false + previous_email: + type: text + influence_conversation: false + caller_id: + type: text + influence_conversation: false + email: + type: text + influence_conversation: false + incident_title: + type: text + influence_conversation: false + priority: + type: text + influence_conversation: false + problem_description: + type: text + influence_conversation: false + requested_slot: + type: text + influence_conversation: false + handoff_to: + type: text + influence_conversation: false +""" + + domain = Domain.from_yaml(test_yaml) + assert domain.as_yaml(clean_before_dump=True) == test_yaml + + +def test_slot_order_is_preserved_when_merging(): + + slot_1 = """ + b: + type: text + influence_conversation: false + a: + type: text + influence_conversation: false""" + + test_yaml_1 = f""" +slots:{slot_1} +""" + + slot_2 = """ + d: + type: text + influence_conversation: false + c: + type: text + influence_conversation: false""" + + test_yaml_2 = f""" +slots:{slot_2} +""" + + test_yaml_merged = f"""version: '{LATEST_TRAINING_DATA_FORMAT_VERSION}' +session_config: + session_expiration_time: 60 + carry_over_slots_to_new_session: true +slots:{slot_2}{slot_1} +""" + + domain_1 = Domain.from_yaml(test_yaml_1) + domain_2 = Domain.from_yaml(test_yaml_2) + domain_merged = domain_1.merge(domain_2) + + assert domain_merged.as_yaml(clean_before_dump=True) == test_yaml_merged + + +def test_responses_text_multiline_is_preserved(): + test_yaml = f"""version: '{LATEST_TRAINING_DATA_FORMAT_VERSION}' +session_config: + session_expiration_time: 60 + carry_over_slots_to_new_session: true +responses: + utter_confirm: + - text: |- + First line + Second line + Third line + - text: One more response + utter_cancel: + - text: First line + - text: Second line +""" + + domain = Domain.from_yaml(test_yaml) + assert domain.as_yaml(clean_before_dump=True) == test_yaml diff --git a/tests/shared/nlu/training_data/formats/test_rasa_yaml.py b/tests/shared/nlu/training_data/formats/test_rasa_yaml.py index b5ec1c12db20..7369d86c58fc 100644 --- a/tests/shared/nlu/training_data/formats/test_rasa_yaml.py +++ b/tests/shared/nlu/training_data/formats/test_rasa_yaml.py @@ -441,3 +441,33 @@ def test_responses_are_converted_from_markdown(): # dumping again should also not change the format assert dumped == RasaYAMLWriter().dumps(dumped_result) + + +def test_responses_text_multiline_is_preserved(): + responses_yml = textwrap.dedent( + """ + responses: + utter_confirm: + - text: |- + First line + Second line + Third line + - text: One more response + utter_cancel: + - text: First line + - text: Second line + """ + ) + + reader = RasaYAMLReader() + result = reader.reads(responses_yml) + + dumped = RasaYAMLWriter().dumps(result) + + validation_reader = RasaYAMLReader() + dumped_result = validation_reader.reads(dumped) + + assert dumped_result.responses == result.responses + + # dumping again should also not change the format + assert dumped == RasaYAMLWriter().dumps(dumped_result)