diff --git a/changelog/7418.bugfix.md b/changelog/7418.bugfix.md new file mode 100644 index 000000000000..43cac3b7ce50 --- /dev/null +++ b/changelog/7418.bugfix.md @@ -0,0 +1,2 @@ +- Preserve `domain` slot ordering while dumping it back to the file. +- Preserve multiline `text` examples of `responses` defined in `domain` and `NLU` training data. \ No newline at end of file diff --git a/rasa/model.py b/rasa/model.py index b4a37357f4a1..97b2b2b3569e 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -315,7 +315,7 @@ async def model_fingerprint(file_importer: "TrainingDataImporter") -> Fingerprin domain = copy.copy(domain) # don't include the response texts in the fingerprint. # Their fingerprint is separate. - domain.templates = [] + domain.templates = {} return { FINGERPRINT_CONFIG_KEY: _get_fingerprint_of_config( diff --git a/rasa/shared/core/domain.py b/rasa/shared/core/domain.py index e0be1110359c..493b6fa16136 100644 --- a/rasa/shared/core/domain.py +++ b/rasa/shared/core/domain.py @@ -52,6 +52,7 @@ KEY_ACTIONS = "actions" KEY_FORMS = "forms" KEY_E2E_ACTIONS = "e2e_actions" +KEY_RESPONSES_TEXT = "text" ALL_DOMAIN_KEYS = [ KEY_SLOTS, @@ -286,8 +287,8 @@ def collect_slots(slot_dict: Dict[Text, Any]) -> List[Slot]: slots = [] # make a copy to not alter the input dictionary slot_dict = copy.deepcopy(slot_dict) - # Sort the slots so that the order of the slot states is consistent - for slot_name in sorted(slot_dict): + # Don't sort the slots, see https://github.com/RasaHQ/rasa-x/issues/3900 + for slot_name in slot_dict: slot_type = slot_dict[slot_name].pop("type", None) slot_class = Slot.resolve_by_type(slot_type) @@ -1057,6 +1058,33 @@ def as_dict(self) -> Dict[Text, Any]: KEY_E2E_ACTIONS: self.action_texts, } + @staticmethod + def get_responses_with_multilines( + responses: Dict[Text, List[Dict[Text, Any]]] + ) -> Dict[Text, List[Dict[Text, Any]]]: + """Returns `responses` with preserved multilines in the `text` key. + + Args: + responses: Original `responses`. + + Returns: + `responses` with preserved multilines in the `text` key. + """ + from ruamel.yaml.scalarstring import LiteralScalarString + + final_responses = responses.copy() + for response_name, examples in final_responses.items(): + for i, example in enumerate(examples): + response_text = example.get(KEY_RESPONSES_TEXT, "") + if not response_text or "\n" not in response_text: + continue + # Has new lines, use `LiteralScalarString` + final_responses[response_name][i][ + KEY_RESPONSES_TEXT + ] = LiteralScalarString(response_text) + + return final_responses + def _transform_intents_for_file(self) -> List[Union[Text, Dict[Text, Any]]]: """Transform intent properties for displaying or writing into a domain file. @@ -1200,6 +1228,10 @@ def as_yaml(self, clean_before_dump: bool = False) -> Text: domain_data.update(self.cleaned_domain()) else: domain_data.update(self.as_dict()) + if domain_data.get(KEY_RESPONSES, {}): + domain_data[KEY_RESPONSES] = self.get_responses_with_multilines( + domain_data[KEY_RESPONSES] + ) return rasa.shared.utils.io.dump_obj_as_yaml_to_string( domain_data, should_preserve_key_order=True diff --git a/rasa/shared/nlu/training_data/formats/rasa_yaml.py b/rasa/shared/nlu/training_data/formats/rasa_yaml.py index 8b93614fc51c..ce254b109b01 100644 --- a/rasa/shared/nlu/training_data/formats/rasa_yaml.py +++ b/rasa/shared/nlu/training_data/formats/rasa_yaml.py @@ -4,6 +4,7 @@ from typing import Text, Any, List, Dict, Tuple, Union, Iterator, Optional import rasa.shared.data +from rasa.shared.core.domain import Domain from rasa.shared.exceptions import YamlException from rasa.shared.utils import validation from ruamel.yaml import StringIO @@ -409,7 +410,9 @@ def training_data_to_dict( result[KEY_NLU] = nlu_items if training_data.responses: - result[KEY_RESPONSES] = training_data.responses + result[KEY_RESPONSES] = Domain.get_responses_with_multilines( + training_data.responses + ) return result diff --git a/tests/shared/core/test_domain.py b/tests/shared/core/test_domain.py index 05020ba67f71..124bea433c65 100644 --- a/tests/shared/core/test_domain.py +++ b/tests/shared/core/test_domain.py @@ -7,7 +7,10 @@ from rasa.shared.exceptions import YamlSyntaxException import rasa.shared.utils.io -from rasa.shared.constants import DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES +from rasa.shared.constants import ( + DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES, + LATEST_TRAINING_DATA_FORMAT_VERSION, +) from rasa.core import training, utils from rasa.core.featurizers.tracker_featurizers import MaxHistoryTrackerFeaturizer from rasa.shared.core.slots import InvalidSlotTypeException, TextSlot @@ -1142,3 +1145,103 @@ def test_valid_slot_mappings(domain_as_dict: Dict[Text, Any]): def test_form_invalid_mappings(domain_as_dict: Dict[Text, Any]): with pytest.raises(InvalidDomain): Domain.from_dict(domain_as_dict) + + +def test_slot_order_is_preserved(): + test_yaml = f"""version: '{LATEST_TRAINING_DATA_FORMAT_VERSION}' +session_config: + session_expiration_time: 60 + carry_over_slots_to_new_session: true +slots: + confirm: + type: bool + influence_conversation: false + previous_email: + type: text + influence_conversation: false + caller_id: + type: text + influence_conversation: false + email: + type: text + influence_conversation: false + incident_title: + type: text + influence_conversation: false + priority: + type: text + influence_conversation: false + problem_description: + type: text + influence_conversation: false + requested_slot: + type: text + influence_conversation: false + handoff_to: + type: text + influence_conversation: false +""" + + domain = Domain.from_yaml(test_yaml) + assert domain.as_yaml(clean_before_dump=True) == test_yaml + + +def test_slot_order_is_preserved_when_merging(): + + slot_1 = """ + b: + type: text + influence_conversation: false + a: + type: text + influence_conversation: false""" + + test_yaml_1 = f""" +slots:{slot_1} +""" + + slot_2 = """ + d: + type: text + influence_conversation: false + c: + type: text + influence_conversation: false""" + + test_yaml_2 = f""" +slots:{slot_2} +""" + + test_yaml_merged = f"""version: '{LATEST_TRAINING_DATA_FORMAT_VERSION}' +session_config: + session_expiration_time: 60 + carry_over_slots_to_new_session: true +slots:{slot_2}{slot_1} +""" + + domain_1 = Domain.from_yaml(test_yaml_1) + domain_2 = Domain.from_yaml(test_yaml_2) + domain_merged = domain_1.merge(domain_2) + + assert domain_merged.as_yaml(clean_before_dump=True) == test_yaml_merged + + +def test_responses_text_multiline_is_preserved(): + test_yaml = f"""version: '{LATEST_TRAINING_DATA_FORMAT_VERSION}' +session_config: + session_expiration_time: 60 + carry_over_slots_to_new_session: true +responses: + utter_confirm: + - text: |- + First line + Second line + Third line + - text: One more response + utter_cancel: + - text: First line + - text: Second line +""" + + domain = Domain.from_yaml(test_yaml) + assert domain.as_yaml(clean_before_dump=True) == test_yaml diff --git a/tests/shared/nlu/training_data/formats/test_rasa_yaml.py b/tests/shared/nlu/training_data/formats/test_rasa_yaml.py index b5ec1c12db20..7369d86c58fc 100644 --- a/tests/shared/nlu/training_data/formats/test_rasa_yaml.py +++ b/tests/shared/nlu/training_data/formats/test_rasa_yaml.py @@ -441,3 +441,33 @@ def test_responses_are_converted_from_markdown(): # dumping again should also not change the format assert dumped == RasaYAMLWriter().dumps(dumped_result) + + +def test_responses_text_multiline_is_preserved(): + responses_yml = textwrap.dedent( + """ + responses: + utter_confirm: + - text: |- + First line + Second line + Third line + - text: One more response + utter_cancel: + - text: First line + - text: Second line + """ + ) + + reader = RasaYAMLReader() + result = reader.reads(responses_yml) + + dumped = RasaYAMLWriter().dumps(result) + + validation_reader = RasaYAMLReader() + dumped_result = validation_reader.reads(dumped) + + assert dumped_result.responses == result.responses + + # dumping again should also not change the format + assert dumped == RasaYAMLWriter().dumps(dumped_result)