From 845ca7f9c7ff4e2fe1473baf17749c79113aec63 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Wed, 2 Sep 2020 14:22:05 +0200 Subject: [PATCH 1/2] fix slow training --- rasa/nlu/extractors/crf_entity_extractor.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/rasa/nlu/extractors/crf_entity_extractor.py b/rasa/nlu/extractors/crf_entity_extractor.py index 305e9d902c64..b4dd619ad996 100644 --- a/rasa/nlu/extractors/crf_entity_extractor.py +++ b/rasa/nlu/extractors/crf_entity_extractor.py @@ -406,14 +406,18 @@ def _create_features_for_token( # get the features to extract for the token we are currently looking at current_feature_idx = pointer_position + half_window_size features = configured_features[current_feature_idx] + + prefix = prefixes[current_feature_idx] + # we add the 'entity' feature to include the entity type as features # for the role and group CRFs + # (do not modify features, otherwise we will end up adding 'entity' + # over and over again, making training very slow) + additional_features = [] if include_tag_features: - features.append("entity") - - prefix = prefixes[current_feature_idx] + additional_features.append("entity") - for feature in features: + for feature in features + additional_features: if feature == "pattern": # add all regexes extracted from the 'RegexFeaturizer' as a # feature: 'pattern_name' is the name of the pattern the user From bb9754840686542be9500010fe75567b60e26e13 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Wed, 2 Sep 2020 14:24:58 +0200 Subject: [PATCH 2/2] add changelog --- changelog/6549.bugfix.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/6549.bugfix.rst diff --git a/changelog/6549.bugfix.rst b/changelog/6549.bugfix.rst new file mode 100644 index 000000000000..7e5200cf79b0 --- /dev/null +++ b/changelog/6549.bugfix.rst @@ -0,0 +1 @@ +Fix slow training of ``CRFEntityExtractor`` when using Entity Roles and Groups.