diff --git a/rasa/nlu/extractors/crf_entity_extractor.py b/rasa/nlu/extractors/crf_entity_extractor.py index 305e9d902c64..b4dd619ad996 100644 --- a/rasa/nlu/extractors/crf_entity_extractor.py +++ b/rasa/nlu/extractors/crf_entity_extractor.py @@ -406,14 +406,18 @@ def _create_features_for_token( # get the features to extract for the token we are currently looking at current_feature_idx = pointer_position + half_window_size features = configured_features[current_feature_idx] + + prefix = prefixes[current_feature_idx] + # we add the 'entity' feature to include the entity type as features # for the role and group CRFs + # (do not modify features, otherwise we will end up adding 'entity' + # over and over again, making training very slow) + additional_features = [] if include_tag_features: - features.append("entity") - - prefix = prefixes[current_feature_idx] + additional_features.append("entity") - for feature in features: + for feature in features + additional_features: if feature == "pattern": # add all regexes extracted from the 'RegexFeaturizer' as a # feature: 'pattern_name' is the name of the pattern the user