From 480181769c2f30a0ec3c89aa6baa890e949a67c3 Mon Sep 17 00:00:00 2001 From: Alonso Guevara Date: Wed, 28 Aug 2024 17:33:05 -0600 Subject: [PATCH] Fix/entity extraction strategy (#1046) * fix strategy config in entity_extraction * update init content --------- Co-authored-by: KylinMountain --- .semversioner/next-release/patch-20240712071506108985.json | 4 ++++ graphrag/config/create_graphrag_config.py | 1 + graphrag/index/init_content.py | 2 ++ 3 files changed, 7 insertions(+) create mode 100644 .semversioner/next-release/patch-20240712071506108985.json diff --git a/.semversioner/next-release/patch-20240712071506108985.json b/.semversioner/next-release/patch-20240712071506108985.json new file mode 100644 index 0000000000..ac0891c7e9 --- /dev/null +++ b/.semversioner/next-release/patch-20240712071506108985.json @@ -0,0 +1,4 @@ +{ + "type": "patch", + "description": "fix strategy config in entity_extraction" +} diff --git a/graphrag/config/create_graphrag_config.py b/graphrag/config/create_graphrag_config.py index 3504507be2..a883658cd0 100644 --- a/graphrag/config/create_graphrag_config.py +++ b/graphrag/config/create_graphrag_config.py @@ -429,6 +429,7 @@ def hydrate_parallelization_params( or defs.ENTITY_EXTRACTION_ENTITY_TYPES, max_gleanings=max_gleanings, prompt=reader.str("prompt", Fragment.prompt_file), + strategy=entity_extraction_config.get("strategy"), encoding_model=reader.str(Fragment.encoding_model), ) diff --git a/graphrag/index/init_content.py b/graphrag/index/init_content.py index fe8dd7cc40..c63a6578fa 100644 --- a/graphrag/index/init_content.py +++ b/graphrag/index/init_content.py @@ -89,6 +89,8 @@ # container_name: entity_extraction: + ## strategy: fully override the entity extraction strategy. + ## type: one of graph_intelligence, graph_intelligence_json and nltk ## llm: override the global llm settings for this task ## parallelization: override the global parallelization settings for this task ## async_mode: override the global async_mode settings for this task