From 7619a16c163abd1a9fb52cfad097815d3d8b7364 Mon Sep 17 00:00:00 2001 From: wangzaistone Date: Mon, 30 Oct 2023 17:06:24 +0800 Subject: [PATCH 1/5] support DB-GPT-Hub sft codellama --- .env.template | 3 ++- pilot/configs/model_config.py | 2 ++ pilot/model/adapter.py | 13 +++++++++++++ pilot/model/conversation.py | 22 ++++++++++++++++++++++ pilot/model/model_adapter.py | 1 + pilot/server/chat_adapter.py | 10 ++++++++++ 6 files changed, 50 insertions(+), 1 deletion(-) diff --git a/.env.template b/.env.template index e03650033..272ee2922 100644 --- a/.env.template +++ b/.env.template @@ -22,7 +22,8 @@ WEB_SERVER_PORT=7860 #** LLM MODELS **# #*******************************************************************# # LLM_MODEL, see /pilot/configs/model_config.LLM_MODEL_CONFIG -LLM_MODEL=vicuna-13b-v1.5 +# LLM_MODEL=vicuna-13b-v1.5 +LLM_MODEL=codellama-13b-sql-sft ## LLM model path, by default, DB-GPT will read the model path from LLM_MODEL_CONFIG based on the LLM_MODEL. ## Of course you can specify your model path according to LLM_MODEL_PATH ## In DB-GPT, the priority from high to low to read model path: diff --git a/pilot/configs/model_config.py b/pilot/configs/model_config.py index e1575ea03..16deee50a 100644 --- a/pilot/configs/model_config.py +++ b/pilot/configs/model_config.py @@ -78,6 +78,8 @@ def get_device() -> str: "internlm-7b": os.path.join(MODEL_PATH, "internlm-chat-7b"), "internlm-7b-8k": os.path.join(MODEL_PATH, "internlm-chat-7b-8k"), "internlm-20b": os.path.join(MODEL_PATH, "internlm-chat-20b"), + "codellama-13b-sql-sft": os.path.join(MODEL_PATH, "codellama-13b-sql-sft"), + # For test now "opt-125m": os.path.join(MODEL_PATH, "opt-125m"), } diff --git a/pilot/model/adapter.py b/pilot/model/adapter.py index 69b159a13..02fbe8aa9 100644 --- a/pilot/model/adapter.py +++ b/pilot/model/adapter.py @@ -319,6 +319,18 @@ def loader(self, model_path: str, from_pretrained_kwargs: dict): model.config.pad_token_id = tokenizer.pad_token_id return model, tokenizer +class CodeLlamaAdapter(BaseLLMAdaper): + """The model adapter for codellama """ + + def match(self, model_path: str): + return "codelama" in model_path.lower() + + def loader(self, model_path: str, from_pretrained_kwargs: dict): + model, tokenizer = super().loader(model_path, from_pretrained_kwargs) + model.config.eos_token_id = tokenizer.eos_token_id + model.config.pad_token_id = tokenizer.pad_token_id + return model, tokenizer + class BaichuanAdapter(BaseLLMAdaper): """The model adapter for Baichuan models (e.g., baichuan-inc/Baichuan-13B-Chat)""" @@ -420,6 +432,7 @@ def loader(self, model_path: str, from_pretrained_kwargs: dict): register_llm_model_adapters(GorillaAdapter) register_llm_model_adapters(GPT4AllAdapter) register_llm_model_adapters(Llama2Adapter) +register_llm_model_adapters(CodeLlamaAdapter) register_llm_model_adapters(BaichuanAdapter) register_llm_model_adapters(WizardLMAdapter) register_llm_model_adapters(LlamaCppAdapater) diff --git a/pilot/model/conversation.py b/pilot/model/conversation.py index b3674e946..98dfc720d 100644 --- a/pilot/model/conversation.py +++ b/pilot/model/conversation.py @@ -339,6 +339,28 @@ def get_conv_template(name: str) -> Conversation: ) ) + +# codellama template +# reference: https://github.com/facebookresearch/llama/blob/cfc3fc8c1968d390eb830e65c63865e980873a06/llama/generation.py#L212 +# reference2 : https://github.com/eosphoros-ai/DB-GPT-Hub/blob/main/README.zh.md +register_conv_template( + Conversation( + name="codellama", + system="[INST] <>\nI want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request." + "If you don't know the answer to the request, please don't share false information.\n<>\n\n", + roles=("[INST]", "[/INST]"), + messages=(), + offset=0, + sep_style=SeparatorStyle.LLAMA2, + sep=" ", + sep2=" ", + stop_token_ids=[2], + system_formatter=lambda msg: f"[INST] <>\n{msg}\n<>\n\n", + ) +) + + + # Alpaca default template register_conv_template( Conversation( diff --git a/pilot/model/model_adapter.py b/pilot/model/model_adapter.py index 1580e8863..112fb468a 100644 --- a/pilot/model/model_adapter.py +++ b/pilot/model/model_adapter.py @@ -45,6 +45,7 @@ "llama-cpp", "proxyllm", "gptj-6b", + "codellama-13b-sql-sft" ] diff --git a/pilot/server/chat_adapter.py b/pilot/server/chat_adapter.py index cb486021b..509305247 100644 --- a/pilot/server/chat_adapter.py +++ b/pilot/server/chat_adapter.py @@ -213,6 +213,15 @@ def match(self, model_path: str): def get_conv_template(self, model_path: str) -> Conversation: return get_conv_template("llama-2") + + +class CodeLlamaChatAdapter(BaseChatAdpter): + """The model ChatAdapter for codellama .""" + def match(self, model_path: str): + return "codelama" in model_path.lower() + + def get_conv_template(self, model_path: str) -> Conversation: + return get_conv_template("codellama") class BaichuanChatAdapter(BaseChatAdpter): @@ -268,6 +277,7 @@ def get_conv_template(self, model_path: str) -> Conversation: register_llm_model_chat_adapter(GorillaChatAdapter) register_llm_model_chat_adapter(GPT4AllChatAdapter) register_llm_model_chat_adapter(Llama2ChatAdapter) +register_llm_model_chat_adapter(CodeLlamaChatAdapter) register_llm_model_chat_adapter(BaichuanChatAdapter) register_llm_model_chat_adapter(WizardLMChatAdapter) register_llm_model_chat_adapter(LlamaCppChatAdapter) From 48539d7206a754644306cd1541337ad37b2d1c4c Mon Sep 17 00:00:00 2001 From: wangzaistone Date: Tue, 31 Oct 2023 17:10:12 +0800 Subject: [PATCH 2/5] codellama bug fix --- pilot/server/chat_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pilot/server/chat_adapter.py b/pilot/server/chat_adapter.py index 509305247..4b6dd0eed 100644 --- a/pilot/server/chat_adapter.py +++ b/pilot/server/chat_adapter.py @@ -218,7 +218,7 @@ def get_conv_template(self, model_path: str) -> Conversation: class CodeLlamaChatAdapter(BaseChatAdpter): """The model ChatAdapter for codellama .""" def match(self, model_path: str): - return "codelama" in model_path.lower() + return "codellama" in model_path.lower() def get_conv_template(self, model_path: str) -> Conversation: return get_conv_template("codellama") From a670e5c00d31007652ef370a85d7f9996cd5a36b Mon Sep 17 00:00:00 2001 From: wangzaistone Date: Tue, 31 Oct 2023 17:24:54 +0800 Subject: [PATCH 3/5] add other codellama models --- pilot/configs/model_config.py | 7 +++++++ pilot/model/adapter.py | 2 +- pilot/model/model_adapter.py | 5 ++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/pilot/configs/model_config.py b/pilot/configs/model_config.py index 16deee50a..803d0fae9 100644 --- a/pilot/configs/model_config.py +++ b/pilot/configs/model_config.py @@ -78,8 +78,15 @@ def get_device() -> str: "internlm-7b": os.path.join(MODEL_PATH, "internlm-chat-7b"), "internlm-7b-8k": os.path.join(MODEL_PATH, "internlm-chat-7b-8k"), "internlm-20b": os.path.join(MODEL_PATH, "internlm-chat-20b"), + "codellama-7b": os.path.join(MODEL_PATH, "CodeLlama-7b-Instruct-hf"), + "codellama-7b-sql-sft": os.path.join(MODEL_PATH, "codellama-7b-sql-sft"), + "codellama-13b": os.path.join(MODEL_PATH, "CodeLlama-13b-Instruct-hf"), "codellama-13b-sql-sft": os.path.join(MODEL_PATH, "codellama-13b-sql-sft"), + + + + # For test now "opt-125m": os.path.join(MODEL_PATH, "opt-125m"), } diff --git a/pilot/model/adapter.py b/pilot/model/adapter.py index 02fbe8aa9..cb9885d2a 100644 --- a/pilot/model/adapter.py +++ b/pilot/model/adapter.py @@ -323,7 +323,7 @@ class CodeLlamaAdapter(BaseLLMAdaper): """The model adapter for codellama """ def match(self, model_path: str): - return "codelama" in model_path.lower() + return "codellama" in model_path.lower() def loader(self, model_path: str, from_pretrained_kwargs: dict): model, tokenizer = super().loader(model_path, from_pretrained_kwargs) diff --git a/pilot/model/model_adapter.py b/pilot/model/model_adapter.py index 112fb468a..cadb1cebd 100644 --- a/pilot/model/model_adapter.py +++ b/pilot/model/model_adapter.py @@ -45,7 +45,10 @@ "llama-cpp", "proxyllm", "gptj-6b", - "codellama-13b-sql-sft" + "codellama-13b-sql-sft", + "codellama-7b", + "codellama-7b-sql-sft", + "codellama-13b" ] From 17e21a395bcff6228ad3dacc0c251af0ec38d4b8 Mon Sep 17 00:00:00 2001 From: wangzaistone Date: Tue, 31 Oct 2023 17:26:59 +0800 Subject: [PATCH 4/5] keep as origin default param --- .env.template | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.env.template b/.env.template index 272ee2922..e03650033 100644 --- a/.env.template +++ b/.env.template @@ -22,8 +22,7 @@ WEB_SERVER_PORT=7860 #** LLM MODELS **# #*******************************************************************# # LLM_MODEL, see /pilot/configs/model_config.LLM_MODEL_CONFIG -# LLM_MODEL=vicuna-13b-v1.5 -LLM_MODEL=codellama-13b-sql-sft +LLM_MODEL=vicuna-13b-v1.5 ## LLM model path, by default, DB-GPT will read the model path from LLM_MODEL_CONFIG based on the LLM_MODEL. ## Of course you can specify your model path according to LLM_MODEL_PATH ## In DB-GPT, the priority from high to low to read model path: From 3233e260b20e16fa39424dfe8a606b3df4d92b1a Mon Sep 17 00:00:00 2001 From: wangzaistone Date: Tue, 31 Oct 2023 17:39:14 +0800 Subject: [PATCH 5/5] add conv judge --- pilot/configs/model_config.py | 5 ----- pilot/model/adapter.py | 3 ++- pilot/model/conversation.py | 1 - pilot/model/model_adapter.py | 8 ++++++-- pilot/server/chat_adapter.py | 3 ++- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pilot/configs/model_config.py b/pilot/configs/model_config.py index 803d0fae9..0e1fb3d40 100644 --- a/pilot/configs/model_config.py +++ b/pilot/configs/model_config.py @@ -82,11 +82,6 @@ def get_device() -> str: "codellama-7b-sql-sft": os.path.join(MODEL_PATH, "codellama-7b-sql-sft"), "codellama-13b": os.path.join(MODEL_PATH, "CodeLlama-13b-Instruct-hf"), "codellama-13b-sql-sft": os.path.join(MODEL_PATH, "codellama-13b-sql-sft"), - - - - - # For test now "opt-125m": os.path.join(MODEL_PATH, "opt-125m"), } diff --git a/pilot/model/adapter.py b/pilot/model/adapter.py index cb9885d2a..5ce5b2173 100644 --- a/pilot/model/adapter.py +++ b/pilot/model/adapter.py @@ -319,8 +319,9 @@ def loader(self, model_path: str, from_pretrained_kwargs: dict): model.config.pad_token_id = tokenizer.pad_token_id return model, tokenizer + class CodeLlamaAdapter(BaseLLMAdaper): - """The model adapter for codellama """ + """The model adapter for codellama""" def match(self, model_path: str): return "codellama" in model_path.lower() diff --git a/pilot/model/conversation.py b/pilot/model/conversation.py index 98dfc720d..5d4309d9f 100644 --- a/pilot/model/conversation.py +++ b/pilot/model/conversation.py @@ -360,7 +360,6 @@ def get_conv_template(name: str) -> Conversation: ) - # Alpaca default template register_conv_template( Conversation( diff --git a/pilot/model/model_adapter.py b/pilot/model/model_adapter.py index cadb1cebd..e09b868e7 100644 --- a/pilot/model/model_adapter.py +++ b/pilot/model/model_adapter.py @@ -48,7 +48,7 @@ "codellama-13b-sql-sft", "codellama-7b", "codellama-7b-sql-sft", - "codellama-13b" + "codellama-13b", ] @@ -152,8 +152,12 @@ def model_adaptation( conv.append_message(conv.roles[1], content) else: raise ValueError(f"Unknown role: {role}") + if system_messages: - conv.set_system_message("".join(system_messages)) + if isinstance(conv, Conversation): + conv.set_system_message("".join(system_messages)) + else: + conv.update_system_message("".join(system_messages)) # Add a blank message for the assistant. conv.append_message(conv.roles[1], None) diff --git a/pilot/server/chat_adapter.py b/pilot/server/chat_adapter.py index 4b6dd0eed..64b72739b 100644 --- a/pilot/server/chat_adapter.py +++ b/pilot/server/chat_adapter.py @@ -213,10 +213,11 @@ def match(self, model_path: str): def get_conv_template(self, model_path: str) -> Conversation: return get_conv_template("llama-2") - + class CodeLlamaChatAdapter(BaseChatAdpter): """The model ChatAdapter for codellama .""" + def match(self, model_path: str): return "codellama" in model_path.lower()