From 7619a16c163abd1a9fb52cfad097815d3d8b7364 Mon Sep 17 00:00:00 2001
From: wangzaistone <zhaowanghappy@163.com>
Date: Mon, 30 Oct 2023 17:06:24 +0800
Subject: [PATCH 1/5] support DB-GPT-Hub sft codellama

---
 .env.template                 |  3 ++-
 pilot/configs/model_config.py |  2 ++
 pilot/model/adapter.py        | 13 +++++++++++++
 pilot/model/conversation.py   | 22 ++++++++++++++++++++++
 pilot/model/model_adapter.py  |  1 +
 pilot/server/chat_adapter.py  | 10 ++++++++++
 6 files changed, 50 insertions(+), 1 deletion(-)
diff --git a/.env.template b/.env.template
index e03650033..272ee2922 100644
--- a/.env.template
+++ b/.env.template
@@ -22,7 +22,8 @@ WEB_SERVER_PORT=7860
 #**                         LLM MODELS                            **#
 #*******************************************************************#
 # LLM_MODEL, see /pilot/configs/model_config.LLM_MODEL_CONFIG
-LLM_MODEL=vicuna-13b-v1.5
+# LLM_MODEL=vicuna-13b-v1.5
+LLM_MODEL=codellama-13b-sql-sft
 ## LLM model path, by default, DB-GPT will read the model path from LLM_MODEL_CONFIG based on the LLM_MODEL.
 ## Of course you can specify your model path according to LLM_MODEL_PATH
 ## In DB-GPT, the priority from high to low to read model path:
diff --git a/pilot/configs/model_config.py b/pilot/configs/model_config.py
index e1575ea03..16deee50a 100644
--- a/pilot/configs/model_config.py
+++ b/pilot/configs/model_config.py
@@ -78,6 +78,8 @@ def get_device() -> str:
     "internlm-7b": os.path.join(MODEL_PATH, "internlm-chat-7b"),
     "internlm-7b-8k": os.path.join(MODEL_PATH, "internlm-chat-7b-8k"),
     "internlm-20b": os.path.join(MODEL_PATH, "internlm-chat-20b"),
+    "codellama-13b-sql-sft": os.path.join(MODEL_PATH, "codellama-13b-sql-sft"),
+
     # For test now
     "opt-125m": os.path.join(MODEL_PATH, "opt-125m"),
 }
diff --git a/pilot/model/adapter.py b/pilot/model/adapter.py
index 69b159a13..02fbe8aa9 100644
--- a/pilot/model/adapter.py
+++ b/pilot/model/adapter.py
@@ -319,6 +319,18 @@ def loader(self, model_path: str, from_pretrained_kwargs: dict):
         model.config.pad_token_id = tokenizer.pad_token_id
         return model, tokenizer
 
+class CodeLlamaAdapter(BaseLLMAdaper):
+    """The model adapter for codellama """
+
+    def match(self, model_path: str):
+        return "codelama" in model_path.lower()
+
+    def loader(self, model_path: str, from_pretrained_kwargs: dict):
+        model, tokenizer = super().loader(model_path, from_pretrained_kwargs)
+        model.config.eos_token_id = tokenizer.eos_token_id
+        model.config.pad_token_id = tokenizer.pad_token_id
+        return model, tokenizer
+
 
 class BaichuanAdapter(BaseLLMAdaper):
     """The model adapter for Baichuan models (e.g., baichuan-inc/Baichuan-13B-Chat)"""
@@ -420,6 +432,7 @@ def loader(self, model_path: str, from_pretrained_kwargs: dict):
 register_llm_model_adapters(GorillaAdapter)
 register_llm_model_adapters(GPT4AllAdapter)
 register_llm_model_adapters(Llama2Adapter)
+register_llm_model_adapters(CodeLlamaAdapter)
 register_llm_model_adapters(BaichuanAdapter)
 register_llm_model_adapters(WizardLMAdapter)
 register_llm_model_adapters(LlamaCppAdapater)
diff --git a/pilot/model/conversation.py b/pilot/model/conversation.py
index b3674e946..98dfc720d 100644
--- a/pilot/model/conversation.py
+++ b/pilot/model/conversation.py
@@ -339,6 +339,28 @@ def get_conv_template(name: str) -> Conversation:
     )
 )
 
+
+# codellama template
+# reference: https://github.com/facebookresearch/llama/blob/cfc3fc8c1968d390eb830e65c63865e980873a06/llama/generation.py#L212
+# reference2 : https://github.com/eosphoros-ai/DB-GPT-Hub/blob/main/README.zh.md
+register_conv_template(
+    Conversation(
+        name="codellama",
+        system="<s>[INST] <<SYS>>\nI want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request."
+        "If you don't know the answer to the request, please don't share false information.\n<</SYS>>\n\n",
+        roles=("[INST]", "[/INST]"),
+        messages=(),
+        offset=0,
+        sep_style=SeparatorStyle.LLAMA2,
+        sep=" ",
+        sep2=" </s><s>",
+        stop_token_ids=[2],
+        system_formatter=lambda msg: f"<s>[INST] <<SYS>>\n{msg}\n<</SYS>>\n\n",
+    )
+)
+
+
+
 # Alpaca default template
 register_conv_template(
     Conversation(
diff --git a/pilot/model/model_adapter.py b/pilot/model/model_adapter.py
index 1580e8863..112fb468a 100644
--- a/pilot/model/model_adapter.py
+++ b/pilot/model/model_adapter.py
@@ -45,6 +45,7 @@
     "llama-cpp",
     "proxyllm",
     "gptj-6b",
+    "codellama-13b-sql-sft"
 ]
 
 
diff --git a/pilot/server/chat_adapter.py b/pilot/server/chat_adapter.py
index cb486021b..509305247 100644
--- a/pilot/server/chat_adapter.py
+++ b/pilot/server/chat_adapter.py
@@ -213,6 +213,15 @@ def match(self, model_path: str):
 
     def get_conv_template(self, model_path: str) -> Conversation:
         return get_conv_template("llama-2")
+    
+
+class CodeLlamaChatAdapter(BaseChatAdpter):
+    """The model ChatAdapter for codellama ."""
+    def match(self, model_path: str):
+        return "codelama" in model_path.lower()
+
+    def get_conv_template(self, model_path: str) -> Conversation:
+        return get_conv_template("codellama")
 
 
 class BaichuanChatAdapter(BaseChatAdpter):
@@ -268,6 +277,7 @@ def get_conv_template(self, model_path: str) -> Conversation:
 register_llm_model_chat_adapter(GorillaChatAdapter)
 register_llm_model_chat_adapter(GPT4AllChatAdapter)
 register_llm_model_chat_adapter(Llama2ChatAdapter)
+register_llm_model_chat_adapter(CodeLlamaChatAdapter)
 register_llm_model_chat_adapter(BaichuanChatAdapter)
 register_llm_model_chat_adapter(WizardLMChatAdapter)
 register_llm_model_chat_adapter(LlamaCppChatAdapter)

From 48539d7206a754644306cd1541337ad37b2d1c4c Mon Sep 17 00:00:00 2001
From: wangzaistone <zhaowanghappy@163.com>
Date: Tue, 31 Oct 2023 17:10:12 +0800
Subject: [PATCH 2/5] codellama bug fix

---
 pilot/server/chat_adapter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pilot/server/chat_adapter.py b/pilot/server/chat_adapter.py
index 509305247..4b6dd0eed 100644
--- a/pilot/server/chat_adapter.py
+++ b/pilot/server/chat_adapter.py
@@ -218,7 +218,7 @@ def get_conv_template(self, model_path: str) -> Conversation:
 class CodeLlamaChatAdapter(BaseChatAdpter):
     """The model ChatAdapter for codellama ."""
     def match(self, model_path: str):
-        return "codelama" in model_path.lower()
+        return "codellama" in model_path.lower()
 
     def get_conv_template(self, model_path: str) -> Conversation:
         return get_conv_template("codellama")

From a670e5c00d31007652ef370a85d7f9996cd5a36b Mon Sep 17 00:00:00 2001
From: wangzaistone <zhaowanghappy@163.com>
Date: Tue, 31 Oct 2023 17:24:54 +0800
Subject: [PATCH 3/5] add other codellama models

---
 pilot/configs/model_config.py | 7 +++++++
 pilot/model/adapter.py        | 2 +-
 pilot/model/model_adapter.py  | 5 ++++-
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/pilot/configs/model_config.py b/pilot/configs/model_config.py
index 16deee50a..803d0fae9 100644
--- a/pilot/configs/model_config.py
+++ b/pilot/configs/model_config.py
@@ -78,8 +78,15 @@ def get_device() -> str:
     "internlm-7b": os.path.join(MODEL_PATH, "internlm-chat-7b"),
     "internlm-7b-8k": os.path.join(MODEL_PATH, "internlm-chat-7b-8k"),
     "internlm-20b": os.path.join(MODEL_PATH, "internlm-chat-20b"),
+    "codellama-7b": os.path.join(MODEL_PATH, "CodeLlama-7b-Instruct-hf"),
+    "codellama-7b-sql-sft": os.path.join(MODEL_PATH, "codellama-7b-sql-sft"),
+    "codellama-13b": os.path.join(MODEL_PATH, "CodeLlama-13b-Instruct-hf"),
     "codellama-13b-sql-sft": os.path.join(MODEL_PATH, "codellama-13b-sql-sft"),
 
+
+    
+
+
     # For test now
     "opt-125m": os.path.join(MODEL_PATH, "opt-125m"),
 }
diff --git a/pilot/model/adapter.py b/pilot/model/adapter.py
index 02fbe8aa9..cb9885d2a 100644
--- a/pilot/model/adapter.py
+++ b/pilot/model/adapter.py
@@ -323,7 +323,7 @@ class CodeLlamaAdapter(BaseLLMAdaper):
     """The model adapter for codellama """
 
     def match(self, model_path: str):
-        return "codelama" in model_path.lower()
+        return "codellama" in model_path.lower()
 
     def loader(self, model_path: str, from_pretrained_kwargs: dict):
         model, tokenizer = super().loader(model_path, from_pretrained_kwargs)
diff --git a/pilot/model/model_adapter.py b/pilot/model/model_adapter.py
index 112fb468a..cadb1cebd 100644
--- a/pilot/model/model_adapter.py
+++ b/pilot/model/model_adapter.py
@@ -45,7 +45,10 @@
     "llama-cpp",
     "proxyllm",
     "gptj-6b",
-    "codellama-13b-sql-sft"
+    "codellama-13b-sql-sft",
+    "codellama-7b",
+    "codellama-7b-sql-sft",
+    "codellama-13b"
 ]
 
 

From 17e21a395bcff6228ad3dacc0c251af0ec38d4b8 Mon Sep 17 00:00:00 2001
From: wangzaistone <zhaowanghappy@163.com>
Date: Tue, 31 Oct 2023 17:26:59 +0800
Subject: [PATCH 4/5] keep as origin default param

---
 .env.template | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.env.template b/.env.template
index 272ee2922..e03650033 100644
--- a/.env.template
+++ b/.env.template
@@ -22,8 +22,7 @@ WEB_SERVER_PORT=7860
 #**                         LLM MODELS                            **#
 #*******************************************************************#
 # LLM_MODEL, see /pilot/configs/model_config.LLM_MODEL_CONFIG
-# LLM_MODEL=vicuna-13b-v1.5
-LLM_MODEL=codellama-13b-sql-sft
+LLM_MODEL=vicuna-13b-v1.5
 ## LLM model path, by default, DB-GPT will read the model path from LLM_MODEL_CONFIG based on the LLM_MODEL.
 ## Of course you can specify your model path according to LLM_MODEL_PATH
 ## In DB-GPT, the priority from high to low to read model path:

From 3233e260b20e16fa39424dfe8a606b3df4d92b1a Mon Sep 17 00:00:00 2001
From: wangzaistone <zhaowanghappy@163.com>
Date: Tue, 31 Oct 2023 17:39:14 +0800
Subject: [PATCH 5/5] add conv judge

---
 pilot/configs/model_config.py | 5 -----
 pilot/model/adapter.py        | 3 ++-
 pilot/model/conversation.py   | 1 -
 pilot/model/model_adapter.py  | 8 ++++++--
 pilot/server/chat_adapter.py  | 3 ++-
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/pilot/configs/model_config.py b/pilot/configs/model_config.py
index 803d0fae9..0e1fb3d40 100644
--- a/pilot/configs/model_config.py
+++ b/pilot/configs/model_config.py
@@ -82,11 +82,6 @@ def get_device() -> str:
     "codellama-7b-sql-sft": os.path.join(MODEL_PATH, "codellama-7b-sql-sft"),
     "codellama-13b": os.path.join(MODEL_PATH, "CodeLlama-13b-Instruct-hf"),
     "codellama-13b-sql-sft": os.path.join(MODEL_PATH, "codellama-13b-sql-sft"),
-
-
-    
-
-
     # For test now
     "opt-125m": os.path.join(MODEL_PATH, "opt-125m"),
 }
diff --git a/pilot/model/adapter.py b/pilot/model/adapter.py
index cb9885d2a..5ce5b2173 100644
--- a/pilot/model/adapter.py
+++ b/pilot/model/adapter.py
@@ -319,8 +319,9 @@ def loader(self, model_path: str, from_pretrained_kwargs: dict):
         model.config.pad_token_id = tokenizer.pad_token_id
         return model, tokenizer
 
+
 class CodeLlamaAdapter(BaseLLMAdaper):
-    """The model adapter for codellama """
+    """The model adapter for codellama"""
 
     def match(self, model_path: str):
         return "codellama" in model_path.lower()
diff --git a/pilot/model/conversation.py b/pilot/model/conversation.py
index 98dfc720d..5d4309d9f 100644
--- a/pilot/model/conversation.py
+++ b/pilot/model/conversation.py
@@ -360,7 +360,6 @@ def get_conv_template(name: str) -> Conversation:
 )
 
 
-
 # Alpaca default template
 register_conv_template(
     Conversation(
diff --git a/pilot/model/model_adapter.py b/pilot/model/model_adapter.py
index cadb1cebd..e09b868e7 100644
--- a/pilot/model/model_adapter.py
+++ b/pilot/model/model_adapter.py
@@ -48,7 +48,7 @@
     "codellama-13b-sql-sft",
     "codellama-7b",
     "codellama-7b-sql-sft",
-    "codellama-13b"
+    "codellama-13b",
 ]
 
 
@@ -152,8 +152,12 @@ def model_adaptation(
                 conv.append_message(conv.roles[1], content)
             else:
                 raise ValueError(f"Unknown role: {role}")
+
         if system_messages:
-            conv.set_system_message("".join(system_messages))
+            if isinstance(conv, Conversation):
+                conv.set_system_message("".join(system_messages))
+            else:
+                conv.update_system_message("".join(system_messages))
 
         # Add a blank message for the assistant.
         conv.append_message(conv.roles[1], None)
diff --git a/pilot/server/chat_adapter.py b/pilot/server/chat_adapter.py
index 4b6dd0eed..64b72739b 100644
--- a/pilot/server/chat_adapter.py
+++ b/pilot/server/chat_adapter.py
@@ -213,10 +213,11 @@ def match(self, model_path: str):
 
     def get_conv_template(self, model_path: str) -> Conversation:
         return get_conv_template("llama-2")
-    
+
 
 class CodeLlamaChatAdapter(BaseChatAdpter):
     """The model ChatAdapter for codellama ."""
+
     def match(self, model_path: str):
         return "codellama" in model_path.lower()