feat: improve local model support and doc #96

OpenBMB · Nov 7, 2023 · 38cc573 · 38cc573
1 parent 42e5799
commit 38cc573
Show file tree

Hide file tree

Showing 6 changed files with 66 additions and 22 deletions.
diff --git a/README.md b/README.md
@@ -205,10 +205,14 @@ https://github.com/OpenBMB/AgentVerse/assets/11704492/4d07da68-f942-4205-b558-f1
  - [Simulation](#simulation)
  - [Framework Required Modules](#framework-required-modules)
  - [CLI Example](#cli-example)
- - [GUI Example (Local)](#gui-example-local)
+ - [GUI Example](#gui-example)
  - [Task-Solving](#task-solving)
  - [Framework Required Modules](#framework-required-modules-1)
  - [CLI Example](#cli-example-1)
+ - [Local Model Support](#local-model-support)
+ - [1. Install the Additional Dependencies](#1-install-the-additional-dependencies)
+ - [2. Launch the Local Server](#2-launch-the-local-server)
+ - [3. Modify the Config File](#3-modify-the-config-file)
 - [AgentVerse Showcases](#agentverse-showcases)
  - [Simulation Showcases](#simulation-showcases)
  - [Task-Solving Showcases](#task-solving-showcases)
@@ -282,7 +286,7 @@ You can create a multi-agent environments provided by us. Using the classroom sc
 agentverse-simulation --task simulation/nlp_classroom_9players
 ```
 
-### GUI Example (Local)
+### GUI Example
 
 We also provide a local website demo for this environment. You can launch it with
 
@@ -338,6 +342,33 @@ We have provided more tasks in `agentverse/tasks/tasksolving/tool_using/` that s
 
 Also, you can take a look at `agentverse/tasks/tasksolving` for more experiments we have done in our paper.
 
+## Local Model Support
+### 1. Install the Additional Dependencies
+If you want to use local models such as LLaMA, you need to additionally install some other dependencies:
+```bash
+pip install -r requirements_local.txt
+```
+
+### 2. Launch the Local Server
+Then modify the `MODEL_PATH` and `MODEL_NAME` according to your need to launch the local server with the following command:
+```bash
+bash scripts/run_local_model_server.sh
+```
+The script will launch a service for Llama 7B chat model.
+The `MODEL_NAME` in AgentVerse currently supports several models including `llama-2-7b-chat-hf`, `llama-2-13b-chat-hf`, `llama-2-70b-chat-hf`, `vicuna-7b-v1.5`, and `vicuna-13b-v1.5`. If you wish to integrate additional models that are [compatible with FastChat](https://github.com/lm-sys/FastChat/blob/main/docs/model_support.md), you need to:
+1. Add the new `MODEL_NAME` into the `LOCAL_LLMS` within `agentverse/llms/__init__.py`. Furthermore, establish
+2. Add the mapping from the new `MODEL_NAME` to its corresponding Huggingface identifier in the `LOCAL_LLMS_MAPPING` within the `agentverse/llms/__init__.py` file.
+
+### 3. Modify the Config File
+In your config file, set the `llm_type` to `local` and `model` to the `MODEL_NAME`. For example
+```yaml
+llm:
+ llm_type: local
+ model: llama-2-7b-chat-hf
+ ...
+```
+
+You can refer to `agentverse/tasks/tasksolving/commongen/llama-2-7b-chat-hf/config.yaml` for a more detailed example.
 
 # AgentVerse Showcases
 

diff --git a/agentverse/llms/__init__.py b/agentverse/llms/__init__.py
@@ -1,7 +1,20 @@
 from agentverse.registry import Registry
 
 llm_registry = Registry(name="LLMRegistry")
-LOCAL_LLMS = ["llama-2-7b-chat-hf"]
+LOCAL_LLMS = [
+ "llama-2-7b-chat-hf",
+ "llama-2-13b-chat-hf",
+ "llama-2-70b-chat-hf",
+ "vicuna-7b-v1.5",
+ "vicuna-13b-v1.5",
+]
+LOCAL_LLMS_MAPPING = {
+ "llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf",
+ "llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf",
+ "llama-2-70b-chat-hf": "meta-llama/Llama-2-70b-chat-hf",
+ "vicuna-7b-v1.5": "lmsys/vicuna-7b-v1.5",
+ "vicuna-13b-v1.5": "lmsys/vicuna-13b-v1.5",
+}
 
 from .base import BaseLLM, BaseChatModel, BaseCompletionModel, LLMResult
 from .openai import OpenAIChat
diff --git a/agentverse/llms/openai.py b/agentverse/llms/openai.py
@@ -97,7 +97,7 @@ class OpenAIChatArgs(BaseModelArgs):
 @llm_registry.register("gpt-35-turbo")
 @llm_registry.register("gpt-3.5-turbo")
 @llm_registry.register("gpt-4")
-@llm_registry.register("llama-2-7b-chat-hf")
+@llm_registry.register("local")
 class OpenAIChat(BaseChatModel):
  args: OpenAIChatArgs = Field(default_factory=OpenAIChatArgs)
 

diff --git a/agentverse/llms/utils/token_counter.py b/agentverse/llms/utils/token_counter.py
@@ -4,7 +4,7 @@
 from typing import List, Union, Dict
 from agentverse.logging import logger
 from agentverse.message import Message
-from agentverse.llms import LOCAL_LLMS
+from agentverse.llms import LOCAL_LLMS, LOCAL_LLMS_MAPPING
 
 
 def count_string_tokens(prompt: str = "", model: str = "gpt-3.5-turbo") -> int:
@@ -27,9 +27,10 @@ def count_message_tokens(
  tokens_per_message = 3
  tokens_per_name = 1
  encoding_model = "gpt-4"
- elif model in LOCAL_LLMS:
+ elif model.lower() in LOCAL_LLMS or model in LOCAL_LLMS:
  from transformers import AutoTokenizer
- encoding = AutoTokenizer.from_pretrained(model)
+
+ encoding = AutoTokenizer.from_pretrained(LOCAL_LLMS_MAPPING[model.lower()])
  else:
  raise NotImplementedError(
  f"count_message_tokens() is not implemented for model {model}.\n"

diff --git a/agentverse/tasks/tasksolving/commongen/llama-2-7b-chat-hf/config.yaml b/agentverse/tasks/tasksolving/commongen/llama-2-7b-chat-hf/config.yaml
@@ -96,8 +96,8 @@ agents:
  memory:
  memory_type: chat_history
  llm:
- llm_type: llama-2-7b-chat-hf
- model: "llama-2-7b-chat-hf"
+ llm_type: local
+ model: llama-2-7b-chat-hf
  temperature: 0
  max_tokens: 512
  output_parser:
@@ -113,8 +113,8 @@ agents:
  memory:
  memory_type: chat_history
  llm:
- llm_type: llama-2-7b-chat-hf
- model: "llama-2-7b-chat-hf"
+ llm_type: local
+ model: llama-2-7b-chat-hf
  temperature: 0
  max_tokens: 1024
  output_parser:
@@ -138,8 +138,8 @@ agents:
  memory:
  memory_type: chat_history
  llm:
- llm_type: llama-2-7b-chat-hf
- model: "llama-2-7b-chat-hf"
+ llm_type: local
+ model: llama-2-7b-chat-hf
  temperature: 0
  max_tokens: 1024
  output_parser:
@@ -154,7 +154,7 @@ agents:
  memory:
  memory_type: chat_history
  llm:
- llm_type: llama-2-7b-chat-hf
+ llm_type: local
  model: llama-2-7b-chat-hf
  temperature: 0
  max_tokens: 1024
@@ -172,7 +172,7 @@ agents:
  memory:
  memory_type: chat_history
  llm:
- llm_type: llama-2-7b-chat-hf
+ llm_type: local
  model: llama-2-7b-chat-hf
  temperature: 0.3
  max_tokens: 1024
@@ -189,8 +189,8 @@ agents:
  memory:
  memory_type: chat_history
  llm:
- llm_type: llama-2-7b-chat-hf
- model: "llama-2-7b-chat-hf"
+ llm_type: local
+ model: llama-2-7b-chat-hf
  temperature: 0
  max_tokens: 1024
  output_parser:

diff --git a/scripts/run_local_model_server.sh b/scripts/run_local_model_server.sh
@@ -1,9 +1,8 @@
-:<<COMMENT
-See https://github.com/lm-sys/FastChat/blob/main/docs/openai_api.md for more usages.
-COMMENT
+# See https://github.com/lm-sys/FastChat/blob/main/docs/openai_api.md for more usages.
+
 # export CUDA_VISIBLE_DEVICES=0
-MODEL_PATH="path_to_the_downloaded_model_dir"
-MODEL_NAME="name_of_the_model"
+MODEL_PATH="meta-llama/Llama-2-7b-chat-hf" # path_to_the_downloaded_model_dir
+MODEL_NAME="llama-2-7b-chat-hf" # name_of_the_model
 python3 -m fastchat.serve.controller & \
 python3 -m fastchat.serve.multi_model_worker \
  --model-path ${MODEL_PATH} \