update readme and fix some bug

sotopia-lab · Oct 11, 2023 · e328128 · e328128
1 parent e74716d
commit e328128
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 4 deletions.
diff --git a/llm_deploy/README.md b/llm_deploy/README.md
@@ -1,3 +1,5 @@
 We need to use an unmerged branch to support deploying lora-finetuned model. (the forked repo is https://github.com/troph-team/vllm.git)
 
-Go to the vllm dir and pip install -e .
+Go to the vllm dir and pip install -e .
+
+To notice https://github.com/vllm-project/vllm/issues/1283, need to modify the config file to == 2.0.1 and the pytorch version if facing with CUDA version error.
diff --git a/llm_deploy/vllm_test.py b/llm_deploy/vllm_test.py
@@ -1,11 +1,11 @@
 from vllm import LLM, SamplingParams
 from vllm.model_executor.adapters import lora
 
-# Create an LLM.
-llm = LLM(model="../llm_ft/vicuna-7b-1.5", gpu_memory_utilization=0.05)
+# Create an LLM, need to change gpu memory utilization based on our need
+llm = LLM(model="../llm_ft/vicuna-7b-1.5", gpu_memory_utilization=0.5)
 
 # Add LoRA adapter
-lora.LoRAModel.from_pretrained(llm.llm_engine.workers[0].model, "../llm_ft/checkpoints/checkpoint-1200")
+lora.LoRAModel.from_pretrained(llm.llm_engine.workers[0].model, "../llm_ft/vicuna_checkpoints/checkpoint-1200")
 
 prompts = [
     "Hello, my name is",