[Inference] integrate deepseek-coder-33b-instruct. (#190)

* Support new mode: deepseek-coder-33b-instruct. * Update Action config for deepseek-coder-33b-instruct. * Small fix * Config device use lowercase letters
intel · Apr 18, 2024 · f536304 · f536304
1 parent 37257e5
commit f536304
Show file tree

Hide file tree

Showing 2 changed files with 26 additions and 1 deletion.
diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml
@@ -34,7 +34,7 @@ jobs:
     name: inference
     strategy:
       matrix:
-        model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-ipex-llm, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, starcoder, llama-2-7b-chat-hf, llama-2-7b-chat-hf-vllm, gemma-2b]
+        model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-ipex-llm, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, starcoder, llama-2-7b-chat-hf, llama-2-7b-chat-hf-vllm, gemma-2b, deepseek-coder-33b-instruct]
         isPR:
           - ${{inputs.ci_type == 'pr'}}
 

diff --git a/llm_on_ray/inference/models/deepseek-coder-33b-instruct.yaml b/llm_on_ray/inference/models/deepseek-coder-33b-instruct.yaml
@@ -0,0 +1,25 @@
+port: 8000
+name: deepseek-coder-33b-instruct
+route_prefix: /deepseek-coder-33b-instruct
+num_replicas: 1
+cpus_per_worker: 24
+gpus_per_worker: 0
+deepspeed: false
+workers_per_group: 2
+device: cpu
+ipex:
+  enabled: false
+  precision: bf16
+model_description:  
+  model_id_or_path: deepseek-ai/deepseek-coder-33b-instruct
+  tokenizer_name_or_path: deepseek-ai/deepseek-coder-33b-instruct
+  chat_processor: ChatModelGptJ
+  prompt:
+    intro: ''
+    human_id: ''
+    bot_id: ''
+    stop_words: ['<|EOT|>', "<human>"]
+
+
+
+