-
Notifications
You must be signed in to change notification settings - Fork 47
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add: example for LLM joint inference
Signed-off-by: Yu Fan <fany@buaa.edu.cn>
- Loading branch information
1 parent
4ac560b
commit 6824017
Showing
13 changed files
with
560 additions
and
0 deletions.
There are no files selected for viewing
54 changes: 54 additions & 0 deletions
54
examples/cloud-edge-collaborative-inference-for-llm/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# README | ||
|
||
## Simple QA | ||
|
||
### Prepare Data | ||
|
||
The data of simple-qa example structure is: | ||
|
||
``` | ||
. | ||
├── test_data | ||
│ └── data.jsonl | ||
└── train_data | ||
└── data.jsonl | ||
``` | ||
|
||
`train_data/data.jsonl` is empty, and the `test_data/data.jsonl` is as follows: | ||
|
||
``` | ||
{"question": "如果小明有5个苹果,他给了小华3个,那么小明还剩下多少个苹果?\nA. 2个\nB. 3个\nC. 4个\nD. 5个", "answer": "A"} | ||
{"question": "下列哪个数是最小的质数?\nA. 0\nB. 1\nC. 2\nD. 4", "answer": "C"} | ||
{"question": "一个长方形的长是10厘米,宽是5厘米,它的周长是多少厘米?\nA. 20厘米\nB. 30厘米\nC. 40厘米\nD. 50厘米", "answer": "B"} | ||
{"question": "下列哪个分数是最接近1的?\nA. 1/2\nB. 3/4\nC. 4/5\nD. 5/6", "answer": "D"} | ||
{"question": "如果一个数加上10等于30,那么这个数是多少?\nA. 20\nB. 21\nC. 22\nD. 23", "answer": "A"} | ||
{"question": "下列哪个算式的结果最大?\nA. 3 + 4\nB. 5 - 2\nC. 6 * 2\nD. 7 ÷ 2", "answer": "C"} | ||
{"question": "一个班级有24个学生,如果每个学生都带了2本书,那么总共有多少本书?\nA. 48本\nB. 36本\nC. 24本\nD. 12本", "answer": "A"} | ||
{"question": "下列哪个是正确的乘法口诀?\nA. 三三得七\nB. 四四十六\nC. 五五二十五\nD. 六六三十六", "answer": "B"} | ||
{"question": "如果一个数是另一个数的3倍,并且这个数是15,那么另一个数是多少?\nA. 5\nB. 10\nC. 15\nD. 45", "answer": "A"} | ||
{"question": "下列哪个图形的周长最长?\nA. 正方形\nB. 长方形\nC. 圆形\nD. 三角形", "answer": "C"} | ||
``` | ||
|
||
### Prepare Environment | ||
|
||
You need to install the changed-sedna package, which added `JsonlDataParse` in `sedna.datasources` | ||
|
||
Replace the file in `yourpath/anaconda3/envs/ianvs/lib/python3.x/site-packages/sedna` with `examples/resources/sedna-with-jsonl.zip` | ||
|
||
|
||
### Run Ianvs | ||
|
||
Run the following command: | ||
|
||
`ianvs -f examples/llm/singletask_learning_bench/simple_qa/benchmarkingjob.yaml` | ||
|
||
## OpenCompass Evaluation | ||
|
||
### Prepare Environment | ||
|
||
`pip install examples/resources/opencompass-0.2.5-py3-none-any.whl` | ||
|
||
### Run Evaluation | ||
|
||
`python run_op.py examples/llm/singletask_learning_bench/simple_qa/testalgorithms/gen/op_eval.py` | ||
|
73 changes: 73 additions & 0 deletions
73
examples/cloud-edge-collaborative-inference-for-llm/benchmarkingjob.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
benchmarkingjob: | ||
# job name of bechmarking; string type; | ||
name: "benchmarkingjob" | ||
# the url address of job workspace that will reserve the output of tests; string type; | ||
# "~/" cannot be identified, so must be relative path or absoulute path | ||
workspace: "./workspace" | ||
|
||
# the url address of test environment configuration file; string type; | ||
# the file format supports yaml/yml; | ||
testenv: "./examples/cloud-edge-collaborative-inference-for-llm/testenv/testenv.yaml" | ||
|
||
# the configuration of test object | ||
test_object: | ||
# test type; string type; | ||
# currently the option of value is "algorithms",the others will be added in succession. | ||
type: "algorithms" | ||
# test algorithm configuration files; list type; | ||
algorithms: | ||
# algorithm name; string type; | ||
- name: "query-routing" | ||
# the url address of test algorithm configuration file; string type; | ||
# the file format supports yaml/yml; | ||
url: "./examples/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/test_queryrouting.yaml" | ||
|
||
# the configuration of ranking leaderboard | ||
rank: | ||
# rank leaderboard with metric of test case's evaluation and order ; list type; | ||
# the sorting priority is based on the sequence of metrics in the list from front to back; | ||
sort_by: [ { "acc": "descend" } ] | ||
|
||
# visualization configuration | ||
visualization: | ||
# mode of visualization in the leaderboard; string type; | ||
# There are quite a few possible dataitems in the leaderboard. Not all of them can be shown simultaneously on the screen. | ||
# In the leaderboard, we provide the "selected_only" mode for the user to configure what is shown or is not shown. | ||
mode: "selected_only" | ||
# method of visualization for selected dataitems; string type; | ||
# currently the options of value are as follows: | ||
# 1> "print_table": print selected dataitems; | ||
method: "print_table" | ||
|
||
# selected dataitem configuration | ||
# The user can add his/her interested dataitems in terms of "paradigms", "modules", "hyperparameters" and "metrics", | ||
# so that the selected columns will be shown. | ||
selected_dataitem: | ||
# currently the options of value are as follows: | ||
# 1> "all": select all paradigms in the leaderboard; | ||
# 2> paradigms in the leaderboard, e.g., "singletasklearning" | ||
paradigms: [ "all" ] | ||
# currently the options of value are as follows: | ||
# 1> "all": select all modules in the leaderboard; | ||
# 2> modules in the leaderboard, e.g., "basemodel" | ||
modules: [ "all" ] | ||
# currently the options of value are as follows: | ||
# 1> "all": select all hyperparameters in the leaderboard; | ||
# 2> hyperparameters in the leaderboard, e.g., "momentum" | ||
hyperparameters: [ "all" ] | ||
# currently the options of value are as follows: | ||
# 1> "all": select all metrics in the leaderboard; | ||
# 2> metrics in the leaderboard, e.g., "f1_score" | ||
metrics: [ "acc" , "latency", "throughput", "bandwith"] | ||
|
||
# model of save selected and all dataitems in workspace; string type; | ||
# currently the options of value are as follows: | ||
# 1> "selected_and_all": save selected and all dataitems; | ||
# 2> "selected_only": save selected dataitems; | ||
save_mode: "selected_and_all" | ||
|
||
|
||
|
||
|
||
|
||
|
51 changes: 51 additions & 0 deletions
51
...es/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/cloud_model.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# Copyright 2022 The KubeEdge Authors. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from __future__ import absolute_import, division, print_function | ||
|
||
import os | ||
import tempfile | ||
import time | ||
import zipfile | ||
import logging | ||
|
||
import numpy as np | ||
from sedna.common.config import Context | ||
from sedna.common.class_factory import ClassType, ClassFactory | ||
|
||
from models import HuggingfaceLLM, VllmLLM, APIBasedLLM | ||
|
||
from transformers import AutoModelForCausalLM, AutoTokenizer | ||
device = "cuda" # the device to load the model onto | ||
|
||
|
||
logging.disable(logging.WARNING) | ||
|
||
__all__ = ["BaseModel"] | ||
|
||
@ClassFactory.register(ClassType.GENERAL, alias="CloudModel") | ||
class BaseModel: | ||
def __init__(self, **kwargs): | ||
# The API KEY and API URL are confidential data and should not be written in yaml. | ||
self.client = APIBasedLLM( | ||
model_name = kwargs.get("model_name", "gpt-4o-mini"), | ||
config = kwargs.get("config", None), | ||
) | ||
|
||
def inference(self, data, input_shape=None, **kwargs): | ||
answer_list = [] | ||
for line in data: | ||
response = self.model.inference(line) | ||
answer_list.append(response) | ||
return answer_list |
74 changes: 74 additions & 0 deletions
74
...les/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/edge_model.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
# Copyright 2022 The KubeEdge Authors. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from __future__ import absolute_import, division, print_function | ||
|
||
import os | ||
import tempfile | ||
import time | ||
import zipfile | ||
import logging | ||
|
||
import numpy as np | ||
from sedna.common.config import Context | ||
from sedna.common.class_factory import ClassType, ClassFactory | ||
|
||
from models import HuggingfaceLLM, VllmLLM, APIBasedLLM | ||
|
||
from transformers import AutoModelForCausalLM, AutoTokenizer | ||
device = "cuda" # the device to load the model onto | ||
|
||
|
||
logging.disable(logging.WARNING) | ||
|
||
__all__ = ["BaseModel"] | ||
|
||
@ClassFactory.register(ClassType.GENERAL, alias="EdgeModel") | ||
class BaseModel: | ||
""" | ||
This is actually the Edge Model. | ||
""" | ||
def __init__(self, **kwargs): | ||
self.kwargs = kwargs | ||
self.model_url = kwargs.get("model_name", None) | ||
self.backend = kwargs.get("backend", "huggingface") | ||
self.quantization = kwargs.get("quantization", "full") | ||
self._set_config() | ||
# 'backend' means serving framework: "huggingface", "vllm" | ||
# 'quantization' means quantization mode:"full","4-bit","8-bit" | ||
|
||
def _set_config(self): | ||
# Some parameters are passed to Sedna through environment variables | ||
parameters = os.environ | ||
# EdgeModel URL, see at https://github.com/kubeedge/sedna/blob/ac623ab32dc37caa04b9e8480dbe1a8c41c4a6c2/lib/sedna/core/base.py#L132 | ||
parameters["MODEL_URL"] = self.model_url | ||
|
||
def load(self, model_url=None): | ||
if self.backend == "huggingface": | ||
self.model = HuggingfaceLLM(model_url, self.quantization) | ||
elif self.backend == "vllm": | ||
self.model = VllmLLM(model_url, self.quantization) | ||
else: | ||
raise Exception(f"Backend {self.backend} is not supported") | ||
|
||
self.model.load(model_url=model_url) | ||
|
||
# TODO cloud service must be configured in JointInference | ||
|
||
def predict(self, data, input_shape=None, **kwargs): | ||
answer_list = [] | ||
for line in data: | ||
response = self.model.inference(line) | ||
answer_list.append(response) | ||
return answer_list |
64 changes: 64 additions & 0 deletions
64
...d-edge-collaborative-inference-for-llm/testalgorithms/query-routing/hard_sample_mining.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
# Copyright 2021 The KubeEdge Authors. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""Hard Example Mining Algorithms""" | ||
|
||
import abc | ||
import math | ||
import random | ||
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline | ||
from sedna.common.class_factory import ClassFactory, ClassType | ||
|
||
__all__ = ('ThresholdFilter', 'CrossEntropyFilter', 'IBTFilter') | ||
|
||
class BaseFilter(metaclass=abc.ABCMeta): | ||
"""The base class to define unified interface.""" | ||
|
||
def __call__(self, infer_result=None): | ||
""" | ||
predict function, judge the sample is hard or not. | ||
Parameters | ||
---------- | ||
infer_result : array_like | ||
prediction result | ||
Returns | ||
------- | ||
is_hard_sample : bool | ||
`True` means hard sample, `False` means not. | ||
""" | ||
raise NotImplementedError | ||
|
||
@classmethod | ||
def data_check(cls, data): | ||
"""Check the data in [0,1].""" | ||
return 0 <= float(data) <= 1 | ||
|
||
|
||
@ClassFactory.register(ClassType.HEM, alias="BERT") | ||
class BERTFilter(BaseFilter, abc.ABC): | ||
def __init__(self, model_path, **kwargs): | ||
self.classifier = pipeline( | ||
"text-classification", | ||
model=model_path, | ||
trust_remote_code=True | ||
) | ||
|
||
def _predict(self, data): | ||
result = self.classifier(data) | ||
return result | ||
|
||
def __call__(self, data=None) -> bool: | ||
return self._predict(data) |
3 changes: 3 additions & 0 deletions
3
...loud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/models/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from .api_llm import APIBasedLLM | ||
from .huggingface_llm import HuggingfaceLLM | ||
from .vllm_llm import VllmLLM |
38 changes: 38 additions & 0 deletions
38
...cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/models/api_llm.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import os | ||
from openai import OpenAI | ||
|
||
from base_llm import BaseLLM | ||
from sedna.core.joint_inference.joint_inference import BigModelService | ||
|
||
class APIBasedLLM(BaseLLM): | ||
def __init__(self, model_name, **kwargs) -> None: | ||
|
||
api_key=os.environ.get("OPENAI_API_KEY"), | ||
base_url=os.environ.get("OPENAI_BASE_URL") | ||
|
||
self.model = model_name | ||
self.client = OpenAI( | ||
api_key=api_key, | ||
base_url=base_url | ||
) | ||
|
||
def _infer(self, prompt, system=None): | ||
if system: | ||
messages = [ | ||
{"role": "system", "content": system}, | ||
{"role": "user", "content": prompt} | ||
] | ||
else: | ||
messages = [ | ||
{"role": "user", "content": prompt} | ||
] | ||
|
||
self.chat_completion = self.client.chat.completions.create( | ||
messages = messages, | ||
model=self.model, | ||
) | ||
|
||
response = self.chat_completion.choices[0].message.content | ||
|
||
return response | ||
|
17 changes: 17 additions & 0 deletions
17
...loud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/models/base_llm.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
class BaseLLM: | ||
def __init__(self, **kwargs) -> None: | ||
BaseLLM.__init__(self, **kwargs) | ||
self.quantization = kwargs.get("quantization", "full") | ||
|
||
def load(self): | ||
raise NotImplementedError | ||
|
||
def inference(self, datas): | ||
answer_list = [] | ||
for line in datas: | ||
response = self._infer(line) | ||
answer_list.append(response) | ||
return answer_list | ||
|
||
def _infer(self, data): | ||
raise NotImplementedError |
Oops, something went wrong.