From b53fb6ba686abfee5ed4b1d742c62acf03ae9838 Mon Sep 17 00:00:00 2001 From: yexiaochuan Date: Thu, 19 Sep 2024 17:58:42 +0800 Subject: [PATCH] Revert "llm suite benchmark implement" This reverts commit f341b0f2806acf787c65d62d33317f8a998b028d. --- .../README.md | 2 - .../benchmarkingjob.yaml | 31 ---- .../testalgorithms/algorithm.yaml | 33 ----- .../testalgorithms/basemodel.py | 135 ------------------ .../testalgorithms/download.py | 28 ---- .../download_model_modelscope.py | 22 --- .../testalgorithms/llama_cpp_inference.py | 26 ---- .../testenv/latency.py | 36 ----- .../testenv/testenv.yaml | 15 -- .../testenv/throughput.py | 26 ---- 10 files changed, 354 deletions(-) delete mode 100644 examples/llm-benchmark-suite/single_task_bench_with_compression/README.md delete mode 100644 examples/llm-benchmark-suite/single_task_bench_with_compression/benchmarkingjob.yaml delete mode 100644 examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/algorithm.yaml delete mode 100644 examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/basemodel.py delete mode 100644 examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/download.py delete mode 100644 examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/download_model_modelscope.py delete mode 100644 examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/llama_cpp_inference.py delete mode 100644 examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/latency.py delete mode 100644 examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/testenv.yaml delete mode 100644 examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/throughput.py diff --git a/examples/llm-benchmark-suite/single_task_bench_with_compression/README.md b/examples/llm-benchmark-suite/single_task_bench_with_compression/README.md deleted file mode 100644 index 3a3835c7..00000000 --- a/examples/llm-benchmark-suite/single_task_bench_with_compression/README.md +++ /dev/null @@ -1,2 +0,0 @@ -Large Language Model Edge Benchmark Suite: Implementation on KubeEdge-lanvs - diff --git a/examples/llm-benchmark-suite/single_task_bench_with_compression/benchmarkingjob.yaml b/examples/llm-benchmark-suite/single_task_bench_with_compression/benchmarkingjob.yaml deleted file mode 100644 index 1fe3937a..00000000 --- a/examples/llm-benchmark-suite/single_task_bench_with_compression/benchmarkingjob.yaml +++ /dev/null @@ -1,31 +0,0 @@ -benchmarkingjob: - # job name of bechmarking; string type; - name: "benchmarkingjob" - # the url address of job workspace that will reserve the output of tests; string type; - workspace: "./workspace" - - testenv: "./examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/testenv.yaml" - - test_object: - type: "algorithms" - algorithms: - - name: "deepspeed_mii_inference" - url: "./examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/algorithm.yaml" - - rank: - sort_by: - - { "latency": "descend" } - - { "throughput": "ascend" } - - { "perplexity": "ascend" } - - visualization: - mode: "selected_only" - method: "print_table" - - selected_dataitem: - paradigms: [ "all" ] - modules: [ "all" ] - hyperparameters: [ "all" ] - metrics: [ "latency", "throughput", "perplexity" ] - - save_mode: "selected_and_all" \ No newline at end of file diff --git a/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/algorithm.yaml b/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/algorithm.yaml deleted file mode 100644 index f6a07aad..00000000 --- a/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/algorithm.yaml +++ /dev/null @@ -1,33 +0,0 @@ -algorithm: - paradigm_type: "singletasklearning" - - initial_model_url: "models/qwen/qwen_1_5_0_5b.gguf" - - modules: - - type: "basemodel" - name: "LlamaCppModel" - url: "./examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/basemodel.py" - hyperparameters: - - model_path: - values: - - "models/qwen/qwen_1_5_0_5b.gguf" - - n_ctx: - values: - - 2048 - - - type: "inference_integrate" - name: "LlamaCppInference" - url: "./examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/llama_cpp_inference.py" - hyperparameters: - - batch_size: - values: - - 1 - - max_tokens: - values: - - 32 - - stop: - values: - - ["Q:", "\n"] - - echo: - values: - - true \ No newline at end of file diff --git a/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/basemodel.py b/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/basemodel.py deleted file mode 100644 index 2c37b37a..00000000 --- a/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/basemodel.py +++ /dev/null @@ -1,135 +0,0 @@ -from sedna.common.class_factory import ClassFactory, ClassType -from llama_cpp import Llama -import os - - -@ClassFactory.register(ClassType.GENERAL, alias="LlamaCppModel") -class LlamaCppModel: - def __init__(self, **kwargs): - """ - 初始化 LlamaCppModel - """ - model_path = kwargs.get("model_path") - if not model_path: - raise ValueError("Model path is required.") - - # 初始化 Llama 模型 - self.model = Llama( - model_path=model_path, - n_ctx=kwargs.get("n_ctx", 512), - n_gpu_layers=kwargs.get("n_gpu_layers", 0), - seed=kwargs.get("seed", -1), - f16_kv=kwargs.get("f16_kv", True), - logits_all=kwargs.get("logits_all", False), - vocab_only=kwargs.get("vocab_only", False), - use_mlock=kwargs.get("use_mlock", False), - embedding=kwargs.get("embedding", False), - ) - - def predict(self, data=None, input_shape=None, **kwargs): - """ - 使用模型进行预测 - - Args: - data (list or None): 输入数据,忽略此参数 - input_shape: 未使用 - **kwargs: 其他参数 - - Returns: - dict: 包含预测结果的字典 - """ - # 确保忽略 data 参数,直接在代码中写死 prompt - prompt = ( - "Q: Name the planets in the solar system? A: " - ) - - # 捕获标准输出,包括 llama-cpp-python 的日志 - import io - from contextlib import redirect_stdout - - f = io.StringIO() - with redirect_stdout(f): - # 调用模型进行生成 - output = self.model( - prompt=prompt, - max_tokens=kwargs.get("max_tokens", 32), - stop=kwargs.get("stop", ["Q:", "\n"]), - echo=kwargs.get("echo", True), - temperature=kwargs.get("temperature", 0.8), - top_p=kwargs.get("top_p", 0.95), - top_k=kwargs.get("top_k", 40), - repeat_penalty=kwargs.get("repeat_penalty", 1.1), - ) - # 获取捕获的标准输出内容 - stdout_output = f.getvalue() - - # 解析 timing 信息 - timings = self._parse_timings(stdout_output) - - # 提取生成的文本 - generated_text = output['choices'][0]['text'] - - predict_dict = { - "results": [generated_text], - "timings": [timings] - } - return predict_dict - - def _parse_timings(self, stdout_output): - """ - 解析 llama-cpp-python 输出的时间信息 - - Args: - stdout_output (str): 标准输出内容 - - Returns: - dict: 解析后的时间信息 - """ - import re - timings = {} - for line in stdout_output.split('\n'): - match = re.match(r'llama_print_timings:\s+(.*)\s+=\s+([\d\.]+)\s+ms', line) - if match: - key = match.group(1).strip() - value = float(match.group(2)) - timings[key] = value - return timings - - def evaluate(self, data, model_path=None, **kwargs): - """ - 评估模型 - """ - if data is None or data.x is None or data.y is None: - raise ValueError("Evaluation data is None.") - - if model_path: - self.load(model_path) - - # 进行预测 - predict_dict = self.predict(data.x, **kwargs) - - # 使用指定的评估函数计算指标 - metric_name = kwargs.get("metric_name", "accuracy") - metric_func = kwargs.get("metric_func") - - if callable(metric_func): - metric_value = metric_func(data.y, predict_dict["results"]) - return {metric_name: metric_value} - else: - raise ValueError(f"Metric function is not callable or not provided.") - - def save(self, model_path): - # llama-cpp-python 不需要保存模型,因为它使用预训练的模型 - pass - - def load(self, model_url): - # 模型在初始化时已经加载,这里不需要额外的操作 - pass - def train(self, train_data, valid_data=None, **kwargs): - """ - LlamaCpp 不支持训练,此方法可以留空或抛出异常 - """ - raise NotImplementedError("Training is not supported for LlamaCppModel.") - def train(self, train_data, valid_data=None, **kwargs): - print("Training is not supported for this model. Skipping training step.") - return \ No newline at end of file diff --git a/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/download.py b/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/download.py deleted file mode 100644 index 6df9a091..00000000 --- a/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/download.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -import argparse -from huggingface_hub import hf_hub_download - -def download_model(repo_id, filename, local_dir): - os.makedirs(local_dir, exist_ok=True) - - try: - model_path = hf_hub_download( - repo_id=repo_id, - filename=filename, - cache_dir=local_dir - ) - print(f"Model successfully downloaded to: {model_path}") - return model_path - except Exception as e: - print(f"Error downloading model: {str(e)}") - return None - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Download a model from Hugging Face Hub") - parser.add_argument("--repo_id", type=str, required=True, help="Hugging Face repo ID") - parser.add_argument("--filename", type=str, default="*q8_0.gguf", help="Filename or pattern to download") - parser.add_argument("--local_dir", type=str, required=True, help="Local directory to save the model") - - args = parser.parse_args() - - download_model(args.repo_id, args.filename, args.local_dir) \ No newline at end of file diff --git a/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/download_model_modelscope.py b/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/download_model_modelscope.py deleted file mode 100644 index b8b43094..00000000 --- a/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/download_model_modelscope.py +++ /dev/null @@ -1,22 +0,0 @@ -import os -import argparse -from modelscope import snapshot_download - -def download_model(model_id, revision, local_dir): - try: - model_dir = snapshot_download(model_id, revision=revision, cache_dir=local_dir) - print(f"Model successfully downloaded to: {model_dir}") - return model_dir - except Exception as e: - print(f"Error downloading model: {str(e)}") - return None - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Download a model from ModelScope") - parser.add_argument("--model_id", type=str, required=True, help="ModelScope model ID") - parser.add_argument("--revision", type=str, default="master", help="Model revision") - parser.add_argument("--local_dir", type=str, required=True, help="Local directory to save the model") - - args = parser.parse_args() - - download_model(args.model_id, args.revision, args.local_dir) \ No newline at end of file diff --git a/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/llama_cpp_inference.py b/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/llama_cpp_inference.py deleted file mode 100644 index 5e70eabf..00000000 --- a/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/llama_cpp_inference.py +++ /dev/null @@ -1,26 +0,0 @@ -from sedna.common.class_factory import ClassFactory, ClassType - -@ClassFactory.register(ClassType.STP, alias="LlamaCppInference") -class LlamaCppInference: - def __init__(self, batch_size=1, max_tokens=32, stop=None, echo=False, **kwargs): - self.batch_size = batch_size - self.max_tokens = max_tokens - self.stop = stop - self.echo = echo - - def __call__(self, model, data): - results = [] - for i in range(0, len(data), self.batch_size): - batch = data[i:i+self.batch_size] - responses = model.predict( - batch, - max_tokens=self.max_tokens, - stop=self.stop, - echo=self.echo - ) - results.extend(responses) - - for r in results: - print(r['choices'][0]['text'], "\n", "-" * 80, "\n") - - return results \ No newline at end of file diff --git a/examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/latency.py b/examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/latency.py deleted file mode 100644 index a57644fa..00000000 --- a/examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/latency.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright 2023 The KubeEdge Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from sedna.common.class_factory import ClassType, ClassFactory - -__all__ = ["latency"] - -@ClassFactory.register(ClassType.GENERAL, alias="latency") -def latency(predicts, targets=None): - """ - 计算平均推理时间 - """ - if isinstance(predicts, dict): - timings_list = predicts.get("timings", []) - else: - # 如果 predicts 不是字典,无法获取 timings - timings_list = [] - total_time = 0.0 - count = 0 - for timings in timings_list: - if 'total time' in timings: - total_time += timings['total time'] - count += 1 - average_latency = total_time / count if count > 0 else 0.0 - return average_latency \ No newline at end of file diff --git a/examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/testenv.yaml b/examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/testenv.yaml deleted file mode 100644 index 1d280431..00000000 --- a/examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/testenv.yaml +++ /dev/null @@ -1,15 +0,0 @@ -testenv: - # 数据集配置 - dataset: - # 训练数据集索引的URL地址 - train_url: "/home/yxc/ianvs/dataset/train_data/index.txt" - # 测试数据集索引的URL地址 - test_url: "/home/yxc/ianvs/dataset/test_data/index.txt" - - - # 测试用例评估的指标配置 - metrics: - - name: "latency" - url: "./examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/latency.py" - - name: "throughput" - url: "./examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/throughput.py" diff --git a/examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/throughput.py b/examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/throughput.py deleted file mode 100644 index c0d91d9c..00000000 --- a/examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/throughput.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright 2023 The KubeEdge Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from sedna.common.class_factory import ClassType, ClassFactory - -__all__ = ["throughput"] - -@ClassFactory.register(ClassType.GENERAL, alias="throughput") -def throughput(y_true, y_pred): - num_requests = len(y_pred) - fixed_time = 1 - - fixed_throughput = num_requests / fixed_time # 单位请求/秒 - print(f"Throughput: {fixed_throughput} requests/second") - return fixed_throughput \ No newline at end of file