From b53fb6ba686abfee5ed4b1d742c62acf03ae9838 Mon Sep 17 00:00:00 2001
From: yexiaochuan <yxc2020@foxmail.com>
Date: Thu, 19 Sep 2024 17:58:42 +0800
Subject: [PATCH] Revert "llm suite benchmark implement"

This reverts commit f341b0f2806acf787c65d62d33317f8a998b028d.
---
 .../README.md                                 |   2 -
 .../benchmarkingjob.yaml                      |  31 ----
 .../testalgorithms/algorithm.yaml             |  33 -----
 .../testalgorithms/basemodel.py               | 135 ------------------
 .../testalgorithms/download.py                |  28 ----
 .../download_model_modelscope.py              |  22 ---
 .../testalgorithms/llama_cpp_inference.py     |  26 ----
 .../testenv/latency.py                        |  36 -----
 .../testenv/testenv.yaml                      |  15 --
 .../testenv/throughput.py                     |  26 ----
 10 files changed, 354 deletions(-)
 delete mode 100644 examples/llm-benchmark-suite/single_task_bench_with_compression/README.md
 delete mode 100644 examples/llm-benchmark-suite/single_task_bench_with_compression/benchmarkingjob.yaml
 delete mode 100644 examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/algorithm.yaml
 delete mode 100644 examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/basemodel.py
 delete mode 100644 examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/download.py
 delete mode 100644 examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/download_model_modelscope.py
 delete mode 100644 examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/llama_cpp_inference.py
 delete mode 100644 examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/latency.py
 delete mode 100644 examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/testenv.yaml
 delete mode 100644 examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/throughput.py

diff --git a/examples/llm-benchmark-suite/single_task_bench_with_compression/README.md b/examples/llm-benchmark-suite/single_task_bench_with_compression/README.md
deleted file mode 100644
index 3a3835c7..00000000
--- a/examples/llm-benchmark-suite/single_task_bench_with_compression/README.md
+++ /dev/null
@@ -1,2 +0,0 @@
-Large Language Model Edge Benchmark Suite: Implementation on KubeEdge-lanvs
-
diff --git a/examples/llm-benchmark-suite/single_task_bench_with_compression/benchmarkingjob.yaml b/examples/llm-benchmark-suite/single_task_bench_with_compression/benchmarkingjob.yaml
deleted file mode 100644
index 1fe3937a..00000000
--- a/examples/llm-benchmark-suite/single_task_bench_with_compression/benchmarkingjob.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-benchmarkingjob:
-  # job name of bechmarking; string type;
-  name: "benchmarkingjob"
-  # the url address of job workspace that will reserve the output of tests; string type;
-  workspace: "./workspace"
-
-  testenv: "./examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/testenv.yaml"
-
-  test_object:
-    type: "algorithms"
-    algorithms:
-      - name: "deepspeed_mii_inference"
-        url: "./examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/algorithm.yaml"
-
-  rank:
-    sort_by: 
-      - { "latency": "descend" }
-      - { "throughput": "ascend" }
-      - { "perplexity": "ascend" }
-
-    visualization:
-      mode: "selected_only"
-      method: "print_table"
-
-    selected_dataitem:
-      paradigms: [ "all" ]
-      modules: [ "all" ]
-      hyperparameters: [ "all" ]
-      metrics: [ "latency", "throughput", "perplexity" ]
-
-    save_mode: "selected_and_all"
\ No newline at end of file
diff --git a/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/algorithm.yaml b/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/algorithm.yaml
deleted file mode 100644
index f6a07aad..00000000
--- a/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/algorithm.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-algorithm:
-  paradigm_type: "singletasklearning"
-
-  initial_model_url: "models/qwen/qwen_1_5_0_5b.gguf"
-
-  modules:
-    - type: "basemodel"
-      name: "LlamaCppModel"
-      url: "./examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/basemodel.py"
-      hyperparameters:
-        - model_path:
-            values:
-              - "models/qwen/qwen_1_5_0_5b.gguf"
-        - n_ctx:
-            values:
-              - 2048
-
-    - type: "inference_integrate"
-      name: "LlamaCppInference"
-      url: "./examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/llama_cpp_inference.py"
-      hyperparameters:
-        - batch_size:
-            values:
-              - 1
-        - max_tokens:
-            values:
-              - 32
-        - stop:
-            values:
-              - ["Q:", "\n"]
-        - echo:
-            values:
-              - true
\ No newline at end of file
diff --git a/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/basemodel.py b/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/basemodel.py
deleted file mode 100644
index 2c37b37a..00000000
--- a/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/basemodel.py
+++ /dev/null
@@ -1,135 +0,0 @@
-from sedna.common.class_factory import ClassFactory, ClassType
-from llama_cpp import Llama
-import os
-
-
-@ClassFactory.register(ClassType.GENERAL, alias="LlamaCppModel")
-class LlamaCppModel:
-    def __init__(self, **kwargs):
-        """
-        初始化 LlamaCppModel
-        """
-        model_path = kwargs.get("model_path")
-        if not model_path:
-            raise ValueError("Model path is required.")
-
-        # 初始化 Llama 模型
-        self.model = Llama(
-            model_path=model_path,
-            n_ctx=kwargs.get("n_ctx", 512),
-            n_gpu_layers=kwargs.get("n_gpu_layers", 0),
-            seed=kwargs.get("seed", -1),
-            f16_kv=kwargs.get("f16_kv", True),
-            logits_all=kwargs.get("logits_all", False),
-            vocab_only=kwargs.get("vocab_only", False),
-            use_mlock=kwargs.get("use_mlock", False),
-            embedding=kwargs.get("embedding", False),
-        )
-
-    def predict(self, data=None, input_shape=None, **kwargs):
-        """
-        使用模型进行预测
-
-        Args:
-            data (list or None): 输入数据，忽略此参数
-            input_shape: 未使用
-            **kwargs: 其他参数
-
-        Returns:
-            dict: 包含预测结果的字典
-        """
-        # 确保忽略 data 参数，直接在代码中写死 prompt
-        prompt = (
-            "Q: Name the planets in the solar system? A: "
-        )
-
-        # 捕获标准输出，包括 llama-cpp-python 的日志
-        import io
-        from contextlib import redirect_stdout
-
-        f = io.StringIO()
-        with redirect_stdout(f):
-            # 调用模型进行生成
-            output = self.model(
-                prompt=prompt,
-                max_tokens=kwargs.get("max_tokens", 32),
-                stop=kwargs.get("stop", ["Q:", "\n"]),
-                echo=kwargs.get("echo", True),
-                temperature=kwargs.get("temperature", 0.8),
-                top_p=kwargs.get("top_p", 0.95),
-                top_k=kwargs.get("top_k", 40),
-                repeat_penalty=kwargs.get("repeat_penalty", 1.1),
-            )
-        # 获取捕获的标准输出内容
-        stdout_output = f.getvalue()
-
-        # 解析 timing 信息
-        timings = self._parse_timings(stdout_output)
-
-        # 提取生成的文本
-        generated_text = output['choices'][0]['text']
-
-        predict_dict = {
-            "results": [generated_text],
-            "timings": [timings]
-        }
-        return predict_dict
-
-    def _parse_timings(self, stdout_output):
-        """
-        解析 llama-cpp-python 输出的时间信息
-
-        Args:
-            stdout_output (str): 标准输出内容
-
-        Returns:
-            dict: 解析后的时间信息
-        """
-        import re
-        timings = {}
-        for line in stdout_output.split('\n'):
-            match = re.match(r'llama_print_timings:\s+(.*)\s+=\s+([\d\.]+)\s+ms', line)
-            if match:
-                key = match.group(1).strip()
-                value = float(match.group(2))
-                timings[key] = value
-        return timings
-
-    def evaluate(self, data, model_path=None, **kwargs):
-        """
-        评估模型
-        """
-        if data is None or data.x is None or data.y is None:
-            raise ValueError("Evaluation data is None.")
-
-        if model_path:
-            self.load(model_path)
-
-        # 进行预测
-        predict_dict = self.predict(data.x, **kwargs)
-
-        # 使用指定的评估函数计算指标
-        metric_name = kwargs.get("metric_name", "accuracy")
-        metric_func = kwargs.get("metric_func")
-
-        if callable(metric_func):
-            metric_value = metric_func(data.y, predict_dict["results"])
-            return {metric_name: metric_value}
-        else:
-            raise ValueError(f"Metric function is not callable or not provided.")
-
-    def save(self, model_path):
-        # llama-cpp-python 不需要保存模型，因为它使用预训练的模型
-        pass
-
-    def load(self, model_url):
-        # 模型在初始化时已经加载，这里不需要额外的操作
-        pass
-    def train(self, train_data, valid_data=None, **kwargs):
-        """
-        LlamaCpp 不支持训练，此方法可以留空或抛出异常
-        """
-        raise NotImplementedError("Training is not supported for LlamaCppModel.")
-    def train(self, train_data, valid_data=None, **kwargs):
-        print("Training is not supported for this model. Skipping training step.")
-        return
\ No newline at end of file
diff --git a/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/download.py b/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/download.py
deleted file mode 100644
index 6df9a091..00000000
--- a/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/download.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import os
-import argparse
-from huggingface_hub import hf_hub_download
-
-def download_model(repo_id, filename, local_dir):
-    os.makedirs(local_dir, exist_ok=True)
-    
-    try:
-        model_path = hf_hub_download(
-            repo_id=repo_id,
-            filename=filename,
-            cache_dir=local_dir
-        )
-        print(f"Model successfully downloaded to: {model_path}")
-        return model_path
-    except Exception as e:
-        print(f"Error downloading model: {str(e)}")
-        return None
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Download a model from Hugging Face Hub")
-    parser.add_argument("--repo_id", type=str, required=True, help="Hugging Face repo ID")
-    parser.add_argument("--filename", type=str, default="*q8_0.gguf", help="Filename or pattern to download")
-    parser.add_argument("--local_dir", type=str, required=True, help="Local directory to save the model")
-    
-    args = parser.parse_args()
-    
-    download_model(args.repo_id, args.filename, args.local_dir)
\ No newline at end of file
diff --git a/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/download_model_modelscope.py b/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/download_model_modelscope.py
deleted file mode 100644
index b8b43094..00000000
--- a/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/download_model_modelscope.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import os
-import argparse
-from modelscope import snapshot_download
-
-def download_model(model_id, revision, local_dir):
-    try:
-        model_dir = snapshot_download(model_id, revision=revision, cache_dir=local_dir)
-        print(f"Model successfully downloaded to: {model_dir}")
-        return model_dir
-    except Exception as e:
-        print(f"Error downloading model: {str(e)}")
-        return None
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Download a model from ModelScope")
-    parser.add_argument("--model_id", type=str, required=True, help="ModelScope model ID")
-    parser.add_argument("--revision", type=str, default="master", help="Model revision")
-    parser.add_argument("--local_dir", type=str, required=True, help="Local directory to save the model")
-    
-    args = parser.parse_args()
-    
-    download_model(args.model_id, args.revision, args.local_dir)
\ No newline at end of file
diff --git a/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/llama_cpp_inference.py b/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/llama_cpp_inference.py
deleted file mode 100644
index 5e70eabf..00000000
--- a/examples/llm-benchmark-suite/single_task_bench_with_compression/testalgorithms/llama_cpp_inference.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from sedna.common.class_factory import ClassFactory, ClassType
-
-@ClassFactory.register(ClassType.STP, alias="LlamaCppInference")
-class LlamaCppInference:
-    def __init__(self, batch_size=1, max_tokens=32, stop=None, echo=False, **kwargs):
-        self.batch_size = batch_size
-        self.max_tokens = max_tokens
-        self.stop = stop
-        self.echo = echo
-
-    def __call__(self, model, data):
-        results = []
-        for i in range(0, len(data), self.batch_size):
-            batch = data[i:i+self.batch_size]
-            responses = model.predict(
-                batch, 
-                max_tokens=self.max_tokens, 
-                stop=self.stop,
-                echo=self.echo
-            )
-            results.extend(responses)
-        
-        for r in results:
-            print(r['choices'][0]['text'], "\n", "-" * 80, "\n")
-        
-        return results
\ No newline at end of file
diff --git a/examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/latency.py b/examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/latency.py
deleted file mode 100644
index a57644fa..00000000
--- a/examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/latency.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# Copyright 2023 The KubeEdge Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from sedna.common.class_factory import ClassType, ClassFactory
-
-__all__ = ["latency"]
-
-@ClassFactory.register(ClassType.GENERAL, alias="latency")
-def latency(predicts, targets=None):
-    """
-    计算平均推理时间
-    """
-    if isinstance(predicts, dict):
-        timings_list = predicts.get("timings", [])
-    else:
-        # 如果 predicts 不是字典，无法获取 timings
-        timings_list = []
-    total_time = 0.0
-    count = 0
-    for timings in timings_list:
-        if 'total time' in timings:
-            total_time += timings['total time']
-            count += 1
-    average_latency = total_time / count if count > 0 else 0.0
-    return average_latency
\ No newline at end of file
diff --git a/examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/testenv.yaml b/examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/testenv.yaml
deleted file mode 100644
index 1d280431..00000000
--- a/examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/testenv.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-testenv:
-  # 数据集配置
-  dataset:
-    # 训练数据集索引的URL地址
-    train_url: "/home/yxc/ianvs/dataset/train_data/index.txt"
-    # 测试数据集索引的URL地址
-    test_url: "/home/yxc/ianvs/dataset/test_data/index.txt"
-
-
-  # 测试用例评估的指标配置
-  metrics:
-    - name: "latency"
-      url: "./examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/latency.py"
-    - name: "throughput"
-      url: "./examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/throughput.py"
diff --git a/examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/throughput.py b/examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/throughput.py
deleted file mode 100644
index c0d91d9c..00000000
--- a/examples/llm-benchmark-suite/single_task_bench_with_compression/testenv/throughput.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright 2023 The KubeEdge Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from sedna.common.class_factory import ClassType, ClassFactory
-
-__all__ = ["throughput"]
-
-@ClassFactory.register(ClassType.GENERAL, alias="throughput")
-def throughput(y_true, y_pred):
-    num_requests = len(y_pred)
-    fixed_time = 1  
-
-    fixed_throughput = num_requests / fixed_time  # 单位请求/秒
-    print(f"Throughput: {fixed_throughput} requests/second")
-    return fixed_throughput
\ No newline at end of file