From 17b252e995df82bd7a20ea292a57010e1b6947e2 Mon Sep 17 00:00:00 2001
From: yexiaochuan <yxc2020@foxmail.com>
Date: Sat, 26 Oct 2024 15:14:01 +0800
Subject: [PATCH] fix: update comments and configuration parameters

Signed-off-by: yexiaochuan <yxc2020@foxmail.com>
---
 examples/llm-edge-benchmark-suite/README.md           | 11 +----------
 .../single_task_bench/testalgorithms/basemodel.py     |  6 ------
 .../single_task_bench/testenv/prefill_latency.py      |  3 ---
 .../testalgorithms/algorithm.yaml                     |  2 +-
 .../testenv/testenv.yaml                              |  2 +-
 5 files changed, 3 insertions(+), 21 deletions(-)

diff --git a/examples/llm-edge-benchmark-suite/README.md b/examples/llm-edge-benchmark-suite/README.md
index 9499a005..8ef4ae97 100644
--- a/examples/llm-edge-benchmark-suite/README.md
+++ b/examples/llm-edge-benchmark-suite/README.md
@@ -18,16 +18,7 @@ The data of llm-edge-benchmark-suite example structure is:
 `train_data/data.jsonl` is empty, and the `test_data/data.jsonl` is as follows:
 
 ```
-{"question": "如果小明有5个苹果，他给了小华3个，那么小明还剩下多少个苹果？\nA. 2个\nB. 3个\nC. 4个\nD. 5个", "answer": "A"}
-{"question": "下列哪个数是最小的质数？\nA. 0\nB. 1\nC. 2\nD. 4", "answer": "C"}
-{"question": "一个长方形的长是10厘米，宽是5厘米，它的周长是多少厘米？\nA. 20厘米\nB. 30厘米\nC. 40厘米\nD. 50厘米", "answer": "B"}
-{"question": "下列哪个分数是最接近1的？\nA. 1/2\nB. 3/4\nC. 4/5\nD. 5/6", "answer": "D"}
-{"question": "如果一个数加上10等于30，那么这个数是多少？\nA. 20\nB. 21\nC. 22\nD. 23", "answer": "A"}
-{"question": "下列哪个算式的结果最大？\nA. 3 + 4\nB. 5 - 2\nC. 6 * 2\nD. 7 ÷ 2", "answer": "C"}
-{"question": "一个班级有24个学生，如果每个学生都带了2本书，那么总共有多少本书？\nA. 48本\nB. 36本\nC. 24本\nD. 12本", "answer": "A"}
-{"question": "下列哪个是正确的乘法口诀？\nA. 三三得七\nB. 四四十六\nC. 五五二十五\nD. 六六三十六", "answer": "B"}
-{"question": "如果一个数是另一个数的3倍，并且这个数是15，那么另一个数是多少？\nA. 5\nB. 10\nC. 15\nD. 45", "answer": "A"}
-{"question": "下列哪个图形的周长最长？\nA. 正方形\nB. 长方形\nC. 圆形\nD. 三角形", "answer": "C"}
+{"question": "Which of the following numbers is the smallest prime number?\nA. 0\nB. 1\nC. 2\nD. 4", "answer": "C"}
 ```
 ### prepare env
 
diff --git a/examples/llm-edge-benchmark-suite/single_task_bench/testalgorithms/basemodel.py b/examples/llm-edge-benchmark-suite/single_task_bench/testalgorithms/basemodel.py
index d38fc2f8..06b45875 100644
--- a/examples/llm-edge-benchmark-suite/single_task_bench/testalgorithms/basemodel.py
+++ b/examples/llm-edge-benchmark-suite/single_task_bench/testalgorithms/basemodel.py
@@ -88,9 +88,6 @@ def predict(self, data, input_shape=None, **kwargs):
     def _parse_timings(self, stdout_output):
         import re
         timings = {}
-        print("================================")
-        print(stdout_output)
-        print("================================")
         for line in stdout_output.split('\n'):
             match = re.match(r'llama_print_timings:\s*(.+?)\s*=\s*([0-9\.]+)\s*ms', line)
             if match:
@@ -99,9 +96,6 @@ def _parse_timings(self, stdout_output):
 
                 key = key.lower().replace(' ', '_')
                 timings[key] = value
-                print(f"Captured timing: {key} = {value}")
-            else:
-                print("No match found for this line.")
 
         return timings
 
diff --git a/examples/llm-edge-benchmark-suite/single_task_bench/testenv/prefill_latency.py b/examples/llm-edge-benchmark-suite/single_task_bench/testenv/prefill_latency.py
index 15e94f35..b7743577 100644
--- a/examples/llm-edge-benchmark-suite/single_task_bench/testenv/prefill_latency.py
+++ b/examples/llm-edge-benchmark-suite/single_task_bench/testenv/prefill_latency.py
@@ -4,9 +4,6 @@
 
 @ClassFactory.register(ClassType.GENERAL, alias="prefill_latency")
 def prefill_latency(y_true, y_pred):
-    # avg_prefill_latency = y_pred.get('avg_prefill_latency', [])
-    # return avg_prefill_latency
-    #TODO 前面所有歌predict_dict 的结果，可以通过下面拿出来，我想把计算平均的过程放在这里，帮我实现
     results_list = y_pred.get('results', [])
     num_requests = len(results_list)
     total_prefill_latency = 0.0
diff --git a/examples/llm-edge-benchmark-suite/single_task_bench_with_compression/testalgorithms/algorithm.yaml b/examples/llm-edge-benchmark-suite/single_task_bench_with_compression/testalgorithms/algorithm.yaml
index 749868a1..1fdd5d5b 100644
--- a/examples/llm-edge-benchmark-suite/single_task_bench_with_compression/testalgorithms/algorithm.yaml
+++ b/examples/llm-edge-benchmark-suite/single_task_bench_with_compression/testalgorithms/algorithm.yaml
@@ -1,6 +1,6 @@
 algorithm:
   paradigm_type: "singletasklearning_with_compression"
-
+  mode: "with_compression"
   initial_model_url: "models/qwen/qwen_1_5_0_5b.gguf"
   quantization_type: "q8_0"
   llama_quantize_path: "llama.cpp/llama-quantize"
diff --git a/examples/llm-edge-benchmark-suite/single_task_bench_with_compression/testenv/testenv.yaml b/examples/llm-edge-benchmark-suite/single_task_bench_with_compression/testenv/testenv.yaml
index e4a6e88a..69de256f 100644
--- a/examples/llm-edge-benchmark-suite/single_task_bench_with_compression/testenv/testenv.yaml
+++ b/examples/llm-edge-benchmark-suite/single_task_bench_with_compression/testenv/testenv.yaml
@@ -2,7 +2,7 @@ testenv:
   dataset:
     train_data: "ianvs/government/objective/train_data/data.jsonl"
     test_data: "ianvs/government/objective/test_data/data.jsonl"
-  use_gpu: true
+  use_gpu: false
   metrics:
     - name: "latency"
       url: "./examples/llm-edge-benchmark-suite/single_task_bench_with_compression/testenv/latency.py"