Merge branch 'feature/qwen_bychannel' into feature/qwen

kendryte · Jan 10, 2025 · 42d0af4 · 42d0af4
2 parents 69c16aa + 058e994
commit 42d0af4
Show file tree

Hide file tree

Showing 3 changed files with 122 additions and 14 deletions.
diff --git a/src/Nncase.Importer/Onnx/MatMul.cs b/src/Nncase.Importer/Onnx/MatMul.cs
@@ -3,6 +3,7 @@
 using System;
 using System.Collections.Generic;
 using Nncase.IR;
+using Nncase.IR.Tensors;
 using Onnx;
 using F = Nncase.IR.F;
 
@@ -13,10 +14,72 @@ public partial class OnnxImporter
         private Expr VisitMatMul(in NodeProto op)
         {
             var (a, b) = GetInputExprs(op, 0, 1);
-            var matmul = IR.F.Math.MatMul(a, b);
-            List<string> outputNames = new() { op.Output[0] };
-            matmul.Metadata.OutputNames = outputNames;
-            return matmul;
+            // /mlp_2/Mul_output_0、/mlp_3/Mul_output_0、/mlp_21/Mul_output_0
+            if (a.Metadata.OutputNames![0] == "/mlp_2/Mul_output_0")
+            {
+                var a_a = F.Tensors.Slice(a, new int[] { 0 }, new int[] { 813 }, new int[] { 2 }, new int[] { 1 });
+                var b_a = F.Tensors.Slice(a, new int[] { 813 }, new int[] { 814 }, new int[] { 2 }, new int[] { 1 });
+                var c_a = F.Tensors.Slice(a, new int[] { 814 }, new int[] { -1 }, new int[] { 2 }, new int[] { 1 });
+
+                var a_b = F.Tensors.Slice(b, new int[] { 0 }, new int[] { 813 }, new int[] { 0 }, new int[] { 1 });
+                var b_b = F.Tensors.Slice(b, new int[] { 813 }, new int[] { 814 }, new int[] { 0 }, new int[] { 1 });
+                var c_b = F.Tensors.Slice(b, new int[] { 814 }, new int[] { -1 }, new int[] { 0 }, new int[] { 1 });
+                var new_a = F.Math.MatMul(a_a, a_b);
+                var new_b = F.Math.MatMul(b_a, b_b);
+                var new_c = F.Math.MatMul(c_a, c_b);
+                return F.Math.Add(new_a, F.Math.Add(new_c, new_b));
+            }
+            else if (a.Metadata.OutputNames![0] == "/mlp_3/Mul_output_0")
+            {
+                var a_a = F.Tensors.Slice(a, new int[] { 0 }, new int[] { 2247 }, new int[] { 2 }, new int[] { 1 });
+                var b_a = F.Tensors.Slice(a, new int[] { 2247 }, new int[] { 2248 }, new int[] { 2 }, new int[] { 1 });
+                var c_a = F.Tensors.Slice(a, new int[] { 2248 }, new int[] { 3016 }, new int[] { 2 }, new int[] { 1 });
+                var d_a = F.Tensors.Slice(a, new int[] { 3016 }, new int[] { 3017 }, new int[] { 2 }, new int[] { 1 });
+                var e_a = F.Tensors.Slice(a, new int[] { 3017 }, new int[] { -1 }, new int[] { 2 }, new int[] { 1 });
+
+                var a_b = F.Tensors.Slice(b, new int[] { 0 }, new int[] { 2247 }, new int[] { 0 }, new int[] { 1 });
+                var b_b = F.Tensors.Slice(b, new int[] { 2247 }, new int[] { 2248 }, new int[] { 0 }, new int[] { 1 });
+                var c_b = F.Tensors.Slice(b, new int[] { 2248 }, new int[] { 3016 }, new int[] { 0 }, new int[] { 1 });
+                var d_b = F.Tensors.Slice(b, new int[] { 3016 }, new int[] { 3017 }, new int[] { 0 }, new int[] { 1 });
+                var e_b = F.Tensors.Slice(b, new int[] { 3017 }, new int[] { -1 }, new int[] { 0 }, new int[] { 1 });
+
+                var new_a = F.Math.MatMul(a_a, a_b);
+                var new_b = F.Math.MatMul(b_a, b_b);
+                var new_c = F.Math.MatMul(c_a, c_b);
+                var new_d = F.Math.MatMul(d_a, d_b);
+                var new_e = F.Math.MatMul(e_a, e_b);
+
+                return F.Math.Add(new_a, F.Math.Add(F.Math.Add(F.Math.Add(new_d, new_e), new_c), new_b));
+            }
+            else if (a.Metadata.OutputNames![0] == "/mlp_21/Mul_output_0")
+            {
+                var a_a = F.Tensors.Slice(a, new int[] { 0 }, new int[] { 567 }, new int[] { 2 }, new int[] { 1 });
+                var b_a = F.Tensors.Slice(a, new int[] { 567 }, new int[] { 568 }, new int[] { 2 }, new int[] { 1 });
+                var c_a = F.Tensors.Slice(a, new int[] { 568 }, new int[] { 3486 }, new int[] { 2 }, new int[] { 1 });
+                var d_a = F.Tensors.Slice(a, new int[] { 3486 }, new int[] { 3487 }, new int[] { 2 }, new int[] { 1 });
+                var e_a = F.Tensors.Slice(a, new int[] { 3487 }, new int[] { -1 }, new int[] { 2 }, new int[] { 1 });
+
+                var a_b = F.Tensors.Slice(b, new int[] { 0 }, new int[] { 567 }, new int[] { 0 }, new int[] { 1 });
+                var b_b = F.Tensors.Slice(b, new int[] { 567 }, new int[] { 568 }, new int[] { 0 }, new int[] { 1 });
+                var c_b = F.Tensors.Slice(b, new int[] { 568 }, new int[] { 3486 }, new int[] { 0 }, new int[] { 1 });
+                var d_b = F.Tensors.Slice(b, new int[] { 3486 }, new int[] { 3487 }, new int[] { 0 }, new int[] { 1 });
+                var e_b = F.Tensors.Slice(b, new int[] { 3487 }, new int[] { -1 }, new int[] { 0 }, new int[] { 1 });
+
+                var new_a = F.Math.MatMul(a_a, a_b);
+                var new_b = F.Math.MatMul(b_a, b_b);
+                var new_c = F.Math.MatMul(c_a, c_b);
+                var new_d = F.Math.MatMul(d_a, d_b);
+                var new_e = F.Math.MatMul(e_a, e_b);
+
+                return F.Math.Add(new_a, F.Math.Add(F.Math.Add(F.Math.Add(new_d, new_e), new_c), new_b));
+            }
+            else
+            {
+                var matmul = IR.F.Math.MatMul(a, b);
+                List<string> outputNames = new() { op.Output[0] };
+                matmul.Metadata.OutputNames = outputNames;
+                return matmul;
+            }
         }
     }
-}
+}
diff --git a/src/Nncase.Quantization/Quantization/Quantizer.Algorithms.cs b/src/Nncase.Quantization/Quantization/Quantizer.Algorithms.cs
@@ -6,6 +6,12 @@
 using System.Linq;
 using System.Text;
 using System.Threading.Tasks;
+using Nncase.IR;
+using Nncase.IR.Tensors;
+using Nncase.IR.F;
+using Nncase.TIR;
+using Math = System.Math;
+using Tuple = System.Tuple;
 
 namespace Nncase.Quantization;
 
@@ -21,8 +27,8 @@ private static ValueRange<float> GetMinMax(Tensor<float> tensor)
         {
             if (float.IsFinite(value))
             {
-                min = Math.Min(min, value);
-                max = Math.Max(max, value);
+                min = System.Math.Min(min, value);
+                max = System.Math.Max(max, value);
             }
         }
 

diff --git a/tests/config.toml b/tests/config.toml
@@ -10,7 +10,7 @@ swapRB = false
 input_type = 'uint8'
 input_shape = [1, 224, 224, 3]
 input_range = [0, 255]
-input_file = "/mnt/model/qwen/onnx/llm.onnx.data"
+input_file = ""
 mean = [0, 0, 0]
 std = [1, 1, 1]
 input_layout = 'NHWC'
@@ -19,9 +19,9 @@ model_layout = 'NHWC'
 letterbox_value = 0
 dump_asm = true
 dump_ir = false
-shape_bucket_enable = true
-shape_bucket_range_info = { "seq_len" = [1,128], "history_len" = [0,128] }
-shape_bucket_segments_count = 2
+shape_bucket_enable = false
+shape_bucket_range_info = { }
+shape_bucket_segments_count = 4
 shape_bucket_fix_var_map = { }
 
 [ptq_opt]
@@ -93,15 +93,54 @@ args = []
 
 [target]
 
+[target.cpu]
+eval = true
+infer = true
+similarity_name = 'cosine'
+
+[target.cpu.mode.noptq]
+enabled = false
+threshold = 0.999
+
+[target.cpu.mode.ptq]
+enabled = true
+threshold = 0.98
+
+[target.k510]
+eval = true
+infer = true
+similarity_name = 'cosine'
+
+[target.k510.mode.noptq]
+enabled = false
+threshold = 0.99
+
+[target.k510.mode.ptq]
+enabled = true
+threshold = 0.98
+
 [target.k230]
 eval = true
 infer = true
 similarity_name = 'cosine'
 
 [target.k230.mode.noptq]
+enabled = false
+threshold = 0.999
+
+[target.k230.mode.ptq]
+enabled = true
+threshold = 0.96
+
+[target.xpu]
+eval = false
+infer = true
+similarity_name = 'cosine'
+
+[target.xpu.mode.noptq]
 enabled = true
 threshold = 0.999
 
-#[target.k230.mode.ptq]
-#enabled = true
-#threshold = 0.96
+[target.xpu.mode.ptq]
+enabled = false
+threshold = 0.9