Skip to content

Commit

Permalink
Merge branch 'feature/qwen_bychannel' into feature/qwen
Browse files Browse the repository at this point in the history
  • Loading branch information
curioyang committed Jan 10, 2025
2 parents 69c16aa + 058e994 commit 42d0af4
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 14 deletions.
73 changes: 68 additions & 5 deletions src/Nncase.Importer/Onnx/MatMul.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using System;
using System.Collections.Generic;
using Nncase.IR;
using Nncase.IR.Tensors;
using Onnx;
using F = Nncase.IR.F;

Expand All @@ -13,10 +14,72 @@ public partial class OnnxImporter
private Expr VisitMatMul(in NodeProto op)
{
var (a, b) = GetInputExprs(op, 0, 1);
var matmul = IR.F.Math.MatMul(a, b);
List<string> outputNames = new() { op.Output[0] };
matmul.Metadata.OutputNames = outputNames;
return matmul;
// /mlp_2/Mul_output_0、/mlp_3/Mul_output_0、/mlp_21/Mul_output_0

Check warning on line 17 in src/Nncase.Importer/Onnx/MatMul.cs

View workflow job for this annotation

GitHub Actions / build-x86_64-linux

Check warning on line 17 in src/Nncase.Importer/Onnx/MatMul.cs

View workflow job for this annotation

GitHub Actions / build-aarch64-macos

Check warning on line 17 in src/Nncase.Importer/Onnx/MatMul.cs

View workflow job for this annotation

GitHub Actions / build-x86_64-windows

Check warning on line 17 in src/Nncase.Importer/Onnx/MatMul.cs

View workflow job for this annotation

GitHub Actions / build-x86_64-linux

Check warning on line 17 in src/Nncase.Importer/Onnx/MatMul.cs

View workflow job for this annotation

GitHub Actions / build-aarch64-macos

Check warning on line 17 in src/Nncase.Importer/Onnx/MatMul.cs

View workflow job for this annotation

GitHub Actions / build-x86_64-windows

if (a.Metadata.OutputNames![0] == "/mlp_2/Mul_output_0")
{
var a_a = F.Tensors.Slice(a, new int[] { 0 }, new int[] { 813 }, new int[] { 2 }, new int[] { 1 });
var b_a = F.Tensors.Slice(a, new int[] { 813 }, new int[] { 814 }, new int[] { 2 }, new int[] { 1 });
var c_a = F.Tensors.Slice(a, new int[] { 814 }, new int[] { -1 }, new int[] { 2 }, new int[] { 1 });

var a_b = F.Tensors.Slice(b, new int[] { 0 }, new int[] { 813 }, new int[] { 0 }, new int[] { 1 });
var b_b = F.Tensors.Slice(b, new int[] { 813 }, new int[] { 814 }, new int[] { 0 }, new int[] { 1 });
var c_b = F.Tensors.Slice(b, new int[] { 814 }, new int[] { -1 }, new int[] { 0 }, new int[] { 1 });
var new_a = F.Math.MatMul(a_a, a_b);
var new_b = F.Math.MatMul(b_a, b_b);
var new_c = F.Math.MatMul(c_a, c_b);
return F.Math.Add(new_a, F.Math.Add(new_c, new_b));
}
else if (a.Metadata.OutputNames![0] == "/mlp_3/Mul_output_0")
{
var a_a = F.Tensors.Slice(a, new int[] { 0 }, new int[] { 2247 }, new int[] { 2 }, new int[] { 1 });
var b_a = F.Tensors.Slice(a, new int[] { 2247 }, new int[] { 2248 }, new int[] { 2 }, new int[] { 1 });
var c_a = F.Tensors.Slice(a, new int[] { 2248 }, new int[] { 3016 }, new int[] { 2 }, new int[] { 1 });
var d_a = F.Tensors.Slice(a, new int[] { 3016 }, new int[] { 3017 }, new int[] { 2 }, new int[] { 1 });
var e_a = F.Tensors.Slice(a, new int[] { 3017 }, new int[] { -1 }, new int[] { 2 }, new int[] { 1 });

var a_b = F.Tensors.Slice(b, new int[] { 0 }, new int[] { 2247 }, new int[] { 0 }, new int[] { 1 });
var b_b = F.Tensors.Slice(b, new int[] { 2247 }, new int[] { 2248 }, new int[] { 0 }, new int[] { 1 });
var c_b = F.Tensors.Slice(b, new int[] { 2248 }, new int[] { 3016 }, new int[] { 0 }, new int[] { 1 });
var d_b = F.Tensors.Slice(b, new int[] { 3016 }, new int[] { 3017 }, new int[] { 0 }, new int[] { 1 });
var e_b = F.Tensors.Slice(b, new int[] { 3017 }, new int[] { -1 }, new int[] { 0 }, new int[] { 1 });

var new_a = F.Math.MatMul(a_a, a_b);
var new_b = F.Math.MatMul(b_a, b_b);
var new_c = F.Math.MatMul(c_a, c_b);
var new_d = F.Math.MatMul(d_a, d_b);
var new_e = F.Math.MatMul(e_a, e_b);

return F.Math.Add(new_a, F.Math.Add(F.Math.Add(F.Math.Add(new_d, new_e), new_c), new_b));
}
else if (a.Metadata.OutputNames![0] == "/mlp_21/Mul_output_0")
{
var a_a = F.Tensors.Slice(a, new int[] { 0 }, new int[] { 567 }, new int[] { 2 }, new int[] { 1 });
var b_a = F.Tensors.Slice(a, new int[] { 567 }, new int[] { 568 }, new int[] { 2 }, new int[] { 1 });
var c_a = F.Tensors.Slice(a, new int[] { 568 }, new int[] { 3486 }, new int[] { 2 }, new int[] { 1 });
var d_a = F.Tensors.Slice(a, new int[] { 3486 }, new int[] { 3487 }, new int[] { 2 }, new int[] { 1 });
var e_a = F.Tensors.Slice(a, new int[] { 3487 }, new int[] { -1 }, new int[] { 2 }, new int[] { 1 });

var a_b = F.Tensors.Slice(b, new int[] { 0 }, new int[] { 567 }, new int[] { 0 }, new int[] { 1 });
var b_b = F.Tensors.Slice(b, new int[] { 567 }, new int[] { 568 }, new int[] { 0 }, new int[] { 1 });
var c_b = F.Tensors.Slice(b, new int[] { 568 }, new int[] { 3486 }, new int[] { 0 }, new int[] { 1 });
var d_b = F.Tensors.Slice(b, new int[] { 3486 }, new int[] { 3487 }, new int[] { 0 }, new int[] { 1 });
var e_b = F.Tensors.Slice(b, new int[] { 3487 }, new int[] { -1 }, new int[] { 0 }, new int[] { 1 });

var new_a = F.Math.MatMul(a_a, a_b);
var new_b = F.Math.MatMul(b_a, b_b);
var new_c = F.Math.MatMul(c_a, c_b);
var new_d = F.Math.MatMul(d_a, d_b);
var new_e = F.Math.MatMul(e_a, e_b);

return F.Math.Add(new_a, F.Math.Add(F.Math.Add(F.Math.Add(new_d, new_e), new_c), new_b));
}
else
{
var matmul = IR.F.Math.MatMul(a, b);
List<string> outputNames = new() { op.Output[0] };
matmul.Metadata.OutputNames = outputNames;
return matmul;
}
}
}
}
}

Check warning on line 85 in src/Nncase.Importer/Onnx/MatMul.cs

View workflow job for this annotation

GitHub Actions / build-x86_64-linux

Check warning on line 85 in src/Nncase.Importer/Onnx/MatMul.cs

View workflow job for this annotation

GitHub Actions / build-aarch64-macos

Check warning on line 85 in src/Nncase.Importer/Onnx/MatMul.cs

View workflow job for this annotation

GitHub Actions / build-x86_64-windows

Check warning on line 85 in src/Nncase.Importer/Onnx/MatMul.cs

View workflow job for this annotation

GitHub Actions / build-x86_64-linux

Check warning on line 85 in src/Nncase.Importer/Onnx/MatMul.cs

View workflow job for this annotation

GitHub Actions / build-aarch64-macos

Check warning on line 85 in src/Nncase.Importer/Onnx/MatMul.cs

View workflow job for this annotation

GitHub Actions / build-x86_64-windows

10 changes: 8 additions & 2 deletions src/Nncase.Quantization/Quantization/Quantizer.Algorithms.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Nncase.IR;
using Nncase.IR.Tensors;
using Nncase.IR.F;
using Nncase.TIR;
using Math = System.Math;
using Tuple = System.Tuple;

namespace Nncase.Quantization;

Expand All @@ -21,8 +27,8 @@ private static ValueRange<float> GetMinMax(Tensor<float> tensor)
{
if (float.IsFinite(value))
{
min = Math.Min(min, value);
max = Math.Max(max, value);
min = System.Math.Min(min, value);
max = System.Math.Max(max, value);
}
}

Expand Down
53 changes: 46 additions & 7 deletions tests/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ swapRB = false
input_type = 'uint8'
input_shape = [1, 224, 224, 3]
input_range = [0, 255]
input_file = "/mnt/model/qwen/onnx/llm.onnx.data"
input_file = ""
mean = [0, 0, 0]
std = [1, 1, 1]
input_layout = 'NHWC'
Expand All @@ -19,9 +19,9 @@ model_layout = 'NHWC'
letterbox_value = 0
dump_asm = true
dump_ir = false
shape_bucket_enable = true
shape_bucket_range_info = { "seq_len" = [1,128], "history_len" = [0,128] }
shape_bucket_segments_count = 2
shape_bucket_enable = false
shape_bucket_range_info = { }
shape_bucket_segments_count = 4
shape_bucket_fix_var_map = { }

[ptq_opt]
Expand Down Expand Up @@ -93,15 +93,54 @@ args = []

[target]

[target.cpu]
eval = true
infer = true
similarity_name = 'cosine'

[target.cpu.mode.noptq]
enabled = false
threshold = 0.999

[target.cpu.mode.ptq]
enabled = true
threshold = 0.98

[target.k510]
eval = true
infer = true
similarity_name = 'cosine'

[target.k510.mode.noptq]
enabled = false
threshold = 0.99

[target.k510.mode.ptq]
enabled = true
threshold = 0.98

[target.k230]
eval = true
infer = true
similarity_name = 'cosine'

[target.k230.mode.noptq]
enabled = false
threshold = 0.999

[target.k230.mode.ptq]
enabled = true
threshold = 0.96

[target.xpu]
eval = false
infer = true
similarity_name = 'cosine'

[target.xpu.mode.noptq]
enabled = true
threshold = 0.999

#[target.k230.mode.ptq]
#enabled = true
#threshold = 0.96
[target.xpu.mode.ptq]
enabled = false
threshold = 0.9

0 comments on commit 42d0af4

Please sign in to comment.