From 3786b6a2190049e669610acfc7d8523fb141c3d3 Mon Sep 17 00:00:00 2001 From: gamzeisl Date: Thu, 10 Oct 2024 13:25:29 +0200 Subject: [PATCH 1/6] Extend simvector names with activation --- src/hwpe/tb/ita_hwpe_tb.sv | 4 +++- src/tb/ita_tb.sv | 4 +++- testGenerator.py | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/hwpe/tb/ita_hwpe_tb.sv b/src/hwpe/tb/ita_hwpe_tb.sv index c31ecbc..888971a 100644 --- a/src/hwpe/tb/ita_hwpe_tb.sv +++ b/src/hwpe/tb/ita_hwpe_tb.sv @@ -128,7 +128,9 @@ module ita_hwpe_tb; "_F", $sformatf("%0d", FEEDFORWARD_SIZE), "_H1_B", - $sformatf("%0d", `ifdef BIAS `BIAS `else 0 `endif) + $sformatf("%0d", `ifdef BIAS `BIAS `else 0 `endif), + "_", + $sformatf( "%s", ACTIVATION) }; // Number of tiles in the sequence dimension N_TILES_SEQUENCE_DIM = SEQUENCE_LEN / M_TILE_LEN; diff --git a/src/tb/ita_tb.sv b/src/tb/ita_tb.sv index 78cdb17..d721247 100644 --- a/src/tb/ita_tb.sv +++ b/src/tb/ita_tb.sv @@ -80,7 +80,9 @@ module ita_tb; "_F", $sformatf("%0d", FEEDFORWARD_SIZE), "_H1_B", - $sformatf("%0d", `ifdef BIAS `BIAS `else 0 `endif) + $sformatf("%0d", `ifdef BIAS `BIAS `else 0 `endif), + "_", + $sformatf( "%s", ACTIVATION) }; N_TILES_SEQUENCE_DIM = SEQUENCE_LEN / M_TILE_LEN; N_TILES_EMBEDDING_DIM = EMBEDDING_SIZE / M_TILE_LEN; diff --git a/testGenerator.py b/testGenerator.py index 993ddfe..7082002 100644 --- a/testGenerator.py +++ b/testGenerator.py @@ -47,7 +47,8 @@ def generateMHA(**args): H = args['H'] NO_BIAS = args['no_bias'] NO_PARTIAL_SOFTMAX = args['no_partial_softmax'] - base_path = f'{current_dir}/simvectors/data_S{S}_E{E}_P{P}_F{F}_H{H}_B{int(not NO_BIAS)}' + ACTIVATION = args['activation'].capitalize() + base_path = f'{current_dir}/simvectors/data_S{S}_E{E}_P{P}_F{F}_H{H}_B{int(not NO_BIAS)}_{ACTIVATION}' if NO_PARTIAL_SOFTMAX: path = f'{base_path}_noPartialSoftmax/' From 4d3294eb74b3b2b0368be017f7f7e2d906df82ca Mon Sep 17 00:00:00 2001 From: gamzeisl Date: Thu, 10 Oct 2024 13:53:06 +0200 Subject: [PATCH 2/6] [ci] Extend tests with no stalls --- .gitlab-ci.yml | 34 ++++++++++++++++++++++++++++++++-- src/hwpe/tb/ita_hwpe_tb.sv | 2 +- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d87f8f2..a8a7dcd 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -45,19 +45,34 @@ run_sim: P: 64 F: 64 activation: gelu + no_stalls: 0 + - S: 64 + E: 64 + P: 64 + F: 64 + activation: gelu + no_stalls: 1 + - S: 128 + E: 192 + P: 256 + F: 256 + activation: gelu + no_stalls: 0 - S: 128 E: 192 P: 256 F: 256 activation: gelu + no_stalls: 1 - S: 192 E: 256 P: 128 F: 128 activation: relu + no_stalls: 1 script: - make bender - - make sim VSIM_FLAGS=-c s=$S e=$E p=$P f=$F bias=1 activation=$activation + - make sim VSIM_FLAGS=-c s=$S e=$E p=$P f=$F bias=1 activation=$activation no_stalls=$no_stalls - ./modelsim/return_status.sh modelsim/build/transcript $S $E $P $F ita_tb run_hwpe_sim: @@ -71,17 +86,32 @@ run_hwpe_sim: P: 64 F: 64 activation: gelu + no_stalls: 0 + - S: 64 + E: 64 + P: 64 + F: 64 + activation: gelu + no_stalls: 1 + - S: 128 + E: 192 + P: 256 + F: 256 + activation: gelu + no_stalls: 0 - S: 128 E: 192 P: 256 F: 256 activation: gelu + no_stalls: 1 - S: 192 E: 256 P: 128 F: 128 activation: relu + no_stalls: 1 script: - make bender - - make sim VSIM_FLAGS=-c DEBUG=OFF target=sim_ita_hwpe_tb s=$S e=$E p=$P f=$F bias=1 activation=$activation + - make sim VSIM_FLAGS=-c DEBUG=OFF target=sim_ita_hwpe_tb s=$S e=$E p=$P f=$F bias=1 activation=$activation no_stalls=$no_stalls - ./modelsim/return_status.sh modelsim/build/transcript $S $E $P $F hwpe_tb diff --git a/src/hwpe/tb/ita_hwpe_tb.sv b/src/hwpe/tb/ita_hwpe_tb.sv index 888971a..712e779 100644 --- a/src/hwpe/tb/ita_hwpe_tb.sv +++ b/src/hwpe/tb/ita_hwpe_tb.sv @@ -67,7 +67,7 @@ module ita_hwpe_tb; // HWPE Parameters localparam unsigned ITA_REG_OFFSET = 32'h20; - parameter real PROB_STALL = 0.1; + parameter real PROB_STALL = `ifdef NO_STALLS ((`NO_STALLS == 1) ? 0 : 0.1) `else 0.1 `endif; parameter MEMORY_SIZE = SEQUENCE_LEN*EMBEDDING_SIZE*4+EMBEDDING_SIZE*PROJECTION_SPACE*4+PROJECTION_SPACE*3*3+EMBEDDING_SIZE*3+SEQUENCE_LEN*PROJECTION_SPACE*4+SEQUENCE_LEN*SEQUENCE_LEN+EMBEDDING_SIZE*FEEDFORWARD_SIZE*2+FEEDFORWARD_SIZE*3+EMBEDDING_SIZE*3; parameter int unsigned AccDataWidth = ITA_TCDM_DW; From f1a28bc88a43332c9e4844b473798f74dd9bb477 Mon Sep 17 00:00:00 2001 From: gamzeisl Date: Thu, 10 Oct 2024 13:53:55 +0200 Subject: [PATCH 3/6] [fix] Convert softmax to unsigned in PyITA --- PyITA/ITA.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/PyITA/ITA.py b/PyITA/ITA.py index 039771a..81a2c50 100644 --- a/PyITA/ITA.py +++ b/PyITA/ITA.py @@ -555,8 +555,10 @@ def soft(self, no_partial_softmax = False): write_matrix(A_save, f"A_soft_{h}", self.paths["standalone"]) def step5_AV(self): - self.O_soft = np.array( - [np.matmul(self.A_partial_softmax[i], self.Vp_requant[i], dtype = np.int32) for i in range(self.H)]) + self.O_soft = np.array([ + np.matmul(self.A_partial_softmax[i].astype(np.uint8), self.Vp_requant[i], dtype = np.int32) + for i in range(self.H) + ]) self.O_soft = np.clip(self.O_soft, -2**(self.WO - 1), 2**(self.WO - 1) - 1) self.O_soft_requant = requantize(self.O_soft, self.requant_eps_mult[4], self.requant_right_shift[4], self.requant_add[4]) From dd9b470b87a44c869b937ca41fed9b915e61bb67 Mon Sep 17 00:00:00 2001 From: gamzeisl Date: Thu, 10 Oct 2024 14:07:19 +0200 Subject: [PATCH 4/6] [fix] Correct pipeline stages in activation unit --- src/ita.sv | 22 +++++++++++----------- src/ita_activation.sv | 10 +++------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/src/ita.sv b/src/ita.sv index f164ac9..bd03c42 100644 --- a/src/ita.sv +++ b/src/ita.sv @@ -288,19 +288,19 @@ module ita ); ita_activation i_activation ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), - .activation_i (activation_q10), - .calc_en_i (calc_en_q6 && last_inner_tile_q6 ), - .calc_en_q_i (calc_en_q7 && last_inner_tile_q7 ), - .b_i (ctrl_i.gelu_b ), - .c_i (ctrl_i.gelu_c ), + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .activation_i (activation_q9), + .calc_en_i (calc_en_q6 && last_inner_tile_q6), + .calc_en_q_i (calc_en_q7 && last_inner_tile_q7), + .b_i (ctrl_i.gelu_b), + .c_i (ctrl_i.gelu_c), .requant_mode_i (activation_requant_mode), - .requant_mult_i (activation_requant_mult), + .requant_mult_i (activation_requant_mult), .requant_shift_i (activation_requant_shift), - .requant_add_i (activation_requant_add), - .data_i (requant_oup), - .data_o (post_activation) + .requant_add_i (activation_requant_add), + .data_i (requant_oup), + .data_o (post_activation) ); ita_fifo_controller i_fifo_controller ( diff --git a/src/ita_activation.sv b/src/ita_activation.sv index 95813f4..0f54070 100644 --- a/src/ita_activation.sv +++ b/src/ita_activation.sv @@ -23,7 +23,7 @@ module ita_activation requant_oup_t data_q1, data_q2, data_q3, data_q4; activation_e activation_q1, activation_q2; oup_t gelu_out, requant_in; - requant_oup_t relu_out_d, relu_out_q1, relu_out_q2, requant_out; + requant_oup_t relu_out, requant_out; logic calc_en_q2, calc_en_q3; ita_requantizer i_requantizer ( @@ -43,7 +43,7 @@ module ita_activation for (genvar i = 0; i < N; i++) begin: relu_instances ita_relu i_relu ( .data_i(data_q2[i]), - .data_o(relu_out_d[i]) + .data_o(relu_out[i]) ); end endgenerate @@ -70,7 +70,7 @@ module ita_activation end Relu: begin for (int i = 0; i < N; i++) begin - requant_in[i] = {{(WO-WI){relu_out_q2[i][WI-1]}}, relu_out_q2[i]}; + requant_in[i] = {{(WO-WI){relu_out[i][WI-1]}}, relu_out[i]}; end end default: begin @@ -101,8 +101,6 @@ module ita_activation data_q4 <= '0; calc_en_q2 <= 0; calc_en_q3 <= 0; - relu_out_q1 <= '0; - relu_out_q2 <= '0; end else begin activation_q1 <= activation_i; activation_q2 <= activation_q1; @@ -112,8 +110,6 @@ module ita_activation data_q4 <= data_q3; calc_en_q2 <= calc_en_q_i; calc_en_q3 <= calc_en_q2; - relu_out_q1 <= relu_out_d; - relu_out_q2 <= relu_out_q1; end end endmodule \ No newline at end of file From 0076abc4c4d53f5b13de5717558543c7fbb72766 Mon Sep 17 00:00:00 2001 From: gamzeisl Date: Thu, 10 Oct 2024 14:36:43 +0200 Subject: [PATCH 5/6] [feature] Silence relu and gelu inputs when not used --- src/ita.sv | 3 ++- src/ita_activation.sv | 19 ++++++++++--------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/ita.sv b/src/ita.sv index bd03c42..9bdb1bb 100644 --- a/src/ita.sv +++ b/src/ita.sv @@ -290,7 +290,8 @@ module ita ita_activation i_activation ( .clk_i (clk_i ), .rst_ni (rst_ni ), - .activation_i (activation_q9), + .activation_i (activation_q7), + .activation_q2_i (activation_q9), .calc_en_i (calc_en_q6 && last_inner_tile_q6), .calc_en_q_i (calc_en_q7 && last_inner_tile_q7), .b_i (ctrl_i.gelu_b), diff --git a/src/ita_activation.sv b/src/ita_activation.sv index 0f54070..8631449 100644 --- a/src/ita_activation.sv +++ b/src/ita_activation.sv @@ -14,6 +14,7 @@ module ita_activation input requant_const_t requant_shift_i, input requant_t requant_add_i, input activation_e activation_i, + input activation_e activation_q2_i, input logic calc_en_i, input logic calc_en_q_i, input requant_oup_t data_i, @@ -21,7 +22,7 @@ module ita_activation ); requant_oup_t data_q1, data_q2, data_q3, data_q4; - activation_e activation_q1, activation_q2; + activation_e activation_q3, activation_q4; oup_t gelu_out, requant_in; requant_oup_t relu_out, requant_out; logic calc_en_q2, calc_en_q3; @@ -42,7 +43,7 @@ module ita_activation generate for (genvar i = 0; i < N; i++) begin: relu_instances ita_relu i_relu ( - .data_i(data_q2[i]), + .data_i((calc_en_q2 && activation_q2_i == Relu) ? data_q2[i] : '0), .data_o(relu_out[i]) ); end @@ -57,14 +58,14 @@ module ita_activation .c_i(c_i), .calc_en_i(calc_en_i), .calc_en_q_i(calc_en_q_i), - .data_i(data_i[i]), + .data_i((calc_en_i && activation_i == Gelu) ? data_i[i] : '0), .data_o(gelu_out[i]) ); end endgenerate always_comb begin - case (activation_i) + case (activation_q2_i) Gelu: begin requant_in = gelu_out; end @@ -81,7 +82,7 @@ module ita_activation always_comb begin - case (activation_q2) + case (activation_q4) Gelu, Relu: begin data_o = requant_out; end @@ -93,8 +94,8 @@ module ita_activation always_ff @(posedge clk_i) begin if (rst_ni == 0) begin - activation_q1 <= Identity; - activation_q2 <= Identity; + activation_q3 <= Identity; + activation_q4 <= Identity; data_q1 <= '0; data_q2 <= '0; data_q3 <= '0; @@ -102,8 +103,8 @@ module ita_activation calc_en_q2 <= 0; calc_en_q3 <= 0; end else begin - activation_q1 <= activation_i; - activation_q2 <= activation_q1; + activation_q3 <= activation_q2_i; + activation_q4 <= activation_q3; data_q1 <= data_i; data_q2 <= data_q1; data_q3 <= data_q2; From 41d6265d17f3e1512ee9407c5c832641d63795a6 Mon Sep 17 00:00:00 2001 From: gamzeisl Date: Wed, 16 Oct 2024 10:27:47 +0200 Subject: [PATCH 6/6] [fix] Adapt tests for new features --- tests/run.sh | 15 +++++++++------ tests/run_loop.sh | 42 ++++++++++++++++++++++++++++++------------ 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/tests/run.sh b/tests/run.sh index ca1c5d7..bf2201e 100755 --- a/tests/run.sh +++ b/tests/run.sh @@ -12,25 +12,28 @@ export buildpath=build export SIM_PATH=modelsim/$buildpath # Set to -gui to use the GUI of QuestaSim -export VSIM_FLAGS=-c +export vsim_flags=-c +export target=ita_tb export no_stalls=0 export s=64 export e=64 export p=64 +export f=64 export bias=1 +export activation=identity # Create test vectors if don't exist -if [ ! -d simvectors/data_S${s}_E${e}_P${p}_H1_B${bias} ] +if [ ! -d simvectors/data_S${s}_E${e}_P${p}_F${f}_H1_B${bias}_${activation^} ] then if [ $bias -eq 1 ] then - python testGenerator.py -S $s -P $p -E $e -H 1 + python testGenerator.py -H 1 -S $s -P $p -E $e -F $f --activation $activation else - python testGenerator.py -S $s -P $p -E $e -H 1 --no-bias + python testGenerator.py -H 1 -S $s -P $p -E $e -F $f --activation $activation --no-bias fi fi # Run the test -make sim VSIM_FLAGS=-c no_stalls=$no_stalls s=$s e=$e p=$p bias=$bias -./modelsim/return_status.sh ${SIM_PATH}/transcript $s $e ita_tb +make sim VSIM_FLAGS=$vsim_flags DEBUG=OFF target=sim_$target no_stalls=$no_stalls s=$s e=$e p=$p f=$f bias=$bias activation=$activation +./modelsim/return_status.sh ${SIM_PATH}/transcript $s $e $p $f $target diff --git a/tests/run_loop.sh b/tests/run_loop.sh index bf8b368..2b563e1 100755 --- a/tests/run_loop.sh +++ b/tests/run_loop.sh @@ -16,6 +16,13 @@ touch $log_file # Activate the virtual environment source venv/bin/activate +# Set the simulation path +export buildpath=build +export SIM_PATH=modelsim/$buildpath + +# Set to -gui to use the GUI of QuestaSim +export vsim_flags=-c + # Set the no_stalls if not set if [ -z "$no_stalls" ] then @@ -48,21 +55,32 @@ do do for p in $(eval echo "{$granularity..512..$granularity}") do - # Create test vectors - python testGenerator.py -S $s -E $e -P $p -H 1 --no-bias - python testGenerator.py -S $s -E $e -P $p -H 1 - - for bias in {0..1} + for f in $(eval echo "{$granularity..512..$granularity}") do - # Log the test - echo "Testing S=$s E=$e P=$p bias=$bias" >> $log_file + for activation in {identity,relu,gelu} + do + # Create test vectors + python testGenerator.py -H 1 -S $s -P $p -E $e -F $f --activation $activation --no-bias + python testGenerator.py -H 1 -S $s -P $p -E $e -F $f --activation $activation + + for target in {ita_tb,ita_hwpe_tb} + do + for bias in {0..1} + do + # Log the test + echo "Testing $target: S=$s E=$e P=$p F=$f Activation=$activation bias=$bias" >> $log_file + + # Run the test + make sim VSIM_FLAGS=$vsim_flags DEBUG=OFF target=sim_$target no_stalls=$no_stalls s=$s e=$e p=$p f=$f bias=$bias activation=$activation + ./modelsim/return_status.sh ${SIM_PATH}/transcript $s $e $p $f $target >> $log_file - # Run the test - make sim VSIM_FLAGS=-c no_stalls=$no_stalls s=$s e=$e p=$p bias=$bias - ./modelsim/return_status.sh modelsim/build/transcript $s $e ita_tb >> $log_file + # read -p "Press Enter to continue" - # Remove the test vectors - rm -rf simvectors/data_S${s}_E${e}_P${p}_H1_B${bias} + # Remove the test vectors + rm -rf simvectors/data_S${s}_E${e}_P${p}_F${f}_H1_B${bias}_${activation^} + done + done + done done done done