pulp-platform · gamzeisl · Oct 16, 2024 · Oct 10, 2024 · Oct 10, 2024 · Oct 10, 2024
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -45,19 +45,34 @@ run_sim:
       P: 64
       F: 64
       activation: gelu
+      no_stalls: 0
+    - S: 64
+      E: 64
+      P: 64
+      F: 64
+      activation: gelu
+      no_stalls: 1
+    - S: 128
+      E: 192
+      P: 256
+      F: 256
+      activation: gelu
+      no_stalls: 0
     - S: 128
       E: 192
       P: 256
       F: 256
       activation: gelu
+      no_stalls: 1
     - S: 192
       E: 256
       P: 128
       F: 128
       activation: relu
+      no_stalls: 1
   script:
     - make bender
-    - make sim VSIM_FLAGS=-c s=$S e=$E p=$P f=$F bias=1 activation=$activation
+    - make sim VSIM_FLAGS=-c s=$S e=$E p=$P f=$F bias=1 activation=$activation no_stalls=$no_stalls
     - ./modelsim/return_status.sh modelsim/build/transcript $S $E $P $F ita_tb
 
 run_hwpe_sim:
@@ -71,17 +86,32 @@ run_hwpe_sim:
       P: 64
       F: 64
       activation: gelu
+      no_stalls: 0
+    - S: 64
+      E: 64
+      P: 64
+      F: 64
+      activation: gelu
+      no_stalls: 1
+    - S: 128
+      E: 192
+      P: 256
+      F: 256
+      activation: gelu
+      no_stalls: 0
     - S: 128
       E: 192
       P: 256
       F: 256
       activation: gelu
+      no_stalls: 1
     - S: 192
       E: 256
       P: 128
       F: 128
       activation: relu
+      no_stalls: 1
   script:
     - make bender
-    - make sim VSIM_FLAGS=-c DEBUG=OFF target=sim_ita_hwpe_tb s=$S e=$E p=$P f=$F bias=1 activation=$activation
+    - make sim VSIM_FLAGS=-c DEBUG=OFF target=sim_ita_hwpe_tb s=$S e=$E p=$P f=$F bias=1 activation=$activation no_stalls=$no_stalls
     - ./modelsim/return_status.sh modelsim/build/transcript $S $E $P $F hwpe_tb
diff --git a/PyITA/ITA.py b/PyITA/ITA.py
@@ -555,8 +555,10 @@ def soft(self, no_partial_softmax = False):
             write_matrix(A_save, f"A_soft_{h}", self.paths["standalone"])
 
     def step5_AV(self):
-        self.O_soft = np.array(
-            [np.matmul(self.A_partial_softmax[i], self.Vp_requant[i], dtype = np.int32) for i in range(self.H)])
+        self.O_soft = np.array([
+            np.matmul(self.A_partial_softmax[i].astype(np.uint8), self.Vp_requant[i], dtype = np.int32)
+            for i in range(self.H)
+        ])
         self.O_soft = np.clip(self.O_soft, -2**(self.WO - 1), 2**(self.WO - 1) - 1)
         self.O_soft_requant = requantize(self.O_soft, self.requant_eps_mult[4], self.requant_right_shift[4],
                                          self.requant_add[4])

diff --git a/src/hwpe/tb/ita_hwpe_tb.sv b/src/hwpe/tb/ita_hwpe_tb.sv
@@ -67,7 +67,7 @@ module ita_hwpe_tb;
 
   // HWPE Parameters
   localparam unsigned ITA_REG_OFFSET  = 32'h20;
-  parameter real PROB_STALL = 0.1;
+  parameter real PROB_STALL = `ifdef NO_STALLS ((`NO_STALLS == 1) ? 0 : 0.1) `else 0.1 `endif;
   parameter MEMORY_SIZE = SEQUENCE_LEN*EMBEDDING_SIZE*4+EMBEDDING_SIZE*PROJECTION_SPACE*4+PROJECTION_SPACE*3*3+EMBEDDING_SIZE*3+SEQUENCE_LEN*PROJECTION_SPACE*4+SEQUENCE_LEN*SEQUENCE_LEN+EMBEDDING_SIZE*FEEDFORWARD_SIZE*2+FEEDFORWARD_SIZE*3+EMBEDDING_SIZE*3;
 
   parameter int unsigned AccDataWidth = ITA_TCDM_DW;
@@ -128,7 +128,9 @@ module ita_hwpe_tb;
       "_F",
       $sformatf("%0d", FEEDFORWARD_SIZE),
       "_H1_B",
-      $sformatf("%0d", `ifdef BIAS `BIAS `else 0 `endif)
+      $sformatf("%0d", `ifdef BIAS `BIAS `else 0 `endif),
+      "_",
+      $sformatf( "%s", ACTIVATION)
     };
     // Number of tiles in the sequence dimension
     N_TILES_SEQUENCE_DIM = SEQUENCE_LEN / M_TILE_LEN;

diff --git a/src/ita.sv b/src/ita.sv
@@ -288,19 +288,20 @@ module ita
   );
 
   ita_activation i_activation (
-    .clk_i         (clk_i       ),
-    .rst_ni        (rst_ni      ),
-    .activation_i  (activation_q10),
-    .calc_en_i     (calc_en_q6 && last_inner_tile_q6  ),
-    .calc_en_q_i  (calc_en_q7 && last_inner_tile_q7  ),
-    .b_i           (ctrl_i.gelu_b  ),
-    .c_i           (ctrl_i.gelu_c  ),
+    .clk_i           (clk_i        ),
+    .rst_ni          (rst_ni       ),
+    .activation_i    (activation_q7),
+    .activation_q2_i (activation_q9),
+    .calc_en_i       (calc_en_q6 && last_inner_tile_q6),
+    .calc_en_q_i     (calc_en_q7 && last_inner_tile_q7),
+    .b_i             (ctrl_i.gelu_b),
+    .c_i             (ctrl_i.gelu_c),
     .requant_mode_i  (activation_requant_mode),
-    .requant_mult_i    (activation_requant_mult),
+    .requant_mult_i  (activation_requant_mult),
     .requant_shift_i (activation_requant_shift),
-    .requant_add_i         (activation_requant_add),
-    .data_i        (requant_oup),
-    .data_o        (post_activation)
+    .requant_add_i   (activation_requant_add),
+    .data_i          (requant_oup),
+    .data_o          (post_activation)
   );
 
   ita_fifo_controller i_fifo_controller (

diff --git a/src/ita_activation.sv b/src/ita_activation.sv
@@ -14,16 +14,17 @@ module ita_activation
     input requant_const_t requant_shift_i,
     input requant_t requant_add_i,
     input activation_e activation_i,
+    input activation_e activation_q2_i,
     input logic calc_en_i,
     input logic calc_en_q_i,
     input requant_oup_t  data_i,
     output requant_oup_t data_o
   );
 
   requant_oup_t data_q1, data_q2, data_q3, data_q4;
-  activation_e activation_q1, activation_q2;
+  activation_e activation_q3, activation_q4;
   oup_t gelu_out, requant_in;
-  requant_oup_t relu_out_d, relu_out_q1, relu_out_q2, requant_out;
+  requant_oup_t relu_out, requant_out;
   logic calc_en_q2, calc_en_q3;
 
   ita_requantizer i_requantizer (
@@ -42,8 +43,8 @@ module ita_activation
   generate
     for (genvar i = 0; i < N; i++) begin: relu_instances
       ita_relu i_relu (
-        .data_i(data_q2[i]),
-        .data_o(relu_out_d[i])
+        .data_i((calc_en_q2 && activation_q2_i == Relu) ? data_q2[i] : '0),
+        .data_o(relu_out[i])
       );
     end
   endgenerate
@@ -57,20 +58,20 @@ module ita_activation
         .c_i(c_i),
         .calc_en_i(calc_en_i),
         .calc_en_q_i(calc_en_q_i),
-        .data_i(data_i[i]),
+        .data_i((calc_en_i && activation_i == Gelu) ? data_i[i] : '0),
         .data_o(gelu_out[i])
       );
     end
   endgenerate
 
   always_comb begin
-    case (activation_i)
+    case (activation_q2_i)
       Gelu: begin
         requant_in = gelu_out;
       end
       Relu: begin
         for (int i = 0; i < N; i++) begin
-          requant_in[i] = {{(WO-WI){relu_out_q2[i][WI-1]}}, relu_out_q2[i]};
+          requant_in[i] = {{(WO-WI){relu_out[i][WI-1]}}, relu_out[i]};
         end
       end
       default: begin
@@ -81,7 +82,7 @@ module ita_activation
 
 
   always_comb begin
-    case (activation_q2)
+    case (activation_q4)
       Gelu, Relu: begin
         data_o = requant_out;
       end
@@ -93,27 +94,23 @@ module ita_activation
 
   always_ff @(posedge clk_i) begin
     if (rst_ni == 0) begin
-      activation_q1 <= Identity;
-      activation_q2 <= Identity;
+      activation_q3 <= Identity;
+      activation_q4 <= Identity;
       data_q1 <= '0;
       data_q2 <= '0;
       data_q3 <= '0;
       data_q4 <= '0;
       calc_en_q2 <= 0;
       calc_en_q3 <= 0;
-      relu_out_q1 <= '0;
-      relu_out_q2 <= '0;
     end else begin
-      activation_q1 <= activation_i;
-      activation_q2 <= activation_q1;
+      activation_q3 <= activation_q2_i;
+      activation_q4 <= activation_q3;
       data_q1 <= data_i;
       data_q2 <= data_q1;
       data_q3 <= data_q2;
       data_q4 <= data_q3;
       calc_en_q2 <= calc_en_q_i;
       calc_en_q3 <= calc_en_q2;
-      relu_out_q1 <= relu_out_d;
-      relu_out_q2 <= relu_out_q1;
     end
   end
 endmodule
diff --git a/src/tb/ita_tb.sv b/src/tb/ita_tb.sv
@@ -80,7 +80,9 @@ module ita_tb;
       "_F",
       $sformatf("%0d", FEEDFORWARD_SIZE),
       "_H1_B",
-      $sformatf("%0d", `ifdef BIAS `BIAS `else 0 `endif)
+      $sformatf("%0d", `ifdef BIAS `BIAS `else 0 `endif),
+      "_",
+      $sformatf( "%s", ACTIVATION)
     };
     N_TILES_SEQUENCE_DIM = SEQUENCE_LEN / M_TILE_LEN;
     N_TILES_EMBEDDING_DIM = EMBEDDING_SIZE / M_TILE_LEN;

diff --git a/testGenerator.py b/testGenerator.py
@@ -47,7 +47,8 @@ def generateMHA(**args):
     H = args['H']
     NO_BIAS = args['no_bias']
     NO_PARTIAL_SOFTMAX = args['no_partial_softmax']
-    base_path = f'{current_dir}/simvectors/data_S{S}_E{E}_P{P}_F{F}_H{H}_B{int(not NO_BIAS)}'
+    ACTIVATION = args['activation'].capitalize()
+    base_path = f'{current_dir}/simvectors/data_S{S}_E{E}_P{P}_F{F}_H{H}_B{int(not NO_BIAS)}_{ACTIVATION}'
 
     if NO_PARTIAL_SOFTMAX:
         path = f'{base_path}_noPartialSoftmax/'

diff --git a/tests/run.sh b/tests/run.sh
@@ -12,25 +12,28 @@ export buildpath=build
 export SIM_PATH=modelsim/$buildpath
 
 # Set to -gui to use the GUI of QuestaSim
-export VSIM_FLAGS=-c
+export vsim_flags=-c
 
+export target=ita_tb
 export no_stalls=0
 export s=64
 export e=64
 export p=64
+export f=64
 export bias=1
+export activation=identity
 
 # Create test vectors if don't exist
-if [ ! -d simvectors/data_S${s}_E${e}_P${p}_H1_B${bias} ]
+if [ ! -d simvectors/data_S${s}_E${e}_P${p}_F${f}_H1_B${bias}_${activation^} ]
 then
     if [ $bias -eq 1 ]
     then
-        python testGenerator.py -S $s -P $p -E $e -H 1
+        python testGenerator.py -H 1 -S $s -P $p -E $e -F $f --activation $activation
     else
-        python testGenerator.py -S $s -P $p -E $e -H 1 --no-bias
+        python testGenerator.py -H 1 -S $s -P $p -E $e -F $f --activation $activation --no-bias
     fi
 fi
 
 # Run the test
-make sim VSIM_FLAGS=-c no_stalls=$no_stalls s=$s e=$e p=$p bias=$bias
-./modelsim/return_status.sh ${SIM_PATH}/transcript $s $e ita_tb
+make sim VSIM_FLAGS=$vsim_flags DEBUG=OFF target=sim_$target no_stalls=$no_stalls s=$s e=$e p=$p f=$f bias=$bias activation=$activation
+./modelsim/return_status.sh ${SIM_PATH}/transcript $s $e $p $f $target
diff --git a/tests/run_loop.sh b/tests/run_loop.sh
@@ -16,6 +16,13 @@ touch $log_file
 # Activate the virtual environment
 source venv/bin/activate
 
+# Set the simulation path
+export buildpath=build
+export SIM_PATH=modelsim/$buildpath
+
+# Set to -gui to use the GUI of QuestaSim
+export vsim_flags=-c
+
 # Set the no_stalls if not set
 if [ -z "$no_stalls" ]
 then
@@ -48,21 +55,32 @@ do
     do
         for p in $(eval echo "{$granularity..512..$granularity}")
         do
-            # Create test vectors
-            python testGenerator.py -S $s -E $e -P $p -H 1 --no-bias
-            python testGenerator.py -S $s -E $e -P $p -H 1
-
-            for bias in {0..1}
+            for f in $(eval echo "{$granularity..512..$granularity}")
             do
-                # Log the test
-                echo "Testing S=$s E=$e P=$p bias=$bias" >> $log_file
+                for activation in {identity,relu,gelu}
+                do
+                    # Create test vectors
+                    python testGenerator.py -H 1 -S $s -P $p -E $e -F $f --activation $activation --no-bias
+                    python testGenerator.py -H 1 -S $s -P $p -E $e -F $f --activation $activation
+
+                    for target in {ita_tb,ita_hwpe_tb}
+                    do
+                        for bias in {0..1}
+                        do
+                            # Log the test
+                            echo "Testing $target: S=$s E=$e P=$p F=$f Activation=$activation bias=$bias" >> $log_file
+
+                            # Run the test
+                            make sim VSIM_FLAGS=$vsim_flags DEBUG=OFF target=sim_$target no_stalls=$no_stalls s=$s e=$e p=$p f=$f bias=$bias activation=$activation
+                            ./modelsim/return_status.sh ${SIM_PATH}/transcript $s $e $p $f $target >> $log_file
 
-                # Run the test
-                make sim VSIM_FLAGS=-c no_stalls=$no_stalls s=$s e=$e p=$p bias=$bias
-                ./modelsim/return_status.sh modelsim/build/transcript $s $e ita_tb >> $log_file
+                            # read -p "Press Enter to continue"
 
-                # Remove the test vectors
-                rm -rf simvectors/data_S${s}_E${e}_P${p}_H1_B${bias}
+                            # Remove the test vectors
+                            rm -rf simvectors/data_S${s}_E${e}_P${p}_F${f}_H1_B${bias}_${activation^}
+                        done
+                    done
+                done
             done
         done
     done