diff --git a/Bender.lock b/Bender.lock index 7b9c137ee..f2704947b 100644 --- a/Bender.lock +++ b/Bender.lock @@ -85,7 +85,7 @@ packages: dependencies: - common_cells spatz: - revision: c8e444c6b64e2b905f4767d158fa3a8c718fef39 + revision: fa75ec560c4518bdac4c350481e6474188f88ecc version: null source: Git: git@iis-git.ee.ethz.ch:spatz/spatz.git @@ -96,7 +96,6 @@ packages: - fpnew - idma - register_interface - - reqrsp_interface - riscv-dbg - tech_cells_generic tech_cells_generic: diff --git a/Bender.yml b/Bender.yml index 564328560..eb93d0848 100644 --- a/Bender.yml +++ b/Bender.yml @@ -14,7 +14,7 @@ dependencies: reqrsp_interface: { path: "hardware/deps/reqrsp_interface" } snitch: { path: "hardware/deps/snitch" } tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.5 } - spatz: { git: "git@iis-git.ee.ethz.ch:spatz/spatz.git", rev: c8e444c } + spatz: { git: "git@iis-git.ee.ethz.ch:spatz/spatz.git", rev: fa75ec56 } FPnew: { git: "https://github.com/pulp-platform/cvfpu.git", rev: pulp-v0.1.3 } workspace: diff --git a/hardware/Makefile b/hardware/Makefile index 2ce6f25af..8894417df 100644 --- a/hardware/Makefile +++ b/hardware/Makefile @@ -103,6 +103,7 @@ vlog_defs += -DSEQ_MEM_SIZE=$(seq_mem_size) -DXQUEUE_SIZE=$(xqueue_size) ifeq ($(spatz), 1) vlog_defs += -DVLEN=$(vlen) -DN_IPU=$(n_ipu) -DN_FPU=$(n_fpu) -DN_FU=$(shell awk 'BEGIN{print ($(n_ipu) > $(n_fpu)) ? $(n_ipu) : $(n_fpu)}') + vlog_defs += -DMEMPOOL_SPATZ=$(spatz) bender_defs += -t spatz endif @@ -148,7 +149,7 @@ $(buildpath)/$(library): .PHONY: compile compile: dpi lib $(buildpath) $(buildpath)/compile.tcl update_opcodes $(buildpath)/compile.tcl: $(bender) $(config_mk) Makefile $(MEMPOOL_DIR)/Bender.yml $(shell find {src,tb,deps} -type f) - $(bender) script vsim --vlog-arg="$(vlog_args)" $(vlog_defs) -t rtl -t mempool_vsim $(bender_defs) > $(buildpath)/compile.tcl + $(bender) script vsim --vlog-arg="$(vlog_args)" $(vlog_defs) -t rtl -t mempool -t mempool_vsim $(bender_defs) > $(buildpath)/compile.tcl echo "exit" >> $(buildpath)/compile.tcl cd $(buildpath) && $(questa_cmd) vsim -work $(library) -c -do compile.tcl @@ -185,7 +186,7 @@ $(buildpath)/$(dpi_library)/mempool_dpi.so: $(dpi) .PHONY: elabvcs elabvcs: dpivcs $(buildpath) $(buildpath)/compilevcs.sh update_opcodes $(buildpath)/compilevcs.sh: $(bender) $(config_mk) Makefile $(MEMPOOL_DIR)/Bender.yml $(shell find {src,tb,deps} -type f) - $(bender) script vcs --vlogan-bin="$(vcs_cmd) vlogan" --vlog-arg="$(vlogan_args)" $(vlog_defs) -t rtl -t mempool_vsim $(bender_defs) > $(buildpath)/compilevcs.sh + $(bender) script vcs --vlogan-bin="$(vcs_cmd) vlogan" --vlog-arg="$(vlogan_args)" $(vlog_defs) -t rtl -t mempool -t mempool_vsim $(bender_defs) > $(buildpath)/compilevcs.sh echo "exit" >> $(buildpath)/compilevcs.sh # Call VCS cd $(buildpath) && \ @@ -259,7 +260,7 @@ $(VERILATOR_MK): $(VERILATOR_CONF) $(VERILATOR_WAIVE) $(MEMPOOL_DIR)/Bender.yml # Overwrite Bootaddress to L2 base while we don't have a DPI to write a wake-up $(eval boot_addr=$(l2_base)) # Create Bender script of all RTL files - $(bender) script verilator $(vlog_defs) -t rtl -t mempool_verilator $(bender_defs) > $(verilator_files) + $(bender) script verilator $(vlog_defs) -t rtl -t mempool -t mempool_verilator $(bender_defs) > $(verilator_files) # Append the verilator library files @echo '' >> $(verilator_files) # Append the verilator library files: Includes @@ -291,7 +292,7 @@ lint: spyglass/tmp/files spyglass/sdc/func.sdc spyglass/scripts/run_lint.tcl spyglass/tmp/files: $(bender) mkdir -p spyglass/tmp - $(bender) script verilator $(vlog_defs) -t rtl -t mempool_verilator $(bender_defs) > spyglass/tmp/files + $(bender) script verilator $(vlog_defs) -t rtl -t mempool -t mempool_verilator $(bender_defs) > spyglass/tmp/files ################ # Tracing # diff --git a/hardware/deps/snitch/src/snitch_lsu.sv b/hardware/deps/snitch/src/snitch_lsu.sv index 6ceaf9e0a..5496b07a6 100644 --- a/hardware/deps/snitch/src/snitch_lsu.sv +++ b/hardware/deps/snitch/src/snitch_lsu.sv @@ -11,6 +11,8 @@ module snitch_lsu import cf_math_pkg::idx_width; #( parameter type tag_t = logic [4:0], + parameter type dreq_t = logic, + parameter type drsp_t = logic, parameter int unsigned NumOutstandingLoads = 1, parameter bit NaNBox = 0, // Dependent parameters. DO NOT CHANGE. @@ -20,8 +22,8 @@ module snitch_lsu input logic rst_i, // request channel input tag_t lsu_qtag_i, - input logic lsu_qwrite, - input logic lsu_qsigned, + input logic lsu_qwrite_i, + input logic lsu_qsigned_i, input logic [31:0] lsu_qaddr_i, input logic [31:0] lsu_qdata_i, input logic [1:0] lsu_qsize_i, @@ -104,9 +106,9 @@ module snitch_lsu end assign req_metadata = '{ - write: lsu_qwrite, + write: lsu_qwrite_i, tag: lsu_qtag_i, - sign_ext: lsu_qsigned, + sign_ext: lsu_qsigned_i, offset: lsu_qaddr_i[1:0], size: lsu_qsize_i }; @@ -135,7 +137,7 @@ module snitch_lsu // also check that we can actually store the necessary information to process // it in the upcoming cycle(s). assign data_qvalid_o = lsu_qvalid_i && !id_table_full; - assign data_qwrite_o = lsu_qwrite; + assign data_qwrite_o = lsu_qwrite_i; assign data_qaddr_o = {lsu_qaddr_i[31:2], 2'b0}; assign data_qamo_o = lsu_qamo_i; assign data_qid_o = req_id; diff --git a/hardware/deps/snitch/src/snitch_md.sv b/hardware/deps/snitch/src/snitch_md.sv index c5789079b..1f89bf764 100644 --- a/hardware/deps/snitch/src/snitch_md.sv +++ b/hardware/deps/snitch/src/snitch_md.sv @@ -274,8 +274,16 @@ module snitch_md assign acc_qid_o = rd; assign acc_qdata_op_o = inst_data_i; assign acc_qdata_arga_o = {{32{gpr_rdata[0][31]}}, gpr_rdata[0]}; - assign acc_qdata_argb_o = {{32{gpr_rdata[1][31]}}, gpr_rdata[1]}; - assign acc_qdata_argc_o = {{32{gpr_rdata[2][31]}}, gpr_rdata[2]}; + // assign acc_qdata_argb_o = {{32{gpr_rdata[1][31]}}, gpr_rdata[1]}; + assign acc_qdata_argb_o = opb_select inside {IImmediate, SImmediate} ? + {{32{alu_result[31]}}, alu_result} : {{32{gpr_rdata[1][31]}}, gpr_rdata[1]}; + +`ifdef XPULPIMG_EXTENSION + assign acc_qdata_argc_o = gpr_rdata[2]}; +`else + assign acc_qdata_argc_o = '0; +`endif + // instruction fetch interface assign inst_addr_o = pc_q; @@ -2239,7 +2247,7 @@ module snitch_md opa_select = Reg; opb_select = IImmediate; write_rd = 1'b0; - acc_qvalid_o = valid_instr; + acc_qvalid_o = valid_instr && !acc_mem_stall; ls_size = Word; is_fp_load = 1'b1; end else begin @@ -2251,7 +2259,7 @@ module snitch_md opa_select = Reg; opb_select = SFImmediate; write_rd = 1'b0; - acc_qvalid_o = valid_instr; + acc_qvalid_o = valid_instr && !acc_mem_stall; ls_size = Word; is_fp_store = 1'b1; acc_mem_store = 1'b1; @@ -2265,7 +2273,7 @@ module snitch_md opa_select = Reg; opb_select = IImmediate; write_rd = 1'b0; - acc_qvalid_o = valid_instr; + acc_qvalid_o = valid_instr && !acc_mem_stall; ls_size = Double; is_fp_load = 1'b1; end else begin @@ -2277,7 +2285,7 @@ module snitch_md opa_select = Reg; opb_select = SFImmediate; write_rd = 1'b0; - acc_qvalid_o = valid_instr; + acc_qvalid_o = valid_instr && !acc_mem_stall; ls_size = Double; is_fp_store = 1'b1; acc_mem_store = 1'b1; @@ -2291,7 +2299,7 @@ module snitch_md opa_select = Reg; opb_select = IImmediate; write_rd = 1'b0; - acc_qvalid_o = valid_instr; + acc_qvalid_o = valid_instr && !acc_mem_stall; ls_size = HalfWord; is_fp_load = 1'b1; end else begin @@ -2303,7 +2311,7 @@ module snitch_md opa_select = Reg; opb_select = SFImmediate; write_rd = 1'b0; - acc_qvalid_o = valid_instr; + acc_qvalid_o = valid_instr && !acc_mem_stall; ls_size = HalfWord; is_fp_store = 1'b1; acc_mem_store = 1'b1; @@ -2317,7 +2325,7 @@ module snitch_md opa_select = Reg; opb_select = IImmediate; write_rd = 1'b0; - acc_qvalid_o = valid_instr; + acc_qvalid_o = valid_instr && !acc_mem_stall; ls_size = Byte; is_fp_load = 1'b1; end else begin @@ -2329,7 +2337,7 @@ module snitch_md opa_select = Reg; opb_select = SFImmediate; write_rd = 1'b0; - acc_qvalid_o = valid_instr; + acc_qvalid_o = valid_instr && !acc_mem_stall; ls_size = Byte; is_fp_store = 1'b1; acc_mem_store = 1'b1; @@ -2525,7 +2533,9 @@ module snitch_md PC: opb = pc_q; CSR: opb = csr_rvalue; PBImmediate: opb = pbimm; - RegRd: opb = gpr_rdata[2]; +`ifdef XPULPIMG_EXTENSION + RegRd: opb = (RegNrReadPorts < 3) ? '0 : gpr_rdata[2]; +`endif default: opb = '0; endcase end @@ -2633,8 +2643,8 @@ module snitch_md .clk_i , .rst_i , .lsu_qtag_i ( rd ), - .lsu_qwrite ( is_store ), - .lsu_qsigned ( is_signed ), + .lsu_qwrite_i ( is_store ), + .lsu_qsigned_i( is_signed ), .lsu_qaddr_i ( lsu_qaddr ), .lsu_qdata_i ( gpr_rdata[1] ), .lsu_qsize_i ( ls_size ), diff --git a/hardware/deps/snitch/src/snitch_pkg.sv b/hardware/deps/snitch/src/snitch_pkg.sv index d736955c6..a79f183cc 100644 --- a/hardware/deps/snitch/src/snitch_pkg.sv +++ b/hardware/deps/snitch/src/snitch_pkg.sv @@ -48,7 +48,7 @@ package snitch_pkg; typedef struct packed { addr_t addr; - logic [4:0] id; + logic [5:0] id; logic [31:0] data_op; data_t data_arga; data_t data_argb; @@ -56,9 +56,10 @@ package snitch_pkg; } acc_req_t; typedef struct packed { - logic [4:0] id; + logic [5:0] id; logic error; data_t data; + logic write; } acc_resp_t; // Number of instructions the sequencer can hold diff --git a/hardware/scripts/questa/wave_core.tcl b/hardware/scripts/questa/wave_core.tcl index 4ab7e06e2..9dfcfc9bd 100644 --- a/hardware/scripts/questa/wave_core.tcl +++ b/hardware/scripts/questa/wave_core.tcl @@ -185,43 +185,74 @@ add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/gen_id_remapper/i_id_remapper/gen_remapper/id_q add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/gen_id_remapper/i_id_remapper/gen_remapper/id_d -add wave -noupdate -group core[$1][$2][$3] -divider SPATZ -add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/issue_valid_i -add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/issue_ready_o -add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/issue_req_i -add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/issue_rsp_o -add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/rsp_valid_o -add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/rsp_ready_i -add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/rsp_o - -add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/spatz_mem_req_o -add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/spatz_mem_req_valid_o -add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/spatz_mem_req_ready_i -add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/spatz_mem_rsp_i -add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/spatz_mem_rsp_valid_i -add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/spatz_mem_finished_o -add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/spatz_mem_str_finished_o -add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/fp_lsu_mem_req_o -add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/fp_lsu_mem_req_valid_o -add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/fp_lsu_mem_req_ready_i -add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/fp_lsu_mem_rsp_i -add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/fp_lsu_mem_rsp_valid_i -add wave -noupdate -group core[$1][$2][$3] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/fp_lsu_mem_rsp_ready_o - -add wave -noupdate -group core[$1][$2][$3] -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/spatz_req_i -add wave -noupdate -group core[$1][$2][$3] -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/spatz_req_valid_i -add wave -noupdate -group core[$1][$2][$3] -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/spatz_req_ready_o -add wave -noupdate -group core[$1][$2][$3] -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vfu_rsp_valid_o -add wave -noupdate -group core[$1][$2][$3] -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vfu_rsp_ready_i -add wave -noupdate -group core[$1][$2][$3] -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vfu_rsp_o -add wave -noupdate -group core[$1][$2][$3] -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vrf_waddr_o -add wave -noupdate -group core[$1][$2][$3] -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vrf_wdata_o -add wave -noupdate -group core[$1][$2][$3] -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vrf_we_o -add wave -noupdate -group core[$1][$2][$3] -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vrf_wbe_o -add wave -noupdate -group core[$1][$2][$3] -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vrf_wvalid_i -add wave -noupdate -group core[$1][$2][$3] -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vrf_id_o -add wave -noupdate -group core[$1][$2][$3] -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vrf_raddr_o -add wave -noupdate -group core[$1][$2][$3] -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vrf_re_o -add wave -noupdate -group core[$1][$2][$3] -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vrf_rdata_i -add wave -noupdate -group core[$1][$2][$3] -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vrf_rvalid_i -add wave -noupdate -group core[$1][$2][$3] -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/fpu_status_o +# add wave -noupdate -group core[$1][$2][$3] -divider SPATZ +add wave -noupdate -group core[$1][$2][$3] -group Spatz /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/issue_valid_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/issue_ready_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/issue_req_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/issue_rsp_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/rsp_valid_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/rsp_ready_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/rsp_o + +add wave -noupdate -group core[$1][$2][$3] -group Spatz /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/spatz_mem_req_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/spatz_mem_req_valid_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/spatz_mem_req_ready_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/spatz_mem_rsp_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/spatz_mem_rsp_valid_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/spatz_mem_finished_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/spatz_mem_str_finished_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/fp_lsu_mem_req_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/fp_lsu_mem_req_valid_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/fp_lsu_mem_req_ready_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/fp_lsu_mem_rsp_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/fp_lsu_mem_rsp_valid_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/fp_lsu_mem_rsp_ready_o + +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/spatz_req_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/spatz_req_valid_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/spatz_req_ready_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vfu_rsp_valid_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vfu_rsp_ready_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vfu_rsp_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vrf_waddr_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vrf_wdata_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vrf_we_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vrf_wbe_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vrf_wvalid_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vrf_id_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vrf_raddr_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vrf_re_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vrf_rdata_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/vrf_rvalid_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/fpu_status_o + +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group "FPU Sequencer" /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/gen_fpu_sequencer/i_fpu_sequencer/* +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group "FPU Sequencer" -group FPR /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/gen_fpu_sequencer/i_fpu_sequencer/i_fpr/* +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group "FPU Sequencer" -group LSU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/gen_fpu_sequencer/i_fpu_sequencer/i_fp_lsu/* + +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group Controller /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_controller/* + +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VRF -divider RegisterWrite +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VRF /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vrf/waddr_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VRF /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vrf/wdata_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VRF /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vrf/we_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VRF /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vrf/wbe_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VRF /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vrf/wvalid_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VRF -divider RegisterRead +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VRF /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vrf/raddr_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VRF /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vrf/rdata_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VRF /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vrf/re_i +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VRF /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vrf/rvalid_o +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VRF -divider Internal +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VRF /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vrf/waddr +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VRF /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vrf/wdata +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VRF /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vrf/we +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VRF /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vrf/wbe +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VRF /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vrf/raddr +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VRF /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vrf/rdata + +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VLSU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vlsu/* + +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VSLDU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vsldu/* + +add wave -noupdate -group core[$1][$2][$3] -group Spatz -group VFU /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_cores[$3]/gen_mempool_cc/riscv_core/i_spatz/i_vfu/* diff --git a/hardware/src/mempool_tile.sv b/hardware/src/mempool_tile.sv index 971a61b0a..03cd65d08 100644 --- a/hardware/src/mempool_tile.sv +++ b/hardware/src/mempool_tile.sv @@ -846,7 +846,8 @@ module mempool_tile logic soc_pready; // We don't care about this - assign soc_resp_i.id = 'x; + // assign soc_resp_i.id = 'x; + assign soc_resp_i.id = '0; snitch_demux #( .NrPorts (NumCoresPerTile*NumDataPortsPerCore ), diff --git a/hardware/src/tcdm_adapter.sv b/hardware/src/tcdm_adapter.sv index ee48abd92..a681539d3 100644 --- a/hardware/src/tcdm_adapter.sv +++ b/hardware/src/tcdm_adapter.sv @@ -131,7 +131,8 @@ module tcdm_adapter #( ); localparam int unsigned CoreIdWidth = idx_width(NumCores); - localparam int unsigned IniAddrWidth = idx_width(NumCoresPerTile*NumDataPortsPerCore + NumGroups); + localparam int unsigned IniAddrWidth = idx_width(NumCoresPerTile + NumGroups); + // localparam int unsigned IniAddrWidth = idx_width(NumCoresPerTile*NumDataPortsPerCore + NumGroups); logic sc_successful_d, sc_successful_q; logic sc_q; diff --git a/software/apps/float-simple/main.c b/software/apps/float-simple/main.c index 02a04e257..7195939a1 100644 --- a/software/apps/float-simple/main.c +++ b/software/apps/float-simple/main.c @@ -23,10 +23,12 @@ #include "runtime.h" #include "synchronization.h" -float res[16]; +float res1[16], res2[16]; +const float alpha = 0.123f; static float vec1[16] = {0.035711680f, 0.84912932f, 0.93399322f, 0.67873514f, 0.75774014f, 0.74313247f, 0.39222702f, 0.65547788f, 0.17118669f, 0.70604610f, 0.031832848f, 0.27692297f, 0.046171390f, 0.097131781f, 0.82345784f, 0.69482863f}; static float vec2[16] = {0.31709948f, 0.95022207f, 0.034446079f, 0.43874437f, 0.38155845f, 0.76551682f, 0.79519993f, 0.18687260f, 0.48976439f, 0.44558620f, 0.64631301f, 0.70936483f, 0.75468665f, 0.27602509f, 0.67970270f, 0.65509802f}; static float vec3[16] = {0.35281116f, 1.7993515f, 0.96843928f, 1.1174796f, 1.1392986f, 1.5086493f, 1.1874269f, 0.84235048f, 0.66095108f, 1.1516323f, 0.67814589f, 0.98628783f, 0.80085802f, 0.37315688f, 1.5031605f, 1.3499267f}; +static float vec4[16] = {0.043925366f, 1.04442906f, 1.14881166f, 0.83484422f, 0.93202037f, 0.91405294f, 0.48243923f, 0.80623779f, 0.21055963f, 0.86843670f, 0.039154403f, 0.34061525f, 0.056790810f, 0.119472091f, 1.01285314f, 0.85463921f}; int main() { @@ -37,8 +39,11 @@ int main() { float *ptr_vec1 = vec1; float *ptr_vec2 = vec2; - float *ptr_vec_res = res; + float *ptr_vec_res1 = res1; + float *ptr_vec_res2 = res2; + mempool_barrier_init(cid); + int32_t count = 0; while (remaining_elem > 0) { uint32_t actual_elem; @@ -47,32 +52,54 @@ int main() { asm volatile("vle32.v v16, (%[vector1])" :: [vector1]"r"(ptr_vec1)); asm volatile("vle32.v v17, (%[vector2])" :: [vector2]"r"(ptr_vec2)); asm volatile("vfadd.vv v14, v16, v17"); - asm volatile("vse32.v v14, (%[vector_res])" :: [vector_res]"r"(ptr_vec_res)); + asm volatile("vfmul.vf v15, v16, %0" ::"f"(alpha)); + asm volatile("vse32.v v14, (%[vector_res])" :: [vector_res]"r"(ptr_vec_res1)); + asm volatile("vse32.v v15, (%[vector_res])" :: [vector_res]"r"(ptr_vec_res2)); remaining_elem -= actual_elem; ptr_vec1 += actual_elem; ptr_vec2 += actual_elem; - ptr_vec_res += actual_elem; + ptr_vec_res1 += actual_elem; + count ++; } + const float ub = 0.1f; + const float lb = -0.1f; + float diff; + // mempool_barrier(num_cores); + // if (cid == 0) { + // printf("CHECK VFADD\n"); + // } mempool_barrier(num_cores); + if (cid == 0) { - printf("CHECK RESULT\n"); + for (int i = 0; i < 16; i++) { + if (vec3[i] != res1[i]) { + printf("error: %d\n", i); + // return 1; + } + } + printf("count:%d\n", count); } + mempool_barrier(num_cores); + if (cid == 0) { + printf("CHECK VFMUL\n"); - float diff; - const float ubound = 0.0001f; - const float lbound = -0.0001f; - for (int i = 0; i < 16; i++) { - diff = vec3[i] - res[i]; - if (diff > ubound | diff < lbound) - error = 1; + for (int i = 0; i < 16; i++) { + // diff = vec4[i] - res2[i]; + if (vec4[i] != res2[i]) { + printf("error: %d", i); + // return i+1; + } + } } + // if (cid == 0) + // printf("COREECT\n"); mempool_barrier(num_cores); - return error; + return 0; } // clang-format on diff --git a/software/apps/sp-axpy-simple/data/data_axpy-vsim-backup.h b/software/apps/sp-axpy-simple/data/data_axpy-vsim-backup.h new file mode 100644 index 000000000..a52818f09 --- /dev/null +++ b/software/apps/sp-axpy-simple/data/data_axpy-vsim-backup.h @@ -0,0 +1,24 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include "layer.h" + +const axpy_layer axpy_l = { + .M = 512, + .dtype = FP32, +}; + + +static const float axpy_X_dram [512] = {1.9269152879714966, 1.4872840642929077, 0.9007171988487244, -2.1055209636688232, 0.6784184575080872, -1.2345448732376099, -0.04306747764348984, -1.6046669483184814, -0.7521352767944336, 1.6487230062484741, -0.3924786448478699, -1.4036071300506592, -0.7278813123703003, -0.5594301819801331, -0.7688388824462891, 0.7624453902244568, 1.6423169374465942, -0.1595974713563919, -0.4973975419998169, 0.439589262008667, -0.7581311464309692, 1.078317642211914, 0.8008005619049072, 1.680620551109314, 1.27912437915802, 1.2964228391647339, 0.610466480255127, 1.334737777709961, -0.2316243201494217, 0.041759490966796875, -0.2515752911567688, 0.859858512878418, -1.3846737146377563, -0.8712361454963684, -0.223365917801857, 1.7173614501953125, 0.3188803195953369, -0.42451897263526917, 0.3057209253311157, -0.7745925188064575, -1.5575724840164185, 0.9956361055374146, -0.8797858357429504, -0.6011420488357544, -1.2741512060165405, 2.1227850914001465, -1.234653115272522, -0.4879138767719269, -0.9138230085372925, -0.6581372618675232, 0.07802387326955795, 0.5258087515830994, -0.48799172043800354, 1.1913690567016602, -0.8140076398849487, -0.7359927892684937, -1.4032478332519531, 0.03600366786122322, -0.06347727030515671, 0.6756148934364319, -0.0978068932890892, 1.8445940017700195, -1.184537410736084, 1.3835493326187134, 1.4451338052749634, 0.8564125299453735, 2.218075752258301, 0.5231655240058899, 0.34664666652679443, -0.19733144342899323, -1.0545889139175415, 1.2779955863952637, -0.1721901297569275, 0.5237884521484375, 0.056621819734573364, 0.4262961447238922, 0.575005054473877, -0.6417241096496582, -2.2063984870910645, -0.7508030533790588, 0.01086814422160387, -0.33874234557151794, -1.3406795263290405, -0.5853705406188965, 0.5361881256103516, 0.5246226191520691, 1.1412016153335571, 0.05164359509944916, 0.7439519762992859, -0.4815843999385834, -1.0494661331176758, 0.603898823261261, -1.7222950458526611, -0.827768862247467, 1.334702968597412, 0.48353928327560425, -2.5095443725585938, 0.4880010485649109, 0.7845868468284607, 0.02864718623459339, 0.640755295753479, 0.5832474231719971, 1.0669267177581787, -0.4501533806324005, -0.18526747822761536, 0.7527588605880737, 0.4047577977180481, 0.17846599221229553, 0.2649095058441162, 1.2731683254241943, -0.0013108636485412717, -0.30360376834869385, -1.457029104232788, -0.10233523696660995, -0.5991530418395996, 0.4770564138889313, 0.7261772155761719, 0.09115186333656311, -0.3890652060508728, 0.5279164910316467, -0.012685478664934635, 0.24083632230758667, 0.13253536820411682, 0.7642406225204468, 1.095009684562683, 0.3398909568786621, 0.7199674844741821, 0.41140761971473694, 1.931160569190979, 1.0118638277053833, -1.4364064931869507, -1.1298598051071167, -0.1360345333814621, 1.6354095935821533, 0.6547407507896423, 0.5760045647621155, 1.1415079832077026, 0.018564576283097267, -1.8058050870895386, 0.9254348874092102, -0.3753443658351898, 1.0330873727798462, -0.6866509318351746, 0.6368136405944824, -0.9726738929748535, 0.9584577679634094, 1.6192004680633545, 1.450609803199768, 0.2694815397262573, -0.21037597954273224, -0.7328027486801147, 0.10429783165454865, 0.3487516939640045, 0.9675941467285156, -0.46568843722343445, 1.6047972440719604, -2.4801201820373535, -0.4175437390804291, -1.1954537630081177, 0.8123369216918945, -1.9005532264709473, 0.22857652604579926, 0.02485940419137478, -0.34595024585723877, 0.2868328094482422, -0.7308424115180969, 0.17482025921344757, -1.0939292907714844, -1.6021603345870972, 1.3528969287872314, 1.288827657699585, 0.05229547247290611, -1.5468504428863525, 0.7567060589790344, 0.7755194902420044, 2.0265355110168457, 0.03581761196255684, 0.12058872729539871, -0.8056637048721313, -0.20757682621479034, -0.9319478273391724, -1.5909662246704102, -1.13597571849823, -0.52259761095047, -0.5187733173370361, -1.5012763738632202, -1.9266542196273804, 0.1278512328863144, 1.0229133367538452, -0.5557951331138611, 0.7042727470397949, 0.7098760008811951, 1.7743884325027466, -0.921550989151001, 0.9624499082565308, -0.33701515197753906, -1.1753336191177368, 0.35805708169937134, 0.47876790165901184, 1.353700041770935, 0.5260620713233948, 2.1120378971099854, -0.5207571387290955, -0.9320061206817627, 0.18516133725643158, 1.0686918497085571, 1.3065344095230103, 0.4598345160484314, -0.8146268725395203, -1.0212392807006836, -0.49492356181144714, -0.5922516584396362, 0.15431594848632812, 0.4407670795917511, -0.14829230308532715, -2.3184432983398438, -0.39799532294273376, 1.0804862976074219, -1.7808643579483032, 1.5080454349517822, 0.30942854285240173, -0.5003090500831604, 1.0350031852722168, 1.6896470785140991, -0.004505051765590906, 1.666792392730713, 0.15392017364501953, -1.0602530241012573, -0.572657585144043, 0.0835680365562439, 0.39990535378456116, 1.989207148551941, -0.07198750972747803, -0.906094491481781, -2.0487122535705566, -1.0810555219650269, 0.01762307994067669, 0.0782259851694107, 0.19315829873085022, 0.40967342257499695, -0.9291303157806396, 0.2761908769607544, -0.5388752818107605, 0.4625823199748993, -0.8718891143798828, -0.027118360623717308, -0.3532457649707794, 1.4638569355010986, 1.255434274673462, -0.7149558067321777, 0.8539193272590637, 0.512991189956665, 0.5397310256958008, 0.5655050277709961, 0.5057917237281799, 0.22245365381240845, -0.685481607913971, 0.5635589957237244, -1.507175087928772, -1.610666036605835, -1.4790465831756592, 0.4322742819786072, -0.1250254064798355, 0.7821183800697327, -1.598767638206482, -0.10912995040416718, 0.7151994705200195, 0.03913922235369682, 1.305860161781311, 0.24659274518489838, -1.9775909185409546, 0.01789604313671589, -1.3793021440505981, 0.625802755355835, -2.5849502086639404, -0.02399955503642559, -0.1221928745508194, -0.7469954490661621, 1.7093087434768677, 0.05792269483208656, 1.1929805278778076, 1.9372931718826294, 0.7287133932113647, 0.9808937907218933, 0.41459226608276367, 1.15656316280365, 0.2690545618534088, -0.036629438400268555, 0.9732939004898071, -1.0150787830352783, -0.5419175624847412, -0.44102486968040466, -0.3136177957057953, -0.12925422191619873, -0.7149624228477478, -0.047562163323163986, 2.0207436084747314, 0.25391900539398193, 0.9364385008811951, 0.7122363448143005, -0.031765542924404144, 0.10164086520671844, 1.3433040380477905, 0.7132695913314819, 0.4038029611110687, -0.7139783501625061, 0.8337291479110718, -0.9585452079772949, 0.45363426208496094, 1.2460919618606567, -2.3065085411071777, -1.2868918180465698, 0.17988650500774384, -2.126762628555298, -0.13408313691616058, -1.0407685041427612, -0.7647228837013245, -0.05528254434466362, 1.204850673675537, -0.982473611831665, 0.4334380030632019, -0.7171905636787415, 1.055369257926941, -1.4533969163894653, 0.46515071392059326, 0.37139150500297546, -0.004656785633414984, 0.07954943925142288, 0.3781784772872925, 0.7051141262054443, -1.7236974239349365, -0.8434810638427734, 0.4351435601711273, 0.26588720083236694, -0.5870985388755798, 0.0826888456940651, 0.8853808045387268, 0.1824439913034439, 0.7863810062408447, -0.057920295745134354, 0.5666652917861938, -0.7097623348236084, -0.4875054359436035, 0.050095997750759125, 0.6084084510803223, 1.6308681964874268, -0.08472306281328201, 1.0844124555587769, 0.9477656483650208, -0.676629364490509, -0.5730168223381042, -0.3303174376487732, -0.7939430475234985, 0.3752319812774658, 0.08790969103574753, -1.241483449935913, -0.32025346159935, -0.844377875328064, -0.5513465404510498, 1.9889612197875977, 1.900311827659607, 1.6950805187225342, 0.028089528903365135, -0.17536965012550354, -1.7734959125518799, -0.7046414017677307, -0.39465200901031494, 1.8868111371994019, -0.21844321489334106, 0.16629981994628906, 2.1441681385040283, 1.7045671939849854, 0.3459012508392334, 0.6424751281738281, -0.20395424962043762, 0.6853673458099365, -0.13968797028064728, -1.1807502508163452, -1.282929539680481, 0.448485791683197, -0.590737521648407, 0.8540631532669067, -0.4900680184364319, -0.35945725440979004, 0.6663737893104553, -0.07426456362009048, -0.20960482954978943, 0.16632132232189178, 1.4703037738800049, -0.9390866756439209, -0.6013189554214478, -0.09964022785425186, -0.9851518273353577, -2.488459348678589, -0.33131900429725647, 0.8435799479484558, 0.9874473810195923, -0.33197471499443054, -0.8076189756393433, 0.824364185333252, 0.024699924513697624, -1.0641486644744873, -0.7601934671401978, -0.4075061082839966, 0.9623646140098572, -0.14264194667339325, 0.15271379053592682, -0.0388023778796196, 0.9446058869361877, -1.5824053287506104, 0.9871290922164917, 1.1456739902496338, -0.14181147515773773, -0.2763414680957794, -0.19321373105049133, 0.7767809629440308, 0.6838752627372742, -1.3245893716812134, -0.5160817503929138, 0.6001842617988586, -0.4702208340167999, -0.6086435317993164, -0.046192023903131485, -1.6457397937774658, -0.4833274185657501, -0.740294337272644, 0.31428107619285583, 0.1415553092956543, 1.0348176956176758, -0.626437783241272, -0.5150922536849976, 0.6902899742126465, -0.4939993619918823, 1.1366126537322998, -0.46184006333351135, 1.419979453086853, 0.848518967628479, -0.047891248017549515, 0.6685602068901062, 1.0429801940917969, 0.6899017095565796, -1.3129348754882812, 0.03780364990234375, -1.1702114343643188, -0.10318559408187866, 1.1894739866256714, 0.7606944441795349, -0.7463049292564392, -1.3838845491409302, 0.4868715703487396, -1.0020296573638916, 0.0329488180577755, -0.42919591069221497, -0.9817978739738464, -0.6420586109161377, 0.8265887498855591, 1.591395616531372, -0.1208132952451706, -0.48302069306373596, 0.11329790204763412, 0.0771508663892746, -0.9228128790855408, -1.2619991302490234, 1.0860532522201538, 1.096641182899475, -0.6836934685707092, 0.06604336202144623, -0.0007737990817986429, 0.1620604395866394, 1.195958137512207, -1.3061535358428955, -1.4039719104766846, -1.0597201585769653, 0.3057299852371216, 0.4150581359863281, -0.7174144983291626, 2.833967924118042, 1.9534740447998047, 2.0486814975738525, -1.0880382061004639, 1.621694564819336, 0.8512656688690186, -0.40046969056129456, -0.6088271737098694, -0.508095383644104, -0.6184902191162109, -1.647040605545044, -1.0362098217010498, -0.4503057301044464, -0.0729660615324974, -0.5479549169540405, -1.1425532102584839, -0.44875210523605347, -0.03045438416302204, 0.3830311596393585, -0.04476971551775932, 1.179942011833191, -0.33142781257629395, 0.6495042443275452, 0.09495851397514343, -0.7525874376296997, -0.647229790687561, -1.2822614908218384, 1.96529221534729, -0.9638485312461853, -2.5667941570281982, 0.7096128463745117, 0.8198426961898804, 0.6214459538459778, 0.42318588495254517, -0.33889833092689514, 0.5179733633995056}; + + +static const float axpy_Y_dram [512] = {-1.363769769668579, 0.1929578185081482, -0.6103342771530151, 0.16323445737361908, 1.51017165184021, 0.21230429410934448, -0.7252011299133301, -0.9527732729911804, 0.5216943025588989, -0.46386733651161194, 0.18237744271755219, -0.38666075468063354, -1.7906768321990967, 0.09329313784837723, -1.9152568578720093, -0.6421752572059631, 1.3438509702682495, -1.2922308444976807, 0.766244113445282, 0.64540034532547, 0.353316068649292, -2.6474881172180176, -1.4575366973876953, -0.9712379574775696, 0.25403109192848206, -0.1790592074394226, 1.1992844343185425, -0.4292171895503998, 1.010284185409546, 0.6110401153564453, 1.2208385467529297, -0.6076440215110779, -1.7376028299331665, -0.12535162270069122, -1.3658148050308228, 1.111746072769165, -0.6227966547012329, -0.7891808748245239, -0.167823925614357, 1.6433145999908447, 2.0070879459381104, -1.2531019449234009, 1.118869423866272, 1.7732776403427124, -2.071660280227661, -0.4125255346298218, -0.9769555926322937, -0.03363388776779175, 1.8594977855682373, 2.6221468448638916, 0.36905255913734436, 0.3802972435951233, 0.19898031651973724, -0.23609064519405365, 0.30340856313705444, -0.45007675886154175, 0.47390419244766235, 0.6503364443778992, 1.1662380695343018, 0.01693599671125412, 0.5325868129730225, -0.6035352349281311, -0.1742597371339798, 0.6092063784599304, -0.8032152652740479, -1.1209005117416382, 0.1956406533718109, -0.7815181016921997, -1.7898789644241333, -0.26157355308532715, -0.44025033712387085, 2.1848294734954834, -0.48009708523750305, -1.2871733903884888, 0.7388824224472046, 0.03389474004507065, -0.31229403614997864, -0.2541753351688385, -1.205536127090454, -0.9542103409767151, 0.061276569962501526, 0.08526104688644409, 0.7481252551078796, -0.16356196999549866, -0.9085567593574524, 0.3129958212375641, 0.8050477504730225, -1.1133604049682617, 0.4981626570224762, -1.1999552249908447, 0.12711313366889954, 0.4403660297393799, 0.6377718448638916, 0.15978877246379852, 1.7697970867156982, 0.6268176436424255, -1.8736529350280762, 2.3259060382843018, -0.9203909635543823, 0.6661149263381958, -0.44026491045951843, -2.3179564476013184, 1.294582724571228, 0.22267311811447144, -0.8483412265777588, 1.6489421129226685, 1.6005686521530151, -0.07858924567699432, 0.43104586005210876, 0.3683530390262604, 0.7637977004051208, 1.1792222261428833, -0.4137862026691437, 0.5184088349342346, -0.7015367746353149, -0.4323408901691437, 0.1414770483970642, 0.07110362499952316, 0.5633530616760254, -0.5786357522010803, -1.083811640739441, -0.3889259994029999, 0.8126105666160583, 1.4981187582015991, 0.043896086513996124, 1.4443233013153076, 0.23202891647815704, 0.5064983367919922, -1.2786966562271118, -0.03842746838927269, 1.9138009548187256, 0.3378446102142334, 0.12505611777305603, -0.7621514797210693, -1.190559983253479, 0.7756073474884033, 0.455719918012619, 0.2503303289413452, -1.3610970973968506, 1.8018341064453125, -0.07434194535017014, -0.15664155781269073, -0.8708454966545105, -0.6410972476005554, -0.414562851190567, -0.6902380585670471, -0.22995619475841522, -2.172283887863159, 0.08768323808908463, 1.0937845706939697, -0.1177205815911293, -0.29864323139190674, -0.9536206126213074, -0.09247277677059174, -1.01665461063385, -0.007675689645111561, -0.518220841884613, 0.83954256772995, 0.05852266773581505, -1.6682480573654175, 2.129624843597412, -1.5181471109390259, 0.1387282907962799, -1.1797568798065186, -0.5297411680221558, 0.9625157713890076, 0.2794382870197296, -0.5718191266059875, -2.7936289310455322, -0.7111545205116272, 0.5235219597816467, -1.71055006980896, 0.8384853601455688, -0.2698453664779663, 0.12306158244609833, 0.8757511377334595, 0.15132997930049896, 0.739393413066864, 0.27310314774513245, 2.7312309741973877, 0.43200522661209106, -0.30918216705322266, -0.09658124297857285, 1.541925072669983, -0.108744777739048, -0.4189043343067169, 1.4384385347366333, -0.7068426609039307, -1.2519514560699463, 3.0250484943389893, 1.3462589979171753, 0.8556069731712341, 0.3220294117927551, 0.44605663418769836, 1.5229592323303223, 1.2804899215698242, -0.11616043001413345, 1.3705363273620605, -0.4809381365776062, -0.9903622269630432, -1.3641812801361084, 0.008205652236938477, -0.40586018562316895, -0.7110859751701355, -0.3495793640613556, 0.3797488212585449, 0.9993040561676025, 1.2751853466033936, 0.9594927430152893, 0.10350999981164932, 0.8290349841117859, 2.0921294689178467, 0.7953095436096191, 0.2792847752571106, 0.1864478439092636, 0.3547132909297943, 0.09063850343227386, 1.7422553300857544, -1.2660012245178223, 0.38916081190109253, 0.34287506341934204, -1.4590637683868408, -1.4936561584472656, -0.22138521075248718, 0.22523505985736847, -0.07724537700414658, 0.9856945276260376, 1.2783364057540894, 0.28815189003944397, 0.869049608707428, -0.8097057938575745, -1.4298604726791382, 0.45901596546173096, 0.5309328436851501, -1.3614802360534668, 1.9562491178512573, 1.7684898376464844, -0.9857985377311707, -1.2370758056640625, -2.301875114440918, -0.0010087001137435436, -0.8494256734848022, -1.6593921184539795, 0.3062905967235565, 1.182044506072998, 0.32602694630622864, -0.3894469738006592, 2.8543806076049805, 0.8243650794029236, 0.7983470559120178, 1.8890222311019897, 0.5934628248214722, 0.0696544423699379, -1.6034338474273682, -0.42982181906700134, 0.5761587619781494, 0.34436315298080444, -3.1016058921813965, -1.4587225914001465, -1.4318257570266724, -0.6071268916130066, -0.25973787903785706, -0.7190185785293579, -0.38583096861839294, 0.5233525037765503, -0.8211768269538879, -0.47086891531944275, 0.6016423106193542, -0.28251126408576965, 0.7692679762840271, -0.7668924331665039, -0.9494866728782654, 0.01691739819943905, 0.08027740567922592, 0.7448412775993347, 1.345484972000122, 0.12682189047336578, -2.4520716667175293, 0.4159761369228363, 1.9025356769561768, -0.7346699833869934, 0.044657133519649506, -1.5211198329925537, 0.3478375971317291, 0.7401772737503052, 1.4161995649337769, 0.6833979487419128, -0.13825182616710663, 0.9212995171546936, 0.5282443761825562, -0.008228386752307415, -1.4493319988250732, -0.605182409286499, -0.17924511432647705, 0.19955870509147644, -1.2461947202682495, -0.41459938883781433, 1.4558700323104858, 0.3316534161567688, -1.00010085105896, -0.6919524669647217, -0.47199076414108276, -1.2894343137741089, 1.0762810707092285, -1.0667427778244019, -1.9893426895141602, 0.29731303453445435, 0.4344584047794342, 0.0033933203667402267, -1.0240145921707153, 0.22404761612415314, -0.7554785013198853, 1.3675810098648071, -0.3197358250617981, -0.9130924344062805, 1.919209361076355, -1.6514869928359985, 2.1477253437042236, -0.6604134440422058, 0.11352583765983582, -0.22056575119495392, 0.7118127346038818, 0.3415871560573578, 1.5885895490646362, -0.3488781750202179, -0.45791950821876526, -1.2322070598602295, -0.598077118396759, -0.28154700994491577, 0.05281926319003105, 0.42497751116752625, 0.4825834333896637, 0.48813387751579285, 1.0082393884658813, -0.595004141330719, 0.3926331400871277, 0.8229668736457825, -0.886031985282898, 1.4801039695739746, 0.8391514420509338, -0.20004984736442566, 0.9949536919593811, 0.7201864719390869, -0.13413065671920776, -1.4067999124526978, -2.3609628677368164, -0.2904934287071228, -0.13345853984355927, -0.15693345665931702, 1.138344645500183, -0.2505214214324951, 1.6704555749893188, -0.545271098613739, -2.15816330909729, -1.6607975959777832, -0.6637441515922546, 0.3657907545566559, -0.39920157194137573, 0.49674081802368164, -2.369169235229492, -0.5614708065986633, -0.5949130654335022, 1.2687278985977173, 1.2904434204101562, -1.1755682229995728, -0.0783226415514946, -0.9705761075019836, 1.4723693132400513, 1.4108561277389526, -1.3143675327301025, -1.31621515750885, -1.2524477243423462, -1.5844100713729858, -2.5446670055389404, 1.3719074726104736, -0.5379461050033569, 0.7378400564193726, -0.8505349159240723, 0.03610055148601532, 1.3406710624694824, 0.9199973940849304, -0.3787555396556854, -1.5597758293151855, -0.8009540438652039, -0.7111086845397949, -0.3866667151451111, 0.9578314423561096, -0.8225308656692505, -2.3908050060272217, 0.322247713804245, 1.875388741493225, 1.1042989492416382, -0.5223758816719055, -0.7401803731918335, 0.16235657036304474, -0.2369976043701172, 0.5099347233772278, 1.670624852180481, 1.5921050310134888, -0.41619211435317993, 1.861944556236267, -1.077892780303955, 0.8848565220832825, -0.8342104554176331, 1.0300744771957397, -0.8680985569953918, -0.5701602697372437, 0.32332202792167664, 1.1284750699996948, -1.2123126983642578, 2.602391004562378, -0.09572362899780273, -0.08114803582429886, 1.2586976289749146, 0.8691263794898987, -0.9609367251396179, 0.05182264745235443, -0.3284812867641449, -2.247206211090088, -0.4478967487812042, 0.4234687089920044, -0.3874586224555969, -0.22963792085647583, -0.40709349513053894, 0.8702965974807739, -1.0552809238433838, -1.3284013271331787, 0.7060741186141968, 0.35730111598968506, 0.5892837643623352, 0.9187757968902588, 0.6662830114364624, 0.24650610983371735, 0.1328691989183426, 0.12191462516784668, 0.47808775305747986, 0.2761341631412506, -0.5895728468894958, 0.569182813167572, -0.7911050319671631, -0.19896702468395233, -1.3615714311599731, -0.5193602442741394, 0.07648162543773651, 0.34005025029182434, 1.4557304382324219, -0.3461014926433563, -0.2633814215660095, -0.447700172662735, -0.7288169264793396, -0.16066236793994904, -0.32063713669776917, -0.6307737827301025, -0.788766622543335, 1.3061575889587402, -0.9275763630867004, -0.26273947954177856, 0.9314952492713928, -0.4593467116355896, -0.9419456720352173, -0.7089186310768127, 2.1860759258270264, -0.6493158936500549, 0.45214036107063293, 0.8520749807357788, -1.6946725845336914, 1.1805996894836426, -2.8929238319396973, -0.3875778615474701, -0.7124031782150269, -1.6171332597732544, -0.35899198055267334, 0.051366694271564484, 0.6950237154960632, 1.835181474685669, -1.9180361032485962, -1.3923954963684082, 0.540465772151947, 0.4350730776786804, -2.2717032432556152, -0.13386189937591553, -0.058557309210300446, 0.12574470043182373, -0.5525766611099243, 0.07448001205921173, -0.1492866724729538, -0.5522539615631104, -0.09342008084058762, -1.0284309387207031, 0.40444278717041016, 2.1425962448120117, -0.5153723955154419, 1.0827196836471558, 1.2498642206192017, 0.9821351766586304, 0.22690092027187347, 0.4927920699119568, -0.5128253102302551, 0.3006223440170288, 0.07734657824039459, 0.6477669477462769, -0.4324244260787964, 1.1740480661392212, 0.7011352777481079, 0.6674330234527588, -0.8035953640937805, -1.3776048421859741, -0.4410470724105835, 0.1417587399482727, 1.1084681749343872, 0.5544233322143555, 1.5817502737045288}; + + +static float axpy_alpha_dram __attribute__((section(".data"))) = {0.21413196623325348}; + + +static const float axpy_GR_dram[512] = {-0.9511556029319763, 0.5114328861236572, -0.41746193170547485, -0.28762489557266235, 1.655442714691162, -0.05205121636390686, -0.7344232797622681, -1.2963837385177612, 0.3606380820274353, -0.11082303524017334, 0.09833522140979767, -0.6872179508209229, -1.9465395212173462, -0.02649874985218048, -2.0798897743225098, -0.4789113402366638, 1.6955235004425049, -1.3264057636260986, 0.659735381603241, 0.7395304441452026, 0.19097594916820526, -2.416585922241211, -1.2860597372055054, -0.611363410949707, 0.5279325246810913, 0.0985463559627533, 1.3300048112869263, -0.1434071660041809, 0.9606860280036926, 0.6199821829795837, 1.1669682264328003, -0.4235208332538605, -2.0341057777404785, -0.31191113591194153, -1.413644552230835, 1.4794880151748657, -0.5545141696929932, -0.8800839781761169, -0.10235930234193802, 1.4774495363235474, 1.673561930656433, -1.0399044752120972, 0.9304791688919067, 1.6445538997650146, -2.344496726989746, 0.04203060269355774, -1.241334319114685, -0.13811184465885162, 1.6638190746307373, 2.4812185764312744, 0.38575994968414307, 0.4928897023200989, 0.09448569267988205, 0.019019559025764465, 0.12910351157188416, -0.6076763272285461, 0.17342397570610046, 0.6580460071563721, 1.1526455879211426, 0.16160672903060913, 0.5116432309150696, -0.20854869484901428, -0.4279070496559143, 0.9054685235023499, -0.4937659204006195, -0.9375151991844177, 0.6706016063690186, -0.6694916486740112, -1.7156507968902588, -0.30382853746414185, -0.6660715341567993, 2.458489179611206, -0.5169684886932373, -1.175013542175293, 0.7510069608688354, 0.12517836689949036, -0.1891670823097229, -0.3915889859199524, -1.6779966354370117, -1.1149812936782837, 0.06360378861427307, 0.012725479900836945, 0.4610429108142853, -0.2889085114002228, -0.793741762638092, 0.42533430457115173, 1.0494154691696167, -1.102301836013794, 0.6574665307998657, -1.3030778169631958, -0.09761111438274384, 0.5696800947189331, 0.2689734101295471, -0.017462998628616333, 2.0555996894836426, 0.7303588390350342, -2.4110264778137207, 2.4304027557373047, -0.7523858547210693, 0.6722491979598999, -0.3030587136745453, -2.1930644512176514, 1.5230457782745361, 0.12628088891506195, -0.8880128860473633, 1.8101317882537842, 1.6872402429580688, -0.04037397354841232, 0.4877714514732361, 0.6409790515899658, 0.7635170221328735, 1.1142109632492065, -0.7257827520370483, 0.4964956045150757, -0.8298345804214478, -0.3301878571510315, 0.2969748079776764, 0.09062215685844421, 0.4800417721271515, -0.4655919671058655, -1.0865280628204346, -0.33735525608062744, 0.8409906029701233, 1.6617671251296997, 0.2783726751804352, 1.5171048641204834, 0.38619697093963623, 0.59459388256073, -0.8651734590530396, 0.1782449185848236, 1.6062203645706177, 0.09590551257133484, 0.09592677652835846, -0.4119580090045929, -1.0503590106964111, 0.8989483118057251, 0.7001532316207886, 0.2543056011199951, -1.7477777004241943, 1.9999992847442627, -0.1547151803970337, 0.06457547843456268, -1.0178793668746948, -0.5047351121902466, -0.6228434443473816, -0.4850016236305237, 0.11676637828350067, -1.8616619110107422, 0.14538785815238953, 1.048736333847046, -0.2746370732784271, -0.2763097286224365, -0.8789417147636414, 0.11472006142139435, -1.1163734197616577, 0.33596271276474, -1.049293875694275, 0.7501330971717834, -0.19746220111846924, -1.4943007230758667, 1.7226556539535522, -1.4692015647888184, 0.1440514773130417, -1.253835916519165, -0.4683210849761963, 0.8060190677642822, 0.31687289476394653, -0.8060643672943115, -3.1367027759552, -0.4214560389518738, 0.7995011806488037, -1.6993519067764282, 0.5072551965713501, -0.10781040787696838, 0.28912508487701416, 1.309697151184082, 0.15899968147277832, 0.7652153372764587, 0.10058479011058807, 2.686782121658325, 0.23244540393352509, -0.6498588919639587, -0.3398299515247345, 1.4300202131271362, -0.219830721616745, -0.7403756380081177, 1.0258803367614746, -0.679465651512146, -1.0329129695892334, 2.9060349464416504, 1.4970662593841553, 1.0076141357421875, 0.7019827365875244, 0.248723104596138, 1.7290505170822144, 1.2083241939544678, -0.36783692240715027, 1.4472078084945679, -0.37841862440109253, -0.7004917860031128, -1.2515345811843872, 0.46046048402786255, -0.5173709392547607, -0.9106582999229431, -0.3099304139614105, 0.6085898876190186, 1.2790749073028564, 1.3736506700515747, 0.7850551009178162, -0.11516997963190079, 0.7230560183525085, 1.965309500694275, 0.8283535242080688, 0.3736670911312103, 0.15469372272491455, -0.14173951745033264, 0.005414985120296478, 1.9736219644546509, -1.647341251373291, 0.7120815515518188, 0.4091336131095886, -1.5661959648132324, -1.272028923034668, 0.1404222548007965, 0.22427038848400116, 0.2796681523323059, 1.0186537504196167, 1.0513023138046265, 0.1655275970697403, 0.8869441747665405, -0.7240732908248901, -1.0039076805114746, 0.443601131439209, 0.33690905570983887, -1.8001749515533447, 1.7247605323791504, 1.772263526916504, -0.9690478444099426, -1.1957144737243652, -2.214150905609131, -0.1999652087688446, -0.7902843952178955, -1.7747825384140015, 0.40534424781799316, 0.9953451752662659, 0.32022005319595337, -0.4650881886482239, 3.1678390502929688, 1.0931936502456665, 0.6452521681785583, 2.071873664855957, 0.7033106088638306, 0.1852281093597412, -1.4823411703109741, -0.32151564955711365, 0.6237931847572327, 0.19757962226867676, -2.9809298515319824, -1.7814569473266602, -1.7767208814620972, -0.9238380193710327, -0.16717413067817688, -0.7457905411720276, -0.21835441887378693, 0.18100523948669434, -0.8445450663566589, -0.3177218437194824, 0.6100232601165771, -0.0028848648071289062, 0.822071373462677, -1.1903579235076904, -0.945654571056366, -0.27843526005744934, 0.21428176760673523, 0.19132083654403687, 1.340345859527588, 0.10065649449825287, -2.612027168273926, 0.7819937467575073, 1.9149388074874878, -0.47921472787857056, 0.4594935476779938, -1.3650790452957153, 0.557878315448761, 0.8289547562599182, 1.6638567447662354, 0.7410111427307129, -0.1460953652858734, 1.1297128200531006, 0.3108835816383362, -0.12427026033401489, -1.5437694787979126, -0.6723380088806152, -0.20692257583141327, 0.04646240174770355, -1.256379246711731, 0.018106400966644287, 1.510242223739624, 0.532174825668335, -0.847588300704956, -0.6987544894218445, -0.45022621750831604, -1.0017900466918945, 1.2290148735046387, -0.9802756309509277, -2.142228364944458, 0.47584110498428345, 0.2292032390832901, 0.10053092241287231, -0.7571864724159241, -0.26984959840774536, -1.0310431718826294, 1.4061005115509033, -0.7751436829566956, -0.9418039321899414, 1.6963475942611694, -1.81523859500885, 2.135887622833252, -0.4024164080619812, -0.09685316681861877, -0.12775281071662903, 0.5582393407821655, 0.5675754547119141, 1.277370810508728, -0.24927453696727753, -0.3783927261829376, -1.2332042455673218, -0.5810430645942688, -0.20056691765785217, 0.20380674302577972, 0.055878788232803345, 0.30196717381477356, 0.5813120007514954, 1.0651743412017822, -0.7207207083702087, 0.41033947467803955, 1.0125552415847778, -0.8469648957252502, 1.6484932899475098, 0.8267488479614258, -0.0787086933851242, 0.8429709076881409, 0.6157959699630737, -0.12340350449085236, -1.2765202522277832, -2.011741876602173, -0.3086353540420532, 0.0987488329410553, 0.04601345956325531, 0.9934566617012024, -0.3732226490974426, 1.5997240543365479, -0.7152796983718872, -2.0778141021728516, -1.6419733762741089, -0.9295854568481445, 0.2972142696380615, -0.5800098776817322, 0.37867990136146545, -1.9432690143585205, -0.15455329418182373, -0.23194214701652527, 1.2747427225112915, 1.252891182899475, -1.5553303956985474, -0.22920888662338257, -1.0550837516784668, 1.8763959407806396, 1.3640804290771484, -1.2787574529647827, -0.8570802211761475, -0.8874453902244568, -1.5103415250778198, -2.407092571258545, 1.328234314918518, -0.39118704199790955, 0.7079284191131592, -1.103371262550354, -0.23861567676067352, 1.4367061853408813, 0.793501615524292, -0.19587332010269165, -1.664715051651001, -0.8779253363609314, -0.5684167742729187, -0.40256914496421814, 0.9129483699798584, -0.7869161367416382, -2.0759658813476562, 0.12115924060344696, 1.7466270923614502, 1.0829627513885498, -0.7333283424377441, -1.2730391025543213, 0.0914105772972107, -0.05636017024517059, 0.7213788032531738, 1.5995384454727173, 1.4191679954528809, -0.23966939747333527, 1.8672336339950562, -1.3057609796524048, 0.7220748066902161, -0.9214705228805542, 1.2361475229263306, -0.8986427783966064, -0.5374593734741211, 0.31501320004463196, 1.3307453393936157, -1.5511562824249268, 2.8137669563293457, 0.1496018022298813, -0.11151440441608429, 1.1995240449905396, 0.8277531266212463, -0.7946031093597412, 0.19826219975948334, -0.6121182441711426, -2.3577158451080322, -0.3193781077861786, 0.3227793872356415, -0.5177886486053467, -0.2395291030406952, -0.7594989538192749, 0.7668007612228394, -1.213801622390747, -1.2611037492752075, 0.7363856434822083, 0.5788886547088623, 0.45514339208602905, 0.808478057384491, 0.8140961527824402, 0.1407250463962555, 0.3762543201446533, 0.023019902408123016, 0.7821507453918457, 0.45782917737960815, -0.5998278856277466, 0.7123429179191589, -0.5677696466445923, -0.051237016916275024, -1.6427128314971924, -0.5112652778625488, -0.17409805953502655, 0.3179549276828766, 1.710434913635254, -0.1832124888896942, -0.4231891632080078, -0.7440340518951416, -0.62456214427948, -0.3752289414405823, -0.3135817348957062, -0.7226783633232117, -0.9990009069442749, 1.1686723232269287, -0.7505772709846497, 0.07802918553352356, 0.9056252837181091, -0.5627768635749817, -0.9176849722862244, -0.692398190498352, 1.9884722232818604, -0.9195502400398254, 0.6846990585327148, 1.0869009494781494, -1.8410732746124268, 1.1947417259216309, -2.893089532852173, -0.3528755307197571, -0.45631030201911926, -1.896822452545166, -0.6596272587776184, -0.17555326223373413, 0.7604902982711792, 1.9240586757659912, -2.071657419204712, -0.7855523824691772, 0.9587669968605042, 0.8737612962722778, -2.5046870708465576, 0.21339476108551025, 0.12372588366270065, 0.0399913415312767, -0.6829460263252258, -0.034319452941417694, -0.2817251980304718, -0.9049379825592041, -0.3153057396411896, -1.1248557567596436, 0.3888184130191803, 2.025261640548706, -0.7600295543670654, 0.9866275191307068, 1.2433429956436157, 1.0641543865203857, 0.21731428802013397, 0.7454553842544556, -0.5837945938110352, 0.43970197439193726, 0.0976802334189415, 0.486613929271698, -0.5710170269012451, 0.8994748592376709, 1.1219671964645386, 0.46104222536087036, -1.3532280921936035, -1.2256540060043335, -0.2654925584793091, 0.274830162525177, 1.1990858316421509, 0.4818543791770935, 1.6926649808883667}; + + diff --git a/software/apps/sp-axpy-simple/data/data_axpy.h b/software/apps/sp-axpy-simple/data/data_axpy.h new file mode 100644 index 000000000..f9beb2bc8 --- /dev/null +++ b/software/apps/sp-axpy-simple/data/data_axpy.h @@ -0,0 +1,24 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include "layer.h" + +const axpy_layer axpy_l = { + .M = 256, + .dtype = FP32, +}; + + +static float axpy_X_dram [256] __attribute__((section(".data"))) = {1.9269152879714966, 1.4872840642929077, 0.9007171988487244, -2.1055209636688232, 0.6784184575080872, -1.2345448732376099, -0.04306747764348984, -1.6046669483184814, -0.7521352767944336, 1.6487230062484741, -0.3924786448478699, -1.4036071300506592, -0.7278813123703003, -0.5594301819801331, -0.7688388824462891, 0.7624453902244568, 1.6423169374465942, -0.1595974713563919, -0.4973975419998169, 0.439589262008667, -0.7581311464309692, 1.078317642211914, 0.8008005619049072, 1.680620551109314, 1.27912437915802, 1.2964228391647339, 0.610466480255127, 1.334737777709961, -0.2316243201494217, 0.041759490966796875, -0.2515752911567688, 0.859858512878418, -1.3846737146377563, -0.8712361454963684, -0.223365917801857, 1.7173614501953125, 0.3188803195953369, -0.42451897263526917, 0.3057209253311157, -0.7745925188064575, -1.5575724840164185, 0.9956361055374146, -0.8797858357429504, -0.6011420488357544, -1.2741512060165405, 2.1227850914001465, -1.234653115272522, -0.4879138767719269, -0.9138230085372925, -0.6581372618675232, 0.07802387326955795, 0.5258087515830994, -0.48799172043800354, 1.1913690567016602, -0.8140076398849487, -0.7359927892684937, -1.4032478332519531, 0.03600366786122322, -0.06347727030515671, 0.6756148934364319, -0.0978068932890892, 1.8445940017700195, -1.184537410736084, 1.3835493326187134, 1.4451338052749634, 0.8564125299453735, 2.218075752258301, 0.5231655240058899, 0.34664666652679443, -0.19733144342899323, -1.0545889139175415, 1.2779955863952637, -0.1721901297569275, 0.5237884521484375, 0.056621819734573364, 0.4262961447238922, 0.575005054473877, -0.6417241096496582, -2.2063984870910645, -0.7508030533790588, 0.01086814422160387, -0.33874234557151794, -1.3406795263290405, -0.5853705406188965, 0.5361881256103516, 0.5246226191520691, 1.1412016153335571, 0.05164359509944916, 0.7439519762992859, -0.4815843999385834, -1.0494661331176758, 0.603898823261261, -1.7222950458526611, -0.827768862247467, 1.334702968597412, 0.48353928327560425, -2.5095443725585938, 0.4880010485649109, 0.7845868468284607, 0.02864718623459339, 0.640755295753479, 0.5832474231719971, 1.0669267177581787, -0.4501533806324005, -0.18526747822761536, 0.7527588605880737, 0.4047577977180481, 0.17846599221229553, 0.2649095058441162, 1.2731683254241943, -0.0013108636485412717, -0.30360376834869385, -1.457029104232788, -0.10233523696660995, -0.5991530418395996, 0.4770564138889313, 0.7261772155761719, 0.09115186333656311, -0.3890652060508728, 0.5279164910316467, -0.012685478664934635, 0.24083632230758667, 0.13253536820411682, 0.7642406225204468, 1.095009684562683, 0.3398909568786621, 0.7199674844741821, 0.41140761971473694, 1.931160569190979, 1.0118638277053833, -1.4364064931869507, -1.1298598051071167, -0.1360345333814621, 1.6354095935821533, 0.6547407507896423, 0.5760045647621155, 1.1415079832077026, 0.018564576283097267, -1.8058050870895386, 0.9254348874092102, -0.3753443658351898, 1.0330873727798462, -0.6866509318351746, 0.6368136405944824, -0.9726738929748535, 0.9584577679634094, 1.6192004680633545, 1.450609803199768, 0.2694815397262573, -0.21037597954273224, -0.7328027486801147, 0.10429783165454865, 0.3487516939640045, 0.9675941467285156, -0.46568843722343445, 1.6047972440719604, -2.4801201820373535, -0.4175437390804291, -1.1954537630081177, 0.8123369216918945, -1.9005532264709473, 0.22857652604579926, 0.02485940419137478, -0.34595024585723877, 0.2868328094482422, -0.7308424115180969, 0.17482025921344757, -1.0939292907714844, -1.6021603345870972, 1.3528969287872314, 1.288827657699585, 0.05229547247290611, -1.5468504428863525, 0.7567060589790344, 0.7755194902420044, 2.0265355110168457, 0.03581761196255684, 0.12058872729539871, -0.8056637048721313, -0.20757682621479034, -0.9319478273391724, -1.5909662246704102, -1.13597571849823, -0.52259761095047, -0.5187733173370361, -1.5012763738632202, -1.9266542196273804, 0.1278512328863144, 1.0229133367538452, -0.5557951331138611, 0.7042727470397949, 0.7098760008811951, 1.7743884325027466, -0.921550989151001, 0.9624499082565308, -0.33701515197753906, -1.1753336191177368, 0.35805708169937134, 0.47876790165901184, 1.353700041770935, 0.5260620713233948, 2.1120378971099854, -0.5207571387290955, -0.9320061206817627, 0.18516133725643158, 1.0686918497085571, 1.3065344095230103, 0.4598345160484314, -0.8146268725395203, -1.0212392807006836, -0.49492356181144714, -0.5922516584396362, 0.15431594848632812, 0.4407670795917511, -0.14829230308532715, -2.3184432983398438, -0.39799532294273376, 1.0804862976074219, -1.7808643579483032, 1.5080454349517822, 0.30942854285240173, -0.5003090500831604, 1.0350031852722168, 1.6896470785140991, -0.004505051765590906, 1.666792392730713, 0.15392017364501953, -1.0602530241012573, -0.572657585144043, 0.0835680365562439, 0.39990535378456116, 1.989207148551941, -0.07198750972747803, -0.906094491481781, -2.0487122535705566, -1.0810555219650269, 0.01762307994067669, 0.0782259851694107, 0.19315829873085022, 0.40967342257499695, -0.9291303157806396, 0.2761908769607544, -0.5388752818107605, 0.4625823199748993, -0.8718891143798828, -0.027118360623717308, -0.3532457649707794, 1.4638569355010986, 1.255434274673462, -0.7149558067321777, 0.8539193272590637, 0.512991189956665, 0.5397310256958008, 0.5655050277709961, 0.5057917237281799, 0.22245365381240845}; + + +static float axpy_Y_dram [256] __attribute__((section(".data"))) = {-0.685481607913971, 0.5635589957237244, -1.507175087928772, -1.610666036605835, -1.4790465831756592, 0.4322742819786072, -0.1250254064798355, 0.7821183800697327, -1.598767638206482, -0.10912995040416718, 0.7151994705200195, 0.03913922235369682, 1.305860161781311, 0.24659274518489838, -1.9775909185409546, 0.01789604313671589, -1.3793021440505981, 0.625802755355835, -2.5849502086639404, -0.02399955503642559, -0.1221928745508194, -0.7469954490661621, 1.7093087434768677, 0.05792269483208656, 1.1929805278778076, 1.9372931718826294, 0.7287133932113647, 0.9808937907218933, 0.41459226608276367, 1.15656316280365, 0.2690545618534088, -0.036629438400268555, 0.9732939004898071, -1.0150787830352783, -0.5419175624847412, -0.44102486968040466, -0.3136177957057953, -0.12925422191619873, -0.7149624228477478, -0.047562163323163986, 2.0207436084747314, 0.25391900539398193, 0.9364385008811951, 0.7122363448143005, -0.031765542924404144, 0.10164086520671844, 1.3433040380477905, 0.7132695913314819, 0.4038029611110687, -0.7139783501625061, 0.8337291479110718, -0.9585452079772949, 0.45363426208496094, 1.2460919618606567, -2.3065085411071777, -1.2868918180465698, 0.17988650500774384, -2.126762628555298, -0.13408313691616058, -1.0407685041427612, -0.7647228837013245, -0.05528254434466362, 1.204850673675537, -0.982473611831665, 0.4334380030632019, -0.7171905636787415, 1.055369257926941, -1.4533969163894653, 0.46515071392059326, 0.37139150500297546, -0.004656785633414984, 0.07954943925142288, 0.3781784772872925, 0.7051141262054443, -1.7236974239349365, -0.8434810638427734, 0.4351435601711273, 0.26588720083236694, -0.5870985388755798, 0.0826888456940651, 0.8853808045387268, 0.1824439913034439, 0.7863810062408447, -0.057920295745134354, 0.5666652917861938, -0.7097623348236084, -0.4875054359436035, 0.050095997750759125, 0.6084084510803223, 1.6308681964874268, -0.08472306281328201, 1.0844124555587769, 0.9477656483650208, -0.676629364490509, -0.5730168223381042, -0.3303174376487732, -0.7939430475234985, 0.3752319812774658, 0.08790969103574753, -1.241483449935913, -0.32025346159935, -0.844377875328064, -0.5513465404510498, 1.9889612197875977, 1.900311827659607, 1.6950805187225342, 0.028089528903365135, -0.17536965012550354, -1.7734959125518799, -0.7046414017677307, -0.39465200901031494, 1.8868111371994019, -0.21844321489334106, 0.16629981994628906, 2.1441681385040283, 1.7045671939849854, 0.3459012508392334, 0.6424751281738281, -0.20395424962043762, 0.6853673458099365, -0.13968797028064728, -1.1807502508163452, -1.282929539680481, 0.448485791683197, -0.590737521648407, 0.8540631532669067, -0.4900680184364319, -0.35945725440979004, 0.6663737893104553, -0.07426456362009048, -0.20960482954978943, 0.16632132232189178, 1.4703037738800049, -0.9390866756439209, -0.6013189554214478, -0.09964022785425186, -0.9851518273353577, -2.488459348678589, -0.33131900429725647, 0.8435799479484558, 0.9874473810195923, -0.33197471499443054, -0.8076189756393433, 0.824364185333252, 0.024699924513697624, -1.0641486644744873, -0.7601934671401978, -0.4075061082839966, 0.9623646140098572, -0.14264194667339325, 0.15271379053592682, -0.0388023778796196, 0.9446058869361877, -1.5824053287506104, 0.9871290922164917, 1.1456739902496338, -0.14181147515773773, -0.2763414680957794, -0.19321373105049133, 0.7767809629440308, 0.6838752627372742, -1.3245893716812134, -0.5160817503929138, 0.6001842617988586, -0.4702208340167999, -0.6086435317993164, -0.046192023903131485, -1.6457397937774658, -0.4833274185657501, -0.740294337272644, 0.31428107619285583, 0.1415553092956543, 1.0348176956176758, -0.626437783241272, -0.5150922536849976, 0.6902899742126465, -0.4939993619918823, 1.1366126537322998, -0.46184006333351135, 1.419979453086853, 0.848518967628479, -0.047891248017549515, 0.6685602068901062, 1.0429801940917969, 0.6899017095565796, -1.3129348754882812, 0.03780364990234375, -1.1702114343643188, -0.10318559408187866, 1.1894739866256714, 0.7606944441795349, -0.7463049292564392, -1.3838845491409302, 0.4868715703487396, -1.0020296573638916, 0.0329488180577755, -0.42919591069221497, -0.9817978739738464, -0.6420586109161377, 0.8265887498855591, 1.591395616531372, -0.1208132952451706, -0.48302069306373596, 0.11329790204763412, 0.0771508663892746, -0.9228128790855408, -1.2619991302490234, 1.0860532522201538, 1.096641182899475, -0.6836934685707092, 0.06604336202144623, -0.0007737990817986429, 0.1620604395866394, 1.195958137512207, -1.3061535358428955, -1.4039719104766846, -1.0597201585769653, 0.3057299852371216, 0.4150581359863281, -0.7174144983291626, 2.833967924118042, 1.9534740447998047, 2.0486814975738525, -1.0880382061004639, 1.621694564819336, 0.8512656688690186, -0.40046969056129456, -0.6088271737098694, -0.508095383644104, -0.6184902191162109, -1.647040605545044, -1.0362098217010498, -0.4503057301044464, -0.0729660615324974, -0.5479549169540405, -1.1425532102584839, -0.44875210523605347, -0.03045438416302204, 0.3830311596393585, -0.04476971551775932, 1.179942011833191, -0.33142781257629395, 0.6495042443275452, 0.09495851397514343, -0.7525874376296997, -0.647229790687561, -1.2822614908218384, 1.96529221534729, -0.9638485312461853, -2.5667941570281982, 0.7096128463745117, 0.8198426961898804, 0.6214459538459778, 0.42318588495254517, -0.33889833092689514, 0.5179733633995056}; + + +static float axpy_alpha_dram __attribute__((section(".data"))) = {0.0354895144701004}; + + +static const float axpy_GR_dram[256] = {-0.6170963048934937, 0.6163420081138611, -1.47520911693573, -1.6853899955749512, -1.454969882965088, 0.3884608745574951, -0.12655384838581085, 0.7251695394515991, -1.6254605054855347, -0.05061757192015648, 0.701270580291748, -0.010674111545085907, 1.280027985572815, 0.226738840341568, -2.0048766136169434, 0.04495485872030258, -1.3210171461105347, 0.6201387047767639, -2.60260272026062, -0.008398745208978653, -0.14909857511520386, -0.7087264657020569, 1.7377287149429321, 0.11756710708141327, 1.2383760213851929, 1.9833025932312012, 0.7503785490989685, 1.028262972831726, 0.4063720405101776, 1.1580451726913452, 0.2601262629032135, -0.006113477051258087, 0.9241524934768677, -1.0459985733032227, -0.5498446822166443, -0.3800765573978424, -0.30230090022087097, -0.14432018995285034, -0.7041125297546387, -0.07505207508802414, 1.9654661417007446, 0.2892536520957947, 0.905215322971344, 0.6909021139144897, -0.07698455452919006, 0.1769774854183197, 1.299486756324768, 0.6959537863731384, 0.3713718354701996, -0.7373353242874146, 0.8364982008934021, -0.9398844838142395, 0.4363156855106354, 1.2883731126785278, -2.335397243499756, -1.313011884689331, 0.13008591532707214, -2.1254849433898926, -0.13633590936660767, -1.0167912244796753, -0.7681940197944641, 0.010181199759244919, 1.1628119945526123, -0.9333721399307251, 0.48472508788108826, -0.6867969036102295, 1.1340876817703247, -1.4348300695419312, 0.4774530231952667, 0.3643883168697357, -0.042083632200956345, 0.12490488588809967, 0.37206754088401794, 0.723703145980835, -1.721687912940979, -0.8283520340919495, 0.4555502235889435, 0.2431127279996872, -0.6654025316238403, 0.05604320764541626, 0.8857665061950684, 0.17042219638824463, 0.7388009428977966, -0.07869481295347214, 0.5856943726539612, -0.6911437511444092, -0.4470047354698181, 0.0519288033246994, 0.6348109245300293, 1.6137770414352417, -0.1219681054353714, 1.105844497680664, 0.8866422176361084, -0.7060064673423767, -0.525648832321167, -0.3131568729877472, -0.8830055594444275, 0.39255091547966003, 0.11575429886579514, -1.240466833114624, -0.29751336574554443, -0.823678731918335, -0.513481855392456, 1.9729855060577393, 1.893736720085144, 1.7217955589294434, 0.04245418682694435, -0.16903597116470337, -1.764094352722168, -0.659457266330719, -0.39469853043556213, 1.8760364055633545, -0.2701524794101715, 0.16266798973083496, 2.1229045391082764, 1.721497654914856, 0.37167292833328247, 0.6457100510597229, -0.21776197850704193, 0.7041028738021851, -0.140138179063797, -1.1722030639648438, -1.2782258987426758, 0.47560831904411316, -0.5518761873245239, 0.8661257028579712, -0.4645167291164398, -0.3448565900325775, 0.7349097728729248, -0.03835400938987732, -0.26058220863342285, 0.1262231469154358, 1.4654759168624878, -0.8810467720031738, -0.5780825018882751, -0.07919810712337494, -0.9446402788162231, -2.4878005981445312, -0.39540615677833557, 0.876423180103302, 0.9741265773773193, -0.29531094431877136, -0.8319878578186035, 0.846964418888092, -0.009819800034165382, -1.0301334857940674, -0.702728807926178, -0.35602468252182007, 0.9719283580780029, -0.15010808408260345, 0.12670697271823883, -0.03510089963674545, 0.9569829106330872, -1.5480659008026123, 0.9706020355224609, 1.202627420425415, -0.22982972860336304, -0.29115989804267883, -0.2356398105621338, 0.8056104183197021, 0.6164255738258362, -1.3164772987365723, -0.5151994824409485, 0.5879066586494446, -0.4600412845611572, -0.6345807909965515, -0.03998773917555809, -1.6845628023147583, -0.5401872992515564, -0.6922807097434998, 0.3600209355354309, 0.14341124892234802, 0.9799207448959351, -0.5995826721191406, -0.4875694513320923, 0.7622107267379761, -0.49272820353507996, 1.1408922672271729, -0.4904326796531677, 1.4126126766204834, 0.8154445886611938, -0.10435386747121811, 0.6282449960708618, 1.0244334936141968, 0.6714906692504883, -1.366214394569397, -0.030572369694709778, -1.165674090385437, -0.0668828934431076, 1.169749140739441, 0.7856887578964233, -0.7211117744445801, -1.3209123611450195, 0.4541661739349365, -0.9678727984428406, 0.020988313481211662, -0.47090792655944824, -0.9690905809402466, -0.6250673532485962, 0.8746309280395508, 1.6100653409957886, -0.04585809260606766, -0.5015020966529846, 0.08022145926952362, 0.08372215181589127, -0.8848855495452881, -1.2156308889389038, 1.1023725271224976, 1.06773042678833, -0.719936728477478, 0.04847876727581024, -0.021792521700263023, 0.16753703355789185, 1.2116007804870605, -1.3114163875579834, -1.4862523078918457, -1.0738447904586792, 0.34407591819763184, 0.3518561124801636, -0.6638947129249573, 2.84494948387146, 1.935718297958374, 2.0854132175445557, -1.0280734300613403, 1.6215347051620483, 0.9104193449020386, -0.3950071334838867, -0.6464550495147705, -0.5284187197685242, -0.615524411201477, -1.6328481435775757, -0.965613842010498, -0.4528605341911316, -0.10512291640043259, -0.6206626892089844, -1.1809192895889282, -0.4481266736984253, -0.02767818234860897, 0.3898862600326538, -0.030230604112148285, 1.1469676494598389, -0.3216259181499481, 0.6303797960281372, 0.11137533187866211, -0.7835303544998169, -0.6481922268867493, -1.2947980165481567, 2.0172438621520996, -0.9192937612533569, -2.592167615890503, 0.739918053150177, 0.83804851770401, 0.6406007409095764, 0.44325539469718933, -0.3209480345249176, 0.5258681178092957}; + + diff --git a/software/apps/sp-axpy-simple/data/layer.h b/software/apps/sp-axpy-simple/data/layer.h new file mode 100644 index 000000000..3aa676fa7 --- /dev/null +++ b/software/apps/sp-axpy-simple/data/layer.h @@ -0,0 +1,145 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +typedef enum { FP64 = 8, FP32 = 4, FP16 = 2, FP8 = 1 } precision_t; + +/** + * @struct gemm_layer_struct + * @brief This structure contains all parameters necessary for GEMM. + * @var gemm_layer_struct::M + * Dimension of matrix product MxK * KxN + * @var gemm_layer_struct::M_p + * M divided by number of compute cores + * @var gemm_layer_struct::N + * Dimension of matrix product MxK * KxN + * @var gemm_layer_struct::K + * Dimension of matrix product MxK * KxN + * @var gemm_layer_struct::TA + * Transpose matrix A + * @var gemm_layer_struct::TB + * Transpose matrix B + * @var gemm_layer_struct::TILE_M + * Tile factor across M dimension + * @var gemm_layer_struct::TILE_N + * Tile factor across N dimension + * @var gemm_layer_struct::TILE_K + * Tile factor across K dimension + * @var gemm_layer_struct::A + * Pointer to matrix A + * @var gemm_layer_struct::B + * Pointer to matrix B + * @var gemm_layer_struct::C + * Pointer to matrix C + * @var gemm_layer_struct::ALPHA + * constant factor: A * B + ALPHA * C + * @var gemm_layer_struct::dtype + * Precision of GEMM + * @var gemm_layer_struct::expand + * Use expanding DOTP instructions + */ +typedef struct gemm_layer_struct { + uint32_t M; + uint32_t M_p; + uint32_t N; + uint32_t K; + + uint32_t TA; + uint32_t TB; + + uint32_t TILE_M; + uint32_t TILE_N; + uint32_t TILE_K; + + double *A; + double *B; + double *C; + + uint32_t ALPHA; + + precision_t dtype; + uint32_t expand; +} gemm_layer; + +/** + * @struct conv_layer_struct + * @brief This structure contains all parameters necessary for Convolutional + * layers + * @var conv_layer_struct::CO + * Number of output channels + * @var conv_layer_struct::CI + * Number of input channels + * @var conv_layer_struct::IH + * Height of input feature map + * @var conv_layer_struct::IW + * Width of input feature map + * @var conv_layer_struct::OH + * Height of output feature map + * @var conv_layer_struct::OW + * Width of output feature map + * @var conv_layer_struct::FH + * Height of filter + * @var conv_layer_struct::FW + * Width of filter + * @var conv_layer_struct::pad + * Padding on all sides + * @var conv_layer_struct::ifmap + * Pointer to input feature map + * @var conv_layer_struct::weights + * Pointer to weights + * @var conv_layer_struct::ofmap + * Pointer to output feature map + * @var conv_layer_struct::TILE_CI + * Tiling factor of input channel + * @var conv_layer_struct::cluster2cluster + * Flag for enabling cluster 2 cluster communication + * @var conv_layer_struct::im2col + * Flag for enabling im2col + GEMM + * @var conv_layer_struct::gamma + * Pointer to gamma for BatchNorm + * @var conv_layer_struct::beta + * Pointer to beta for BatchNorm + * @var gemm_layer_struct::dtype + * Precision of Convolution layer + */ +typedef struct conv_layer_struct { + // CONV2D + uint32_t CO; + uint32_t CI; + uint32_t IH; + uint32_t IW; + uint32_t OH; + uint32_t OW; + uint32_t FH; + uint32_t FW; + uint32_t pad; + + double *ifmap; + double *weights; + double *ofmap; + + uint32_t TILE_CI; + uint32_t cluster2cluster; + uint32_t im2col; + + // BATCHNORM + double *gamma; + double *beta; + + precision_t dtype; +} conv_layer; + +/** + * @struct axpy_layer_struct + * @brief This structure contains all parameters necessary for AXPY layers + */ +typedef struct axpy_layer_struct { + // AXPY + uint32_t M; + + precision_t dtype; +} axpy_layer; diff --git a/software/apps/sp-axpy-simple/main-vsim-backup.c b/software/apps/sp-axpy-simple/main-vsim-backup.c new file mode 100644 index 000000000..c62be40e9 --- /dev/null +++ b/software/apps/sp-axpy-simple/main-vsim-backup.c @@ -0,0 +1,206 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Domenic Wüthrich + +#include +#include +#include + +#include "data/data_axpy.h" +#include "printf.h" +#ifdef MEMPOOL +#include "alloc.h" +#include "runtime.h" +#include "synchronization.h" +#endif + +// dump(result, 1); +const unsigned int csize = 32; +const unsigned int core_count = 16; +const unsigned int esize = csize*core_count; + +float x[esize]; +float y[esize]; + +// Initialize the matrices +void init_matrix(float *matrix, const float *src, + const unsigned int num_columns, unsigned int cid) { + for (unsigned int j = cid*num_columns; j < (cid+1)*num_columns; ++j) + matrix[j] = src[j]; +} + +// Verify the matrices +int verify_matrix(float *matrix, const float *golden, + const unsigned int num_columns, unsigned int cid) { + int error = 0; + for (unsigned int j = cid*num_columns; j < (cid+1)*num_columns; ++j) { + float diff = matrix[j] - golden[j]; + if (diff < 0) + diff = -diff; + if (diff > 0.1f) + error ++; + } + return error; +} + +int main() { + const unsigned int num_cores = mempool_get_core_count(); + const unsigned int cores_per_group = num_cores / NUM_GROUPS; + const unsigned int cid = mempool_get_core_id(); + + unsigned int timer_start, timer_end, timer; + + // Initialize MemPool + mempool_init(cid); + + // Initialize multicore barrier + mempool_barrier_init(cid); + + // Reset timer + timer = (unsigned int)-1; + + // Wait for all cores to finish + mempool_barrier(num_cores); + + if (cid < core_count) { + if (cid == 0) + printf("initialize matrix\n"); + // for (unsigned int j = cid*csize; j < (cid+1)*csize; ++j) { + // x[j] = axpy_X_dram[j]; + // y[j] = axpy_Y_dram[j]; + // } + init_matrix(x, axpy_X_dram, csize, cid); + init_matrix(y, axpy_Y_dram, csize, cid); + } + + // Calculate local pointers + float *x_ = x + cid * csize; + float *y_ = y + cid * csize; + + // Initialize alpha + float alpha = axpy_alpha_dram; + + unsigned int remaining_elem = csize; + + mempool_barrier(num_cores); + + // if (cid != 0) { + // mempool_wfi(); + // } + + // printf("S:%d\n", cid); + // wake_up(cid + 1); + + // if (cid == 0) { + // printf("start kernel\n"); + // } + // Wait for all cores to finish + + // int32_t count = 0; + if (cid < core_count) { + while (remaining_elem > 0) { + timer_start = mempool_get_timer(); + // Set the VL + uint32_t actual_elem; + asm volatile("vsetvli %[gvl], %[vl], e32, m1, ta, ma" + : [gvl] "=r"(actual_elem) + : [vl] "r"(remaining_elem)); + + // if (cid == 0) + // printf("Start Load\n"); + // Load vectors + asm volatile("vle32.v v0, (%0)" ::"r"(x_)); + asm volatile("vle32.v v8, (%0)" ::"r"(y_)); + // if (cid == 0) + // printf("Finish Load\n"); + + // Multiply-accumulate + asm volatile("vfmacc.vf v8, %0, v0" ::"f"(alpha)); + // if (cid == 0) + // printf("Finish MACC\n"); + + // Store results + asm volatile("vse32.v v8, (%0)" ::"r"(y_)); + // if (cid == 15) + // printf("Finish Store\n"); + + // Bump pointers + remaining_elem -= actual_elem; + x_ += actual_elem; + y_ += actual_elem; + } + // printf("cid:%u count %d", cid, count); + } + + // if (cid != 0) { + // mempool_wfi(); + // } + + // printf("F:%d\n", cid); + // wake_up(cid + 1); + + // Wait for all cores to finish matmul + mempool_barrier(num_cores); + if(cid == 0) + printf("count\n"); + + // End timer and check if new best runtime + timer_end = mempool_get_timer(); + if (cid == 0) { + unsigned int timer_temp = timer_end - timer_start; + if (timer_temp < timer) { + timer = timer_temp; + } + } + + // Check and display results + if (cid == 0) { + unsigned int performance = 1000 * 2 * 16 / timer; + unsigned int utilization = performance / (2 * core_count * N_FPU); + + printf("\n----- (%u) axpy -----\n", esize); + printf("The execution took %u cycles.\n", timer); + printf("The performance is %u OP/1000cycle (%u%%o utilization).\n", + performance, utilization); + } + + mempool_barrier(num_cores); + if (cid < core_count) { + int error = verify_matrix(y, axpy_GR_dram, csize, cid); + if (cid == 0) + printf("Errors: %d\n", error); + + // if (error != 0) { + // printf("Error: y[%d]=%04x\n", error, *(uint32_t *)(y + error)); + // return error; + // } + // for (int k = 0; k < 16; k ++) { + // if (ptr_vec_res[k] != axpy_GR_dram[k]) + // } + } + + // // Free the matrices + // if (cid == 0) { + // domain_free(get_alloc_tile(0), x); + // domain_free(get_alloc_tile(0), y); + // } + + // Wait for core 0 to finish displaying results + mempool_barrier(num_cores); + + return 0; +} diff --git a/software/apps/sp-axpy-simple/main.c b/software/apps/sp-axpy-simple/main.c new file mode 100644 index 000000000..23de34716 --- /dev/null +++ b/software/apps/sp-axpy-simple/main.c @@ -0,0 +1,199 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Domenic Wüthrich + +#include +#include +#include + +#include "data/data_axpy.h" +#include "printf.h" +#ifdef MEMPOOL +#include "alloc.h" +#include "runtime.h" +#include "synchronization.h" +#endif + +// dump(result, 1); +const unsigned int csize = 32; +const unsigned int core_count = 8; +const unsigned int esize = csize*core_count; + +float x[esize]; +float y[esize]; + +// Initialize the matrices +void init_matrix(float *matrix, const float *src, + const unsigned int num_columns, unsigned int cid) { + for (unsigned int j = cid*num_columns; j < (cid+1)*num_columns; ++j) + matrix[j] = src[j]; +} + +// Verify the matrices +int verify_matrix(float *matrix, const float *golden, + const unsigned int num_columns, unsigned int cid) { + int error = 0; + for (unsigned int j = cid*num_columns; j < (cid+1)*num_columns; ++j) { + float diff = matrix[j] - golden[j]; + if (diff < 0) + diff = -diff; + if (diff > 0.01f) + error ++; + } + return error; +} + +int main() { + const unsigned int num_cores = mempool_get_core_count(); + const unsigned int cores_per_group = num_cores / NUM_GROUPS; + const unsigned int cid = mempool_get_core_id(); + + unsigned int timer_start, timer_end, timer; + + // Initialize MemPool + mempool_init(cid); + + // Initialize multicore barrier + mempool_barrier_init(cid); + + // Reset timer + timer = (unsigned int)-1; + + // Wait for all cores to finish + mempool_barrier(num_cores); + + if (cid < core_count) { + if (cid == 0) + printf("initialize matrix\n"); + // for (unsigned int j = cid*csize; j < (cid+1)*csize; ++j) { + // x[j] = axpy_X_dram[j]; + // y[j] = axpy_Y_dram[j]; + // } + init_matrix(x, axpy_X_dram, csize, cid); + init_matrix(y, axpy_Y_dram, csize, cid); + } + + // Calculate local pointers + float *x_ = x + cid * csize; + float *y_ = y + cid * csize; + + // Initialize alpha + float alpha = axpy_alpha_dram; + + unsigned int remaining_elem = csize; + + mempool_barrier(num_cores); + + // if (cid != 0) { + // mempool_wfi(); + // } + + // printf("S:%d\n", cid); + // wake_up(cid + 1); + + // if (cid == 0) { + // printf("start kernel\n"); + // } + // Wait for all cores to finish + + // int32_t count = 0; + if (cid < core_count) { + while (remaining_elem > 0) { + timer_start = mempool_get_timer(); + // Set the VL + uint32_t actual_elem; + asm volatile("vsetvli %[gvl], %[vl], e32, m1, ta, ma" + : [gvl] "=r"(actual_elem) + : [vl] "r"(remaining_elem)); + + // if (cid == 0) + // printf("Start Load\n"); + // Load vectors + asm volatile("vle32.v v0, (%0)" ::"r"(x_)); + asm volatile("vle32.v v8, (%0)" ::"r"(y_)); + // if (cid == 0) + // printf("Finish Load\n"); + + // Multiply-accumulate + asm volatile("vfmacc.vf v8, %0, v0" ::"f"(alpha)); + // if (cid == 0) + // printf("Finish MACC\n"); + + // Store results + asm volatile("vse32.v v8, (%0)" ::"r"(y_)); + // if (cid == 7) + // printf("Finish Store\n"); + + // Bump pointers + remaining_elem -= actual_elem; + x_ += actual_elem; + y_ += actual_elem; + } + // printf("cid:%u count %d", cid, count); + } + + // if (cid != 0) { + // mempool_wfi(); + // } + + // printf("F:%d\n", cid); + // wake_up(cid + 1); + + // Wait for all cores to finish matmul + mempool_barrier(num_cores); + if(cid == 0) + printf("finish calc\n"); + + // End timer and check if new best runtime + timer_end = mempool_get_timer(); + if (cid == 0) { + unsigned int timer_temp = timer_end - timer_start; + if (timer_temp < timer) { + timer = timer_temp; + } + } + + // Check and display results + if (cid == 0) { + unsigned int performance = 1000 * 2 * 16 / timer; + unsigned int utilization = performance / (2 * core_count * N_FPU); + + printf("\n----- (%u) axpy -----\n", esize); + printf("The execution took %u cycles.\n", timer); + printf("The performance is %u OP/1000cycle (%u%%o utilization).\n", + performance, utilization); + } + + mempool_barrier(num_cores); + if (cid == 0) + printf("start check\n"); + + // if (cid < core_count) { + // int error = verify_matrix(y, axpy_GR_dram, csize, cid); + // if (error > 0) + // printf("%d,Errors: %d\n", cid, error); + // } + if (cid == 0) { + int error = verify_matrix(y, axpy_GR_dram, esize, cid); + printf("Errors: %d\n", error); + } + + // Wait for core 0 to finish displaying results + mempool_barrier(num_cores); + + return 0; +} diff --git a/software/apps/sp-axpy-simple/script/axpy.json b/software/apps/sp-axpy-simple/script/axpy.json new file mode 100644 index 000000000..7b7cad730 --- /dev/null +++ b/software/apps/sp-axpy-simple/script/axpy.json @@ -0,0 +1,11 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Parameters for an AXPY + +{ + kernel: "AXPY" + M: 256, + prec: 32 +} diff --git a/software/apps/sp-axpy-simple/script/gen_data.py b/software/apps/sp-axpy-simple/script/gen_data.py new file mode 100755 index 000000000..45084cdd2 --- /dev/null +++ b/software/apps/sp-axpy-simple/script/gen_data.py @@ -0,0 +1,626 @@ +#!/usr/bin/env python3 +# Copyright 2022 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 + +# Author: Tim Fischer + +import numpy as np +import torch +import torch.nn as nn +import argparse +import pathlib +import hjson + +np.random.seed(42) +torch.manual_seed(42) + +global verbose + + +def array_to_cstr(a, fmt=float): + out = '{' + if fmt == float: + if isinstance(a, np.ndarray): + a = a.flat + if isinstance(a, torch.Tensor): + a = a.numpy().flat + for el in a: + out += '{}, '.format(el) + else: + for sign, exp, mant in zip(a['sign'].numpy().flat, a['exponent'].numpy().flat, a['mantissa'].numpy().flat): + value = sign * 2**7 + exp * 2**2 + mant + out += "0x{:02x}, ".format(value) + out = out[:-2] + '}' + return out + + +def emit_header_file(layer_type: str, **kwargs): + + file_path = pathlib.Path(__file__).parent.parent / 'data' + emit_str = "// Copyright 2022 ETH Zurich and University of Bologna.\n" + \ + "// Licensed under the Apache License, Version 2.0, see LICENSE for details.\n" + \ + "// SPDX-License-Identifier: Apache-2.0\n\n" + + if layer_type == 'AXPY': + file = file_path / 'data_axpy.h' + emit_str += emit_axpy_layer(**kwargs) + if layer_type == 'Conv2d': + file = file_path / 'data_conv2d.h' + emit_str += emit_conv2d_layer(**kwargs) + elif layer_type == 'GEMM': + file = file_path / 'data_gemm.h' + emit_str += emit_GEMM_layer(**kwargs) + elif layer_type == 'BatchNorm': + file = file_path / 'data_batchnorm.h' + emit_str += emit_batchnorm_layer(**kwargs) + elif layer_type == 'MaxPool': + file = file_path / 'data_maxpool.h' + emit_str += emit_maxpool_layer(**kwargs) + elif layer_type == 'FusedConv': + file = file_path / 'data_fusedconv.h' + emit_str += emit_fusedconv(**kwargs) + with file.open('w') as f: + f.write(emit_str) + + +def emit_axpy_layer(name='axpy', **kwargs): + vec_X = kwargs['X'] + vec_Y = kwargs['Y'] + alpha = kwargs['alpha'] + result = kwargs['result'] + + m = kwargs['M'] + + layer_str = '' + layer_str += '#include "layer.h"\n\n' + layer_str += f'const axpy_layer {name}_l = {{\n' + layer_str += f'\t.M = {m},\n' + layer_str += f'\t.dtype = FP{kwargs["prec"]},\n' + layer_str += '};\n\n\n' + + ctypes = { + '64': 'double', + '32': 'float', + '16': '__fp16', + '8': 'char' + } + + dtype = ctypes[str(kwargs['prec'])] + if dtype != 'char': + # layer_str += f'static {dtype} {name}_X_dram [{m}] __attribute__((section(".data"))) = ' + array_to_cstr(vec_X) + ';\n\n\n' + # layer_str += f'static {dtype} {name}_Y_dram [{m}] __attribute__((section(".data"))) = ' + array_to_cstr(vec_Y) + ';\n\n\n' + # layer_str += f'static {dtype} {name}_alpha_dram __attribute__((section(".data"))) = ' + array_to_cstr(alpha) + ';\n\n\n' + # layer_str += f'static const {dtype} {name}_GR_dram[{m}] = ' + array_to_cstr(result) + ';\n\n\n' + layer_str += f'static {dtype} {name}_X_dram [{m}] __attribute__((section(".data"))) = ' + array_to_cstr(vec_X) + ';\n\n\n' + layer_str += f'static {dtype} {name}_Y_dram [{m}] __attribute__((section(".data"))) = ' + array_to_cstr(vec_Y) + ';\n\n\n' + layer_str += f'static {dtype} {name}_alpha_dram __attribute__((section(".data"))) = ' + array_to_cstr(alpha) + ';\n\n\n' + layer_str += f'static const {dtype} {name}_GR_dram[{m}] = ' + array_to_cstr(result) + ';\n\n\n' + else: + layer_str += f'static {dtype} {name}_X_dram [{m}] = ' + \ + array_to_cstr(kwargs['bits_X'], fmt='char') + ';\n\n\n' + layer_str += f'static {dtype} {name}_Y_dram [{m}] = ' + \ + array_to_cstr(kwargs['bits_Y'], fmt='char') + ';\n\n\n' + layer_str += f'static {dtype} {name}_alpha_dram = ' + \ + array_to_cstr(kwargs['bits_alpha'], fmt='char') + ';\n\n\n' + + return layer_str + +def emit_conv2d_layer(name='conv2d', **kwargs): + ifmap = kwargs['ifmap'] + ofmap = kwargs['ofmap'] + weights = kwargs['weights'] + + n, ih, iw, ci = ifmap.shape + _, oh, ow, co = ofmap.shape + _, fh, fw, _ = weights.shape + + layer_str = '' + layer_str += '#include "layer.h"\n\n' + layer_str += f'conv_layer {name}_l = {{\n' + layer_str += f'\t.CO = {co},\n' + layer_str += f'\t.CI = {ci},\n' + layer_str += f'\t.IH = {ih},\n' + layer_str += f'\t.IW = {iw},\n' + layer_str += f'\t.OH = {oh},\n' + layer_str += f'\t.OW = {ow},\n' + layer_str += f'\t.FH = {fh},\n' + layer_str += f'\t.FW = {fw}\n' + layer_str += '};\n\n\n' + + layer_str += f'static double {name}_result[{oh}][{ow}][{co}] __attribute__((section(".data")));\n\n' + layer_str += f'static double {name}_checksum[{oh}][{ow}] = ' + array_to_cstr(torch.sum(ofmap, dim=-1)) + ';\n\n\n' + layer_str += f'static double {name}_ifmap_dram[{ih}][{iw}][{ci}] = ' + array_to_cstr(ifmap) + ';\n\n\n' + layer_str += f'static double {name}_weights_dram[{co}][{ci}][{fh}][{fw}] = ' + array_to_cstr(weights) + ';\n\n\n' + layer_str += f'static double {name}_ofmap_dram[{oh}][{ow}][{co}] = ' + array_to_cstr(ofmap) + ';\n\n\n' + + return layer_str + + +def emit_linear_layer(input, weights, ofmap): + + layer_str = '' + return layer_str + + +def emit_GEMM_layer(name='gemm', **kwargs): + mat_A = kwargs['A'] + mat_B = kwargs['B'] + mat_C = kwargs['C'] + result = kwargs['result'] + + m = kwargs['M'] + n = kwargs['N'] + k = kwargs['K'] + + layer_str = '' + layer_str += '#include "layer.h"\n\n' + layer_str += f'const gemm_layer {name}_l = {{\n' + layer_str += f'\t.M = {m},\n' + layer_str += f'\t.N = {n},\n' + layer_str += f'\t.K = {k},\n' + layer_str += f'\t.TA = {int(kwargs["ta"])},\n' + layer_str += f'\t.TB = {int(kwargs["tb"])},\n' + layer_str += f'\t.ALPHA = {kwargs["alpha"]},\n' + layer_str += f'\t.dtype = FP{kwargs["prec"]},\n' + layer_str += f'\t.expand = {kwargs["expand"]}\n' + layer_str += '};\n\n\n' + + ctypes = { + '64': 'double', + '32': 'float', + '16': '__fp16', + '8': 'char' + } + + dtype = ctypes[str(kwargs['prec'])] + if dtype != 'char': + layer_str += f'static {dtype} {name}_A_dram [{m}*{k}] __attribute__((section(".data"))) = ' + array_to_cstr(mat_A) + ';\n\n\n' + layer_str += f'static {dtype} {name}_B_dram [{k}*{n}] __attribute__((section(".data"))) = ' + array_to_cstr(mat_B) + ';\n\n\n' + layer_str += f'static {dtype} {name}_C_dram [{m}*{n}] __attribute__((section(".data"))) = ' + array_to_cstr(mat_C) + ';\n\n\n' + layer_str += f'static const {dtype} {name}_checksum[{m}] = ' + array_to_cstr(torch.sum(result, dim=-1)) + ';\n\n\n' + else: + layer_str += f'static {dtype} {name}_A_dram [{m}][{k}] = ' + \ + array_to_cstr(kwargs['bits_A'], fmt='char') + ';\n\n\n' + layer_str += f'static {dtype} {name}_B_dram [{k}][{n}] = ' + \ + array_to_cstr(kwargs['bits_B'], fmt='char') + ';\n\n\n' + layer_str += f'static {dtype} {name}_C_dram [{m}][{n}] = ' + \ + array_to_cstr(kwargs['bits_C'], fmt='char') + ';\n\n\n' + + return layer_str + + +def emit_batchnorm_layer(name='batchnorm', **kwargs): + + ifmap = kwargs['ifmap'] + ofmap = kwargs['ofmap'] + beta = kwargs['beta'] + gamma = kwargs['gamma'] + + n, ih, iw, ci = ifmap.shape + _, oh, ow, co = ofmap.shape + + layer_str = '' + layer_str += '#include "layer.h"\n\n' + layer_str += f'conv_layer {name}_l = {{\n' + layer_str += f'\t.CO = {co},\n' + layer_str += f'\t.CI = {ci},\n' + layer_str += f'\t.IH = {ih},\n' + layer_str += f'\t.IW = {iw},\n' + layer_str += f'\t.OH = {oh},\n' + layer_str += f'\t.OW = {ow},\n' + layer_str += '};\n\n\n' + + layer_str += f'static double {name}_result[{oh}][{ow}][{co}] __attribute__((section(".data")));\n\n' + layer_str += f'static double {name}_checksum[{oh}][{ow}] = ' + array_to_cstr(torch.sum(ofmap, dim=-1)) + ';\n\n\n' + layer_str += f'static double {name}_ifmap_dram[{ih}][{iw}][{ci}] = ' + array_to_cstr(ifmap) + ';\n\n\n' + layer_str += f'static double {name}_beta_dram[{ci}] = ' + array_to_cstr(beta) + ';\n\n\n' + layer_str += f'static double {name}_gamma_dram[{ci}] = ' + array_to_cstr(gamma) + ';\n\n\n' + layer_str += f'static double {name}_ofmap_dram[{oh}][{ow}][{co}] = ' + array_to_cstr(ofmap) + ';\n\n\n' + + return layer_str + + +def emit_maxpool_layer(name='maxpool', **kwargs): + + ifmap = kwargs['ifmap'] + ofmap = kwargs['ofmap'] + k = kwargs['kernel_size'] + + n, ih, iw, ci = ifmap.shape + _, oh, ow, co = ofmap.shape + + layer_str = '' + layer_str += '#include "layer.h"\n\n' + layer_str += f'conv_layer {name}_l = {{\n' + layer_str += f'\t.CO = {co},\n' + layer_str += f'\t.CI = {ci},\n' + layer_str += f'\t.IH = {ih},\n' + layer_str += f'\t.IW = {iw},\n' + layer_str += f'\t.OH = {oh},\n' + layer_str += f'\t.OW = {ow},\n' + layer_str += f'\t.FH = {k},\n' + layer_str += f'\t.FW = {k},\n' + layer_str += '};\n\n\n' + + layer_str += f'static double {name}_result[{oh}][{ow}][{co}] __attribute__((section(".data")));\n\n' + layer_str += f'static double {name}_checksum[{oh}][{ow}] = ' + array_to_cstr(torch.sum(ofmap, dim=-1)) + ';\n\n\n' + layer_str += f'static double {name}_ifmap_dram[{ih}][{iw}][{ci}] = ' + array_to_cstr(ifmap) + ';\n\n\n' + layer_str += f'static double {name}_ofmap_dram[{oh}][{ow}][{co}] = ' + array_to_cstr(ofmap) + ';\n\n\n' + + return layer_str + + +def emit_fusedconv(name='fusedconv', **kwargs): + + ifmap = kwargs['ifmap'] + kernel = kwargs['kernel'] + bn_k = kwargs['bn_k'] + bn_l = kwargs['bn_l'] + ofmap = kwargs['ofmap'] + ofmap_before = kwargs['ofmap_before'] + ifmap_padded = kwargs['ifmap_padded'] + + padding = kwargs['padding'] + + if kwargs['depthwise']: + ih, iw, ci = ifmap.shape + oh, ow, co = ofmap.shape + fh, fw, co = kernel.shape + ci = co + ih_pad, iw_pad, _ = ifmap_padded.shape + elif kwargs['chw_layer']: + ci, ih, iw = ifmap.shape + oh, ow, co = ofmap.shape + co, ci, fh, fw = kernel.shape + _, ih_pad, iw_pad = ifmap_padded.shape + else: + ih, iw, ci = ifmap.shape + oh, ow, co = ofmap.shape + _, fh, fw, _ = kernel.shape + ih_pad, iw_pad, _ = ifmap_padded.shape + + ctypes = { + '64': 'double', + '32': 'float', + '16': '__fp16', + '8': 'char' + } + + dtype = ctypes[str(kwargs['prec'])] + + layer_str = '#include \n' + layer_str += '#include "conv2d.h"\n\n' + layer_str += 'kernel_fp32 k = {\n' + layer_str += f'\t.ch_in = {ci},\n' + layer_str += f'\t.ch_out = {co},\n' + layer_str += f'\t.dim_in_x = {iw},\n' + layer_str += f'\t.dim_in_y = {ih},\n' + layer_str += f'\t.dim_kernel_x = {fw},\n' + layer_str += f'\t.dim_kernel_y = {fh},\n' + layer_str += f'\t.dim_out_x = {ow},\n' + layer_str += f'\t.dim_out_y = {oh},\n' + layer_str += f'\t.padding_y_top = {padding["padding_y_top"]},\n' + layer_str += f'\t.padding_y_bottom = {padding["padding_y_bottom"]},\n' + layer_str += f'\t.padding_x_left = {padding["padding_x_left"]},\n' + layer_str += f'\t.padding_x_right = {padding["padding_x_right"]},\n' + layer_str += f'\t.stride_x = {kwargs["stride"]["stride_x"]},\n' + layer_str += f'\t.stride_y = {kwargs["stride"]["stride_y"]},\n' + layer_str += f'\t.flag_relu = {kwargs["flags"]["flag_relu"]},\n' + layer_str += f'\t.flag_batch_norm = {kwargs["flags"]["flag_batch_norm"]},\n' + layer_str += f'\t.flag_y_accumulate_start = {kwargs["flags"]["flag_y_accumulate_start"]},\n' + layer_str += f'\t.flag_y_accumulate_end = {kwargs["flags"]["flag_y_accumulate_end"]},\n' + layer_str += '};\n\n' + layer_str += f'uint32_t dw = {kwargs["depthwise"]};\n' + layer_str += f'uint32_t chw_layer = {kwargs["chw_layer"]};\n' + + layer_str += f'static {dtype} {name}_pInBuffer_dram[{ih_pad}][{iw_pad}][{ci}] = ' + \ + array_to_cstr(ifmap_padded) + ';\n\n' + layer_str += f'static {dtype} {name}_pWeight_dram[{co}][{fh}][{fw}][{ci}] = {array_to_cstr(kernel)};\n\n' + layer_str += f'static {dtype} {name}_lambda_dram[{ci}] = {array_to_cstr(bn_l)};\n\n' + layer_str += f'static {dtype} {name}_kappa_dram[{ci}] = {array_to_cstr(bn_k)};\n\n' + layer_str += f'static {dtype} {name}_pOutBuffer_dram[{oh}][{ow}][{co}] = {array_to_cstr(ofmap_before)};\n\n' + layer_str += f'static {dtype} {name}_pCheckOutBuffer_dram[{oh}][{ow}][{co}] = {array_to_cstr(ofmap)};\n\n' + + return layer_str + + +def rand_data_generator(shape, prec, alt=False): + if prec == 64: + return torch.randn(shape, requires_grad=False, dtype=torch.float64), {} + elif prec == 32: + return torch.randn(shape, requires_grad=False, dtype=torch.float32), {} + elif prec == 16: + if alt: + return torch.randn(shape, requires_grad=False, dtype=torch.bfloat16), {} + else: + return torch.randn(shape, requires_grad=False, dtype=torch.float16), {} + elif prec == 8: + sign = torch.randint(0, 2, shape, requires_grad=False, dtype=torch.uint8) # -1 or 1 + exponent = torch.randint(0, 16, shape, requires_grad=False, dtype=torch.uint8) # < 0b01111 + mantissa = torch.randint(0, 4, shape, requires_grad=False, dtype=torch.uint8) # can be arbitrary + bits = {'sign': sign, 'exponent': exponent, 'mantissa': mantissa} + # TODO: not actually correct + return ((-1.0)**sign.double())*(2.0**(exponent.double()-15.0))*(1.0 + mantissa.double() / (2**2)), bits + + +def conv2d(ifmap, weights, padding=1, stride=1): + n, ci, ih, iw = ifmap.shape + co, _, fh, fw = weights.shape + + conv2d = nn.Conv2d(ci, co, (fh, fw), padding=((fh-1)//2, (fw-1)//2)) + conv2d.weight = nn.Parameter(weights, requires_grad=False) + conv2d.bias = nn.Parameter(torch.zeros_like(conv2d.bias, dtype=weights.dtype), requires_grad=False) + ofmap = conv2d(ifmap) + + return ofmap + + +def max_pooling(ifmap, kernel): + n, ci, ih, iw = ifmap.shape + max_pool = nn.MaxPool2d(kernel_size=kernel) + ofmap = max_pool(ifmap) + + return ofmap + + +def batchnorm(ifmap): + n, ci, ih, iw = ifmap.shape + bn = torch.nn.BatchNorm2d(ci) + bn.weight.requires_grad = False + bn.bias.requires_grad = False + running_mean = torch.randn_like(bn.running_mean, requires_grad=False) + running_var = torch.rand_like(bn.running_var, requires_grad=False) + gamma = bn.weight / torch.sqrt(running_var + bn.eps) + beta = bn.bias - running_mean * bn.weight / torch.sqrt(running_var + bn.eps) + ofmap = ifmap * gamma.unsqueeze(-1).unsqueeze(-1) + beta.unsqueeze(-1).unsqueeze(-1) + + return ofmap, gamma, beta + + +def fused_conv(ifmap, weights, bn_k, bn_l, padding, stride, bn, relu, accumulate, depthwise): + + ih, iw, ci = ifmap.shape + if not depthwise: + co, fh, fw, _ = weights.shape + else: + fh, fw, co = weights.shape + ci = co + + ifmap_padded = torch.zeros(ih + padding['padding_y_top'] + padding['padding_y_bottom'], iw + + padding['padding_x_left'] + padding['padding_x_right'], + ci, + requires_grad=False, dtype=ifmap.dtype) + ifmap_padded[padding['padding_y_top']:ih+padding['padding_y_top'], + padding['padding_x_left']:iw+padding['padding_x_left']] = ifmap + + # Don't cover undefined behaviour when there are steps without a complete kernel window + if (ifmap_padded.shape[0] - (fh - 1) - 1) % stride['stride_y'] != 0: + print("Warning: rounding h output dimension") + if (ifmap_padded.shape[1] - (fw - 1) - 1) % stride['stride_x'] != 0: + print("Warning: rounding w output dimension") + + ofmap = torch.zeros((ifmap_padded.shape[0] - (fh - 1) - 1) // stride['stride_y'] + 1, + (ifmap_padded.shape[1] - (fw - 1) - 1) // stride['stride_x'] + 1, co) + if accumulate: + ofmap_before = torch.randn_like(ofmap, requires_grad=False) + else: + ofmap_before = torch.zeros_like(ofmap, requires_grad=False) + + if verbose: + print(ifmap.shape, ifmap_padded.shape, ofmap.shape) + + if (depthwise): + # depthwise Conv2d + for h in range(0, ifmap_padded.shape[0] - (fh - 1), stride['stride_y']): + for w in range(0, ifmap_padded.shape[1] - (fw - 1), stride['stride_x']): + for c in range(co): + ofmap[h//stride['stride_y'], w//stride['stride_x'], + c] = torch.dot(ifmap_padded[h:h+fh, w:w+fw, c].flatten(), weights[:, :, c].flatten()) + else: + # Conv2d + for h in range(0, ifmap_padded.shape[0] - (fh - 1), stride['stride_y']): + for w in range(0, ifmap_padded.shape[1] - (fw - 1), stride['stride_x']): + for c in range(co): + ofmap[h//stride['stride_y'], w//stride['stride_x'], + c] = torch.dot(ifmap_padded[h:h+fh, w:w+fw].flatten(), weights[c].flatten()) + + ofmap += ofmap_before + + # BatchNorm + if bn: + ofmap = ofmap * bn_k + bn_l + + # ReLU + if relu: + ofmap = torch.nn.functional.relu(ofmap) + + return ofmap, ofmap_before, ifmap_padded + + +def main(): + + parser = argparse.ArgumentParser(description='Generate data for kernels') + parser.add_argument( + "-c", + "--cfg", + type=pathlib.Path, + required=True, + help='Select param config file kernel' + ) + parser.add_argument( + "-v", + "--verbose", + action='store_true', + help='Set verbose' + ) + + args = parser.parse_args() + + global verbose + verbose = args.verbose + + with args.cfg.open() as f: + param = hjson.loads(f.read()) + + if param['prec'] == 64: + dtype = torch.float64 + elif param['prec'] == 16: + dtype = torch.float16 + elif param['prec'] == 8: + dtype = None + else: + dtype = torch.float32 + + if param['kernel'] == 'Conv2d': + ifmap = torch.randn(1, param['channels']['in'], + param['input_dim']['height'], + param['input_dim']['width'], requires_grad=False, dtype=dtype) + weights = torch.randn(param['channels']['out'], + param['channels']['in'], + param['filter']['height'], + param['filter']['width'], requires_grad=False, dtype=dtype) + + ofmap = conv2d(ifmap, weights, + padding=param['filter']['padding'], + stride=param['filter']['stride']) + + # convert from CHW to HWC format + ifmap = ifmap.permute(0, 2, 3, 1) + ofmap = ofmap.permute(0, 2, 3, 1) + weights = weights.permute(0, 2, 3, 1) + kwargs = {'ifmap': ifmap, 'weights': weights, 'ofmap': ofmap} + emit_header_file('Conv2d', **kwargs) + + elif param['kernel'] == 'GEMM': + mat_A, bits_A = rand_data_generator((param['M'], param['K']), param['prec']) + mat_B, bits_B = rand_data_generator((param['K'], param['N']), param['prec']) + mat_C, bits_C = rand_data_generator((param['M'], param['N']), param['prec']) + + result = param['alpha'] * mat_C + torch.matmul(mat_A, mat_B) + + if param['transpose_A']: + mat_A = mat_A.T + if param['transpose_B']: + mat_B = mat_B.T + + kwargs = { + 'A': mat_A, + 'B': mat_B, + 'C': mat_C, + 'result': result, + 'M': param['M'], + 'N': param['N'], + 'K': param['K'], + 'ta': param['transpose_A'], + 'tb': param['transpose_B'], + 'alpha': param['alpha'], + 'prec': param['prec'], + 'expand': param['expand'], + 'bits_A': bits_A, + 'bits_B': bits_B, + 'bits_C': bits_C + } + + emit_header_file('GEMM', **kwargs) + + elif param['kernel'] == 'AXPY': + vec_X, bits_X = rand_data_generator((param['M'], 1), param['prec']) + vec_Y, bits_Y = rand_data_generator((param['M'], 1), param['prec']) + alpha, bits_a = rand_data_generator((1, 1), param['prec']) + + result = alpha * vec_X + vec_Y + + kwargs = { + 'X': vec_X, + 'Y': vec_Y, + 'alpha': alpha, + 'result': result, + 'M': param['M'], + 'prec': param['prec'], + 'bits_X': bits_X, + 'bits_Y': bits_Y, + 'bits_a': bits_a + } + + emit_header_file('AXPY', **kwargs) + + elif param['kernel'] == 'BatchNorm': + ifmap = torch.randn(1, param['channels']['in'], + param['input_dim']['height'], + param['input_dim']['width'], requires_grad=False, dtype=dtype) + + ofmap, gamma, beta = batchnorm(ifmap) + + # convert from CHW to HWC format + ifmap = ifmap.permute(0, 2, 3, 1) + ofmap = ofmap.permute(0, 2, 3, 1) + + kwargs = {'ifmap': ifmap, 'beta': beta, 'gamma': gamma, 'ofmap': ofmap} + emit_header_file('BatchNorm', **kwargs) + + elif param['kernel'] == 'MaxPool': + ifmap = torch.randn(1, param['channels']['in'], + param['input_dim']['height'], + param['input_dim']['width'], requires_grad=False, dtype=dtype) + + ofmap = max_pooling(ifmap, param['kernel_size']) + + # convert from CHW to HWC format + ifmap = ifmap.permute(0, 2, 3, 1) + ofmap = ofmap.permute(0, 2, 3, 1) + + kwargs = {'ifmap': ifmap, 'ofmap': ofmap, 'kernel_size': param['kernel_size']} + emit_header_file('MaxPool', **kwargs) + + elif param['kernel'] == 'FusedConv': + ifmap = torch.randn(param['dim_in_y'], param['dim_in_x'], param['ch_in'], requires_grad=False, dtype=dtype) + if not param['depthwise']: + kernel = torch.randn(param['ch_out'], param['dim_kernel_y'], param['dim_kernel_x'], + param['ch_in'], requires_grad=False, dtype=dtype) + else: + kernel = torch.randn(param['dim_kernel_y'], param['dim_kernel_x'], + param['ch_in'], requires_grad=False, dtype=dtype) + + bn_k = torch.randn(param['ch_out'], requires_grad=False) + bn_l = torch.randn(param['ch_out'], requires_grad=False) + + ofmap, ofmap_before, ifmap_padded = fused_conv(ifmap, + kernel, + bn_k, + bn_l, + param['padding'], + param['stride'], + param['flags']['flag_batch_norm'], + param['flags']['flag_relu'], + not param['flags']['flag_y_accumulate_start'], + param['depthwise']) + + if param['chw_layer']: + ifmap = ifmap.permute(2, 0, 1) + ifmap_padded = ifmap_padded.permute(2, 0, 1) + kernel = kernel.permute(0, 3, 1, 2) + + kwargs = { + 'ifmap': ifmap, + 'ifmap_padded': ifmap_padded, + 'ofmap': ofmap, + 'ofmap_before': ofmap_before, + 'kernel': kernel, + 'bn_k': bn_k, + 'bn_l': bn_l, + 'padding': param['padding'], + 'stride': param['stride'], + 'prec': param['prec'], + 'flags': param['flags'], + 'depthwise': param['depthwise'], + 'chw_layer': param['chw_layer'] + } + emit_header_file('FusedConv', **kwargs) + + else: + print("No valid kernel selected") + + +if __name__ == '__main__': + main() diff --git a/software/apps/vector_add/main.c b/software/apps/vector_add/main.c index d3655c832..f3d292485 100644 --- a/software/apps/vector_add/main.c +++ b/software/apps/vector_add/main.c @@ -23,20 +23,16 @@ #include "runtime.h" #include "synchronization.h" -#define UNALIGNED 1 -#define VEC_LENGTH 33+UNALIGNED +// #define UNALIGNED 1 +// #define VEC_LENGTH 33+UNALIGNED +#define VEC_LENGTH 32 +#define UNALIGNED 0 -uint8_t vector1[VEC_LENGTH] = {0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23}; -uint8_t vector2[VEC_LENGTH] = {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0xfe, 0xdc}; +uint8_t vector1[VEC_LENGTH] = {0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef}; +uint8_t vector2[VEC_LENGTH] = {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10}; uint8_t vector_res[VEC_LENGTH]; -// #define SLD_LENGTH_INIT 16 -// #define SLD_LENGTH 7 - -// uint16_t vector_sld[SLD_LENGTH_INIT] = {0x0001, 0x0203, 0x0405, 0x0607, 0x0809, 0x0a0b, 0x0c0d, 0x0e0f, 0xf0e1, 0xd2c3, 0xb4a5, 0x9687, 0x7968, 0x5b4a, 0x3d2c, 0x1f0e}; -// uint16_t vector_sld_res[SLD_LENGTH]; - int main() { ////////////////// // Strip Mining // @@ -63,6 +59,9 @@ int main() { ptr_vec_res += actual_elem; } + mempool_barrier_init(cid); + mempool_barrier(num_cores); + if (cid == 0) { printf("CHECK RESULT\n"); } @@ -78,8 +77,11 @@ int main() { // return i+1; } } - // Initialize multicore barrier - mempool_barrier_init(cid); + + mempool_barrier(num_cores); + if (cid == 0 & error == 0) { + printf("CORRECT!\n"); + } mempool_barrier(num_cores); return error; diff --git a/software/runtime/runtime.mk b/software/runtime/runtime.mk index 2410f8620..9f627c054 100644 --- a/software/runtime/runtime.mk +++ b/software/runtime/runtime.mk @@ -37,8 +37,8 @@ ifeq ($(COMPILER),gcc) endif ifneq ($(n_fpu), 0) - RISCV_ARCH ?= rv$(RISCV_XLEN)imafd - RISCV_ABI := ilp32d + RISCV_ARCH ?= rv$(RISCV_XLEN)imaf + RISCV_ABI := ilp32 else RISCV_ARCH ?= rv$(RISCV_XLEN)ima endif @@ -54,8 +54,8 @@ else ifeq ($(spatz), 1) RISCV_ARCH ?= rv$(RISCV_XLEN)ima ifneq ($(n_fpu), 0) - RISCV_ARCH := $(addsuffix fd, $(RISCV_ARCH)) - RISCV_ABI := ilp32d + RISCV_ARCH := $(addsuffix f, $(RISCV_ARCH)) + RISCV_ABI := ilp32 endif RISCV_ARCH := $(addsuffix vzfh, $(RISCV_ARCH)) else