From e731b69315e3ebeb96c3cb60a7e3dc173a338710 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Mon, 30 Nov 2020 11:34:45 +0100 Subject: [PATCH 01/65] [riscv-opcodes] Add Xpulpv2 load/stores --- toolchain/riscv-opcodes/Makefile | 2 +- toolchain/riscv-opcodes/encoding_out.h | 75 ++++++++++++++++++- toolchain/riscv-opcodes/inst.sverilog | 25 ++++++- .../riscv-opcodes/opcodes-xpulpimg_CUSTOM | 30 ++++++++ toolchain/riscv-opcodes/parse_opcodes | 1 + 5 files changed, 128 insertions(+), 5 deletions(-) diff --git a/toolchain/riscv-opcodes/Makefile b/toolchain/riscv-opcodes/Makefile index 5f39502b0..12d02b4aa 100644 --- a/toolchain/riscv-opcodes/Makefile +++ b/toolchain/riscv-opcodes/Makefile @@ -7,7 +7,7 @@ MY_OPCODES := opcodes-frep_CUSTOM opcodes-xpulpimg_CUSTOM opcodes-rv32d-zfh_DRAF ALL_OPCODES := opcodes-pseudo $(ALL_REAL_OPCODES) $(MY_OPCODES) opcodes-rvv-pseudo # Opcodes to be discarded -DISCARDED_OPCODES := +DISCARDED_OPCODES := opcodes-frep_CUSTOM OPCODES = $(filter-out $(sort $(DISCARDED_OPCODES)), $(sort $(ALL_OPCODES))) diff --git a/toolchain/riscv-opcodes/encoding_out.h b/toolchain/riscv-opcodes/encoding_out.h index 98660a4cb..5e5867b30 100644 --- a/toolchain/riscv-opcodes/encoding_out.h +++ b/toolchain/riscv-opcodes/encoding_out.h @@ -326,8 +326,6 @@ #define MASK_CUSTOM3_RD_RS1 0x707f #define MATCH_CUSTOM3_RD_RS1_RS2 0x707b #define MASK_CUSTOM3_RD_RS1_RS2 0x707f -#define MATCH_FREP 0xb -#define MASK_FREP 0x7f #define MATCH_SLLI_RV32 0x1013 #define MASK_SLLI_RV32 0xfe00707f #define MATCH_SRLI_RV32 0x5013 @@ -1846,6 +1844,54 @@ #define MASK_CSRRSI 0x707f #define MATCH_CSRRCI 0x7073 #define MASK_CSRRCI 0x707f +#define MATCH_P_LB_IRPOST 0xb +#define MASK_P_LB_IRPOST 0x707f +#define MATCH_P_LBU_IRPOST 0x400b +#define MASK_P_LBU_IRPOST 0x707f +#define MATCH_P_LH_IRPOST 0x100b +#define MASK_P_LH_IRPOST 0x707f +#define MATCH_P_LHU_IRPOST 0x500b +#define MASK_P_LHU_IRPOST 0x707f +#define MATCH_P_LW_IRPOST 0x200b +#define MASK_P_LW_IRPOST 0x707f +#define MATCH_P_LB_RRPOST 0x700b +#define MASK_P_LB_RRPOST 0xfe00707f +#define MATCH_P_LBU_RRPOST 0x4000700b +#define MASK_P_LBU_RRPOST 0xfe00707f +#define MATCH_P_LH_RRPOST 0x1000700b +#define MASK_P_LH_RRPOST 0xfe00707f +#define MATCH_P_LHU_RRPOST 0x5000700b +#define MASK_P_LHU_RRPOST 0xfe00707f +#define MATCH_P_LW_RRPOST 0x2000700b +#define MASK_P_LW_RRPOST 0xfe00707f +#define MATCH_P_LB_RR 0x7003 +#define MASK_P_LB_RR 0xfe00707f +#define MATCH_P_LBU_RR 0x40007003 +#define MASK_P_LBU_RR 0xfe00707f +#define MATCH_P_LH_RR 0x10007003 +#define MASK_P_LH_RR 0xfe00707f +#define MATCH_P_LHU_RR 0x50007003 +#define MASK_P_LHU_RR 0xfe00707f +#define MATCH_P_LW_RR 0x20007003 +#define MASK_P_LW_RR 0xfe00707f +#define MATCH_P_SB_IRPOST 0x2b +#define MASK_P_SB_IRPOST 0x707f +#define MATCH_P_SH_IRPOST 0x102b +#define MASK_P_SH_IRPOST 0x707f +#define MATCH_P_SW_IRPOST 0x202b +#define MASK_P_SW_IRPOST 0x707f +#define MATCH_P_SB_RRPOST 0x402b +#define MASK_P_SB_RRPOST 0xfe00707f +#define MATCH_P_SH_RRPOST 0x502b +#define MASK_P_SH_RRPOST 0xfe00707f +#define MATCH_P_SW_RRPOST 0x602b +#define MASK_P_SW_RRPOST 0xfe00707f +#define MATCH_P_SB_RR 0x4023 +#define MASK_P_SB_RR 0xfe00707f +#define MATCH_P_SH_RR 0x5023 +#define MASK_P_SH_RR 0xfe00707f +#define MATCH_P_SW_RR 0x6023 +#define MASK_P_SW_RR 0xfe00707f #define MATCH_P_ABS 0x4000033 #define MASK_P_ABS 0xfff0707f #define MATCH_P_SLET 0x4002033 @@ -2848,7 +2894,6 @@ DECLARE_INSN(custom3_rs1_rs2, MATCH_CUSTOM3_RS1_RS2, MASK_CUSTOM3_RS1_RS2) DECLARE_INSN(custom3_rd, MATCH_CUSTOM3_RD, MASK_CUSTOM3_RD) DECLARE_INSN(custom3_rd_rs1, MATCH_CUSTOM3_RD_RS1, MASK_CUSTOM3_RD_RS1) DECLARE_INSN(custom3_rd_rs1_rs2, MATCH_CUSTOM3_RD_RS1_RS2, MASK_CUSTOM3_RD_RS1_RS2) -DECLARE_INSN(frep, MATCH_FREP, MASK_FREP) DECLARE_INSN(slli_rv32, MATCH_SLLI_RV32, MASK_SLLI_RV32) DECLARE_INSN(srli_rv32, MATCH_SRLI_RV32, MASK_SRLI_RV32) DECLARE_INSN(srai_rv32, MATCH_SRAI_RV32, MASK_SRAI_RV32) @@ -3608,6 +3653,30 @@ DECLARE_INSN(csrrc, MATCH_CSRRC, MASK_CSRRC) DECLARE_INSN(csrrwi, MATCH_CSRRWI, MASK_CSRRWI) DECLARE_INSN(csrrsi, MATCH_CSRRSI, MASK_CSRRSI) DECLARE_INSN(csrrci, MATCH_CSRRCI, MASK_CSRRCI) +DECLARE_INSN(p_lb_irpost, MATCH_P_LB_IRPOST, MASK_P_LB_IRPOST) +DECLARE_INSN(p_lbu_irpost, MATCH_P_LBU_IRPOST, MASK_P_LBU_IRPOST) +DECLARE_INSN(p_lh_irpost, MATCH_P_LH_IRPOST, MASK_P_LH_IRPOST) +DECLARE_INSN(p_lhu_irpost, MATCH_P_LHU_IRPOST, MASK_P_LHU_IRPOST) +DECLARE_INSN(p_lw_irpost, MATCH_P_LW_IRPOST, MASK_P_LW_IRPOST) +DECLARE_INSN(p_lb_rrpost, MATCH_P_LB_RRPOST, MASK_P_LB_RRPOST) +DECLARE_INSN(p_lbu_rrpost, MATCH_P_LBU_RRPOST, MASK_P_LBU_RRPOST) +DECLARE_INSN(p_lh_rrpost, MATCH_P_LH_RRPOST, MASK_P_LH_RRPOST) +DECLARE_INSN(p_lhu_rrpost, MATCH_P_LHU_RRPOST, MASK_P_LHU_RRPOST) +DECLARE_INSN(p_lw_rrpost, MATCH_P_LW_RRPOST, MASK_P_LW_RRPOST) +DECLARE_INSN(p_lb_rr, MATCH_P_LB_RR, MASK_P_LB_RR) +DECLARE_INSN(p_lbu_rr, MATCH_P_LBU_RR, MASK_P_LBU_RR) +DECLARE_INSN(p_lh_rr, MATCH_P_LH_RR, MASK_P_LH_RR) +DECLARE_INSN(p_lhu_rr, MATCH_P_LHU_RR, MASK_P_LHU_RR) +DECLARE_INSN(p_lw_rr, MATCH_P_LW_RR, MASK_P_LW_RR) +DECLARE_INSN(p_sb_irpost, MATCH_P_SB_IRPOST, MASK_P_SB_IRPOST) +DECLARE_INSN(p_sh_irpost, MATCH_P_SH_IRPOST, MASK_P_SH_IRPOST) +DECLARE_INSN(p_sw_irpost, MATCH_P_SW_IRPOST, MASK_P_SW_IRPOST) +DECLARE_INSN(p_sb_rrpost, MATCH_P_SB_RRPOST, MASK_P_SB_RRPOST) +DECLARE_INSN(p_sh_rrpost, MATCH_P_SH_RRPOST, MASK_P_SH_RRPOST) +DECLARE_INSN(p_sw_rrpost, MATCH_P_SW_RRPOST, MASK_P_SW_RRPOST) +DECLARE_INSN(p_sb_rr, MATCH_P_SB_RR, MASK_P_SB_RR) +DECLARE_INSN(p_sh_rr, MATCH_P_SH_RR, MASK_P_SH_RR) +DECLARE_INSN(p_sw_rr, MATCH_P_SW_RR, MASK_P_SW_RR) DECLARE_INSN(p_abs, MATCH_P_ABS, MASK_P_ABS) DECLARE_INSN(p_slet, MATCH_P_SLET, MASK_P_SLET) DECLARE_INSN(p_sletu, MATCH_P_SLETU, MASK_P_SLETU) diff --git a/toolchain/riscv-opcodes/inst.sverilog b/toolchain/riscv-opcodes/inst.sverilog index 00ee613df..3242a203b 100644 --- a/toolchain/riscv-opcodes/inst.sverilog +++ b/toolchain/riscv-opcodes/inst.sverilog @@ -24,7 +24,6 @@ package riscv_instr; localparam [31:0] CUSTOM3_RD = 32'b?????????????????100?????1111011; localparam [31:0] CUSTOM3_RD_RS1 = 32'b?????????????????110?????1111011; localparam [31:0] CUSTOM3_RD_RS1_RS2 = 32'b?????????????????111?????1111011; - localparam [31:0] FREP = 32'b?????????????????????????0001011; localparam [31:0] SLLI_RV32 = 32'b0000000??????????001?????0010011; localparam [31:0] SRLI_RV32 = 32'b0000000??????????101?????0010011; localparam [31:0] SRAI_RV32 = 32'b0100000??????????101?????0010011; @@ -784,6 +783,30 @@ package riscv_instr; localparam [31:0] CSRRWI = 32'b?????????????????101?????1110011; localparam [31:0] CSRRSI = 32'b?????????????????110?????1110011; localparam [31:0] CSRRCI = 32'b?????????????????111?????1110011; + localparam [31:0] P_LB_IRPOST = 32'b?????????????????000?????0001011; + localparam [31:0] P_LBU_IRPOST = 32'b?????????????????100?????0001011; + localparam [31:0] P_LH_IRPOST = 32'b?????????????????001?????0001011; + localparam [31:0] P_LHU_IRPOST = 32'b?????????????????101?????0001011; + localparam [31:0] P_LW_IRPOST = 32'b?????????????????010?????0001011; + localparam [31:0] P_LB_RRPOST = 32'b0000000??????????111?????0001011; + localparam [31:0] P_LBU_RRPOST = 32'b0100000??????????111?????0001011; + localparam [31:0] P_LH_RRPOST = 32'b0001000??????????111?????0001011; + localparam [31:0] P_LHU_RRPOST = 32'b0101000??????????111?????0001011; + localparam [31:0] P_LW_RRPOST = 32'b0010000??????????111?????0001011; + localparam [31:0] P_LB_RR = 32'b0000000??????????111?????0000011; + localparam [31:0] P_LBU_RR = 32'b0100000??????????111?????0000011; + localparam [31:0] P_LH_RR = 32'b0001000??????????111?????0000011; + localparam [31:0] P_LHU_RR = 32'b0101000??????????111?????0000011; + localparam [31:0] P_LW_RR = 32'b0010000??????????111?????0000011; + localparam [31:0] P_SB_IRPOST = 32'b?????????????????000?????0101011; + localparam [31:0] P_SH_IRPOST = 32'b?????????????????001?????0101011; + localparam [31:0] P_SW_IRPOST = 32'b?????????????????010?????0101011; + localparam [31:0] P_SB_RRPOST = 32'b0000000??????????100?????0101011; + localparam [31:0] P_SH_RRPOST = 32'b0000000??????????101?????0101011; + localparam [31:0] P_SW_RRPOST = 32'b0000000??????????110?????0101011; + localparam [31:0] P_SB_RR = 32'b0000000??????????100?????0100011; + localparam [31:0] P_SH_RR = 32'b0000000??????????101?????0100011; + localparam [31:0] P_SW_RR = 32'b0000000??????????110?????0100011; localparam [31:0] P_ABS = 32'b000001000000?????000?????0110011; localparam [31:0] P_SLET = 32'b0000010??????????010?????0110011; localparam [31:0] P_SLETU = 32'b0000010??????????011?????0110011; diff --git a/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM b/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM index daf000556..e80a4e859 100644 --- a/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM +++ b/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM @@ -9,6 +9,35 @@ # Xpulpimg extension +# Post-increment and reg-reg loads +p.lb_irpost rd rs1 imm12 14..12=0 6..2=0x02 1..0=3 +p.lbu_irpost rd rs1 imm12 14..12=4 6..2=0x02 1..0=3 +p.lh_irpost rd rs1 imm12 14..12=1 6..2=0x02 1..0=3 +p.lhu_irpost rd rs1 imm12 14..12=5 6..2=0x02 1..0=3 +p.lw_irpost rd rs1 imm12 14..12=2 6..2=0x02 1..0=3 +p.lb_rrpost rd rs1 rs2 31..25=0x00 14..12=7 6..2=0x02 1..0=3 +p.lbu_rrpost rd rs1 rs2 31..25=0x20 14..12=7 6..2=0x02 1..0=3 +p.lh_rrpost rd rs1 rs2 31..25=0x08 14..12=7 6..2=0x02 1..0=3 +p.lhu_rrpost rd rs1 rs2 31..25=0x28 14..12=7 6..2=0x02 1..0=3 +p.lw_rrpost rd rs1 rs2 31..25=0x10 14..12=7 6..2=0x02 1..0=3 +p.lb_rr rd rs1 rs2 31..25=0x00 14..12=7 6..2=0x00 1..0=3 +p.lbu_rr rd rs1 rs2 31..25=0x20 14..12=7 6..2=0x00 1..0=3 +p.lh_rr rd rs1 rs2 31..25=0x08 14..12=7 6..2=0x00 1..0=3 +p.lhu_rr rd rs1 rs2 31..25=0x28 14..12=7 6..2=0x00 1..0=3 +p.lw_rr rd rs1 rs2 31..25=0x10 14..12=7 6..2=0x00 1..0=3 + +# Post-increment and reg-reg stores +p.sb_irpost rs1 rs2 imm12hi imm12lo 14..12=0 6..2=0x0A 1..0=3 +p.sh_irpost rs1 rs2 imm12hi imm12lo 14..12=1 6..2=0x0A 1..0=3 +p.sw_irpost rs1 rs2 imm12hi imm12lo 14..12=2 6..2=0x0A 1..0=3 +p.sb_rrpost rs1 rs2 prs3 31..25=0x00 14..12=4 6..2=0x0A 1..0=3 +p.sh_rrpost rs1 rs2 prs3 31..25=0x00 14..12=5 6..2=0x0A 1..0=3 +p.sw_rrpost rs1 rs2 prs3 31..25=0x00 14..12=6 6..2=0x0A 1..0=3 +p.sb_rr rs1 rs2 prs3 31..25=0x00 14..12=4 6..2=0x08 1..0=3 +p.sh_rr rs1 rs2 prs3 31..25=0x00 14..12=5 6..2=0x08 1..0=3 +p.sw_rr rs1 rs2 prs3 31..25=0x00 14..12=6 6..2=0x08 1..0=3 + +# Generic ALU operations p.abs rd rs1 31..25=2 24..20=0 14..12=0 6..2=0x0C 1..0=3 p.slet rd rs1 rs2 31..25=2 14..12=2 6..2=0x0C 1..0=3 p.sletu rd rs1 rs2 31..25=2 14..12=3 6..2=0x0C 1..0=3 @@ -25,5 +54,6 @@ p.clipu rd rs1 imm5 31..25=10 14..12=2 6..2=0x0C 1..0=3 p.clipr rd rs1 rs2 31..25=10 14..12=5 6..2=0x0C 1..0=3 p.clipur rd rs1 rs2 31..25=10 14..12=6 6..2=0x0C 1..0=3 +# Immediate branching p.beqimm rs1 imm5 bimm12hi bimm12lo 14..12=2 6..2=0x18 1..0=3 p.bneimm rs1 imm5 bimm12hi bimm12lo 14..12=3 6..2=0x18 1..0=3 diff --git a/toolchain/riscv-opcodes/parse_opcodes b/toolchain/riscv-opcodes/parse_opcodes index a33c7a43c..ad36c229f 100755 --- a/toolchain/riscv-opcodes/parse_opcodes +++ b/toolchain/riscv-opcodes/parse_opcodes @@ -38,6 +38,7 @@ arglut['shamtw'] = (24,20) # for xpulpimg arglut['imm5'] = (24,20) +arglut['prs3'] = (11,7) # for vectors arglut['vd'] = (11,7) From 3617fe47b44c9dcdc31a2ba3b473c9a440a07e61 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Mon, 30 Nov 2020 18:33:28 +0100 Subject: [PATCH 02/65] [riscv-isa-sim] Add support to Xpulpv2 load/stores --- toolchain/riscv-isa-sim/disasm/disasm.cc | 67 +++++++++++++++++++ toolchain/riscv-isa-sim/riscv/decode.h | 4 ++ .../riscv-isa-sim/riscv/insns/p_lb_irpost.h | 2 + toolchain/riscv-isa-sim/riscv/insns/p_lb_rr.h | 1 + .../riscv-isa-sim/riscv/insns/p_lb_rrpost.h | 2 + .../riscv-isa-sim/riscv/insns/p_lbu_irpost.h | 2 + .../riscv-isa-sim/riscv/insns/p_lbu_rr.h | 1 + .../riscv-isa-sim/riscv/insns/p_lbu_rrpost.h | 2 + .../riscv-isa-sim/riscv/insns/p_lh_irpost.h | 2 + toolchain/riscv-isa-sim/riscv/insns/p_lh_rr.h | 1 + .../riscv-isa-sim/riscv/insns/p_lh_rrpost.h | 2 + .../riscv-isa-sim/riscv/insns/p_lhu_irpost.h | 2 + .../riscv-isa-sim/riscv/insns/p_lhu_rr.h | 1 + .../riscv-isa-sim/riscv/insns/p_lhu_rrpost.h | 2 + .../riscv-isa-sim/riscv/insns/p_lw_irpost.h | 2 + toolchain/riscv-isa-sim/riscv/insns/p_lw_rr.h | 1 + .../riscv-isa-sim/riscv/insns/p_lw_rrpost.h | 2 + .../riscv-isa-sim/riscv/insns/p_sb_irpost.h | 2 + toolchain/riscv-isa-sim/riscv/insns/p_sb_rr.h | 1 + .../riscv-isa-sim/riscv/insns/p_sb_rrpost.h | 2 + .../riscv-isa-sim/riscv/insns/p_sh_irpost.h | 2 + toolchain/riscv-isa-sim/riscv/insns/p_sh_rr.h | 1 + .../riscv-isa-sim/riscv/insns/p_sh_rrpost.h | 2 + .../riscv-isa-sim/riscv/insns/p_sw_irpost.h | 2 + toolchain/riscv-isa-sim/riscv/insns/p_sw_rr.h | 1 + .../riscv-isa-sim/riscv/insns/p_sw_rrpost.h | 2 + toolchain/riscv-isa-sim/riscv/riscv.mk.in | 24 +++++++ 27 files changed, 135 insertions(+) create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_lb_irpost.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_lb_rr.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_lb_rrpost.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_lbu_irpost.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_lbu_rr.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_lbu_rrpost.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_lh_irpost.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_lh_rr.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_lh_rrpost.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_lhu_irpost.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_lhu_rr.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_lhu_rrpost.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_lw_irpost.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_lw_rr.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_lw_rrpost.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_sb_irpost.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_sb_rr.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_sb_rrpost.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_sh_irpost.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_sh_rr.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_sh_rrpost.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_sw_irpost.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_sw_rr.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_sw_rrpost.h diff --git a/toolchain/riscv-isa-sim/disasm/disasm.cc b/toolchain/riscv-isa-sim/disasm/disasm.cc index 9ee83b575..e3b5e7a74 100644 --- a/toolchain/riscv-isa-sim/disasm/disasm.cc +++ b/toolchain/riscv-isa-sim/disasm/disasm.cc @@ -367,6 +367,43 @@ struct : public arg_t { } } p_simm5; +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.i_imm()) + '(' + xpr_name[insn.rs1()] + "!)"; + } +} load_address_irpost; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::string(xpr_name[insn.rs2()]) + '(' + xpr_name[insn.rs1()] + "!)"; + } +} load_address_rrpost; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::string(xpr_name[insn.rs2()]) + '(' + xpr_name[insn.rs1()] + ')'; + } +} load_address_rr; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.s_imm()) + '(' + xpr_name[insn.rs1()] + "!)"; + } +} store_address_irpost; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::string(xpr_name[insn.p_rs3()]) + '(' + xpr_name[insn.rs1()] + "!)"; + } +} store_address_rrpost; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::string(xpr_name[insn.p_rs3()]) + '(' + xpr_name[insn.rs1()] + ')'; + } +} store_address_rr; + + typedef struct { reg_t match; reg_t mask; @@ -434,6 +471,12 @@ disassembler_t::disassembler_t(int xlen) #define DEFINE_XFTYPE(code) DISASM_INSN(#code, code, 0, {&frd, &xrs1}) #define DEFINE_SFENCE_TYPE(code) DISASM_INSN(#code, code, 0, {&xrs1, &xrs2}) // Xpulpimg + #define DEFINE_PLOAD_IRPOST(code) DISASM_INSN(#code, code, 0, {&xrd, &load_address_irpost}) + #define DEFINE_PLOAD_RRPOST(code) DISASM_INSN(#code, code, 0, {&xrd, &load_address_rrpost}) + #define DEFINE_PLOAD_RR(code) DISASM_INSN(#code, code, 0, {&xrd, &load_address_rr}) + #define DEFINE_PSTORE_IRPOST(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address_irpost}) + #define DEFINE_PSTORE_RRPOST(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address_rrpost}) + #define DEFINE_PSTORE_RR(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address_rr}) #define DEFINE_PITYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_zimm5}) #define DEFINE_PBTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_simm5, &branch_target}) @@ -1277,6 +1320,30 @@ disassembler_t::disassembler_t(int xlen) } // Xpulpimg extension + DEFINE_PLOAD_IRPOST(p_lb_irpost); + DEFINE_PLOAD_IRPOST(p_lbu_irpost); + DEFINE_PLOAD_IRPOST(p_lh_irpost); + DEFINE_PLOAD_IRPOST(p_lhu_irpost); + DEFINE_PLOAD_IRPOST(p_lw_irpost); + DEFINE_PLOAD_RRPOST(p_lb_rrpost); + DEFINE_PLOAD_RRPOST(p_lbu_rrpost); + DEFINE_PLOAD_RRPOST(p_lh_rrpost); + DEFINE_PLOAD_RRPOST(p_lhu_rrpost); + DEFINE_PLOAD_RRPOST(p_lw_rrpost); + DEFINE_PLOAD_RR(p_lb_rr); + DEFINE_PLOAD_RR(p_lbu_rr); + DEFINE_PLOAD_RR(p_lh_rr); + DEFINE_PLOAD_RR(p_lhu_rr); + DEFINE_PLOAD_RR(p_lw_rr); + DEFINE_PSTORE_IRPOST(p_sb_irpost); + DEFINE_PSTORE_IRPOST(p_sh_irpost); + DEFINE_PSTORE_IRPOST(p_sw_irpost); + DEFINE_PSTORE_RRPOST(p_sb_rrpost); + DEFINE_PSTORE_RRPOST(p_sh_rrpost); + DEFINE_PSTORE_RRPOST(p_sw_rrpost); + DEFINE_PSTORE_RR(p_sb_rr); + DEFINE_PSTORE_RR(p_sh_rr); + DEFINE_PSTORE_RR(p_sw_rr); DEFINE_R1TYPE(p_abs); DEFINE_RTYPE(p_slet); DEFINE_RTYPE(p_sletu); diff --git a/toolchain/riscv-isa-sim/riscv/decode.h b/toolchain/riscv-isa-sim/riscv/decode.h index 63b1e2676..f0dcfa10e 100644 --- a/toolchain/riscv-isa-sim/riscv/decode.h +++ b/toolchain/riscv-isa-sim/riscv/decode.h @@ -131,6 +131,7 @@ class insn_t // Xpulpimg uint64_t p_zimm5() { return x(20, 5); } int64_t p_simm5() { return xs(20, 5); } + uint64_t p_rs3() { return x(7, 5); } private: insn_bits_t b; @@ -284,6 +285,9 @@ class regfile_t #define sext8(x) ((sreg_t)(int8_t)(x)) #define zext8(x) ((reg_t)(uint8_t)(x)) +#define P_RS3 READ_REG(insn.p_rs3()) /* same as RD, just different semantical value */ +#define WRITE_RS1(value) WRITE_REG(insn.rs1(), value) + #define sext32(x) ((sreg_t)(int32_t)(x)) #define zext32(x) ((reg_t)(uint32_t)(x)) diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lb_irpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_lb_irpost.h new file mode 100644 index 000000000..ed17db162 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lb_irpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_int8(RS1)); +WRITE_RS1(RS1 + insn.i_imm()); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lb_rr.h b/toolchain/riscv-isa-sim/riscv/insns/p_lb_rr.h new file mode 100644 index 000000000..c32237fe1 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lb_rr.h @@ -0,0 +1 @@ +WRITE_RD(MMU.load_int8(RS1 + sreg_t(RS2))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lb_rrpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_lb_rrpost.h new file mode 100644 index 000000000..9dc2bd93d --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lb_rrpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_int8(RS1)); +WRITE_RS1(RS1 + sreg_t(RS2)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lbu_irpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_lbu_irpost.h new file mode 100644 index 000000000..0f015c376 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lbu_irpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_uint8(RS1)); +WRITE_RS1(RS1 + insn.i_imm()); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lbu_rr.h b/toolchain/riscv-isa-sim/riscv/insns/p_lbu_rr.h new file mode 100644 index 000000000..a95ca2a9a --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lbu_rr.h @@ -0,0 +1 @@ +WRITE_RD(MMU.load_uint8(RS1 + sreg_t(RS2))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lbu_rrpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_lbu_rrpost.h new file mode 100644 index 000000000..3456c8aec --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lbu_rrpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_uint8(RS1)); +WRITE_RS1(RS1 + sreg_t(RS2)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lh_irpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_lh_irpost.h new file mode 100644 index 000000000..3fea47c18 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lh_irpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_int16(RS1)); +WRITE_RS1(RS1 + insn.i_imm()); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lh_rr.h b/toolchain/riscv-isa-sim/riscv/insns/p_lh_rr.h new file mode 100644 index 000000000..cd5bf8219 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lh_rr.h @@ -0,0 +1 @@ +WRITE_RD(MMU.load_int16(RS1 + sreg_t(RS2))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lh_rrpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_lh_rrpost.h new file mode 100644 index 000000000..60353fd3e --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lh_rrpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_int16(RS1)); +WRITE_RS1(RS1 + sreg_t(RS2)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lhu_irpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_lhu_irpost.h new file mode 100644 index 000000000..8e7cfb6be --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lhu_irpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_uint16(RS1)); +WRITE_RS1(RS1 + insn.i_imm()); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lhu_rr.h b/toolchain/riscv-isa-sim/riscv/insns/p_lhu_rr.h new file mode 100644 index 000000000..6568736a7 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lhu_rr.h @@ -0,0 +1 @@ +WRITE_RD(MMU.load_uint16(RS1 + sreg_t(RS2))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lhu_rrpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_lhu_rrpost.h new file mode 100644 index 000000000..195222ac0 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lhu_rrpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_uint16(RS1)); +WRITE_RS1(RS1 + sreg_t(RS2)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lw_irpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_lw_irpost.h new file mode 100644 index 000000000..fb77d8723 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lw_irpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_int32(RS1)); +WRITE_RS1(RS1 + insn.i_imm()); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lw_rr.h b/toolchain/riscv-isa-sim/riscv/insns/p_lw_rr.h new file mode 100644 index 000000000..78fa33231 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lw_rr.h @@ -0,0 +1 @@ +WRITE_RD(MMU.load_int32(RS1 + sreg_t(RS2))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lw_rrpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_lw_rrpost.h new file mode 100644 index 000000000..e315c5dfe --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lw_rrpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_int32(RS1)); +WRITE_RS1(RS1 + sreg_t(RS2)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_sb_irpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_sb_irpost.h new file mode 100644 index 000000000..9339bc9ca --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_sb_irpost.h @@ -0,0 +1,2 @@ +MMU.store_uint8(RS1, RS2); +WRITE_RS1(RS1 + insn.s_imm()); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_sb_rr.h b/toolchain/riscv-isa-sim/riscv/insns/p_sb_rr.h new file mode 100644 index 000000000..73e49727c --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_sb_rr.h @@ -0,0 +1 @@ +MMU.store_uint8(RS1 + sreg_t(P_RS3), RS2); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_sb_rrpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_sb_rrpost.h new file mode 100644 index 000000000..044255174 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_sb_rrpost.h @@ -0,0 +1,2 @@ +MMU.store_uint8(RS1, RS2); +WRITE_RS1(RS1 + sreg_t(P_RS3)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_sh_irpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_sh_irpost.h new file mode 100644 index 000000000..f915c518d --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_sh_irpost.h @@ -0,0 +1,2 @@ +MMU.store_uint16(RS1, RS2); +WRITE_RS1(RS1 + insn.s_imm()); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_sh_rr.h b/toolchain/riscv-isa-sim/riscv/insns/p_sh_rr.h new file mode 100644 index 000000000..f3270bd56 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_sh_rr.h @@ -0,0 +1 @@ +MMU.store_uint16(RS1 + sreg_t(P_RS3), RS2); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_sh_rrpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_sh_rrpost.h new file mode 100644 index 000000000..5043c6287 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_sh_rrpost.h @@ -0,0 +1,2 @@ +MMU.store_uint16(RS1, RS2); +WRITE_RS1(RS1 + sreg_t(P_RS3)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_sw_irpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_sw_irpost.h new file mode 100644 index 000000000..7ff0406fe --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_sw_irpost.h @@ -0,0 +1,2 @@ +MMU.store_uint32(RS1, RS2); +WRITE_RS1(RS1 + insn.s_imm()); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_sw_rr.h b/toolchain/riscv-isa-sim/riscv/insns/p_sw_rr.h new file mode 100644 index 000000000..6bef97f73 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_sw_rr.h @@ -0,0 +1 @@ +MMU.store_uint32(RS1 + sreg_t(P_RS3), RS2); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_sw_rrpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_sw_rrpost.h new file mode 100644 index 000000000..6382d6d80 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_sw_rrpost.h @@ -0,0 +1,2 @@ +MMU.store_uint32(RS1, RS2); +WRITE_RS1(RS1 + sreg_t(P_RS3)); diff --git a/toolchain/riscv-isa-sim/riscv/riscv.mk.in b/toolchain/riscv-isa-sim/riscv/riscv.mk.in index af69ae8f0..8ec957ab1 100644 --- a/toolchain/riscv-isa-sim/riscv/riscv.mk.in +++ b/toolchain/riscv-isa-sim/riscv/riscv.mk.in @@ -753,6 +753,30 @@ riscv_insn_ext_v = \ $(riscv_insn_ext_v_ldst) \ riscv_insn_ext_xpulpimg = \ + p_lb_irpost \ + p_lbu_irpost \ + p_lh_irpost \ + p_lhu_irpost \ + p_lw_irpost \ + p_lb_rrpost \ + p_lbu_rrpost \ + p_lh_rrpost \ + p_lhu_rrpost \ + p_lw_rrpost \ + p_lb_rr \ + p_lbu_rr \ + p_lh_rr \ + p_lhu_rr \ + p_lw_rr \ + p_sb_irpost \ + p_sh_irpost \ + p_sw_irpost \ + p_sb_rrpost \ + p_sh_rrpost \ + p_sw_rrpost \ + p_sb_rr \ + p_sh_rr \ + p_sw_rr \ p_abs \ p_slet \ p_sletu \ From f647c27b02f83010c711eaedc6fc7189a254258c Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 1 Dec 2020 11:17:31 +0100 Subject: [PATCH 03/65] [riscv-gnu-toolchain] Add Xpulpv2 load/store support to Xpulpimg --- toolchain/riscv-gnu-toolchain | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolchain/riscv-gnu-toolchain b/toolchain/riscv-gnu-toolchain index 42e484f35..0689d1be3 160000 --- a/toolchain/riscv-gnu-toolchain +++ b/toolchain/riscv-gnu-toolchain @@ -1 +1 @@ -Subproject commit 42e484f35b7832ae0f67eb85bf12c7844f64f089 +Subproject commit 0689d1be314161a1467566105b6abde7a770bf75 From cf2fa3a8279e2961523c575533203e8937425337 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Wed, 2 Dec 2020 11:47:14 +0100 Subject: [PATCH 04/65] [apps] Set GCC as default compiler --- apps/common/runtime.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/common/runtime.mk b/apps/common/runtime.mk index 07e8b96a5..a7639e288 100644 --- a/apps/common/runtime.mk +++ b/apps/common/runtime.mk @@ -28,7 +28,7 @@ GCC_INSTALL_DIR ?= $(INSTALL_DIR)/riscv-gcc LLVM_INSTALL_DIR ?= $(INSTALL_DIR)/llvm HALIDE_INSTALL_DIR ?= $(INSTALL_DIR)/halide -COMPILER ?= llvm +COMPILER ?= gcc XPULPIMG ?= $(xpulpimg) RISCV_XLEN ?= 32 From 9fb4faa2b0dc136fabb3084b8841d8113ef593f6 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Wed, 2 Dec 2020 11:49:53 +0100 Subject: [PATCH 05/65] [snitch] Implement post-increment and reg-reg load/stores in Snitch core --- hardware/deps/snitch/src/snitch.sv | 373 ++++++++++++++++++++++++++++- 1 file changed, 360 insertions(+), 13 deletions(-) diff --git a/hardware/deps/snitch/src/snitch.sv b/hardware/deps/snitch/src/snitch.sv index f71786d1f..7413a5586 100644 --- a/hardware/deps/snitch/src/snitch.sv +++ b/hardware/deps/snitch/src/snitch.sv @@ -81,7 +81,7 @@ module snitch #( ); localparam int RegWidth = RVE ? 4 : 5; - localparam int RegNrReadPorts = 2; + localparam int RegNrReadPorts = snitch_pkg::XPULPIMG ? 3 : 2; logic illegal_inst; logic zero_lsb; @@ -107,7 +107,7 @@ module snitch #( logic [32:0] adder_result; logic [31:0] alu_result; - logic [RegWidth-1:0] rd, rs1, rs2; + logic [RegWidth-1:0] rd, rs1, rs2, rs3; logic stall, lsu_stall; // Register connections logic [RegNrReadPorts-1:0][RegWidth-1:0] gpr_raddr; @@ -118,7 +118,7 @@ module snitch #( logic [2**RegWidth-1:0] sb_d, sb_q; // Load/Store Defines - logic is_load, is_store, is_signed; + logic is_load, is_store, is_signed, is_postincr; logic is_fp_load, is_fp_store; logic ls_misaligned; logic ld_addr_misaligned; @@ -150,9 +150,11 @@ module snitch #( logic lsu_qready, lsu_qvalid; logic lsu_pvalid, lsu_pready; logic [RegWidth-1:0] lsu_rd; + logic [31:0] lsu_qaddr; logic retire_load; // retire a load instruction logic retire_i; // retire the rest of the base instruction set + logic retire_i_rd, retire_i_rs1; // when retire_i = 1, write-back can be on rd or on rs1 logic retire_acc; // retire an instruction we offloaded logic acc_stall; @@ -175,11 +177,13 @@ module snitch #( } alu_op; enum logic [3:0] { - None, Reg, IImmediate, UImmediate, JImmediate, SImmediate, SFImmediate, PC, CSR, CSRImmediate, PBImmediate + None, Reg, IImmediate, UImmediate, JImmediate, SImmediate, SFImmediate, PC, CSR, CSRImmediate, PBImmediate, RegRs3 } opa_select, opb_select; - logic write_rd; // write desitnation this cycle + logic write_rd; // write rd desitnation this cycle logic uses_rd; + logic write_rs1; // write rs1 destination this cycle + logic uses_rs1; // useless for now, rs1 always written this cycle enum logic [1:0] {Consec, Alu, Exception} next_pc; enum logic [1:0] {RdAlu, RdConsecPC, RdBypass} rd_select; @@ -229,7 +233,9 @@ module snitch #( // Scoreboard: Keep track of rd dependencies (only loads at the moment) logic operands_ready; logic dst_ready; + logic rs2_ready, rs3_ready; logic opa_ready, opb_ready; + logic dstrd_ready, dstrs1_ready; always_comb begin sb_d = sb_q; @@ -239,13 +245,18 @@ module snitch #( if (retire_acc) sb_d[acc_pid_i[RegWidth-1:0]] = 1'b0; sb_d[0] = 1'b0; end + // rediness of registers connected to opb + assign rs2_ready = (opb_select != Reg & opb_select != SImmediate) | ~sb_q[rs2]; + assign rs3_ready = (opb_select != RegRs3) | ~sb_q[rs3]; // TODO(zarubaf): This can probably be described a bit more efficient assign opa_ready = (opa_select != Reg) | ~sb_q[rs1]; - assign opb_ready = (opb_select != Reg & opb_select != SImmediate) | ~sb_q[rs2]; + assign opb_ready = rs2_ready & rs3_ready; assign operands_ready = opa_ready & opb_ready; // either we are not using the destination register or we need to make // sure that its destination operand is not marked busy in the scoreboard. - assign dst_ready = ~uses_rd | (uses_rd & ~sb_q[rd]); + assign dstrd_ready = ~uses_rd | (uses_rd & ~sb_q[rd]); + assign dstrs1_ready = ~uses_rs1 | (uses_rs1 & ~sb_q[rs1]); + assign dst_ready = dstrd_ready & dstrs1_ready; assign valid_instr = (inst_ready_i & inst_valid_o) & operands_ready & dst_ready; // the accelerator interface stalled us @@ -278,6 +289,7 @@ module snitch #( assign rd = inst_data_i[7 + RegWidth - 1:7]; assign rs1 = inst_data_i[15 + RegWidth - 1:15]; assign rs2 = inst_data_i[20 + RegWidth - 1:20]; + assign rs3 = inst_data_i[7 + RegWidth - 1:7]; always_comb begin illegal_inst = 1'b0; @@ -287,11 +299,14 @@ module snitch #( next_pc = Consec; + // set up rd destination rd_select = RdAlu; write_rd = 1'b1; - // if we are writing the field this cycle we need - // an int destination register + // if we are writing the field this cycle we need an int destination register uses_rd = write_rd; + // set up rs1 destination + write_rs1 = 1'b0; + uses_rs1 = write_rs1; rd_bypass = '0; zero_lsb = 1'b0; @@ -299,6 +314,7 @@ module snitch #( // LSU interface is_load = 1'b0; is_store = 1'b0; + is_postincr = 1'b0; is_fp_load = 1'b0; is_fp_store = 1'b0; is_signed = 1'b0; @@ -748,6 +764,320 @@ module snitch #( end /* Xpulpimg extension */ + // Post-increment loads/stores + riscv_instr::P_LB_IRPOST: begin // Xpulpimg: p.lb rd,iimm(rs1!) + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + write_rs1 = 1'b1; + is_load = 1'b1; + is_postincr = 1'b1; + is_signed = 1'b1; + opa_select = Reg; + opb_select = IImmediate; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LBU_IRPOST: begin // Xpulpimg: p.lbu + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + write_rs1 = 1'b1; + is_load = 1'b1; + is_postincr = 1'b1; + opa_select = Reg; + opb_select = IImmediate; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LH_IRPOST: begin // Xpulpimg: p.lh + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + write_rs1 = 1'b1; + is_load = 1'b1; + is_postincr = 1'b1; + is_signed = 1'b1; + ls_size = HalfWord; + opa_select = Reg; + opb_select = IImmediate; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LHU_IRPOST: begin // Xpulpimg: p.lhu + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + write_rs1 = 1'b1; + is_load = 1'b1; + is_postincr = 1'b1; + ls_size = HalfWord; + opa_select = Reg; + opb_select = IImmediate; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LW_IRPOST: begin // Xpulpimg: p.lw + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + write_rs1 = 1'b1; + is_load = 1'b1; + is_postincr = 1'b1; + is_signed = 1'b1; + ls_size = Word; + opa_select = Reg; + opb_select = IImmediate; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LB_RRPOST: begin // Xpulpimg: p.lb rd,rs2(rs1!) + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + write_rs1 = 1'b1; + is_load = 1'b1; + is_postincr = 1'b1; + is_signed = 1'b1; + opa_select = Reg; + opb_select = Reg; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LBU_RRPOST: begin // Xpulpimg: p.lbu + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + write_rs1 = 1'b1; + is_load = 1'b1; + is_postincr = 1'b1; + opa_select = Reg; + opb_select = Reg; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LH_RRPOST: begin // Xpulpimg: p.lh + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + write_rs1 = 1'b1; + is_load = 1'b1; + is_postincr = 1'b1; + is_signed = 1'b1; + ls_size = HalfWord; + opa_select = Reg; + opb_select = Reg; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LHU_RRPOST: begin // Xpulpimg: p.lhu + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + write_rs1 = 1'b1; + is_load = 1'b1; + is_postincr = 1'b1; + ls_size = HalfWord; + opa_select = Reg; + opb_select = Reg; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LW_RRPOST: begin // Xpulpimg: p.lw + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + write_rs1 = 1'b1; + is_load = 1'b1; + is_postincr = 1'b1; + is_signed = 1'b1; + ls_size = Word; + opa_select = Reg; + opb_select = Reg; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LB_RR: begin // Xpulpimg: p.lb rd,rs2(rs1) + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + is_load = 1'b1; + is_signed = 1'b1; + opa_select = Reg; + opb_select = Reg; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LBU_RR: begin // Xpulpimg: p.lbu + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + is_load = 1'b1; + opa_select = Reg; + opb_select = Reg; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LH_RR: begin // Xpulpimg: p.lh + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + is_load = 1'b1; + is_signed = 1'b1; + ls_size = HalfWord; + opa_select = Reg; + opb_select = Reg; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LHU_RR: begin // Xpulpimg: p.lhu + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + is_load = 1'b1; + ls_size = HalfWord; + opa_select = Reg; + opb_select = Reg; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LW_RR: begin // Xpulpimg: p.lw + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + is_load = 1'b1; + is_signed = 1'b1; + ls_size = Word; + opa_select = Reg; + opb_select = Reg; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_SB_IRPOST: begin // Xpulpimg: p.sb rs2,simm(rs1!) + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + write_rs1 = 1'b1; + is_store = 1'b1; + is_postincr = 1'b1; + opa_select = Reg; + opb_select = SImmediate; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_SH_IRPOST: begin // Xpulpimg: p.sh + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + write_rs1 = 1'b1; + is_store = 1'b1; + is_postincr = 1'b1; + ls_size = HalfWord; + opa_select = Reg; + opb_select = SImmediate; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_SW_IRPOST: begin // Xpulpimg: p.sw + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + write_rs1 = 1'b1; + is_store = 1'b1; + is_postincr = 1'b1; + ls_size = Word; + opa_select = Reg; + opb_select = SImmediate; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_SB_RRPOST: begin // Xpulpimg: p.sb rs2,rs3(rs1!) + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + write_rs1 = 1'b1; + is_store = 1'b1; + is_postincr = 1'b1; + opa_select = Reg; + opb_select = RegRs3; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_SH_RRPOST: begin // Xpulpimg: p.sh + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + write_rs1 = 1'b1; + is_store = 1'b1; + is_postincr = 1'b1; + ls_size = HalfWord; + opa_select = Reg; + opb_select = RegRs3; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_SW_RRPOST: begin // Xpulpimg: p.sw + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + write_rs1 = 1'b1; + is_store = 1'b1; + is_postincr = 1'b1; + ls_size = Word; + opa_select = Reg; + opb_select = RegRs3; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_SB_RR: begin // Xpulpimg: p.sb rs2,rs3(rs1) + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + is_store = 1'b1; + opa_select = Reg; + opb_select = RegRs3; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_SH_RR: begin // Xpulpimg: p.sh + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + is_store = 1'b1; + ls_size = HalfWord; + opa_select = Reg; + opb_select = RegRs3; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_SW_RR: begin // Xpulpimg: p.sw + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + is_store = 1'b1; + ls_size = Word; + opa_select = Reg; + opb_select = RegRs3; + end else begin + illegal_inst = 1'b1; + end + end + + // Generic ALU operations // Off-load to IPU coprocessor riscv_instr::P_ABS, // Xpulpimg: p.abs riscv_instr::P_SLET, // Xpulpimg: p.slet @@ -810,6 +1140,8 @@ module snitch #( if (exception) begin write_rd = 1'b0; uses_rd = 1'b0; + write_rs1 = 1'b0; + uses_rs1 = 1'b0; acc_qvalid_o = 1'b0; next_pc = Exception; end @@ -893,12 +1225,17 @@ module snitch #( PC: opb = pc_q; CSR: opb = csr_rvalue; PBImmediate: opb = pbimm; + RegRs3: opb = gpr_rdata[2]; default: opb = '0; endcase end assign gpr_raddr[0] = rs1; assign gpr_raddr[1] = rs2; + // connect third read port only if present + if (RegNrReadPorts >= 3) begin : gpr_rs3 + assign gpr_raddr[2] = rs3; + end // -------------------- // ALU @@ -998,7 +1335,7 @@ module snitch #( .lsu_qtag_i ( rd ), .lsu_qwrite ( is_store ), .lsu_qsigned ( is_signed ), - .lsu_qaddr_i ( alu_result ), + .lsu_qaddr_i ( lsu_qaddr ), .lsu_qdata_i ( gpr_rdata[1] ), .lsu_qsize_i ( ls_size ), .lsu_qamo_i ( ls_amo ), @@ -1022,10 +1359,16 @@ module snitch #( .data_pready_o ); + // address can be alu_result (i.e. rs1 + iimm/simm) or rs1 (for post-increment load/stores) + assign lsu_qaddr = is_postincr ? gpr_rdata[0] : alu_result; + assign lsu_qvalid = valid_instr & (is_load | is_store) & ~(ld_addr_misaligned | st_addr_misaligned); // we can retire if we are not stalling and if the instruction is writing a register - assign retire_i = write_rd & valid_instr & (rd != 0); + assign retire_i_rd = write_rd & valid_instr & (rd != 0); + assign retire_i_rs1 = write_rs1 & valid_instr & (rs1 != 0); + // NOTE(smazzola): write-backs on rd and rs1 in the same cycle should be mutually exclusive + assign retire_i = retire_i_rd | retire_i_rs1; // ----------------------- // Unaligned Address Check @@ -1071,7 +1414,9 @@ module snitch #( if (RegNrWritePorts == 1) begin always_comb begin gpr_we[0] = 1'b0; - gpr_waddr[0] = rd; + // NOTE(smazzola): this works because write-backs on rd and rs1 in the same cycle are mutually + // exclusive; if this should change, the following statement has to be written in another form + gpr_waddr[0] = write_rs1 ? rs1 : rd; // choose whether to writeback at RF[rs1] for post-increment load/stores gpr_wdata[0] = alu_writeback; // external interfaces lsu_pready = 1'b0; @@ -1099,7 +1444,9 @@ module snitch #( end else if (RegNrWritePorts == 2) begin always_comb begin gpr_we[0] = 1'b0; - gpr_waddr[0] = rd; + // NOTE(smazzola): this works because write-backs on rd and rs1 in the same cycle are mutually + // exclusive; if this should change, the following statement has to be written in another form + gpr_waddr[0] = write_rs1 ? rs1 : rd; // choose whether to writeback at RF[rs1] for post-increment load/stores gpr_wdata[0] = alu_writeback; gpr_we[1] = 1'b0; gpr_waddr[1] = lsu_rd; From a443a2b38fd08681c6c0b606731159f4f26985a7 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Wed, 2 Dec 2020 12:01:46 +0100 Subject: [PATCH 06/65] [snitch] Remove FPU traces generation from MemPool CC --- hardware/deps/snitch/src/mempool_cc.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hardware/deps/snitch/src/mempool_cc.sv b/hardware/deps/snitch/src/mempool_cc.sv index b6244db67..f0d393d3e 100644 --- a/hardware/deps/snitch/src/mempool_cc.sv +++ b/hardware/deps/snitch/src/mempool_cc.sv @@ -255,8 +255,8 @@ module mempool_cc #( "acc_pid": i_snitch.acc_pid_i, "acc_pdata_32": i_snitch.acc_pdata_i[31:0], // FPU offload - "fpu_offload": (i_snitch.acc_qready_i && i_snitch.acc_qvalid_o && !snitch_pkg::shared_offload(i_snitch.acc_qdata_op_o)), - "is_seq_insn": (i_snitch.inst_data_i ==? riscv_instr::FREP) + "fpu_offload": 1'b0, + "is_seq_insn": 1'b0 }; task fmt_extras ( From 1329ba25b3d29c49c91bd918a44d56a7a7324848 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 8 Dec 2020 09:45:27 +0100 Subject: [PATCH 07/65] [riscv-tests] Extend test suit to Xpulpv2 load/stores --- .../isa/macros/scalar/test_macros.h | 352 +++++++++++++++++- apps/riscv-tests/isa/rv32uxpulpimg/Makefrag | 6 + .../isa/rv32uxpulpimg/p_lb_irpost.S | 70 ++++ apps/riscv-tests/isa/rv32uxpulpimg/p_lb_rr.S | 76 ++++ .../isa/rv32uxpulpimg/p_lb_rrpost.S | 76 ++++ .../isa/rv32uxpulpimg/p_lbu_irpost.S | 70 ++++ apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_rr.S | 77 ++++ .../isa/rv32uxpulpimg/p_lbu_rrpost.S | 76 ++++ .../isa/rv32uxpulpimg/p_lh_irpost.S | 70 ++++ apps/riscv-tests/isa/rv32uxpulpimg/p_lh_rr.S | 76 ++++ .../isa/rv32uxpulpimg/p_lh_rrpost.S | 76 ++++ .../isa/rv32uxpulpimg/p_lhu_irpost.S | 70 ++++ apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_rr.S | 76 ++++ .../isa/rv32uxpulpimg/p_lhu_rrpost.S | 76 ++++ .../isa/rv32uxpulpimg/p_lw_irpost.S | 70 ++++ apps/riscv-tests/isa/rv32uxpulpimg/p_lw_rr.S | 76 ++++ .../isa/rv32uxpulpimg/p_lw_rrpost.S | 76 ++++ .../isa/rv32uxpulpimg/p_sb_irpost.S | 71 ++++ apps/riscv-tests/isa/rv32uxpulpimg/p_sb_rr.S | 71 ++++ .../isa/rv32uxpulpimg/p_sb_rrpost.S | 71 ++++ .../isa/rv32uxpulpimg/p_sh_irpost.S | 71 ++++ apps/riscv-tests/isa/rv32uxpulpimg/p_sh_rr.S | 71 ++++ .../isa/rv32uxpulpimg/p_sh_rrpost.S | 71 ++++ .../isa/rv32uxpulpimg/p_sw_irpost.S | 71 ++++ apps/riscv-tests/isa/rv32uxpulpimg/p_sw_rr.S | 72 ++++ .../isa/rv32uxpulpimg/p_sw_rrpost.S | 71 ++++ apps/riscv-tests/isa/snitch_isa.mk | 6 + 27 files changed, 2114 insertions(+), 1 deletion(-) create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_lb_irpost.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_lb_rr.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_lb_rrpost.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_irpost.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_rr.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_rrpost.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_lh_irpost.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_lh_rr.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_lh_rrpost.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_irpost.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_rr.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_rrpost.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_lw_irpost.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_lw_rr.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_lw_rrpost.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_sb_irpost.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_sb_rr.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_sb_rrpost.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_sh_irpost.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_sh_rr.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_sh_rrpost.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_sw_irpost.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_sw_rr.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_sw_rrpost.S diff --git a/apps/riscv-tests/isa/macros/scalar/test_macros.h b/apps/riscv-tests/isa/macros/scalar/test_macros.h index 0eacde614..41366922e 100644 --- a/apps/riscv-tests/isa/macros/scalar/test_macros.h +++ b/apps/riscv-tests/isa/macros/scalar/test_macros.h @@ -264,7 +264,7 @@ test_ ## testnum: \ ) #----------------------------------------------------------------------- -# Test memory instructions +# Test memory instructions (immediate offset) #----------------------------------------------------------------------- #define TEST_LD_OP( testnum, inst, result, offset, base ) \ @@ -340,6 +340,356 @@ test_ ## testnum: \ li x5, 2; \ bne x4, x5, 1b \ +#----------------------------------------------------------------------- +# Test post-increment memory instructions (immediate offset) +#----------------------------------------------------------------------- + +#define TEST_LD_POST_OP( testnum, inst, load_result, offset, base ) \ + TEST_CASE( testnum, x14, load_result, \ + la x1, base; \ + addi x15, x1, offset; \ + inst x14, offset(x1!); \ + ) \ + bne x1, x15, fail; + +#define TEST_ST_POST_OP( testnum, store_inst, load_inst, store_result, offset, base ) \ + TEST_CASE( testnum, x14, store_result, \ + la x1, base; \ + la x15, base; \ + li x2, store_result; \ + store_inst x2, offset(x1!); \ + load_inst x14, 0(x15); \ + ) \ + addi x15, x15, offset; \ + bne x1, x15, fail; + +#define TEST_LD_POST_DEST1_BYPASS( testnum, nop_cycles, inst, load_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x1, base; \ + inst x14, offset(x1!); \ + TEST_INSERT_NOPS_ ## nop_cycles \ + addi x6, x14, 0; \ + li x7, load_result; \ + bne x6, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b; \ + +#define TEST_LD_POST_DEST2_BYPASS( testnum, nop_cycles, inst, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x1, base; \ + addi x7, x1, offset; \ + inst x14, offset(x1!); \ + TEST_INSERT_NOPS_ ## nop_cycles \ + addi x6, x1, 0; \ + bne x6, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b; \ + +#define TEST_LD_POST_SRC1_BYPASS( testnum, nop_cycles, inst, load_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x1, base; \ + TEST_INSERT_NOPS_ ## nop_cycles \ + inst x14, offset(x1!); \ + li x7, load_result; \ + bne x14, x7, fail; \ + la x15, base; \ + addi x15, x15, offset; \ + bne x1, x15, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + +#define TEST_ST_POST_SRC12_BYPASS( testnum, src1_nops, src2_nops, store_inst, load_inst, store_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: li x1, store_result; \ + TEST_INSERT_NOPS_ ## src1_nops \ + la x2, base; \ + TEST_INSERT_NOPS_ ## src2_nops \ + store_inst x1, offset(x2!); \ + la x15, base; \ + load_inst x14, 0(x15); \ + li x7, store_result; \ + bne x14, x7, fail; \ + addi x15, x15, offset; \ + bne x2, x15, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + +#define TEST_ST_POST_SRC21_BYPASS( testnum, src1_nops, src2_nops, store_inst, load_inst, store_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x2, base; \ + TEST_INSERT_NOPS_ ## src1_nops \ + li x1, store_result; \ + TEST_INSERT_NOPS_ ## src2_nops \ + store_inst x1, offset(x2!); \ + la x15, base; \ + load_inst x14, 0(x15); \ + li x7, store_result; \ + bne x14, x7, fail; \ + addi x15, x15, offset; \ + bne x2, x15, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + + +# You have to make sure Mem[base] =! value, otherwise this test might wrongly succeed +#define TEST_LD_POST_DEST1_WAW( testnum, inst, value, base ) \ + TEST_CASE( testnum, x14, value, \ + la x1, base; \ + inst x14, 0(x1!); \ + li x14, value; \ + ) + +#----------------------------------------------------------------------- +# Test memory instructions (register offset) +#----------------------------------------------------------------------- + +#define TEST_LD_RR_OP( testnum, inst, load_result, offset, base ) \ + TEST_CASE( testnum, x14, load_result, \ + la x1, base; \ + li x16, offset; \ + inst x14, x16(x1); \ + ) \ + la x15, base; \ + bne x1, x15, fail; + +#define TEST_ST_RR_OP( testnum, store_inst, load_inst, store_result, offset, base ) \ + TEST_CASE( testnum, x14, store_result, \ + la x1, base; \ + la x15, base; \ + li x16, offset; \ + li x2, store_result; \ + store_inst x2, x16(x1); \ + load_inst x14, offset(x15); \ + ) \ + bne x1, x15, fail; + +#define TEST_LD_RR_DEST_BYPASS( testnum, nop_cycles, inst, load_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x1, base; \ + li x16, offset; \ + inst x14, x16(x1); \ + TEST_INSERT_NOPS_ ## nop_cycles \ + addi x6, x14, 0; \ + li x7, load_result; \ + bne x6, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b; \ + +#define TEST_LD_RR_SRC12_BYPASS( testnum, src1_nops, src2_nops, inst, load_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x1, base; \ + TEST_INSERT_NOPS_ ## src1_nops \ + li x16, offset; \ + TEST_INSERT_NOPS_ ## src2_nops \ + inst x14, x16(x1); \ + li x7, load_result; \ + bne x14, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + +#define TEST_LD_RR_SRC21_BYPASS( testnum, src1_nops, src2_nops, inst, load_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: li x16, offset; \ + TEST_INSERT_NOPS_ ## src1_nops \ + la x1, base; \ + TEST_INSERT_NOPS_ ## src2_nops \ + inst x14, x16(x1); \ + li x7, load_result; \ + bne x14, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + +# Actually reg-reg stores have 3 sources; to avoid too many tests we +# only test rs1 and rs3 bypass (rs2 bypass already tested by others) +#define TEST_ST_RR_SRC12_BYPASS( testnum, src1_nops, src2_nops, store_inst, load_inst, store_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x2, base; \ + li x1, store_result; \ + TEST_INSERT_NOPS_ ## src1_nops \ + li x16, offset; \ + TEST_INSERT_NOPS_ ## src2_nops \ + store_inst x1, x16(x2); \ + la x15, base; \ + load_inst x14, offset(x15); \ + li x7, store_result; \ + bne x14, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + +#define TEST_ST_RR_SRC21_BYPASS( testnum, src1_nops, src2_nops, store_inst, load_inst, store_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x2, base; \ + li x16, offset; \ + TEST_INSERT_NOPS_ ## src1_nops \ + li x1, store_result; \ + TEST_INSERT_NOPS_ ## src2_nops \ + store_inst x1, x16(x2); \ + la x15, base; \ + load_inst x14, offset(x15); \ + li x7, store_result; \ + bne x14, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + +# You have to make sure Mem[base] =! value, otherwise this test might wrongly succeed +#define TEST_LD_RR_DEST1_WAW( testnum, inst, value, base ) \ + TEST_CASE( testnum, x14, value, \ + la x1, base; \ + inst x14, x0(x1); \ + li x14, value; \ + ) \ + +#----------------------------------------------------------------------- +# Test post-increment memory instructions (register offset) +#----------------------------------------------------------------------- + +#define TEST_LD_RR_POST_OP( testnum, inst, load_result, offset, base ) \ + TEST_CASE( testnum, x14, load_result, \ + la x1, base; \ + li x16, offset; \ + inst x14, x16(x1!); \ + ) \ + la x15, base; \ + addi x15, x15, offset; \ + bne x1, x15, fail; + +#define TEST_ST_RR_POST_OP( testnum, store_inst, load_inst, store_result, offset, base ) \ + TEST_CASE( testnum, x14, store_result, \ + la x1, base; \ + la x15, base; \ + li x16, offset; \ + li x2, store_result; \ + store_inst x2, x16(x1!); \ + load_inst x14, 0(x15); \ + ) \ + addi x15, x15, offset; \ + bne x1, x15, fail; + +#define TEST_LD_RR_POST_DEST_BYPASS( testnum, nop_cycles, inst, load_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x1, base; \ + li x16, offset; \ + inst x14, x16(x1!); \ + TEST_INSERT_NOPS_ ## nop_cycles \ + addi x6, x14, 0; \ + li x7, load_result; \ + bne x6, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b; \ + +#define TEST_LD_RR_POST_SRC12_BYPASS( testnum, src1_nops, src2_nops, inst, load_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x1, base; \ + TEST_INSERT_NOPS_ ## src1_nops \ + li x16, offset; \ + TEST_INSERT_NOPS_ ## src2_nops \ + inst x14, x16(x1!); \ + li x7, load_result; \ + bne x14, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + +#define TEST_LD_RR_POST_SRC21_BYPASS( testnum, src1_nops, src2_nops, inst, load_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: li x16, offset; \ + TEST_INSERT_NOPS_ ## src1_nops \ + la x1, base; \ + TEST_INSERT_NOPS_ ## src2_nops \ + inst x14, x16(x1!); \ + li x7, load_result; \ + bne x14, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + +# Actually reg-reg stores have 3 sources; to avoid too many tests we +# only test rs1 and rs3 bypass (rs2 bypass already tested by others) +#define TEST_ST_RR_POST_SRC12_BYPASS( testnum, src1_nops, src2_nops, store_inst, load_inst, store_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x2, base; \ + li x1, store_result; \ + TEST_INSERT_NOPS_ ## src1_nops \ + li x16, offset; \ + TEST_INSERT_NOPS_ ## src2_nops \ + store_inst x1, x16(x2!); \ + la x15, base; \ + load_inst x14, 0 (x15); \ + li x7, store_result; \ + bne x14, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + +#define TEST_ST_RR_POST_SRC21_BYPASS( testnum, src1_nops, src2_nops, store_inst, load_inst, store_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x2, base; \ + li x16, offset; \ + TEST_INSERT_NOPS_ ## src1_nops \ + li x1, store_result; \ + TEST_INSERT_NOPS_ ## src2_nops \ + store_inst x1, x16(x2!); \ + la x15, base; \ + load_inst x14, 0(x15); \ + li x7, store_result; \ + bne x14, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + +# You have to make sure Mem[base] =! value, otherwise this test might wrongly succeed +#define TEST_LD_RR_POST_DEST1_WAW( testnum, inst, value, base ) \ + TEST_CASE( testnum, x14, value, \ + la x1, base; \ + inst x14, x0(x1!); \ + li x14, value; \ + ) \ + +#----------------------------------------------------------------------- +# Test branch instructions +#----------------------------------------------------------------------- + #define TEST_BR2_OP_TAKEN( testnum, inst, val1, val2 ) \ test_ ## testnum: \ li TESTNUM, testnum; \ diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag b/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag index f34ab11dd..aa362808d 100644 --- a/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag +++ b/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag @@ -3,6 +3,12 @@ #----------------------------------------------------------------------- rv32uxpulpimg_sc_tests = \ + p_lb_irpost p_lbu_irpost p_lh_irpost p_lhu_irpost p_lw_irpost \ + p_lb_rrpost p_lbu_rrpost p_lh_rrpost p_lhu_rrpost p_lw_rrpost \ + p_lb_rr p_lbu_rr p_lh_rr p_lhu_rr p_lw_rr \ + p_sb_irpost p_sh_irpost p_sw_irpost \ + p_sb_rrpost p_sh_rrpost p_sw_rrpost \ + p_sb_rr p_sh_rr p_sw_rr \ p_abs \ p_slet p_sletu \ p_min p_minu \ diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lb_irpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lb_irpost.S new file mode 100644 index 000000000..2322ca2d6 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lb_irpost.S @@ -0,0 +1,70 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lb_irpost.S +#----------------------------------------------------------------------------- +# +# Test p.lb (immediate-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_POST_OP( 2, p.lb, 0xffffffff, 0, tdat ) + TEST_LD_POST_OP( 3, p.lb, 0xffffffff, 1, tdat ) + TEST_LD_POST_OP( 4, p.lb, 0x0000000f, 2, tdat3 ) + TEST_LD_POST_OP( 5, p.lb, 0x0000000f, 3, tdat3 ) + + # Negative offset + TEST_LD_POST_OP( 6, p.lb, 0x00000000, -3, tdat1 ) + TEST_LD_POST_OP( 7, p.lb, 0x00000000, -2, tdat1 ) + TEST_LD_POST_OP( 8, p.lb, 0xfffffff0, -1, tdat2 ) + TEST_LD_POST_OP( 9, p.lb, 0xfffffff0, 0, tdat2 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_POST_DEST1_BYPASS( 10, 0, p.lb, 0xffffffff, 1, tdat0 ) + TEST_LD_POST_DEST1_BYPASS( 11, 1, p.lb, 0x00000000, 1, tdat1 ) + TEST_LD_POST_DEST1_BYPASS( 12, 2, p.lb, 0xfffffff0, 1, tdat2 ) + + TEST_LD_POST_DEST2_BYPASS( 13, 0, p.lb, 2, tdat0 ) + TEST_LD_POST_DEST2_BYPASS( 14, 1, p.lb, 1, tdat1 ) + TEST_LD_POST_DEST2_BYPASS( 15, 2, p.lb, -3, tdat3 ) + + TEST_LD_POST_SRC1_BYPASS( 16, 0, p.lb, 0xffffffff, 1, tdat0 ) + TEST_LD_POST_SRC1_BYPASS( 17, 1, p.lb, 0x0000000f, -1, tdat3 ) + TEST_LD_POST_SRC1_BYPASS( 18, 2, p.lb, 0xfffffff0, 1, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_POST_DEST1_WAW( 19, p.lb, 25, tdat ) + TEST_LD_POST_DEST1_WAW( 20, p.lb, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .byte 0xff +tdat1: .byte 0x00 +tdat2: .byte 0xf0 +tdat3: .byte 0x0f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lb_rr.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lb_rr.S new file mode 100644 index 000000000..6938e133d --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lb_rr.S @@ -0,0 +1,76 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lb_rr.S +#----------------------------------------------------------------------------- +# +# Test p.lb (register-register) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_RR_OP( 2, p.lb, 0xffffffff, 0, tdat ) + TEST_LD_RR_OP( 3, p.lb, 0x00000000, 1, tdat ) + TEST_LD_RR_OP( 4, p.lb, 0xfffffff0, 2, tdat ) + TEST_LD_RR_OP( 5, p.lb, 0x0000000f, 3, tdat ) + + # Negative offset + TEST_LD_RR_OP( 6, p.lb, 0xffffffff, -3, tdat3 ) + TEST_LD_RR_OP( 7, p.lb, 0x00000000, -2, tdat3 ) + TEST_LD_RR_OP( 8, p.lb, 0xfffffff0, -1, tdat3 ) + TEST_LD_RR_OP( 9, p.lb, 0x0000000f, 0, tdat3 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_RR_DEST_BYPASS( 10, 0, p.lb, 0x00000000, 1, tdat0 ) + TEST_LD_RR_DEST_BYPASS( 11, 1, p.lb, 0xfffffff0, 1, tdat1 ) + TEST_LD_RR_DEST_BYPASS( 12, 2, p.lb, 0x0000000f, 1, tdat2 ) + + TEST_LD_RR_SRC12_BYPASS( 13, 0, 0, p.lb, 0x00000000, 1, tdat0 ) + TEST_LD_RR_SRC12_BYPASS( 14, 0, 1, p.lb, 0xfffffff0, -1, tdat3 ) + TEST_LD_RR_SRC12_BYPASS( 15, 0, 2, p.lb, 0x0000000f, 1, tdat2 ) + TEST_LD_RR_SRC12_BYPASS( 16, 1, 0, p.lb, 0x00000000, 1, tdat0 ) + TEST_LD_RR_SRC12_BYPASS( 17, 1, 1, p.lb, 0xfffffff0, -1, tdat3 ) + TEST_LD_RR_SRC12_BYPASS( 18, 2, 0, p.lb, 0x0000000f, 1, tdat2 ) + + TEST_LD_RR_SRC21_BYPASS( 19, 0, 0, p.lb, 0x00000000, 1, tdat0 ) + TEST_LD_RR_SRC21_BYPASS( 20, 0, 1, p.lb, 0xfffffff0, -1, tdat3 ) + TEST_LD_RR_SRC21_BYPASS( 21, 0, 2, p.lb, 0x0000000f, 1, tdat2 ) + TEST_LD_RR_SRC21_BYPASS( 22, 1, 0, p.lb, 0x00000000, 1, tdat0 ) + TEST_LD_RR_SRC21_BYPASS( 23, 1, 1, p.lb, 0xfffffff0, -1, tdat3 ) + TEST_LD_RR_SRC21_BYPASS( 24, 2, 0, p.lb, 0x0000000f, 1, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_RR_DEST1_WAW( 25, p.lb, 25, tdat ) + TEST_LD_RR_DEST1_WAW( 26, p.lb, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .byte 0xff +tdat1: .byte 0x00 +tdat2: .byte 0xf0 +tdat3: .byte 0x0f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lb_rrpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lb_rrpost.S new file mode 100644 index 000000000..afa33f659 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lb_rrpost.S @@ -0,0 +1,76 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lb_rrpost.S +#----------------------------------------------------------------------------- +# +# Test p.lb (register-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_RR_POST_OP( 2, p.lb, 0xffffffff, 0, tdat ) + TEST_LD_RR_POST_OP( 3, p.lb, 0xffffffff, 1, tdat ) + TEST_LD_RR_POST_OP( 4, p.lb, 0x0000000f, 2, tdat3 ) + TEST_LD_RR_POST_OP( 5, p.lb, 0x0000000f, 3, tdat3 ) + + # Negative offset + TEST_LD_RR_POST_OP( 6, p.lb, 0x00000000, -3, tdat1 ) + TEST_LD_RR_POST_OP( 7, p.lb, 0x00000000, -2, tdat1 ) + TEST_LD_RR_POST_OP( 8, p.lb, 0xfffffff0, -1, tdat2 ) + TEST_LD_RR_POST_OP( 9, p.lb, 0xfffffff0, 0, tdat2 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_RR_POST_DEST_BYPASS( 10, 0, p.lb, 0xffffffff, 1, tdat0 ) + TEST_LD_RR_POST_DEST_BYPASS( 11, 1, p.lb, 0x00000000, 1, tdat1 ) + TEST_LD_RR_POST_DEST_BYPASS( 12, 2, p.lb, 0xfffffff0, 1, tdat2 ) + + TEST_LD_RR_POST_SRC12_BYPASS( 13, 0, 0, p.lb, 0xffffffff, 1, tdat0 ) + TEST_LD_RR_POST_SRC12_BYPASS( 14, 0, 1, p.lb, 0x0000000f, -1, tdat3 ) + TEST_LD_RR_POST_SRC12_BYPASS( 15, 0, 2, p.lb, 0xfffffff0, 1, tdat2 ) + TEST_LD_RR_POST_SRC12_BYPASS( 16, 1, 0, p.lb, 0xffffffff, 1, tdat0 ) + TEST_LD_RR_POST_SRC12_BYPASS( 17, 1, 1, p.lb, 0x0000000f, -1, tdat3 ) + TEST_LD_RR_POST_SRC12_BYPASS( 18, 2, 0, p.lb, 0xfffffff0, 1, tdat2 ) + + TEST_LD_RR_POST_SRC21_BYPASS( 19, 0, 0, p.lb, 0xffffffff, 1, tdat0 ) + TEST_LD_RR_POST_SRC21_BYPASS( 20, 0, 1, p.lb, 0x0000000f, -1, tdat3 ) + TEST_LD_RR_POST_SRC21_BYPASS( 21, 0, 2, p.lb, 0xfffffff0, 1, tdat2 ) + TEST_LD_RR_POST_SRC21_BYPASS( 22, 1, 0, p.lb, 0xffffffff, 1, tdat0 ) + TEST_LD_RR_POST_SRC21_BYPASS( 23, 1, 1, p.lb, 0x0000000f, -1, tdat3 ) + TEST_LD_RR_POST_SRC21_BYPASS( 24, 2, 0, p.lb, 0xfffffff0, 1, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_RR_POST_DEST1_WAW( 25, p.lb, 25, tdat ) + TEST_LD_RR_POST_DEST1_WAW( 26, p.lb, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .byte 0xff +tdat1: .byte 0x00 +tdat2: .byte 0xf0 +tdat3: .byte 0x0f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_irpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_irpost.S new file mode 100644 index 000000000..a8c72a965 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_irpost.S @@ -0,0 +1,70 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lbu_irpost.S +#----------------------------------------------------------------------------- +# +# Test p.lbu (immediate-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_POST_OP( 2, p.lbu, 0x000000ff, 0, tdat ) + TEST_LD_POST_OP( 3, p.lbu, 0x000000ff, 1, tdat ) + TEST_LD_POST_OP( 4, p.lbu, 0x0000000f, 2, tdat3 ) + TEST_LD_POST_OP( 5, p.lbu, 0x0000000f, 3, tdat3 ) + + # Negative offset + TEST_LD_POST_OP( 6, p.lbu, 0x00000000, -3, tdat1 ) + TEST_LD_POST_OP( 7, p.lbu, 0x00000000, -2, tdat1 ) + TEST_LD_POST_OP( 8, p.lbu, 0x000000f0, -1, tdat2 ) + TEST_LD_POST_OP( 9, p.lbu, 0x000000f0, 0, tdat2 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_POST_DEST1_BYPASS( 10, 0, p.lbu, 0x000000ff, 1, tdat0 ) + TEST_LD_POST_DEST1_BYPASS( 11, 1, p.lbu, 0x00000000, 1, tdat1 ) + TEST_LD_POST_DEST1_BYPASS( 12, 2, p.lbu, 0x000000f0, 1, tdat2 ) + + TEST_LD_POST_DEST2_BYPASS( 13, 0, p.lbu, 2, tdat0 ) + TEST_LD_POST_DEST2_BYPASS( 14, 1, p.lbu, 1, tdat1 ) + TEST_LD_POST_DEST2_BYPASS( 15, 2, p.lbu, -3, tdat3 ) + + TEST_LD_POST_SRC1_BYPASS( 16, 0, p.lbu, 0x000000ff, 1, tdat0 ) + TEST_LD_POST_SRC1_BYPASS( 17, 1, p.lbu, 0x0000000f, -1, tdat3 ) + TEST_LD_POST_SRC1_BYPASS( 18, 2, p.lbu, 0x000000f0, 1, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_POST_DEST1_WAW( 19, p.lbu, 25, tdat ) + TEST_LD_POST_DEST1_WAW( 20, p.lbu, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .byte 0xff +tdat1: .byte 0x00 +tdat2: .byte 0xf0 +tdat3: .byte 0x0f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_rr.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_rr.S new file mode 100644 index 000000000..ba0b33235 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_rr.S @@ -0,0 +1,77 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lbu_rr.S +#----------------------------------------------------------------------------- +# +# Test p.lbu (register-register) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_RR_OP( 2, p.lbu, 0x000000ff, 0, tdat ) + TEST_LD_RR_OP( 3, p.lbu, 0x00000000, 1, tdat ) + TEST_LD_RR_OP( 4, p.lbu, 0x000000f0, 2, tdat ) + TEST_LD_RR_OP( 5, p.lbu, 0x0000000f, 3, tdat ) + + # Negative offset + TEST_LD_RR_OP( 6, p.lbu, 0x000000ff, -3, tdat3 ) + TEST_LD_RR_OP( 7, p.lbu, 0x00000000, -2, tdat3 ) + TEST_LD_RR_OP( 8, p.lbu, 0x000000f0, -1, tdat3 ) + TEST_LD_RR_OP( 9, p.lbu, 0x0000000f, 0, tdat3 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_RR_DEST_BYPASS( 10, 0, p.lbu, 0x00000000, 1, tdat0 ) + TEST_LD_RR_DEST_BYPASS( 11, 1, p.lbu, 0x000000f0, 1, tdat1 ) + TEST_LD_RR_DEST_BYPASS( 12, 2, p.lbu, 0x0000000f, 1, tdat2 ) + + TEST_LD_RR_SRC12_BYPASS( 13, 0, 0, p.lbu, 0x00000000, 1, tdat0 ) + TEST_LD_RR_SRC12_BYPASS( 14, 0, 1, p.lbu, 0x000000f0, -1, tdat3 ) + TEST_LD_RR_SRC12_BYPASS( 15, 0, 2, p.lbu, 0x0000000f, 1, tdat2 ) + TEST_LD_RR_SRC12_BYPASS( 16, 1, 0, p.lbu, 0x00000000, 1, tdat0 ) + TEST_LD_RR_SRC12_BYPASS( 17, 1, 1, p.lbu, 0x000000f0, -1, tdat3 ) + TEST_LD_RR_SRC12_BYPASS( 18, 2, 0, p.lbu, 0x0000000f, 1, tdat2 ) + + TEST_LD_RR_SRC21_BYPASS( 19, 0, 0, p.lbu, 0x00000000, 1, tdat0 ) + TEST_LD_RR_SRC21_BYPASS( 20, 0, 1, p.lbu, 0x000000f0, -1, tdat3 ) + TEST_LD_RR_SRC21_BYPASS( 21, 0, 2, p.lbu, 0x0000000f, 1, tdat2 ) + TEST_LD_RR_SRC21_BYPASS( 22, 1, 0, p.lbu, 0x00000000, 1, tdat0 ) + TEST_LD_RR_SRC21_BYPASS( 23, 1, 1, p.lbu, 0x000000f0, -1, tdat3 ) + TEST_LD_RR_SRC21_BYPASS( 24, 2, 0, p.lbu, 0x0000000f, 1, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_RR_DEST1_WAW( 25, p.lbu, 25, tdat ) + TEST_LD_RR_DEST1_WAW( 26, p.lbu, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .byte 0xff +tdat1: .byte 0x00 +tdat2: .byte 0xf0 +tdat3: .byte 0x0f + +RVTEST_DATA_END + diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_rrpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_rrpost.S new file mode 100644 index 000000000..9582ca6f7 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_rrpost.S @@ -0,0 +1,76 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lbu_rrpost.S +#----------------------------------------------------------------------------- +# +# Test p.lbu (register-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_RR_POST_OP( 2, p.lbu, 0x000000ff, 0, tdat ) + TEST_LD_RR_POST_OP( 3, p.lbu, 0x000000ff, 1, tdat ) + TEST_LD_RR_POST_OP( 4, p.lbu, 0x0000000f, 2, tdat3 ) + TEST_LD_RR_POST_OP( 5, p.lbu, 0x0000000f, 3, tdat3 ) + + # Negative offset + TEST_LD_RR_POST_OP( 6, p.lbu, 0x00000000, -3, tdat1 ) + TEST_LD_RR_POST_OP( 7, p.lbu, 0x00000000, -2, tdat1 ) + TEST_LD_RR_POST_OP( 8, p.lbu, 0x000000f0, -1, tdat2 ) + TEST_LD_RR_POST_OP( 9, p.lbu, 0x000000f0, 0, tdat2 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_RR_POST_DEST_BYPASS( 10, 0, p.lbu, 0x000000ff, 1, tdat0 ) + TEST_LD_RR_POST_DEST_BYPASS( 11, 1, p.lbu, 0x00000000, 1, tdat1 ) + TEST_LD_RR_POST_DEST_BYPASS( 12, 2, p.lbu, 0x000000f0, 1, tdat2 ) + + TEST_LD_RR_POST_SRC12_BYPASS( 13, 0, 0, p.lbu, 0x000000ff, 1, tdat0 ) + TEST_LD_RR_POST_SRC12_BYPASS( 14, 0, 1, p.lbu, 0x0000000f, -1, tdat3 ) + TEST_LD_RR_POST_SRC12_BYPASS( 15, 0, 2, p.lbu, 0x000000f0, 1, tdat2 ) + TEST_LD_RR_POST_SRC12_BYPASS( 16, 1, 0, p.lbu, 0x000000ff, 1, tdat0 ) + TEST_LD_RR_POST_SRC12_BYPASS( 17, 1, 1, p.lbu, 0x0000000f, -1, tdat3 ) + TEST_LD_RR_POST_SRC12_BYPASS( 18, 2, 0, p.lbu, 0x000000f0, 1, tdat2 ) + + TEST_LD_RR_POST_SRC21_BYPASS( 19, 0, 0, p.lbu, 0x000000ff, 1, tdat0 ) + TEST_LD_RR_POST_SRC21_BYPASS( 20, 0, 1, p.lbu, 0x0000000f, -1, tdat3 ) + TEST_LD_RR_POST_SRC21_BYPASS( 21, 0, 2, p.lbu, 0x000000f0, 1, tdat2 ) + TEST_LD_RR_POST_SRC21_BYPASS( 22, 1, 0, p.lbu, 0x000000ff, 1, tdat0 ) + TEST_LD_RR_POST_SRC21_BYPASS( 23, 1, 1, p.lbu, 0x0000000f, -1, tdat3 ) + TEST_LD_RR_POST_SRC21_BYPASS( 24, 2, 0, p.lbu, 0x000000f0, 1, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_RR_POST_DEST1_WAW( 25, p.lbu, 25, tdat ) + TEST_LD_RR_POST_DEST1_WAW( 26, p.lbu, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .byte 0xff +tdat1: .byte 0x00 +tdat2: .byte 0xf0 +tdat3: .byte 0x0f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lh_irpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lh_irpost.S new file mode 100644 index 000000000..ca376a2a6 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lh_irpost.S @@ -0,0 +1,70 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lh_irpost.S +#----------------------------------------------------------------------------- +# +# Test p.lh (immediate-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_POST_OP( 2, p.lh, 0x000000ff, 0, tdat ) + TEST_LD_POST_OP( 3, p.lh, 0x000000ff, 2, tdat ) + TEST_LD_POST_OP( 4, p.lh, 0xfffff00f, 4, tdat3 ) + TEST_LD_POST_OP( 5, p.lh, 0xfffff00f, 6, tdat3 ) + + # Negative offset + TEST_LD_POST_OP( 6, p.lh, 0xffffff00, -6, tdat1 ) + TEST_LD_POST_OP( 7, p.lh, 0xffffff00, -4, tdat1 ) + TEST_LD_POST_OP( 8, p.lh, 0x00000ff0, -2, tdat2 ) + TEST_LD_POST_OP( 9, p.lh, 0x00000ff0, 0, tdat2 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_POST_DEST1_BYPASS( 10, 0, p.lh, 0x000000ff, 2, tdat0 ) + TEST_LD_POST_DEST1_BYPASS( 11, 1, p.lh, 0xffffff00, 2, tdat1 ) + TEST_LD_POST_DEST1_BYPASS( 12, 2, p.lh, 0x00000ff0, 2, tdat2 ) + + TEST_LD_POST_DEST2_BYPASS( 13, 0, p.lh, 4, tdat0 ) + TEST_LD_POST_DEST2_BYPASS( 14, 1, p.lh, 2, tdat1 ) + TEST_LD_POST_DEST2_BYPASS( 15, 2, p.lh, -6, tdat3 ) + + TEST_LD_POST_SRC1_BYPASS( 16, 0, p.lh, 0x000000ff, 2, tdat0 ) + TEST_LD_POST_SRC1_BYPASS( 17, 1, p.lh, 0xfffff00f, -2, tdat3 ) + TEST_LD_POST_SRC1_BYPASS( 18, 2, p.lh, 0x00000ff0, 2, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_POST_DEST1_WAW( 19, p.lh, 25, tdat ) + TEST_LD_POST_DEST1_WAW( 20, p.lh, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .half 0x00ff +tdat1: .half 0xff00 +tdat2: .half 0x0ff0 +tdat3: .half 0xf00f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lh_rr.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lh_rr.S new file mode 100644 index 000000000..ebc5aabd9 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lh_rr.S @@ -0,0 +1,76 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lh_rr.S +#----------------------------------------------------------------------------- +# +# Test p.lh (register-register) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_RR_OP( 2, p.lh, 0x000000ff, 0, tdat ) + TEST_LD_RR_OP( 3, p.lh, 0xffffff00, 2, tdat ) + TEST_LD_RR_OP( 4, p.lh, 0x00000ff0, 4, tdat ) + TEST_LD_RR_OP( 5, p.lh, 0xfffff00f, 6, tdat ) + + # Negative offset + TEST_LD_RR_OP( 6, p.lh, 0x000000ff, -6, tdat3 ) + TEST_LD_RR_OP( 7, p.lh, 0xffffff00, -4, tdat3 ) + TEST_LD_RR_OP( 8, p.lh, 0x00000ff0, -2, tdat3 ) + TEST_LD_RR_OP( 9, p.lh, 0xfffff00f, 0, tdat3 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_RR_DEST_BYPASS( 10, 0, p.lh, 0xffffff00, 2, tdat0 ) + TEST_LD_RR_DEST_BYPASS( 11, 1, p.lh, 0x00000ff0, 2, tdat1 ) + TEST_LD_RR_DEST_BYPASS( 12, 2, p.lh, 0xfffff00f, 2, tdat2 ) + + TEST_LD_RR_SRC12_BYPASS( 13, 0, 0, p.lh, 0xffffff00, 2, tdat0 ) + TEST_LD_RR_SRC12_BYPASS( 14, 0, 1, p.lh, 0x00000ff0, -2, tdat3 ) + TEST_LD_RR_SRC12_BYPASS( 15, 0, 2, p.lh, 0xfffff00f, 2, tdat2 ) + TEST_LD_RR_SRC12_BYPASS( 16, 1, 0, p.lh, 0xffffff00, 2, tdat0 ) + TEST_LD_RR_SRC12_BYPASS( 17, 1, 1, p.lh, 0x00000ff0, -2, tdat3 ) + TEST_LD_RR_SRC12_BYPASS( 18, 2, 0, p.lh, 0xfffff00f, 2, tdat2 ) + + TEST_LD_RR_SRC21_BYPASS( 19, 0, 0, p.lh, 0xffffff00, 2, tdat0 ) + TEST_LD_RR_SRC21_BYPASS( 20, 0, 1, p.lh, 0x00000ff0, -2, tdat3 ) + TEST_LD_RR_SRC21_BYPASS( 21, 0, 2, p.lh, 0xfffff00f, 2, tdat2 ) + TEST_LD_RR_SRC21_BYPASS( 22, 1, 0, p.lh, 0xffffff00, 2, tdat0 ) + TEST_LD_RR_SRC21_BYPASS( 23, 1, 1, p.lh, 0x00000ff0, -2, tdat3 ) + TEST_LD_RR_SRC21_BYPASS( 24, 2, 0, p.lh, 0xfffff00f, 2, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_RR_DEST1_WAW( 25, p.lh, 25, tdat ) + TEST_LD_RR_DEST1_WAW( 26, p.lh, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .half 0x00ff +tdat1: .half 0xff00 +tdat2: .half 0x0ff0 +tdat3: .half 0xf00f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lh_rrpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lh_rrpost.S new file mode 100644 index 000000000..64a6281aa --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lh_rrpost.S @@ -0,0 +1,76 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lh_rrpost.S +#----------------------------------------------------------------------------- +# +# Test p.lh (register-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_RR_POST_OP( 2, p.lh, 0x000000ff, 0, tdat ) + TEST_LD_RR_POST_OP( 3, p.lh, 0x000000ff, 2, tdat ) + TEST_LD_RR_POST_OP( 4, p.lh, 0xffffff00, 4, tdat1 ) + TEST_LD_RR_POST_OP( 5, p.lh, 0xffffff00, 6, tdat1 ) + + # Negative offset + TEST_LD_RR_POST_OP( 6, p.lh, 0x00000ff0, -6, tdat2 ) + TEST_LD_RR_POST_OP( 7, p.lh, 0x00000ff0, -4, tdat2 ) + TEST_LD_RR_POST_OP( 8, p.lh, 0xfffff00f, -2, tdat3 ) + TEST_LD_RR_POST_OP( 9, p.lh, 0xfffff00f, 0, tdat3 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_RR_POST_DEST_BYPASS( 10, 0, p.lh, 0x000000ff, 2, tdat0 ) + TEST_LD_RR_POST_DEST_BYPASS( 11, 1, p.lh, 0xffffff00, 2, tdat1 ) + TEST_LD_RR_POST_DEST_BYPASS( 12, 2, p.lh, 0x00000ff0, 2, tdat2 ) + + TEST_LD_RR_POST_SRC12_BYPASS( 13, 0, 0, p.lh, 0x000000ff, 2, tdat0 ) + TEST_LD_RR_POST_SRC12_BYPASS( 14, 0, 1, p.lh, 0xfffff00f, -2, tdat3 ) + TEST_LD_RR_POST_SRC12_BYPASS( 15, 0, 2, p.lh, 0x00000ff0, 2, tdat2 ) + TEST_LD_RR_POST_SRC12_BYPASS( 16, 1, 0, p.lh, 0x000000ff, 2, tdat0 ) + TEST_LD_RR_POST_SRC12_BYPASS( 17, 1, 1, p.lh, 0xfffff00f, -2, tdat3 ) + TEST_LD_RR_POST_SRC12_BYPASS( 18, 2, 0, p.lh, 0x00000ff0, 2, tdat2 ) + + TEST_LD_RR_POST_SRC21_BYPASS( 19, 0, 0, p.lh, 0x000000ff, 2, tdat0 ) + TEST_LD_RR_POST_SRC21_BYPASS( 20, 0, 1, p.lh, 0xfffff00f, -2, tdat3 ) + TEST_LD_RR_POST_SRC21_BYPASS( 21, 0, 2, p.lh, 0x00000ff0, 2, tdat2 ) + TEST_LD_RR_POST_SRC21_BYPASS( 22, 1, 0, p.lh, 0x000000ff, 2, tdat0 ) + TEST_LD_RR_POST_SRC21_BYPASS( 23, 1, 1, p.lh, 0xfffff00f, -2, tdat3 ) + TEST_LD_RR_POST_SRC21_BYPASS( 24, 2, 0, p.lh, 0x00000ff0, 2, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_RR_POST_DEST1_WAW( 25, p.lh, 25, tdat ) + TEST_LD_RR_POST_DEST1_WAW( 26, p.lh, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .half 0x00ff +tdat1: .half 0xff00 +tdat2: .half 0x0ff0 +tdat3: .half 0xf00f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_irpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_irpost.S new file mode 100644 index 000000000..72a3d86b8 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_irpost.S @@ -0,0 +1,70 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lhu_irpost.S +#----------------------------------------------------------------------------- +# +# Test p.lhu (immediate-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_POST_OP( 2, p.lhu, 0x000000ff, 0, tdat ) + TEST_LD_POST_OP( 3, p.lhu, 0x000000ff, 2, tdat ) + TEST_LD_POST_OP( 4, p.lhu, 0x0000f00f, 4, tdat3 ) + TEST_LD_POST_OP( 5, p.lhu, 0x0000f00f, 6, tdat3 ) + + # Negative offset + TEST_LD_POST_OP( 6, p.lhu, 0x0000ff00, -6, tdat1 ) + TEST_LD_POST_OP( 7, p.lhu, 0x0000ff00, -4, tdat1 ) + TEST_LD_POST_OP( 8, p.lhu, 0x00000ff0, -2, tdat2 ) + TEST_LD_POST_OP( 9, p.lhu, 0x00000ff0, 0, tdat2 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_POST_DEST1_BYPASS( 10, 0, p.lhu, 0x000000ff, 2, tdat0 ) + TEST_LD_POST_DEST1_BYPASS( 11, 1, p.lhu, 0x0000ff00, 2, tdat1 ) + TEST_LD_POST_DEST1_BYPASS( 12, 2, p.lhu, 0x00000ff0, 2, tdat2 ) + + TEST_LD_POST_DEST2_BYPASS( 13, 0, p.lhu, 4, tdat0 ) + TEST_LD_POST_DEST2_BYPASS( 14, 1, p.lhu, 2, tdat1 ) + TEST_LD_POST_DEST2_BYPASS( 15, 2, p.lhu, -6, tdat3 ) + + TEST_LD_POST_SRC1_BYPASS( 16, 0, p.lhu, 0x000000ff, 2, tdat0 ) + TEST_LD_POST_SRC1_BYPASS( 17, 1, p.lhu, 0x0000f00f, -2, tdat3 ) + TEST_LD_POST_SRC1_BYPASS( 18, 2, p.lhu, 0x00000ff0, 2, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_POST_DEST1_WAW( 19, p.lhu, 25, tdat ) + TEST_LD_POST_DEST1_WAW( 20, p.lhu, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .half 0x00ff +tdat1: .half 0xff00 +tdat2: .half 0x0ff0 +tdat3: .half 0xf00f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_rr.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_rr.S new file mode 100644 index 000000000..a8c54fff9 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_rr.S @@ -0,0 +1,76 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lhu_rr.S +#----------------------------------------------------------------------------- +# +# Test p.lhu (register-register) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_RR_OP( 2, p.lhu, 0x000000ff, 0, tdat ) + TEST_LD_RR_OP( 3, p.lhu, 0x0000ff00, 2, tdat ) + TEST_LD_RR_OP( 4, p.lhu, 0x00000ff0, 4, tdat ) + TEST_LD_RR_OP( 5, p.lhu, 0x0000f00f, 6, tdat ) + + # Negative offset + TEST_LD_RR_OP( 6, p.lhu, 0x000000ff, -6, tdat3 ) + TEST_LD_RR_OP( 7, p.lhu, 0x0000ff00, -4, tdat3 ) + TEST_LD_RR_OP( 8, p.lhu, 0x00000ff0, -2, tdat3 ) + TEST_LD_RR_OP( 9, p.lhu, 0x0000f00f, 0, tdat3 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_RR_DEST_BYPASS( 10, 0, p.lhu, 0x0000ff00, 2, tdat0 ) + TEST_LD_RR_DEST_BYPASS( 11, 1, p.lhu, 0x00000ff0, 2, tdat1 ) + TEST_LD_RR_DEST_BYPASS( 12, 2, p.lhu, 0x0000f00f, 2, tdat2 ) + + TEST_LD_RR_SRC12_BYPASS( 13, 0, 0, p.lhu, 0x0000ff00, 2, tdat0 ) + TEST_LD_RR_SRC12_BYPASS( 14, 0, 1, p.lhu, 0x00000ff0, -2, tdat3 ) + TEST_LD_RR_SRC12_BYPASS( 15, 0, 2, p.lhu, 0x0000f00f, 2, tdat2 ) + TEST_LD_RR_SRC12_BYPASS( 16, 1, 0, p.lhu, 0x0000ff00, 2, tdat0 ) + TEST_LD_RR_SRC12_BYPASS( 17, 1, 1, p.lhu, 0x00000ff0, -2, tdat3 ) + TEST_LD_RR_SRC12_BYPASS( 18, 2, 0, p.lhu, 0x0000f00f, 2, tdat2 ) + + TEST_LD_RR_SRC21_BYPASS( 19, 0, 0, p.lhu, 0x0000ff00, 2, tdat0 ) + TEST_LD_RR_SRC21_BYPASS( 20, 0, 1, p.lhu, 0x00000ff0, -2, tdat3 ) + TEST_LD_RR_SRC21_BYPASS( 21, 0, 2, p.lhu, 0x0000f00f, 2, tdat2 ) + TEST_LD_RR_SRC21_BYPASS( 22, 1, 0, p.lhu, 0x0000ff00, 2, tdat0 ) + TEST_LD_RR_SRC21_BYPASS( 23, 1, 1, p.lhu, 0x00000ff0, -2, tdat3 ) + TEST_LD_RR_SRC21_BYPASS( 24, 2, 0, p.lhu, 0x0000f00f, 2, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_RR_DEST1_WAW( 25, p.lhu, 25, tdat ) + TEST_LD_RR_DEST1_WAW( 26, p.lhu, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .half 0x00ff +tdat1: .half 0xff00 +tdat2: .half 0x0ff0 +tdat3: .half 0xf00f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_rrpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_rrpost.S new file mode 100644 index 000000000..aee16032b --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_rrpost.S @@ -0,0 +1,76 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lhu_rrpost.S +#----------------------------------------------------------------------------- +# +# Test p.lhu (register-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_RR_POST_OP( 2, p.lhu, 0x000000ff, 0, tdat ) + TEST_LD_RR_POST_OP( 3, p.lhu, 0x000000ff, 2, tdat ) + TEST_LD_RR_POST_OP( 4, p.lhu, 0x0000ff00, 4, tdat1 ) + TEST_LD_RR_POST_OP( 5, p.lhu, 0x0000ff00, 6, tdat1 ) + + # Negative offset + TEST_LD_RR_POST_OP( 6, p.lhu, 0x00000ff0, -6, tdat2 ) + TEST_LD_RR_POST_OP( 7, p.lhu, 0x00000ff0, -4, tdat2 ) + TEST_LD_RR_POST_OP( 8, p.lhu, 0x0000f00f, -2, tdat3 ) + TEST_LD_RR_POST_OP( 9, p.lhu, 0x0000f00f, 0, tdat3 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_RR_POST_DEST_BYPASS( 10, 0, p.lhu, 0x000000ff, 2, tdat0 ) + TEST_LD_RR_POST_DEST_BYPASS( 11, 1, p.lhu, 0x0000ff00, 2, tdat1 ) + TEST_LD_RR_POST_DEST_BYPASS( 12, 2, p.lhu, 0x00000ff0, 2, tdat2 ) + + TEST_LD_RR_POST_SRC12_BYPASS( 13, 0, 0, p.lhu, 0x000000ff, 2, tdat0 ) + TEST_LD_RR_POST_SRC12_BYPASS( 14, 0, 1, p.lhu, 0x0000f00f, -2, tdat3 ) + TEST_LD_RR_POST_SRC12_BYPASS( 15, 0, 2, p.lhu, 0x00000ff0, 2, tdat2 ) + TEST_LD_RR_POST_SRC12_BYPASS( 16, 1, 0, p.lhu, 0x000000ff, 2, tdat0 ) + TEST_LD_RR_POST_SRC12_BYPASS( 17, 1, 1, p.lhu, 0x0000f00f, -2, tdat3 ) + TEST_LD_RR_POST_SRC12_BYPASS( 18, 2, 0, p.lhu, 0x00000ff0, 2, tdat2 ) + + TEST_LD_RR_POST_SRC21_BYPASS( 19, 0, 0, p.lhu, 0x000000ff, 2, tdat0 ) + TEST_LD_RR_POST_SRC21_BYPASS( 20, 0, 1, p.lhu, 0x0000f00f, -2, tdat3 ) + TEST_LD_RR_POST_SRC21_BYPASS( 21, 0, 2, p.lhu, 0x00000ff0, 2, tdat2 ) + TEST_LD_RR_POST_SRC21_BYPASS( 22, 1, 0, p.lhu, 0x000000ff, 2, tdat0 ) + TEST_LD_RR_POST_SRC21_BYPASS( 23, 1, 1, p.lhu, 0x0000f00f, -2, tdat3 ) + TEST_LD_RR_POST_SRC21_BYPASS( 24, 2, 0, p.lhu, 0x00000ff0, 2, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_RR_POST_DEST1_WAW( 25, p.lhu, 25, tdat ) + TEST_LD_RR_POST_DEST1_WAW( 26, p.lhu, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .half 0x00ff +tdat1: .half 0xff00 +tdat2: .half 0x0ff0 +tdat3: .half 0xf00f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lw_irpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lw_irpost.S new file mode 100644 index 000000000..ec62744a2 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lw_irpost.S @@ -0,0 +1,70 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lw_irpost.S +#----------------------------------------------------------------------------- +# +# Test p.lw (immediate-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_POST_OP( 2, p.lw, 0x00ff00ff, 0, tdat ) + TEST_LD_POST_OP( 3, p.lw, 0x00ff00ff, 4, tdat ) + TEST_LD_POST_OP( 4, p.lw, 0xf00ff00f, 8, tdat3 ) + TEST_LD_POST_OP( 5, p.lw, 0xf00ff00f, 12, tdat3 ) + + # Negative offset + TEST_LD_POST_OP( 6, p.lw, 0xff00ff00, -12, tdat1 ) + TEST_LD_POST_OP( 7, p.lw, 0xff00ff00, -8, tdat1 ) + TEST_LD_POST_OP( 8, p.lw, 0x0ff00ff0, -4, tdat2 ) + TEST_LD_POST_OP( 9, p.lw, 0x0ff00ff0, 0, tdat2 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_POST_DEST1_BYPASS( 10, 0, p.lw, 0x00ff00ff, 4, tdat0 ) + TEST_LD_POST_DEST1_BYPASS( 11, 1, p.lw, 0xff00ff00, 4, tdat1 ) + TEST_LD_POST_DEST1_BYPASS( 12, 2, p.lw, 0x0ff00ff0, 4, tdat2 ) + + TEST_LD_POST_DEST2_BYPASS( 13, 0, p.lw, 8, tdat0 ) + TEST_LD_POST_DEST2_BYPASS( 14, 1, p.lw, 4, tdat1 ) + TEST_LD_POST_DEST2_BYPASS( 15, 2, p.lw, -12, tdat3 ) + + TEST_LD_POST_SRC1_BYPASS( 16, 0, p.lw, 0x00ff00ff, 4, tdat0 ) + TEST_LD_POST_SRC1_BYPASS( 17, 1, p.lw, 0xf00ff00f, -4, tdat3 ) + TEST_LD_POST_SRC1_BYPASS( 18, 2, p.lw, 0x0ff00ff0, 4, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_POST_DEST1_WAW( 19, p.lw, 25, tdat ) + TEST_LD_POST_DEST1_WAW( 20, p.lw, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .word 0x00ff00ff +tdat1: .word 0xff00ff00 +tdat2: .word 0x0ff00ff0 +tdat3: .word 0xf00ff00f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lw_rr.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lw_rr.S new file mode 100644 index 000000000..f6000fd78 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lw_rr.S @@ -0,0 +1,76 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lw_rr.S +#----------------------------------------------------------------------------- +# +# Test p.lw (register-register) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_RR_OP( 2, p.lw, 0x00ff00ff, 0, tdat ) + TEST_LD_RR_OP( 3, p.lw, 0xff00ff00, 4, tdat ) + TEST_LD_RR_OP( 4, p.lw, 0x0ff00ff0, 8, tdat ) + TEST_LD_RR_OP( 5, p.lw, 0xf00ff00f, 12, tdat ) + + # Negative offset + TEST_LD_RR_OP( 6, p.lw, 0x00ff00ff, -12, tdat3 ) + TEST_LD_RR_OP( 7, p.lw, 0xff00ff00, -8, tdat3 ) + TEST_LD_RR_OP( 8, p.lw, 0x0ff00ff0, -4, tdat3 ) + TEST_LD_RR_OP( 9, p.lw, 0xf00ff00f, 0, tdat3 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_RR_DEST_BYPASS( 10, 0, p.lw, 0xff00ff00, 4, tdat0 ) + TEST_LD_RR_DEST_BYPASS( 11, 1, p.lw, 0x0ff00ff0, 4, tdat1 ) + TEST_LD_RR_DEST_BYPASS( 12, 2, p.lw, 0xf00ff00f, 4, tdat2 ) + + TEST_LD_RR_SRC12_BYPASS( 13, 0, 0, p.lw, 0xff00ff00, 4, tdat0 ) + TEST_LD_RR_SRC12_BYPASS( 14, 0, 1, p.lw, 0x0ff00ff0, -4, tdat3 ) + TEST_LD_RR_SRC12_BYPASS( 15, 0, 2, p.lw, 0xf00ff00f, 4, tdat2 ) + TEST_LD_RR_SRC12_BYPASS( 16, 1, 0, p.lw, 0xff00ff00, 4, tdat0 ) + TEST_LD_RR_SRC12_BYPASS( 17, 1, 1, p.lw, 0x0ff00ff0, -4, tdat3 ) + TEST_LD_RR_SRC12_BYPASS( 18, 2, 0, p.lw, 0xf00ff00f, 4, tdat2 ) + + TEST_LD_RR_SRC21_BYPASS( 19, 0, 0, p.lw, 0xff00ff00, 4, tdat0 ) + TEST_LD_RR_SRC21_BYPASS( 20, 0, 1, p.lw, 0x0ff00ff0, -4, tdat3 ) + TEST_LD_RR_SRC21_BYPASS( 21, 0, 2, p.lw, 0xf00ff00f, 4, tdat2 ) + TEST_LD_RR_SRC21_BYPASS( 22, 1, 0, p.lw, 0xff00ff00, 4, tdat0 ) + TEST_LD_RR_SRC21_BYPASS( 23, 1, 1, p.lw, 0x0ff00ff0, -4, tdat3 ) + TEST_LD_RR_SRC21_BYPASS( 24, 2, 0, p.lw, 0xf00ff00f, 4, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_RR_DEST1_WAW( 25, p.lw, 25, tdat ) + TEST_LD_RR_DEST1_WAW( 26, p.lw, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .word 0x00ff00ff +tdat1: .word 0xff00ff00 +tdat2: .word 0x0ff00ff0 +tdat3: .word 0xf00ff00f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lw_rrpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lw_rrpost.S new file mode 100644 index 000000000..bdec214d1 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lw_rrpost.S @@ -0,0 +1,76 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lw_rrpost.S +#----------------------------------------------------------------------------- +# +# Test p.lw (register-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_RR_POST_OP( 2, p.lw, 0x00ff00ff, 0, tdat ) + TEST_LD_RR_POST_OP( 3, p.lw, 0x00ff00ff, 4, tdat ) + TEST_LD_RR_POST_OP( 4, p.lw, 0xff00ff00, 8, tdat1 ) + TEST_LD_RR_POST_OP( 5, p.lw, 0xff00ff00, 12, tdat1 ) + + # Negative offset + TEST_LD_RR_POST_OP( 6, p.lw, 0x0ff00ff0, -12, tdat2 ) + TEST_LD_RR_POST_OP( 7, p.lw, 0x0ff00ff0, -8, tdat2 ) + TEST_LD_RR_POST_OP( 8, p.lw, 0xf00ff00f, -4, tdat3 ) + TEST_LD_RR_POST_OP( 9, p.lw, 0xf00ff00f, 0, tdat3 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_RR_POST_DEST_BYPASS( 10, 0, p.lw, 0x00ff00ff, 4, tdat0 ) + TEST_LD_RR_POST_DEST_BYPASS( 11, 1, p.lw, 0xff00ff00, 4, tdat1 ) + TEST_LD_RR_POST_DEST_BYPASS( 12, 2, p.lw, 0x0ff00ff0, 4, tdat2 ) + + TEST_LD_RR_POST_SRC12_BYPASS( 13, 0, 0, p.lw, 0x00ff00ff, 4, tdat0 ) + TEST_LD_RR_POST_SRC12_BYPASS( 14, 0, 1, p.lw, 0xf00ff00f, -4, tdat3 ) + TEST_LD_RR_POST_SRC12_BYPASS( 15, 0, 2, p.lw, 0x0ff00ff0, 4, tdat2 ) + TEST_LD_RR_POST_SRC12_BYPASS( 16, 1, 0, p.lw, 0x00ff00ff, 4, tdat0 ) + TEST_LD_RR_POST_SRC12_BYPASS( 17, 1, 1, p.lw, 0xf00ff00f, -4, tdat3 ) + TEST_LD_RR_POST_SRC12_BYPASS( 18, 2, 0, p.lw, 0x0ff00ff0, 4, tdat2 ) + + TEST_LD_RR_POST_SRC21_BYPASS( 19, 0, 0, p.lw, 0x00ff00ff, 4, tdat0 ) + TEST_LD_RR_POST_SRC21_BYPASS( 20, 0, 1, p.lw, 0xf00ff00f, -4, tdat3 ) + TEST_LD_RR_POST_SRC21_BYPASS( 21, 0, 2, p.lw, 0x0ff00ff0, 4, tdat2 ) + TEST_LD_RR_POST_SRC21_BYPASS( 22, 1, 0, p.lw, 0x00ff00ff, 4, tdat0 ) + TEST_LD_RR_POST_SRC21_BYPASS( 23, 1, 1, p.lw, 0xf00ff00f, -4, tdat3 ) + TEST_LD_RR_POST_SRC21_BYPASS( 24, 2, 0, p.lw, 0x0ff00ff0, 4, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_RR_POST_DEST1_WAW( 25, p.lw, 25, tdat ) + TEST_LD_RR_POST_DEST1_WAW( 26, p.lw, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .word 0x00ff00ff +tdat1: .word 0xff00ff00 +tdat2: .word 0x0ff00ff0 +tdat3: .word 0xf00ff00f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_sb_irpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_sb_irpost.S new file mode 100644 index 000000000..d5ddf3a03 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_sb_irpost.S @@ -0,0 +1,71 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_sb_irpost.S +#----------------------------------------------------------------------------- +# +# Test p.sb (immediate-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_ST_POST_OP( 2, p.sb, lb, 0xffffffaa, 0, tdat ) + TEST_ST_POST_OP( 3, p.sb, lb, 0x00000000, 1, tdat ) + TEST_ST_POST_OP( 4, p.sb, lh, 0xffffefa0, 2, tdat ) + TEST_ST_POST_OP( 5, p.sb, lb, 0x0000000a, 3, tdat ) + + # Negative offset + TEST_ST_POST_OP( 6, p.sb, lb, 0xffffffaa, -6, tdat8 ) + TEST_ST_POST_OP( 7, p.sb, lb, 0x00000000, -5, tdat8 ) + TEST_ST_POST_OP( 8, p.sb, lb, 0xffffffa0, -3, tdat8 ) + TEST_ST_POST_OP( 9, p.sb, lb, 0x0000000a, -1, tdat8 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_ST_POST_SRC12_BYPASS( 10, 0, 0, p.sb, lb, 0xffffffdd, 0, tdat ); + TEST_ST_POST_SRC12_BYPASS( 11, 0, 1, p.sb, lb, 0xffffffcd, 1, tdat ); + TEST_ST_POST_SRC12_BYPASS( 12, 0, 2, p.sb, lb, 0xffffffcc, 2, tdat ); + TEST_ST_POST_SRC12_BYPASS( 13, 1, 0, p.sb, lb, 0xffffffbc, 3, tdat ); + TEST_ST_POST_SRC12_BYPASS( 14, 1, 1, p.sb, lb, 0xffffffbb, 4, tdat ); + TEST_ST_POST_SRC12_BYPASS( 15, 2, 0, p.sb, lb, 0xffffffab, 5, tdat ); + + TEST_ST_POST_SRC21_BYPASS( 16, 0, 0, p.sb, lb, 0x00000033, 0, tdat ); + TEST_ST_POST_SRC21_BYPASS( 17, 0, 1, p.sb, lb, 0x00000023, 1, tdat ); + TEST_ST_POST_SRC21_BYPASS( 18, 0, 2, p.sb, lb, 0x00000022, 2, tdat ); + TEST_ST_POST_SRC21_BYPASS( 19, 1, 0, p.sb, lb, 0x00000012, 3, tdat ); + TEST_ST_POST_SRC21_BYPASS( 20, 1, 1, p.sb, lb, 0x00000011, 4, tdat ); + TEST_ST_POST_SRC21_BYPASS( 21, 2, 0, p.sb, lb, 0x00000001, 5, tdat ); + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .byte 0xef +tdat1: .byte 0xef +tdat2: .byte 0xef +tdat3: .byte 0xef +tdat4: .byte 0xef +tdat5: .byte 0xef +tdat6: .byte 0xef +tdat7: .byte 0xef +tdat8: .byte 0xef +tdat9: .byte 0xef + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_sb_rr.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_sb_rr.S new file mode 100644 index 000000000..6b501b487 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_sb_rr.S @@ -0,0 +1,71 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_sb_rr.S +#----------------------------------------------------------------------------- +# +# Test p.sb (register-register) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_ST_RR_OP( 2, p.sb, lb, 0xffffffaa, 0, tdat ) + TEST_ST_RR_OP( 3, p.sb, lb, 0x00000000, 1, tdat ) + TEST_ST_RR_OP( 4, p.sb, lh, 0xffffefa0, 2, tdat ) + TEST_ST_RR_OP( 5, p.sb, lb, 0x0000000a, 3, tdat ) + + # Negative offset + TEST_ST_RR_OP( 6, p.sb, lb, 0xffffffaa, -6, tdat8 ) + TEST_ST_RR_OP( 7, p.sb, lb, 0x00000000, -5, tdat8 ) + TEST_ST_RR_OP( 8, p.sb, lb, 0xffffffa0, -3, tdat8 ) + TEST_ST_RR_OP( 9, p.sb, lb, 0x0000000a, -1, tdat8 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_ST_RR_SRC12_BYPASS( 10, 0, 0, p.sb, lb, 0xffffffdd, 0, tdat ); + TEST_ST_RR_SRC12_BYPASS( 11, 0, 1, p.sb, lb, 0xffffffcd, 1, tdat ); + TEST_ST_RR_SRC12_BYPASS( 12, 0, 2, p.sb, lb, 0xffffffcc, 2, tdat ); + TEST_ST_RR_SRC12_BYPASS( 13, 1, 0, p.sb, lb, 0xffffffbc, 3, tdat ); + TEST_ST_RR_SRC12_BYPASS( 14, 1, 1, p.sb, lb, 0xffffffbb, 4, tdat ); + TEST_ST_RR_SRC12_BYPASS( 15, 2, 0, p.sb, lb, 0xffffffab, 5, tdat ); + + TEST_ST_RR_SRC21_BYPASS( 16, 0, 0, p.sb, lb, 0x00000033, 0, tdat ); + TEST_ST_RR_SRC21_BYPASS( 17, 0, 1, p.sb, lb, 0x00000023, 1, tdat ); + TEST_ST_RR_SRC21_BYPASS( 18, 0, 2, p.sb, lb, 0x00000022, 2, tdat ); + TEST_ST_RR_SRC21_BYPASS( 19, 1, 0, p.sb, lb, 0x00000012, 3, tdat ); + TEST_ST_RR_SRC21_BYPASS( 20, 1, 1, p.sb, lb, 0x00000011, 4, tdat ); + TEST_ST_RR_SRC21_BYPASS( 21, 2, 0, p.sb, lb, 0x00000001, 5, tdat ); + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .byte 0xef +tdat1: .byte 0xef +tdat2: .byte 0xef +tdat3: .byte 0xef +tdat4: .byte 0xef +tdat5: .byte 0xef +tdat6: .byte 0xef +tdat7: .byte 0xef +tdat8: .byte 0xef +tdat9: .byte 0xef + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_sb_rrpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_sb_rrpost.S new file mode 100644 index 000000000..3ed706fde --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_sb_rrpost.S @@ -0,0 +1,71 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_sb_rrpost.S +#----------------------------------------------------------------------------- +# +# Test p.sb (register-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_ST_RR_POST_OP( 2, p.sb, lb, 0xffffffaa, 0, tdat1 ) + TEST_ST_RR_POST_OP( 3, p.sb, lb, 0x00000000, 1, tdat1 ) + TEST_ST_RR_POST_OP( 4, p.sb, lh, 0xffffefa0, 2, tdat2 ) + TEST_ST_RR_POST_OP( 5, p.sb, lb, 0x0000000a, 3, tdat2 ) + + # Negative offset + TEST_ST_RR_POST_OP( 6, p.sb, lb, 0xffffffaa, -6, tdat8 ) + TEST_ST_RR_POST_OP( 7, p.sb, lb, 0x00000000, -5, tdat8 ) + TEST_ST_RR_POST_OP( 8, p.sb, lb, 0xffffffa0, -3, tdat8 ) + TEST_ST_RR_POST_OP( 9, p.sb, lb, 0x0000000a, -1, tdat8 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_ST_RR_POST_SRC12_BYPASS( 10, 0, 0, p.sb, lb, 0xffffffdd, 0, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 11, 0, 1, p.sb, lb, 0xffffffcd, 1, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 12, 0, 2, p.sb, lb, 0xffffffcc, 2, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 13, 1, 0, p.sb, lb, 0xffffffbc, 3, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 14, 1, 1, p.sb, lb, 0xffffffbb, 4, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 15, 2, 0, p.sb, lb, 0xffffffab, 5, tdat ); + + TEST_ST_RR_POST_SRC21_BYPASS( 16, 0, 0, p.sb, lb, 0x00000033, 0, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 17, 0, 1, p.sb, lb, 0x00000023, 1, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 18, 0, 2, p.sb, lb, 0x00000022, 2, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 19, 1, 0, p.sb, lb, 0x00000012, 3, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 20, 1, 1, p.sb, lb, 0x00000011, 4, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 21, 2, 0, p.sb, lb, 0x00000001, 5, tdat ); + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .byte 0xef +tdat1: .byte 0xef +tdat2: .byte 0xef +tdat3: .byte 0xef +tdat4: .byte 0xef +tdat5: .byte 0xef +tdat6: .byte 0xef +tdat7: .byte 0xef +tdat8: .byte 0xef +tdat9: .byte 0xef + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_sh_irpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_sh_irpost.S new file mode 100644 index 000000000..32c0b376b --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_sh_irpost.S @@ -0,0 +1,71 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_sh_irpost.S +#----------------------------------------------------------------------------- +# +# Test p.sh (immediate-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_ST_POST_OP( 2, p.sh, lh, 0x000000aa, 0, tdat ); + TEST_ST_POST_OP( 3, p.sh, lh, 0xffffaa00, 2, tdat ); + TEST_ST_POST_OP( 4, p.sh, lw, 0xbeef0aa0, 4, tdat ); + TEST_ST_POST_OP( 5, p.sh, lh, 0xffffa00a, 6, tdat ); + + # Negative offset + TEST_ST_POST_OP( 6, p.sh, lh, 0x000000aa, -6, tdat8 ); + TEST_ST_POST_OP( 7, p.sh, lh, 0xffffaa00, -4, tdat8 ); + TEST_ST_POST_OP( 8, p.sh, lh, 0x00000aa0, -2, tdat8 ); + TEST_ST_POST_OP( 9, p.sh, lh, 0xffffa00a, 0, tdat8 ); + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_ST_POST_SRC12_BYPASS( 10, 0, 0, p.sh, lh, 0xffffccdd, 0, tdat ); + TEST_ST_POST_SRC12_BYPASS( 11, 0, 1, p.sh, lh, 0xffffbccd, 2, tdat ); + TEST_ST_POST_SRC12_BYPASS( 12, 0, 2, p.sh, lh, 0xffffbbcc, 4, tdat ); + TEST_ST_POST_SRC12_BYPASS( 13, 1, 0, p.sh, lh, 0xffffabbc, 6, tdat ); + TEST_ST_POST_SRC12_BYPASS( 14, 1, 1, p.sh, lh, 0xffffaabb, 8, tdat ); + TEST_ST_POST_SRC12_BYPASS( 15, 2, 0, p.sh, lh, 0xffffdaab, 10, tdat ); + + TEST_ST_POST_SRC21_BYPASS( 16, 0, 0, p.sh, lh, 0x00002233, 0, tdat ); + TEST_ST_POST_SRC21_BYPASS( 17, 0, 1, p.sh, lh, 0x00001223, 2, tdat ); + TEST_ST_POST_SRC21_BYPASS( 18, 0, 2, p.sh, lh, 0x00001122, 4, tdat ); + TEST_ST_POST_SRC21_BYPASS( 19, 1, 0, p.sh, lh, 0x00000112, 6, tdat ); + TEST_ST_POST_SRC21_BYPASS( 20, 1, 1, p.sh, lh, 0x00000011, 8, tdat ); + TEST_ST_POST_SRC21_BYPASS( 21, 2, 0, p.sh, lh, 0x00003001, 10, tdat ); + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .half 0xbeef +tdat1: .half 0xbeef +tdat2: .half 0xbeef +tdat3: .half 0xbeef +tdat4: .half 0xbeef +tdat5: .half 0xbeef +tdat6: .half 0xbeef +tdat7: .half 0xbeef +tdat8: .half 0xbeef +tdat9: .half 0xbeef + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_sh_rr.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_sh_rr.S new file mode 100644 index 000000000..0c5f4cbcb --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_sh_rr.S @@ -0,0 +1,71 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_sh_rr.S +#----------------------------------------------------------------------------- +# +# Test p.sh (register-register) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_ST_RR_OP( 2, p.sh, lh, 0x000000aa, 0, tdat ); + TEST_ST_RR_OP( 3, p.sh, lh, 0xffffaa00, 2, tdat ); + TEST_ST_RR_OP( 4, p.sh, lw, 0xbeef0aa0, 4, tdat ); + TEST_ST_RR_OP( 5, p.sh, lh, 0xffffa00a, 6, tdat ); + + # Negative offset + TEST_ST_RR_OP( 6, p.sh, lh, 0x000000aa, -6, tdat8 ); + TEST_ST_RR_OP( 7, p.sh, lh, 0xffffaa00, -4, tdat8 ); + TEST_ST_RR_OP( 8, p.sh, lh, 0x00000aa0, -2, tdat8 ); + TEST_ST_RR_OP( 9, p.sh, lh, 0xffffa00a, 0, tdat8 ); + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_ST_RR_SRC12_BYPASS( 10, 0, 0, p.sh, lh, 0xffffccdd, 0, tdat ); + TEST_ST_RR_SRC12_BYPASS( 11, 0, 1, p.sh, lh, 0xffffbccd, 2, tdat ); + TEST_ST_RR_SRC12_BYPASS( 12, 0, 2, p.sh, lh, 0xffffbbcc, 4, tdat ); + TEST_ST_RR_SRC12_BYPASS( 13, 1, 0, p.sh, lh, 0xffffabbc, 6, tdat ); + TEST_ST_RR_SRC12_BYPASS( 14, 1, 1, p.sh, lh, 0xffffaabb, 8, tdat ); + TEST_ST_RR_SRC12_BYPASS( 15, 2, 0, p.sh, lh, 0xffffdaab, 10, tdat ); + + TEST_ST_RR_SRC21_BYPASS( 16, 0, 0, p.sh, lh, 0x00002233, 0, tdat ); + TEST_ST_RR_SRC21_BYPASS( 17, 0, 1, p.sh, lh, 0x00001223, 2, tdat ); + TEST_ST_RR_SRC21_BYPASS( 18, 0, 2, p.sh, lh, 0x00001122, 4, tdat ); + TEST_ST_RR_SRC21_BYPASS( 19, 1, 0, p.sh, lh, 0x00000112, 6, tdat ); + TEST_ST_RR_SRC21_BYPASS( 20, 1, 1, p.sh, lh, 0x00000011, 8, tdat ); + TEST_ST_RR_SRC21_BYPASS( 21, 2, 0, p.sh, lh, 0x00003001, 10, tdat ); + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .half 0xbeef +tdat1: .half 0xbeef +tdat2: .half 0xbeef +tdat3: .half 0xbeef +tdat4: .half 0xbeef +tdat5: .half 0xbeef +tdat6: .half 0xbeef +tdat7: .half 0xbeef +tdat8: .half 0xbeef +tdat9: .half 0xbeef + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_sh_rrpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_sh_rrpost.S new file mode 100644 index 000000000..5dafda6d9 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_sh_rrpost.S @@ -0,0 +1,71 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_sh_rrpost.S +#----------------------------------------------------------------------------- +# +# Test p.sh (register-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_ST_RR_POST_OP( 2, p.sh, lh, 0x000000aa, 0, tdat ); + TEST_ST_RR_POST_OP( 3, p.sh, lh, 0xffffaa00, 2, tdat ); + TEST_ST_RR_POST_OP( 4, p.sh, lw, 0xbeef0aa0, 4, tdat ); + TEST_ST_RR_POST_OP( 5, p.sh, lh, 0xffffa00a, 6, tdat ); + + # Negative offset + TEST_ST_RR_POST_OP( 6, p.sh, lh, 0x000000aa, -6, tdat8 ); + TEST_ST_RR_POST_OP( 7, p.sh, lh, 0xffffaa00, -4, tdat8 ); + TEST_ST_RR_POST_OP( 8, p.sh, lh, 0x00000aa0, -2, tdat8 ); + TEST_ST_RR_POST_OP( 9, p.sh, lh, 0xffffa00a, 0, tdat8 ); + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_ST_RR_POST_SRC12_BYPASS( 10, 0, 0, p.sh, lh, 0xffffccdd, 0, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 11, 0, 1, p.sh, lh, 0xffffbccd, 2, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 12, 0, 2, p.sh, lh, 0xffffbbcc, 4, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 13, 1, 0, p.sh, lh, 0xffffabbc, 6, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 14, 1, 1, p.sh, lh, 0xffffaabb, 8, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 15, 2, 0, p.sh, lh, 0xffffdaab, 10, tdat ); + + TEST_ST_RR_POST_SRC21_BYPASS( 16, 0, 0, p.sh, lh, 0x00002233, 0, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 17, 0, 1, p.sh, lh, 0x00001223, 2, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 18, 0, 2, p.sh, lh, 0x00001122, 4, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 19, 1, 0, p.sh, lh, 0x00000112, 6, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 20, 1, 1, p.sh, lh, 0x00000011, 8, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 21, 2, 0, p.sh, lh, 0x00003001, 10, tdat ); + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .half 0xbeef +tdat1: .half 0xbeef +tdat2: .half 0xbeef +tdat3: .half 0xbeef +tdat4: .half 0xbeef +tdat5: .half 0xbeef +tdat6: .half 0xbeef +tdat7: .half 0xbeef +tdat8: .half 0xbeef +tdat9: .half 0xbeef + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_sw_irpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_sw_irpost.S new file mode 100644 index 000000000..bd8c174d4 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_sw_irpost.S @@ -0,0 +1,71 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_sw_irpost.S +#----------------------------------------------------------------------------- +# +# Test p.sw (immediate-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_ST_POST_OP( 2, p.sw, lw, 0x00aa00aa, 0, tdat ); + TEST_ST_POST_OP( 3, p.sw, lw, 0xaa00aa00, 4, tdat ); + TEST_ST_POST_OP( 4, p.sw, lw, 0x0aa00aa0, 8, tdat ); + TEST_ST_POST_OP( 5, p.sw, lw, 0xa00aa00a, 12, tdat ); + + # Negative offset + TEST_ST_POST_OP( 6, p.sw, lw, 0x00aa00aa, -12, tdat8 ); + TEST_ST_POST_OP( 7, p.sw, lw, 0xaa00aa00, -8, tdat8 ); + TEST_ST_POST_OP( 8, p.sw, lw, 0x0aa00aa0, -4, tdat8 ); + TEST_ST_POST_OP( 9, p.sw, lw, 0xa00aa00a, 0, tdat8 ); + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_ST_POST_SRC12_BYPASS( 10, 0, 0, p.sw, lw, 0xaabbccdd, 0, tdat ); + TEST_ST_POST_SRC12_BYPASS( 11, 0, 1, p.sw, lw, 0xdaabbccd, 4, tdat ); + TEST_ST_POST_SRC12_BYPASS( 12, 0, 2, p.sw, lw, 0xddaabbcc, 8, tdat ); + TEST_ST_POST_SRC12_BYPASS( 13, 1, 0, p.sw, lw, 0xcddaabbc, 12, tdat ); + TEST_ST_POST_SRC12_BYPASS( 14, 1, 1, p.sw, lw, 0xccddaabb, 16, tdat ); + TEST_ST_POST_SRC12_BYPASS( 15, 2, 0, p.sw, lw, 0xbccddaab, 20, tdat ); + + TEST_ST_POST_SRC21_BYPASS( 16, 0, 0, p.sw, lw, 0x00112233, 0, tdat ); + TEST_ST_POST_SRC21_BYPASS( 17, 0, 1, p.sw, lw, 0x30011223, 4, tdat ); + TEST_ST_POST_SRC21_BYPASS( 18, 0, 2, p.sw, lw, 0x33001122, 8, tdat ); + TEST_ST_POST_SRC21_BYPASS( 19, 1, 0, p.sw, lw, 0x23300112, 12, tdat ); + TEST_ST_POST_SRC21_BYPASS( 20, 1, 1, p.sw, lw, 0x22330011, 16, tdat ); + TEST_ST_POST_SRC21_BYPASS( 21, 2, 0, p.sw, lw, 0x12233001, 20, tdat ); + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .word 0xdeadbeef +tdat1: .word 0xdeadbeef +tdat2: .word 0xdeadbeef +tdat3: .word 0xdeadbeef +tdat4: .word 0xdeadbeef +tdat5: .word 0xdeadbeef +tdat6: .word 0xdeadbeef +tdat7: .word 0xdeadbeef +tdat8: .word 0xdeadbeef +tdat9: .word 0xdeadbeef + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_sw_rr.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_sw_rr.S new file mode 100644 index 000000000..6a6a53e38 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_sw_rr.S @@ -0,0 +1,72 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_sw_rr.S +#----------------------------------------------------------------------------- +# +# Test p.sw (register-register) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_ST_RR_OP( 2, p.sw, lw, 0x00aa00aa, 0, tdat ); + TEST_ST_RR_OP( 3, p.sw, lw, 0xaa00aa00, 4, tdat ); + TEST_ST_RR_OP( 4, p.sw, lw, 0x0aa00aa0, 8, tdat ); + TEST_ST_RR_OP( 5, p.sw, lw, 0xa00aa00a, 12, tdat ); + + # Negative offset + TEST_ST_RR_OP( 6, p.sw, lw, 0x00aa00aa, -12, tdat8 ); + TEST_ST_RR_OP( 7, p.sw, lw, 0xaa00aa00, -8, tdat8 ); + TEST_ST_RR_OP( 8, p.sw, lw, 0x0aa00aa0, -4, tdat8 ); + TEST_ST_RR_OP( 9, p.sw, lw, 0xa00aa00a, 0, tdat8 ); + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_ST_RR_SRC12_BYPASS( 10, 0, 0, p.sw, lw, 0xaabbccdd, 0, tdat ); + TEST_ST_RR_SRC12_BYPASS( 11, 0, 1, p.sw, lw, 0xdaabbccd, 4, tdat ); + TEST_ST_RR_SRC12_BYPASS( 12, 0, 2, p.sw, lw, 0xddaabbcc, 8, tdat ); + TEST_ST_RR_SRC12_BYPASS( 13, 1, 0, p.sw, lw, 0xcddaabbc, 12, tdat ); + TEST_ST_RR_SRC12_BYPASS( 14, 1, 1, p.sw, lw, 0xccddaabb, 16, tdat ); + TEST_ST_RR_SRC12_BYPASS( 15, 2, 0, p.sw, lw, 0xbccddaab, 20, tdat ); + + TEST_ST_RR_SRC21_BYPASS( 16, 0, 0, p.sw, lw, 0x00112233, 0, tdat ); + TEST_ST_RR_SRC21_BYPASS( 17, 0, 1, p.sw, lw, 0x30011223, 4, tdat ); + TEST_ST_RR_SRC21_BYPASS( 18, 0, 2, p.sw, lw, 0x33001122, 8, tdat ); + TEST_ST_RR_SRC21_BYPASS( 19, 1, 0, p.sw, lw, 0x23300112, 12, tdat ); + TEST_ST_RR_SRC21_BYPASS( 20, 1, 1, p.sw, lw, 0x22330011, 16, tdat ); + TEST_ST_RR_SRC21_BYPASS( 21, 2, 0, p.sw, lw, 0x12233001, 20, tdat ); + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .word 0xdeadbeef +tdat1: .word 0xdeadbeef +tdat2: .word 0xdeadbeef +tdat3: .word 0xdeadbeef +tdat4: .word 0xdeadbeef +tdat5: .word 0xdeadbeef +tdat6: .word 0xdeadbeef +tdat7: .word 0xdeadbeef +tdat8: .word 0xdeadbeef +tdat9: .word 0xdeadbeef + +RVTEST_DATA_END + diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_sw_rrpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_sw_rrpost.S new file mode 100644 index 000000000..ce9c58d1a --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_sw_rrpost.S @@ -0,0 +1,71 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_sw_rrpost.S +#----------------------------------------------------------------------------- +# +# Test p.sw (register-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_ST_RR_POST_OP( 2, p.sw, lw, 0x00aa00aa, 0, tdat ); + TEST_ST_RR_POST_OP( 3, p.sw, lw, 0xaa00aa00, 4, tdat ); + TEST_ST_RR_POST_OP( 4, p.sw, lw, 0x0aa00aa0, 8, tdat ); + TEST_ST_RR_POST_OP( 5, p.sw, lw, 0xa00aa00a, 12, tdat ); + + # Negative offset + TEST_ST_RR_POST_OP( 6, p.sw, lw, 0x00aa00aa, -12, tdat8 ); + TEST_ST_RR_POST_OP( 7, p.sw, lw, 0xaa00aa00, -8, tdat8 ); + TEST_ST_RR_POST_OP( 8, p.sw, lw, 0x0aa00aa0, -4, tdat8 ); + TEST_ST_RR_POST_OP( 9, p.sw, lw, 0xa00aa00a, 0, tdat8 ); + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_ST_RR_POST_SRC12_BYPASS( 10, 0, 0, p.sw, lw, 0xaabbccdd, 0, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 11, 0, 1, p.sw, lw, 0xdaabbccd, 4, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 12, 0, 2, p.sw, lw, 0xddaabbcc, 8, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 13, 1, 0, p.sw, lw, 0xcddaabbc, 12, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 14, 1, 1, p.sw, lw, 0xccddaabb, 16, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 15, 2, 0, p.sw, lw, 0xbccddaab, 20, tdat ); + + TEST_ST_RR_POST_SRC21_BYPASS( 16, 0, 0, p.sw, lw, 0x00112233, 0, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 17, 0, 1, p.sw, lw, 0x30011223, 4, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 18, 0, 2, p.sw, lw, 0x33001122, 8, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 19, 1, 0, p.sw, lw, 0x23300112, 12, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 20, 1, 1, p.sw, lw, 0x22330011, 16, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 21, 2, 0, p.sw, lw, 0x12233001, 20, tdat ); + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .word 0xdeadbeef +tdat1: .word 0xdeadbeef +tdat2: .word 0xdeadbeef +tdat3: .word 0xdeadbeef +tdat4: .word 0xdeadbeef +tdat5: .word 0xdeadbeef +tdat6: .word 0xdeadbeef +tdat7: .word 0xdeadbeef +tdat8: .word 0xdeadbeef +tdat9: .word 0xdeadbeef + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/snitch_isa.mk b/apps/riscv-tests/isa/snitch_isa.mk index 3a571c2e1..f4d7c072e 100644 --- a/apps/riscv-tests/isa/snitch_isa.mk +++ b/apps/riscv-tests/isa/snitch_isa.mk @@ -33,6 +33,12 @@ rv32um_snitch_sc_tests = \ ifeq ($(xpulpimg),1) rv32uxpulpimg_snitch_sc_tests = \ + p_lb_irpost p_lbu_irpost p_lh_irpost p_lhu_irpost p_lw_irpost \ + p_lb_rrpost p_lbu_rrpost p_lh_rrpost p_lhu_rrpost p_lw_rrpost \ + p_lb_rr p_lbu_rr p_lh_rr p_lhu_rr p_lw_rr \ + p_sb_irpost p_sh_irpost p_sw_irpost \ + p_sb_rrpost p_sh_rrpost p_sw_rrpost \ + p_sb_rr p_sh_rr p_sw_rr \ p_abs \ p_slet p_sletu \ p_min p_minu \ From 916942d493a3448060bc1987423bc72bcc3b2b7b Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 8 Dec 2020 09:46:15 +0100 Subject: [PATCH 08/65] [riscv-tests] Set dtc path before running Spike in makefile --- apps/riscv-tests/isa/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/riscv-tests/isa/Makefile b/apps/riscv-tests/isa/Makefile index c7f9b5607..542e05167 100644 --- a/apps/riscv-tests/isa/Makefile +++ b/apps/riscv-tests/isa/Makefile @@ -52,9 +52,11 @@ vpath %.S $(src_dir) $(RISCV_OBJDUMP) $< > $@ %.out: % + PATH="$(MEMPOOL_DIR)/install/riscv-isa-sim/bin:$$PATH"; \ $(RISCV_SIM) --isa=rv64gc $< 2> $@ %.out32: % + PATH="$(MEMPOOL_DIR)/install/riscv-isa-sim/bin:$$PATH"; \ $(RISCV_SIM) --isa=rv32gc $< 2> $@ define compile_template From 3aad79e760d1f80d13aff9ee358e7acc98fd1a75 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Mon, 14 Dec 2020 14:49:17 +0100 Subject: [PATCH 09/65] [riscv-opcodes] Add support to 32-bit MAC operations Added instructions: p.mac, p.msu --- toolchain/riscv-opcodes/encoding_out.h | 6 ++++++ toolchain/riscv-opcodes/inst.sverilog | 2 ++ toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/toolchain/riscv-opcodes/encoding_out.h b/toolchain/riscv-opcodes/encoding_out.h index 5e5867b30..44369735c 100644 --- a/toolchain/riscv-opcodes/encoding_out.h +++ b/toolchain/riscv-opcodes/encoding_out.h @@ -1926,6 +1926,10 @@ #define MASK_P_BEQIMM 0x707f #define MATCH_P_BNEIMM 0x3063 #define MASK_P_BNEIMM 0x707f +#define MATCH_P_MAC 0x42000033 +#define MASK_P_MAC 0xfe00707f +#define MATCH_P_MSU 0x42001033 +#define MASK_P_MSU 0xfe00707f #define MATCH_FLAH 0x1007 #define MASK_FLAH 0x707f #define MATCH_FSAH 0x1027 @@ -3694,6 +3698,8 @@ DECLARE_INSN(p_clipr, MATCH_P_CLIPR, MASK_P_CLIPR) DECLARE_INSN(p_clipur, MATCH_P_CLIPUR, MASK_P_CLIPUR) DECLARE_INSN(p_beqimm, MATCH_P_BEQIMM, MASK_P_BEQIMM) DECLARE_INSN(p_bneimm, MATCH_P_BNEIMM, MASK_P_BNEIMM) +DECLARE_INSN(p_mac, MATCH_P_MAC, MASK_P_MAC) +DECLARE_INSN(p_msu, MATCH_P_MSU, MASK_P_MSU) DECLARE_INSN(flah, MATCH_FLAH, MASK_FLAH) DECLARE_INSN(fsah, MATCH_FSAH, MASK_FSAH) DECLARE_INSN(fmadd_ah, MATCH_FMADD_AH, MASK_FMADD_AH) diff --git a/toolchain/riscv-opcodes/inst.sverilog b/toolchain/riscv-opcodes/inst.sverilog index 3242a203b..f8879a42d 100644 --- a/toolchain/riscv-opcodes/inst.sverilog +++ b/toolchain/riscv-opcodes/inst.sverilog @@ -824,6 +824,8 @@ package riscv_instr; localparam [31:0] P_CLIPUR = 32'b0001010??????????110?????0110011; localparam [31:0] P_BEQIMM = 32'b?????????????????010?????1100011; localparam [31:0] P_BNEIMM = 32'b?????????????????011?????1100011; + localparam [31:0] P_MAC = 32'b0100001??????????000?????0110011; + localparam [31:0] P_MSU = 32'b0100001??????????001?????0110011; localparam [31:0] FLAH = 32'b?????????????????001?????0000111; localparam [31:0] FSAH = 32'b?????????????????001?????0100111; localparam [31:0] FMADD_AH = 32'b?????10??????????101?????1000011; diff --git a/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM b/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM index e80a4e859..ab4f6134e 100644 --- a/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM +++ b/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM @@ -57,3 +57,7 @@ p.clipur rd rs1 rs2 31..25=10 14..12=6 6..2=0x0C 1..0=3 # Immediate branching p.beqimm rs1 imm5 bimm12hi bimm12lo 14..12=2 6..2=0x18 1..0=3 p.bneimm rs1 imm5 bimm12hi bimm12lo 14..12=3 6..2=0x18 1..0=3 + +# MAC operations +p.mac rd rs1 rs2 31..25=33 14..12=0 6..2=0x0C 1..0=3 +p.msu rd rs1 rs2 31..25=33 14..12=1 6..2=0x0C 1..0=3 From 0d9573965b4404e28711cc3f79b333268d1431a3 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Mon, 14 Dec 2020 16:57:30 +0100 Subject: [PATCH 10/65] [riscv-isa-sim] Add support to 32-bit MAC operations Added instructions: p.mac, p.msu --- toolchain/riscv-isa-sim/disasm/disasm.cc | 2 ++ toolchain/riscv-isa-sim/riscv/insns/p_mac.h | 1 + toolchain/riscv-isa-sim/riscv/insns/p_msu.h | 1 + toolchain/riscv-isa-sim/riscv/riscv.mk.in | 2 ++ 4 files changed, 6 insertions(+) create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_mac.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/p_msu.h diff --git a/toolchain/riscv-isa-sim/disasm/disasm.cc b/toolchain/riscv-isa-sim/disasm/disasm.cc index e3b5e7a74..61b44e3c6 100644 --- a/toolchain/riscv-isa-sim/disasm/disasm.cc +++ b/toolchain/riscv-isa-sim/disasm/disasm.cc @@ -1361,6 +1361,8 @@ disassembler_t::disassembler_t(int xlen) DEFINE_RTYPE(p_clipur); DEFINE_PBTYPE(p_beqimm); DEFINE_PBTYPE(p_bneimm); + DEFINE_RTYPE(p_mac); + DEFINE_RTYPE(p_msu); // provide a default disassembly for all instructions as a fallback #define DECLARE_INSN(code, match, mask) \ diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_mac.h b/toolchain/riscv-isa-sim/riscv/insns/p_mac.h new file mode 100644 index 000000000..bf5c77a14 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_mac.h @@ -0,0 +1 @@ +WRITE_RD(sext_xlen(sreg_t(RD) + sext_xlen(sreg_t(RS1) * sreg_t(RS2)))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_msu.h b/toolchain/riscv-isa-sim/riscv/insns/p_msu.h new file mode 100644 index 000000000..2a42cf05e --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_msu.h @@ -0,0 +1 @@ +WRITE_RD(sext_xlen(sreg_t(RD) - sext_xlen(sreg_t(RS1) * sreg_t(RS2)))); diff --git a/toolchain/riscv-isa-sim/riscv/riscv.mk.in b/toolchain/riscv-isa-sim/riscv/riscv.mk.in index 8ec957ab1..4acb19170 100644 --- a/toolchain/riscv-isa-sim/riscv/riscv.mk.in +++ b/toolchain/riscv-isa-sim/riscv/riscv.mk.in @@ -794,6 +794,8 @@ riscv_insn_ext_xpulpimg = \ p_clipur \ p_beqimm \ p_bneimm \ + p_mac \ + p_msu \ riscv_insn_ext_h = \ hfence_gvma \ From a40a064af8c465d7603086b9e2e9d1ee606b81a1 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Mon, 14 Dec 2020 17:14:48 +0100 Subject: [PATCH 11/65] [riscv-gnu-toolchain] Add Xpulpv2 MAC operations to Xpulpimg --- toolchain/riscv-gnu-toolchain | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolchain/riscv-gnu-toolchain b/toolchain/riscv-gnu-toolchain index 0689d1be3..24f16160f 160000 --- a/toolchain/riscv-gnu-toolchain +++ b/toolchain/riscv-gnu-toolchain @@ -1 +1 @@ -Subproject commit 0689d1be314161a1467566105b6abde7a770bf75 +Subproject commit 24f16160f9da4c04ddda37003b026ad0e98a8623 From d87f98edea5e7634a18b46bf5fc9bb7d59f90012 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 15 Dec 2020 10:37:04 +0100 Subject: [PATCH 12/65] [snitch] Add None value to DSPU enum types for muxes --- hardware/deps/snitch/src/snitch_ipu.sv | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/hardware/deps/snitch/src/snitch_ipu.sv b/hardware/deps/snitch/src/snitch_ipu.sv index 6f6dc2bcf..ecebd743b 100644 --- a/hardware/deps/snitch/src/snitch_ipu.sv +++ b/hardware/deps/snitch/src/snitch_ipu.sv @@ -208,12 +208,12 @@ module dspu #( // Internal control signals logic cmp_signed; // comparator operation is signed enum logic [1:0] { - Reg, Zero, ClipBound + None, Reg, Zero, ClipBound } cmp_op_b_sel; // selection of shared comparator operands logic clip_unsigned; // clip operation has "0" as lower bound logic clip_register; // if 1 clip operation uses rs2, else ximm enum logic [3:0] { - Abs, Sle, Min, Max, Exths, Exthz, Extbs, Extbz, Clip + Nop, Abs, Sle, Min, Max, Exths, Exthz, Extbs, Extbz, Clip } res_sel; // result selection // -------------------- @@ -222,46 +222,56 @@ module dspu #( always_comb begin cmp_signed = 1'b1; - cmp_op_b_sel = Reg; + cmp_op_b_sel = None; clip_unsigned = 1'b0; clip_register = 1'b0; - res_sel = Abs; + res_sel = Nop; unique casez (operator_i) riscv_instr::P_ABS: begin cmp_op_b_sel = Zero; res_sel = Abs; end riscv_instr::P_SLET: begin + cmp_op_b_sel = Reg; res_sel = Sle; end riscv_instr::P_SLETU: begin cmp_signed = 1'b0; + cmp_op_b_sel = Reg; res_sel = Sle; end riscv_instr::P_MIN: begin + cmp_op_b_sel = Reg; res_sel = Min; end riscv_instr::P_MINU: begin cmp_signed = 1'b0; + cmp_op_b_sel = Reg; res_sel = Min; end riscv_instr::P_MAX: begin + cmp_op_b_sel = Reg; res_sel = Max; end riscv_instr::P_MAXU: begin cmp_signed = 1'b0; + cmp_op_b_sel = Reg; res_sel = Max; end riscv_instr::P_EXTHS: begin + cmp_op_b_sel = Reg; res_sel = Exths; end riscv_instr::P_EXTHZ: begin + cmp_op_b_sel = Reg; res_sel = Exthz; end riscv_instr::P_EXTBS: begin + cmp_op_b_sel = Reg; res_sel = Extbs; end riscv_instr::P_EXTBZ: begin + cmp_op_b_sel = Reg; res_sel = Extbz; end riscv_instr::P_CLIP: begin From f8df0f1f9261d0a681637c1dfc84b559ebc8cf3b Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 15 Dec 2020 11:17:42 +0100 Subject: [PATCH 13/65] [snitch] Implement MAC operations in IPU Added instructions: p.mac, p.msu --- hardware/deps/snitch/src/snitch.sv | 65 +++++++++++++++++--------- hardware/deps/snitch/src/snitch_ipu.sv | 35 ++++++++++++-- 2 files changed, 75 insertions(+), 25 deletions(-) diff --git a/hardware/deps/snitch/src/snitch.sv b/hardware/deps/snitch/src/snitch.sv index 7413a5586..d29442e9a 100644 --- a/hardware/deps/snitch/src/snitch.sv +++ b/hardware/deps/snitch/src/snitch.sv @@ -104,10 +104,11 @@ module snitch #( /* verilator lint_on WIDTH */ logic [31:0] opa, opb; +//logic [31:0] opa, opb, opc; logic [32:0] adder_result; logic [31:0] alu_result; - logic [RegWidth-1:0] rd, rs1, rs2, rs3; + logic [RegWidth-1:0] rd, rs1, rs2; logic stall, lsu_stall; // Register connections logic [RegNrReadPorts-1:0][RegWidth-1:0] gpr_raddr; @@ -177,8 +178,8 @@ module snitch #( } alu_op; enum logic [3:0] { - None, Reg, IImmediate, UImmediate, JImmediate, SImmediate, SFImmediate, PC, CSR, CSRImmediate, PBImmediate, RegRs3 - } opa_select, opb_select; + None, Reg, IImmediate, UImmediate, JImmediate, SImmediate, SFImmediate, PC, CSR, CSRImmediate, PBImmediate, RegRd + } opa_select, opb_select, opc_select; logic write_rd; // write rd desitnation this cycle logic uses_rd; @@ -221,7 +222,8 @@ module snitch #( assign acc_qdata_op_o = inst_data_i; assign acc_qdata_arga_o = {{32{gpr_rdata[0][31]}}, gpr_rdata[0]}; assign acc_qdata_argb_o = {{32{gpr_rdata[1][31]}}, gpr_rdata[1]}; - assign acc_qdata_argc_o = {32'b0, alu_result}; +//assign acc_qdata_argc_o = {32'b0, alu_result}; + assign acc_qdata_argc_o = {{32{gpr_rdata[2][31]}}, gpr_rdata[2]}; // instruction fetch interface assign inst_addr_o = pc_q; @@ -233,8 +235,7 @@ module snitch #( // Scoreboard: Keep track of rd dependencies (only loads at the moment) logic operands_ready; logic dst_ready; - logic rs2_ready, rs3_ready; - logic opa_ready, opb_ready; + logic opa_ready, opb_ready, opc_ready; logic dstrd_ready, dstrs1_ready; always_comb begin @@ -245,13 +246,11 @@ module snitch #( if (retire_acc) sb_d[acc_pid_i[RegWidth-1:0]] = 1'b0; sb_d[0] = 1'b0; end - // rediness of registers connected to opb - assign rs2_ready = (opb_select != Reg & opb_select != SImmediate) | ~sb_q[rs2]; - assign rs3_ready = (opb_select != RegRs3) | ~sb_q[rs3]; // TODO(zarubaf): This can probably be described a bit more efficient assign opa_ready = (opa_select != Reg) | ~sb_q[rs1]; - assign opb_ready = rs2_ready & rs3_ready; - assign operands_ready = opa_ready & opb_ready; + assign opb_ready = ((opb_select != Reg & opb_select != SImmediate) | ~sb_q[rs2]) & ((opb_select != RegRd) | ~sb_q[rd]); + assign opc_ready = (opc_select != Reg) | ~sb_q[rd]; + assign operands_ready = opa_ready & opb_ready & opc_ready; // either we are not using the destination register or we need to make // sure that its destination operand is not marked busy in the scoreboard. assign dstrd_ready = ~uses_rd | (uses_rd & ~sb_q[rd]); @@ -289,13 +288,13 @@ module snitch #( assign rd = inst_data_i[7 + RegWidth - 1:7]; assign rs1 = inst_data_i[15 + RegWidth - 1:15]; assign rs2 = inst_data_i[20 + RegWidth - 1:20]; - assign rs3 = inst_data_i[7 + RegWidth - 1:7]; always_comb begin illegal_inst = 1'b0; alu_op = Add; opa_select = None; opb_select = None; + opc_select = None; next_pc = Consec; @@ -1013,7 +1012,7 @@ module snitch #( is_store = 1'b1; is_postincr = 1'b1; opa_select = Reg; - opb_select = RegRs3; + opb_select = RegRd; end else begin illegal_inst = 1'b1; end @@ -1026,7 +1025,7 @@ module snitch #( is_postincr = 1'b1; ls_size = HalfWord; opa_select = Reg; - opb_select = RegRs3; + opb_select = RegRd; end else begin illegal_inst = 1'b1; end @@ -1039,7 +1038,7 @@ module snitch #( is_postincr = 1'b1; ls_size = Word; opa_select = Reg; - opb_select = RegRs3; + opb_select = RegRd; end else begin illegal_inst = 1'b1; end @@ -1049,7 +1048,7 @@ module snitch #( write_rd = 1'b0; is_store = 1'b1; opa_select = Reg; - opb_select = RegRs3; + opb_select = RegRd; end else begin illegal_inst = 1'b1; end @@ -1060,7 +1059,7 @@ module snitch #( is_store = 1'b1; ls_size = HalfWord; opa_select = Reg; - opb_select = RegRs3; + opb_select = RegRd; end else begin illegal_inst = 1'b1; end @@ -1071,7 +1070,7 @@ module snitch #( is_store = 1'b1; ls_size = Word; opa_select = Reg; - opb_select = RegRs3; + opb_select = RegRd; end else begin illegal_inst = 1'b1; end @@ -1128,6 +1127,22 @@ module snitch #( illegal_inst = 1'b1; end end + // MAC operations + // Off-load to IPU coprocessor + riscv_instr::P_MAC, // Xpulpimg: p.mac + riscv_instr::P_MSU: begin // Xpulpimg: p.msu + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + acc_qvalid_o = valid_instr; + opa_select = Reg; + opb_select = Reg; + opc_select = Reg; + acc_register_rd = 1'b1; + end else begin + illegal_inst = 1'b1; + end + end /* end of Xpulpimg extension */ // TODO(zarubaf): Illegal Instructions @@ -1225,16 +1240,24 @@ module snitch #( PC: opb = pc_q; CSR: opb = csr_rvalue; PBImmediate: opb = pbimm; - RegRs3: opb = gpr_rdata[2]; + RegRd: opb = gpr_rdata[2]; default: opb = '0; endcase end +// always_comb begin +// unique case (opc_select) +// None: opc = '0; +// Reg: opc = gpr_rdata[2]; +// default: opc = '0; +// endcase +// end + assign gpr_raddr[0] = rs1; assign gpr_raddr[1] = rs2; // connect third read port only if present - if (RegNrReadPorts >= 3) begin : gpr_rs3 - assign gpr_raddr[2] = rs3; + if (RegNrReadPorts >= 3) begin : gpr_raddr_2 + assign gpr_raddr[2] = rd; end // -------------------- diff --git a/hardware/deps/snitch/src/snitch_ipu.sv b/hardware/deps/snitch/src/snitch_ipu.sv index ecebd743b..e8da53c65 100644 --- a/hardware/deps/snitch/src/snitch_ipu.sv +++ b/hardware/deps/snitch/src/snitch_ipu.sv @@ -75,7 +75,9 @@ module snitch_ipu #( riscv_instr::P_CLIP, // Xpulpimg: p.clip riscv_instr::P_CLIPU, // Xpulpimg: p.clipu riscv_instr::P_CLIPR, // Xpulpimg: p.clipr - riscv_instr::P_CLIPUR: begin // Xpulpimg: p.clipur + riscv_instr::P_CLIPUR, // Xpulpimg: p.clipur + riscv_instr::P_MAC, // Xpulpimg: p.mac + riscv_instr::P_MSU: begin // Xpulpimg: p.msu if (snitch_pkg::XPULPIMG) begin dsp_valid_op = acc_qvalid_i; acc_qready_o = dsp_ready_op; @@ -135,6 +137,7 @@ module snitch_ipu #( .operator_i ( acc_qdata_op_i ), .op_a_i ( acc_qdata_arga_i ), .op_b_i ( acc_qdata_argb_i ), + .op_c_i ( acc_qdata_argc_i ), .in_valid_i ( dsp_valid_op ), .in_ready_o ( dsp_ready_op ), .out_valid_o ( dsp_valid ), @@ -188,6 +191,7 @@ module dspu #( input logic [31:0] operator_i, input logic [Width-1:0] op_a_i, input logic [Width-1:0] op_b_i, + input logic [Width-1:0] op_c_i, input logic in_valid_i, output logic in_ready_o, output logic out_valid_o, @@ -212,8 +216,9 @@ module dspu #( } cmp_op_b_sel; // selection of shared comparator operands logic clip_unsigned; // clip operation has "0" as lower bound logic clip_register; // if 1 clip operation uses rs2, else ximm + logic mul_msu; // multiplication operation is msu enum logic [3:0] { - Nop, Abs, Sle, Min, Max, Exths, Exthz, Extbs, Extbz, Clip + Nop, Abs, Sle, Min, Max, Exths, Exthz, Extbs, Extbz, Clip, Mul } res_sel; // result selection // -------------------- @@ -225,6 +230,7 @@ module dspu #( cmp_op_b_sel = None; clip_unsigned = 1'b0; clip_register = 1'b0; + mul_msu = 1'b0; res_sel = Nop; unique casez (operator_i) riscv_instr::P_ABS: begin @@ -294,6 +300,13 @@ module dspu #( cmp_op_b_sel = ClipBound; res_sel = Clip; end + riscv_instr::P_MAC: begin + res_sel = Mul; + end + riscv_instr::P_MSU: begin + mul_msu = 1'b1; + res_sel = Mul; + end default: ; endcase end @@ -304,8 +317,6 @@ module dspu #( // |___//_/ \_\|_|/_/ \_\|_| /_/ \_\|_| |_||_| // - logic cmp_result; - // -------------------- // Clips // -------------------- @@ -332,6 +343,7 @@ module dspu #( // Shared comparator // -------------------- logic [Width-1:0] cmp_op_a, cmp_op_b; + logic cmp_result; // Comparator operand A assignment assign cmp_op_a = op_a_i; @@ -348,6 +360,20 @@ module dspu #( // Instantiate comparator assign cmp_result = $signed({cmp_op_a[Width-1] & cmp_signed, cmp_op_a}) <= $signed({cmp_op_b[Width-1] & cmp_signed, cmp_op_b}); + // -------------------- + // Multiplier + // -------------------- + + // 32x32 into 32 bits multiplier & accumulator + logic [Width-1:0] mul_op_a, mul_op_b; + logic [Width-1:0] mul_result; + + assign mul_op_a = op_a_i ^ {Width{mul_msu}}; + assign mul_op_b = op_b_i & {Width{mul_msu}}; + + // perform either accumulation or subtraction with respect to op_c_i basing on mul_msu + assign mul_result = $signed(op_c_i) + $signed(mul_op_b) + $signed(mul_op_a) * $signed(op_b_i); + // -------------------- // Result generation // -------------------- @@ -378,6 +404,7 @@ module dspu #( // + if clip_op_b >= 0: clip_comp=clip_op_b (i.e. rs1>=0 and clip_op_b>=0) and the result must // be clipped to the upper bound since rs1 > clip_op_b Clip: result_o = cmp_result ? (clip_use_n_bound ? clip_op_b_n : op_a_i) : (op_a_i[Width-1] ? op_a_i : clip_op_b); + Mul: result_o = mul_result; default: result_o = '0; endcase end From d07cbfd1c1460d36aba685087d237cdb14de65b7 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Thu, 17 Dec 2020 12:31:10 +0100 Subject: [PATCH 14/65] [riscv-tests] Add unit tests for MAC operations Added tests for instructions: p.mac, p.msu --- .../isa/macros/scalar/test_macros.h | 129 ++++++++++++++++++ apps/riscv-tests/isa/rv32uxpulpimg/Makefrag | 1 + apps/riscv-tests/isa/rv32uxpulpimg/p_mac.S | 88 ++++++++++++ apps/riscv-tests/isa/rv32uxpulpimg/p_msu.S | 88 ++++++++++++ apps/riscv-tests/isa/snitch_isa.mk | 1 + 5 files changed, 307 insertions(+) create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_mac.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/p_msu.S diff --git a/apps/riscv-tests/isa/macros/scalar/test_macros.h b/apps/riscv-tests/isa/macros/scalar/test_macros.h index 41366922e..356ede1c3 100644 --- a/apps/riscv-tests/isa/macros/scalar/test_macros.h +++ b/apps/riscv-tests/isa/macros/scalar/test_macros.h @@ -263,6 +263,135 @@ test_ ## testnum: \ inst x0, x1, x2; \ ) +#----------------------------------------------------------------------- +# Tests for instructions with 3 register operands +#----------------------------------------------------------------------- + +#define TEST_RRR_OP( testnum, inst, result, val1, val2, val3 ) \ + TEST_CASE( testnum, x14, result, \ + li x1, MASK_XLEN(val1); \ + li x2, MASK_XLEN(val2); \ + li x14, MASK_XLEN(val3); \ + inst x14, x1, x2; \ + ) + +#define TEST_RRR_SRC1_EQ_DEST( testnum, inst, result, val1, val2 ) \ + TEST_CASE( testnum, x1, result, \ + li x1, MASK_XLEN(val1); \ + li x2, MASK_XLEN(val2); \ + inst x1, x1, x2; \ + ) + +#define TEST_RRR_SRC2_EQ_DEST( testnum, inst, result, val1, val2 ) \ + TEST_CASE( testnum, x2, result, \ + li x1, MASK_XLEN(val1); \ + li x2, MASK_XLEN(val2); \ + inst x2, x1, x2; \ + ) + +#define TEST_RRR_SRC12_EQ_DEST( testnum, inst, result, val1 ) \ + TEST_CASE( testnum, x1, result, \ + li x1, MASK_XLEN(val1); \ + inst x1, x1, x1; \ + ) + +#define TEST_RRR_DEST_BYPASS( testnum, nop_cycles, inst, result, val1, val2, val3 ) \ + TEST_CASE( testnum, x6, result, \ + li x4, 0; \ +1: li x1, MASK_XLEN(val1); \ + li x2, MASK_XLEN(val2); \ + li x14, MASK_XLEN(val3); \ + inst x14, x1, x2; \ + TEST_INSERT_NOPS_ ## nop_cycles \ + addi x6, x14, 0; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + ) + +#define TEST_RRR_SRC12_BYPASS( testnum, src1_nops, src2_nops, inst, result, val1, val2, val3 ) \ + TEST_CASE( testnum, x14, result, \ + li x4, 0; \ +1: li x14, MASK_XLEN(val3); \ + li x1, MASK_XLEN(val1); \ + TEST_INSERT_NOPS_ ## src1_nops \ + li x2, MASK_XLEN(val2); \ + TEST_INSERT_NOPS_ ## src2_nops \ + inst x14, x1, x2; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + ) + +#define TEST_RRR_SRC21_BYPASS( testnum, src1_nops, src2_nops, inst, result, val1, val2, val3 ) \ + TEST_CASE( testnum, x14, result, \ + li x4, 0; \ +1: li x14, MASK_XLEN(val3); \ + li x2, MASK_XLEN(val2); \ + TEST_INSERT_NOPS_ ## src1_nops \ + li x1, MASK_XLEN(val1); \ + TEST_INSERT_NOPS_ ## src2_nops \ + inst x14, x1, x2; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + ) + +# Actually here we have 3 sources; to avoid too many tests we test rD source bypass only on its own +#define TEST_RRR_SRC3_BYPASS( testnum, nop_cycles, inst, result, val1, val2, val3 ) \ + TEST_CASE( testnum, x14, result, \ + li x4, 0; \ +1: li x1, MASK_XLEN(val1); \ + li x2, MASK_XLEN(val2); \ + li x14, MASK_XLEN(val3); \ + TEST_INSERT_NOPS_ ## nop_cycles \ + inst x14, x1, x2; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + ) + +#define TEST_RRR_ZEROSRC1( testnum, inst, result, val1, val2 ) \ + TEST_CASE( testnum, x2, result, \ + li x1, MASK_XLEN(val1); \ + li x2, MASK_XLEN(val2); \ + inst x2, x0, x1; \ + ) + +#define TEST_RRR_ZEROSRC2( testnum, inst, result, val1, val2 ) \ + TEST_CASE( testnum, x2, result, \ + li x1, MASK_XLEN(val1); \ + li x2, MASK_XLEN(val2); \ + inst x2, x1, x0; \ + ) + +#define TEST_RRR_ZEROSRC3( testnum, inst, result, val1, val2 ) \ + TEST_CASE( testnum, x14, result, \ + li x1, MASK_XLEN(val1); \ + li x2, MASK_XLEN(val2); \ + li x14, 0; \ + inst x14, x1, x2; \ + ) + +#define TEST_RRR_ZEROSRC12( testnum, inst, result, val1 ) \ + TEST_CASE( testnum, x1, result, \ + li x1, MASK_XLEN(val1); \ + inst x1, x0, x0; \ + ) + +#define TEST_RRR_ZEROSRC123( testnum, inst, result ) \ + TEST_CASE( testnum, x1, result, \ + li x1, 0; \ + inst x1, x0, x0; \ + ) + +#define TEST_RRR_ZERODEST( testnum, inst, val1, val2 ) \ + TEST_CASE( testnum, x0, 0, \ + li x1, MASK_XLEN(val1); \ + li x2, MASK_XLEN(val2); \ + inst x0, x1, x2; \ + ) + #----------------------------------------------------------------------- # Test memory instructions (immediate offset) #----------------------------------------------------------------------- diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag b/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag index aa362808d..471502857 100644 --- a/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag +++ b/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag @@ -18,6 +18,7 @@ rv32uxpulpimg_sc_tests = \ p_clip p_clipu \ p_clipr p_clipur \ p_beqimm p_bneimm \ + p_mac p_msu \ rv32uxpulpimg_p_tests = $(addprefix rv32uxpulpimg-p-, $(rv32uxpulpimg_sc_tests)) rv32uxpulpimg_v_tests = $(addprefix rv32uxpulpimg-v-, $(rv32uxpulpimg_sc_tests)) diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_mac.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_mac.S new file mode 100644 index 000000000..3ccd7f41b --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_mac.S @@ -0,0 +1,88 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_mac.S +#----------------------------------------------------------------------------- +# +# Test p.mac instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + TEST_RRR_OP( 2, p.mac, 0x00000000, 0x00000000, 0x00000000, 0x00000000 ) + TEST_RRR_OP( 3, p.mac, 0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF ) + TEST_RRR_OP( 4, p.mac, 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000 ) + TEST_RRR_OP( 5, p.mac, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF ) + TEST_RRR_OP( 6, p.mac, 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000 ) + TEST_RRR_OP( 7, p.mac, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF ) + TEST_RRR_OP( 8, p.mac, 0x00000001, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 ) + TEST_RRR_OP( 9, p.mac, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF ) + + TEST_RRR_OP( 10, p.mac, 0x4CF50B3F, 0xB463DADE, 0x1C2D45F3, 0x67287485 ) + TEST_RRR_OP( 11, p.mac, 0x01C8425D, 0x5DE547E9, 0xBE923643, 0x20B94A62 ) + TEST_RRR_OP( 12, p.mac, 0x125000F5, 0xD5042C35, 0x113E2192, 0xD265F5BB ) + TEST_RRR_OP( 13, p.mac, 0x117DE9BB, 0x0762A9A5, 0xAB420127, 0x9B426C98 ) + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + TEST_RRR_SRC1_EQ_DEST( 14, p.mac, 0x0CEE5928, 0x113E2192, 0x1C2D45F3 ) + TEST_RRR_SRC2_EQ_DEST( 15, p.mac, 0x1F280E82, 0x7E139C55, 0xBE923643 ) + + TEST_RRR_SRC12_EQ_DEST( 16, p.mac, 0x22EE857E, 0x84BB8025 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_RRR_DEST_BYPASS( 17, 0, p.mac, 5, 1, 2, 3 ) + TEST_RRR_DEST_BYPASS( 18, 1, p.mac, 26, 4, 5, 6 ) + TEST_RRR_DEST_BYPASS( 19, 2, p.mac, 65, 7, 8, 9 ) + + TEST_RRR_SRC12_BYPASS( 20, 0, 0, p.mac, 5, 1, 2, 3 ) + TEST_RRR_SRC12_BYPASS( 21, 0, 1, p.mac, 26, 4, 5, 6 ) + TEST_RRR_SRC12_BYPASS( 22, 0, 2, p.mac, 65, 7, 8, 9 ) + TEST_RRR_SRC12_BYPASS( 23, 1, 0, p.mac, 5, -1, -2, 3 ) + TEST_RRR_SRC12_BYPASS( 24, 1, 1, p.mac, 14, 4, 5, -6 ) + TEST_RRR_SRC12_BYPASS( 25, 2, 0, p.mac, -47, -7, 8, 9 ) + + TEST_RRR_SRC21_BYPASS( 26, 0, 0, p.mac, 5, 1, 2, 3 ) + TEST_RRR_SRC21_BYPASS( 27, 0, 1, p.mac, 26, 4, 5, 6 ) + TEST_RRR_SRC21_BYPASS( 28, 0, 2, p.mac, 65, 7, 8, 9 ) + TEST_RRR_SRC21_BYPASS( 29, 1, 0, p.mac, 5, -1, -2, 3 ) + TEST_RRR_SRC21_BYPASS( 30, 1, 1, p.mac, 14, 4, 5, -6 ) + TEST_RRR_SRC21_BYPASS( 31, 2, 0, p.mac, -47, -7, 8, 9 ) + + TEST_RRR_SRC3_BYPASS( 32, 0, p.mac, 26, 4, 5, 6 ) + TEST_RRR_SRC3_BYPASS( 33, 1, p.mac, 65, 7, 8, 9 ) + TEST_RRR_SRC3_BYPASS( 34, 2, p.mac, 5, -1, -2, 3 ) + + TEST_RRR_ZEROSRC1( 35, p.mac, 10, -5, 10 ) + TEST_RRR_ZEROSRC2( 36, p.mac, 7, 32, 7 ) + TEST_RRR_ZEROSRC3( 37, p.mac, -8, -1, 8 ) + + TEST_RRR_ZEROSRC12( 38, p.mac, -3, -3 ) + + TEST_RRR_ZEROSRC123( 39, p.mac, 0 ) + + TEST_RRR_ZERODEST( 40, p.mac, 34, -10 ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_msu.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_msu.S new file mode 100644 index 000000000..46f7b5866 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_msu.S @@ -0,0 +1,88 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_msu.S +#----------------------------------------------------------------------------- +# +# Test p.msu instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + TEST_RRR_OP( 2, p.msu, 0x00000000, 0x00000000, 0x00000000, 0x00000000 ) + TEST_RRR_OP( 3, p.msu, 0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF ) + TEST_RRR_OP( 4, p.msu, 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000 ) + TEST_RRR_OP( 5, p.msu, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF ) + TEST_RRR_OP( 6, p.msu, 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000 ) + TEST_RRR_OP( 7, p.msu, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF ) + TEST_RRR_OP( 8, p.msu, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 ) + TEST_RRR_OP( 9, p.msu, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF ) + + TEST_RRR_OP( 10, p.msu, 0x815BDDCB, 0xB463DADE, 0x1C2D45F3, 0x67287485 ) + TEST_RRR_OP( 11, p.msu, 0x3FAA5267, 0x5DE547E9, 0xBE923643, 0x20B94A62 ) + TEST_RRR_OP( 12, p.msu, 0x927BEA81, 0xD5042C35, 0x113E2192, 0xD265F5BB ) + TEST_RRR_OP( 13, p.msu, 0x2506EF75, 0x0762A9A5, 0xAB420127, 0x9B426C98 ) + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + TEST_RRR_SRC1_EQ_DEST( 14, p.msu, 0x158DE9FC, 0x113E2192, 0x1C2D45F3 ) + TEST_RRR_SRC2_EQ_DEST( 15, p.msu, 0x5DFC5E04, 0x7E139C55, 0xBE923643 ) + + TEST_RRR_SRC12_EQ_DEST( 16, p.msu, 0xE6887ACC, 0x84BB8025 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_RRR_DEST_BYPASS( 17, 0, p.msu, 1, 1, 2, 3 ) + TEST_RRR_DEST_BYPASS( 18, 1, p.msu, -14, 4, 5, 6 ) + TEST_RRR_DEST_BYPASS( 19, 2, p.msu, -47, 7, 8, 9 ) + + TEST_RRR_SRC12_BYPASS( 20, 0, 0, p.msu, 1, 1, 2, 3 ) + TEST_RRR_SRC12_BYPASS( 21, 0, 1, p.msu, -14, 4, 5, 6 ) + TEST_RRR_SRC12_BYPASS( 22, 0, 2, p.msu, -47, 7, 8, 9 ) + TEST_RRR_SRC12_BYPASS( 23, 1, 0, p.msu, 1, -1, -2, 3 ) + TEST_RRR_SRC12_BYPASS( 24, 1, 1, p.msu, -26, 4, 5, -6 ) + TEST_RRR_SRC12_BYPASS( 25, 2, 0, p.msu, 65, -7, 8, 9 ) + + TEST_RRR_SRC21_BYPASS( 26, 0, 0, p.msu, 1, 1, 2, 3 ) + TEST_RRR_SRC21_BYPASS( 27, 0, 1, p.msu, -14, 4, 5, 6 ) + TEST_RRR_SRC21_BYPASS( 28, 0, 2, p.msu, -47, 7, 8, 9 ) + TEST_RRR_SRC21_BYPASS( 29, 1, 0, p.msu, 1, -1, -2, 3 ) + TEST_RRR_SRC21_BYPASS( 30, 1, 1, p.msu, -26, 4, 5, -6 ) + TEST_RRR_SRC21_BYPASS( 31, 2, 0, p.msu, 65, -7, 8, 9 ) + + TEST_RRR_SRC3_BYPASS( 32, 0, p.msu, -14, 4, 5, 6 ) + TEST_RRR_SRC3_BYPASS( 33, 1, p.msu, -47, 7, 8, 9 ) + TEST_RRR_SRC3_BYPASS( 34, 2, p.msu, 1, -1, -2, 3 ) + + TEST_RRR_ZEROSRC1( 35, p.msu, 10, -5, 10 ) + TEST_RRR_ZEROSRC2( 36, p.msu, 7, 32, 7 ) + TEST_RRR_ZEROSRC3( 37, p.msu, 8, -1, 8 ) + + TEST_RRR_ZEROSRC12( 38, p.msu, -3, -3 ) + + TEST_RRR_ZEROSRC123( 39, p.msu, 0 ) + + TEST_RRR_ZERODEST( 40, p.msu, 34, -10 ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/snitch_isa.mk b/apps/riscv-tests/isa/snitch_isa.mk index f4d7c072e..4b66b2d53 100644 --- a/apps/riscv-tests/isa/snitch_isa.mk +++ b/apps/riscv-tests/isa/snitch_isa.mk @@ -48,6 +48,7 @@ ifeq ($(xpulpimg),1) p_clip p_clipu \ p_clipr p_clipur \ p_beqimm p_bneimm \ + p_mac p_msu \ endif From 55802cbdca27620058b064fb6165a259e1a0e5d2 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Thu, 17 Dec 2020 12:33:05 +0100 Subject: [PATCH 15/65] [snitch] Merge multiplier module and MAC in DSP unit --- hardware/deps/snitch/src/snitch_ipu.sv | 172 ++++++++++++++++--------- 1 file changed, 112 insertions(+), 60 deletions(-) diff --git a/hardware/deps/snitch/src/snitch_ipu.sv b/hardware/deps/snitch/src/snitch_ipu.sv index e8da53c65..6192513ba 100644 --- a/hardware/deps/snitch/src/snitch_ipu.sv +++ b/hardware/deps/snitch/src/snitch_ipu.sv @@ -51,8 +51,13 @@ module snitch_ipu #( riscv_instr::MULH, riscv_instr::MULHSU, riscv_instr::MULHU: begin - mul_valid_op = acc_qvalid_i; - acc_qready_o = mul_ready_op; + if (snitch_pkg::XPULPIMG) begin + dsp_valid_op = acc_qvalid_i; + acc_qready_o = dsp_ready_op; + end else begin + mul_valid_op = acc_qvalid_i; + acc_qready_o = mul_ready_op; + end end riscv_instr::DIV, riscv_instr::DIVU, @@ -89,24 +94,6 @@ module snitch_ipu #( endcase end - // Multiplication - multiplier #( - .Width ( 32 ), - .IdWidth ( IdWidth ) - ) i_multiplier ( - .clk_i, - .rst_i, - .id_i ( acc_qid_i ), - .operator_i ( acc_qdata_op_i ), - .operand_a_i ( acc_qdata_arga_i ), - .operand_b_i ( acc_qdata_argb_i ), - .valid_i ( mul_valid_op ), - .ready_o ( mul_ready_op ), - .result_o ( mul.result ), - .valid_o ( mul_valid ), - .ready_i ( mul_ready ), - .id_o ( mul.id ) - ); // Serial Divider serdiv #( .WIDTH ( 32 ), @@ -125,43 +112,61 @@ module snitch_ipu #( .id_o ( div.id ), .res_o ( div.result ) ); - if (snitch_pkg::XPULPIMG) begin : gen_dspu + + if (snitch_pkg::XPULPIMG) begin : gen_xpulpimg // DSP Unit dspu #( .Width ( 32 ), .IdWidth ( IdWidth ) ) i_dspu ( - .clk_i ( clk_i ), - .rst_i ( rst_i ), - .id_i ( acc_qid_i ), - .operator_i ( acc_qdata_op_i ), - .op_a_i ( acc_qdata_arga_i ), - .op_b_i ( acc_qdata_argb_i ), - .op_c_i ( acc_qdata_argc_i ), - .in_valid_i ( dsp_valid_op ), - .in_ready_o ( dsp_ready_op ), - .out_valid_o ( dsp_valid ), - .out_ready_i ( dsp_ready ), - .id_o ( dsp.id ), - .result_o ( dsp.result ) + .clk_i ( clk_i ), + .rst_i ( rst_i ), + .id_i ( acc_qid_i ), + .operator_i ( acc_qdata_op_i ), + .op_a_i ( acc_qdata_arga_i ), + .op_b_i ( acc_qdata_argb_i ), + .op_c_i ( acc_qdata_argc_i ), + .in_valid_i ( dsp_valid_op ), + .in_ready_o ( dsp_ready_op ), + .out_valid_o ( dsp_valid ), + .out_ready_i ( dsp_ready ), + .id_o ( dsp.id ), + .result_o ( dsp.result ) ); - end - // Output Arbitration - if (snitch_pkg::XPULPIMG) begin : gen_3inputs + // Output Arbitration stream_arbiter #( .DATA_T ( result_t ), - .N_INP ( 3 ) + .N_INP ( 2 ) ) i_stream_arbiter ( .clk_i, - .rst_ni ( ~rst_i ), - .inp_data_i ( {div, mul, dsp} ), - .inp_valid_i ( {div_valid, mul_valid, dsp_valid} ), - .inp_ready_o ( {div_ready, mul_ready, dsp_ready} ), - .oup_data_o ( oup ), - .oup_valid_o ( acc_pvalid_o ), - .oup_ready_i ( acc_pready_i ) + .rst_ni ( ~rst_i ), + .inp_data_i ( {div, dsp} ), + .inp_valid_i ( {div_valid, dsp_valid} ), + .inp_ready_o ( {div_ready, dsp_ready} ), + .oup_data_o ( oup ), + .oup_valid_o ( acc_pvalid_o ), + .oup_ready_i ( acc_pready_i ) + ); + end else begin : gen_vanilla + // Multiplication + multiplier #( + .Width ( 32 ), + .IdWidth ( IdWidth ) + ) i_multiplier ( + .clk_i, + .rst_i, + .id_i ( acc_qid_i ), + .operator_i ( acc_qdata_op_i ), + .operand_a_i ( acc_qdata_arga_i ), + .operand_b_i ( acc_qdata_argb_i ), + .valid_i ( mul_valid_op ), + .ready_o ( mul_ready_op ), + .result_o ( mul.result ), + .valid_o ( mul_valid ), + .ready_i ( mul_ready ), + .id_o ( mul.id ) ); - end else begin : gen_2inputs + // Output Arbitration stream_arbiter #( .DATA_T ( result_t ), .N_INP ( 2 ) @@ -176,6 +181,7 @@ module snitch_ipu #( .oup_ready_i ( acc_pready_i ) ); end + assign acc_pdata_o = oup.result; assign acc_pid_o = oup.id; endmodule @@ -216,9 +222,14 @@ module dspu #( } cmp_op_b_sel; // selection of shared comparator operands logic clip_unsigned; // clip operation has "0" as lower bound logic clip_register; // if 1 clip operation uses rs2, else ximm - logic mul_msu; // multiplication operation is msu + enum logic [1:0] { + NoMul, MulLow, MulHigh, MulMac + } mul_op; // type of multiplication operation + logic mac_msu; // multiplication operation is MSU + logic mul_op_a_sign; // sign of multiplier operand a + logic mac_op_b_sign; // sign of multiplier operand b enum logic [3:0] { - Nop, Abs, Sle, Min, Max, Exths, Exthz, Extbs, Extbz, Clip, Mul + Nop, Abs, Sle, Min, Max, Exths, Exthz, Extbs, Extbz, Clip, Mac } res_sel; // result selection // -------------------- @@ -230,9 +241,35 @@ module dspu #( cmp_op_b_sel = None; clip_unsigned = 1'b0; clip_register = 1'b0; - mul_msu = 1'b0; + mul_op = NoMul; + mac_msu = 1'b0; + mul_op_a_sign = 1'b0; + mac_op_b_sign = 1'b0; res_sel = Nop; unique casez (operator_i) + // Multiplications from M extension + riscv_instr::MUL: begin + mul_op = MulLow; + mul_op_a_sign = 1'b1; + mac_op_b_sign = 1'b1; + res_sel = Mac; + end + riscv_instr::MULH: begin + mul_op = MulHigh; + mul_op_a_sign = 1'b1; + mac_op_b_sign = 1'b1; + res_sel = Mac; + end + riscv_instr::MULHSU: begin + mul_op = MulHigh; + mul_op_a_sign = 1'b1; + res_sel = Mac; + end + riscv_instr::MULHU: begin + mul_op = MulHigh; + res_sel = Mac; + end + // Instructions from Xpulpimg riscv_instr::P_ABS: begin cmp_op_b_sel = Zero; res_sel = Abs; @@ -301,11 +338,17 @@ module dspu #( res_sel = Clip; end riscv_instr::P_MAC: begin - res_sel = Mul; + mul_op = MulMac; + mul_op_a_sign = 1'b1; + mac_op_b_sign = 1'b1; + res_sel = Mac; end riscv_instr::P_MSU: begin - mul_msu = 1'b1; - res_sel = Mul; + mul_op = MulMac; + mac_msu = 1'b1; + mul_op_a_sign = 1'b1; + mac_op_b_sign = 1'b1; + res_sel = Mac; end default: ; endcase @@ -361,18 +404,27 @@ module dspu #( assign cmp_result = $signed({cmp_op_a[Width-1] & cmp_signed, cmp_op_a}) <= $signed({cmp_op_b[Width-1] & cmp_signed, cmp_op_b}); // -------------------- - // Multiplier + // Multiplier & acc // -------------------- // 32x32 into 32 bits multiplier & accumulator - logic [Width-1:0] mul_op_a, mul_op_b; - logic [Width-1:0] mul_result; + logic [Width-1:0] mul_op_a; + logic [2*Width-1:0] mul_result; + logic [Width-1:0] mac_result; + + assign mul_op_a = mac_msu ? -op_a_i : op_a_i; // op_a_i is sign-inverted if mac_msu=1, to have -op_a*op_b - assign mul_op_a = op_a_i ^ {Width{mul_msu}}; - assign mul_op_b = op_b_i & {Width{mul_msu}}; + // 32-bits input, 64-bits output multiplier + assign mul_result = $signed({mul_op_a[Width-1] & mul_op_a_sign, mul_op_a}) * $signed({op_b_i[Width-1] & mac_op_b_sign, op_b_i}); - // perform either accumulation or subtraction with respect to op_c_i basing on mul_msu - assign mul_result = $signed(op_c_i) + $signed(mul_op_b) + $signed(mul_op_a) * $signed(op_b_i); + always_comb begin + unique case (mul_op) + MulLow: mac_result = mul_result[Width-1:0]; // mul, take lowest 32 bits + MulHigh: mac_result = mul_result[2*Width-1:Width]; // mul high, take highest 32 bits + MulMac: mac_result = op_c_i + mul_result[Width-1:0]; // accumulate + default: mac_result = '0; + endcase + end // -------------------- // Result generation @@ -404,7 +456,7 @@ module dspu #( // + if clip_op_b >= 0: clip_comp=clip_op_b (i.e. rs1>=0 and clip_op_b>=0) and the result must // be clipped to the upper bound since rs1 > clip_op_b Clip: result_o = cmp_result ? (clip_use_n_bound ? clip_op_b_n : op_a_i) : (op_a_i[Width-1] ? op_a_i : clip_op_b); - Mul: result_o = mul_result; + Mac: result_o = mac_result; default: result_o = '0; endcase end From 82d33ec75f01755adc3fd44da6cdb4275a5f7458 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 5 Jan 2021 12:21:03 +0100 Subject: [PATCH 16/65] [riscv-opcodes] Add Xpulpv2 SIMD arithmetical instructions Added instructions: SIMD add, sub, avg, min, max, shifts, logicals, abs, extract, insert --- toolchain/riscv-opcodes/Makefile | 2 +- toolchain/riscv-opcodes/encoding_out.h | 1599 +++-------------- toolchain/riscv-opcodes/inst.sverilog | 533 +----- .../riscv-opcodes/opcodes-xpulpimg_CUSTOM | 101 ++ toolchain/riscv-opcodes/parse_opcodes | 1 + 5 files changed, 471 insertions(+), 1765 deletions(-) diff --git a/toolchain/riscv-opcodes/Makefile b/toolchain/riscv-opcodes/Makefile index 12d02b4aa..9afe3b7eb 100644 --- a/toolchain/riscv-opcodes/Makefile +++ b/toolchain/riscv-opcodes/Makefile @@ -7,7 +7,7 @@ MY_OPCODES := opcodes-frep_CUSTOM opcodes-xpulpimg_CUSTOM opcodes-rv32d-zfh_DRAF ALL_OPCODES := opcodes-pseudo $(ALL_REAL_OPCODES) $(MY_OPCODES) opcodes-rvv-pseudo # Opcodes to be discarded -DISCARDED_OPCODES := opcodes-frep_CUSTOM +DISCARDED_OPCODES := opcodes-frep_CUSTOM opcodes-rvv OPCODES = $(filter-out $(sort $(DISCARDED_OPCODES)), $(sort $(ALL_OPCODES))) diff --git a/toolchain/riscv-opcodes/encoding_out.h b/toolchain/riscv-opcodes/encoding_out.h index 44369735c..bb0afda67 100644 --- a/toolchain/riscv-opcodes/encoding_out.h +++ b/toolchain/riscv-opcodes/encoding_out.h @@ -924,888 +924,6 @@ #define MASK_C_SWSP 0xe003 #define MATCH_C_FSWSP 0xe002 #define MASK_C_FSWSP 0xe003 -#define MATCH_VSETVLI 0x7057 -#define MASK_VSETVLI 0x8000707f -#define MATCH_VSETVL 0x80007057 -#define MASK_VSETVL 0xfe00707f -#define MATCH_VLE8_V 0x7 -#define MASK_VLE8_V 0x1df0707f -#define MATCH_VLE16_V 0x5007 -#define MASK_VLE16_V 0x1df0707f -#define MATCH_VLE32_V 0x6007 -#define MASK_VLE32_V 0x1df0707f -#define MATCH_VLE64_V 0x7007 -#define MASK_VLE64_V 0x1df0707f -#define MATCH_VLE128_V 0x10000007 -#define MASK_VLE128_V 0x1df0707f -#define MATCH_VLE256_V 0x10005007 -#define MASK_VLE256_V 0x1df0707f -#define MATCH_VLE512_V 0x10006007 -#define MASK_VLE512_V 0x1df0707f -#define MATCH_VLE1024_V 0x10007007 -#define MASK_VLE1024_V 0x1df0707f -#define MATCH_VSE8_V 0x27 -#define MASK_VSE8_V 0x1df0707f -#define MATCH_VSE16_V 0x5027 -#define MASK_VSE16_V 0x1df0707f -#define MATCH_VSE32_V 0x6027 -#define MASK_VSE32_V 0x1df0707f -#define MATCH_VSE64_V 0x7027 -#define MASK_VSE64_V 0x1df0707f -#define MATCH_VSE128_V 0x10000027 -#define MASK_VSE128_V 0x1df0707f -#define MATCH_VSE256_V 0x10005027 -#define MASK_VSE256_V 0x1df0707f -#define MATCH_VSE512_V 0x10006027 -#define MASK_VSE512_V 0x1df0707f -#define MATCH_VSE1024_V 0x10007027 -#define MASK_VSE1024_V 0x1df0707f -#define MATCH_VLSE8_V 0x8000007 -#define MASK_VLSE8_V 0x1c00707f -#define MATCH_VLSE16_V 0x8005007 -#define MASK_VLSE16_V 0x1c00707f -#define MATCH_VLSE32_V 0x8006007 -#define MASK_VLSE32_V 0x1c00707f -#define MATCH_VLSE64_V 0x8007007 -#define MASK_VLSE64_V 0x1c00707f -#define MATCH_VLSE128_V 0x18000007 -#define MASK_VLSE128_V 0x1c00707f -#define MATCH_VLSE256_V 0x18005007 -#define MASK_VLSE256_V 0x1c00707f -#define MATCH_VLSE512_V 0x18006007 -#define MASK_VLSE512_V 0x1c00707f -#define MATCH_VLSE1024_V 0x18007007 -#define MASK_VLSE1024_V 0x1c00707f -#define MATCH_VSSE8_V 0x8000027 -#define MASK_VSSE8_V 0x1c00707f -#define MATCH_VSSE16_V 0x8005027 -#define MASK_VSSE16_V 0x1c00707f -#define MATCH_VSSE32_V 0x8006027 -#define MASK_VSSE32_V 0x1c00707f -#define MATCH_VSSE64_V 0x8007027 -#define MASK_VSSE64_V 0x1c00707f -#define MATCH_VSSE128_V 0x18000027 -#define MASK_VSSE128_V 0x1c00707f -#define MATCH_VSSE256_V 0x18005027 -#define MASK_VSSE256_V 0x1c00707f -#define MATCH_VSSE512_V 0x18006027 -#define MASK_VSSE512_V 0x1c00707f -#define MATCH_VSSE1024_V 0x18007027 -#define MASK_VSSE1024_V 0x1c00707f -#define MATCH_VLXEI8_V 0xc000007 -#define MASK_VLXEI8_V 0x1c00707f -#define MATCH_VLXEI16_V 0xc005007 -#define MASK_VLXEI16_V 0x1c00707f -#define MATCH_VLXEI32_V 0xc006007 -#define MASK_VLXEI32_V 0x1c00707f -#define MATCH_VLXEI64_V 0xc007007 -#define MASK_VLXEI64_V 0x1c00707f -#define MATCH_VLXEI128_V 0x1c000007 -#define MASK_VLXEI128_V 0x1c00707f -#define MATCH_VLXEI256_V 0x1c005007 -#define MASK_VLXEI256_V 0x1c00707f -#define MATCH_VLXEI512_V 0x1c006007 -#define MASK_VLXEI512_V 0x1c00707f -#define MATCH_VLXEI1024_V 0x1c007007 -#define MASK_VLXEI1024_V 0x1c00707f -#define MATCH_VSXEI8_V 0xc000027 -#define MASK_VSXEI8_V 0x1c00707f -#define MATCH_VSXEI16_V 0xc005027 -#define MASK_VSXEI16_V 0x1c00707f -#define MATCH_VSXEI32_V 0xc006027 -#define MASK_VSXEI32_V 0x1c00707f -#define MATCH_VSXEI64_V 0xc007027 -#define MASK_VSXEI64_V 0x1c00707f -#define MATCH_VSXEI128_V 0x1c000027 -#define MASK_VSXEI128_V 0x1c00707f -#define MATCH_VSXEI256_V 0x1c005027 -#define MASK_VSXEI256_V 0x1c00707f -#define MATCH_VSXEI512_V 0x1c006027 -#define MASK_VSXEI512_V 0x1c00707f -#define MATCH_VSXEI1024_V 0x1c007027 -#define MASK_VSXEI1024_V 0x1c00707f -#define MATCH_VSUXEI8_V 0x4000027 -#define MASK_VSUXEI8_V 0x1c00707f -#define MATCH_VSUXEI16_V 0x4005027 -#define MASK_VSUXEI16_V 0x1c00707f -#define MATCH_VSUXEI32_V 0x4006027 -#define MASK_VSUXEI32_V 0x1c00707f -#define MATCH_VSUXEI64_V 0x4007027 -#define MASK_VSUXEI64_V 0x1c00707f -#define MATCH_VSUXEI128_V 0x14000027 -#define MASK_VSUXEI128_V 0x1c00707f -#define MATCH_VSUXEI256_V 0x14005027 -#define MASK_VSUXEI256_V 0x1c00707f -#define MATCH_VSUXEI512_V 0x14006027 -#define MASK_VSUXEI512_V 0x1c00707f -#define MATCH_VSUXEI1024_V 0x14007027 -#define MASK_VSUXEI1024_V 0x1c00707f -#define MATCH_VLE8FF_V 0x1000007 -#define MASK_VLE8FF_V 0x1df0707f -#define MATCH_VLE16FF_V 0x1005007 -#define MASK_VLE16FF_V 0x1df0707f -#define MATCH_VLE32FF_V 0x1006007 -#define MASK_VLE32FF_V 0x1df0707f -#define MATCH_VLE64FF_V 0x1007007 -#define MASK_VLE64FF_V 0x1df0707f -#define MATCH_VLE128FF_V 0x11000007 -#define MASK_VLE128FF_V 0x1df0707f -#define MATCH_VLE256FF_V 0x11005007 -#define MASK_VLE256FF_V 0x1df0707f -#define MATCH_VLE512FF_V 0x11006007 -#define MASK_VLE512FF_V 0x1df0707f -#define MATCH_VLE1024FF_V 0x11007007 -#define MASK_VLE1024FF_V 0x1df0707f -#define MATCH_VL1RE8_V 0x2800007 -#define MASK_VL1RE8_V 0xfff0707f -#define MATCH_VL1RE16_V 0x2805007 -#define MASK_VL1RE16_V 0xfff0707f -#define MATCH_VL1RE32_V 0x2806007 -#define MASK_VL1RE32_V 0xfff0707f -#define MATCH_VL1RE64_V 0x2807007 -#define MASK_VL1RE64_V 0xfff0707f -#define MATCH_VL2RE8_V 0x22800007 -#define MASK_VL2RE8_V 0xfff0707f -#define MATCH_VL2RE16_V 0x22805007 -#define MASK_VL2RE16_V 0xfff0707f -#define MATCH_VL2RE32_V 0x22806007 -#define MASK_VL2RE32_V 0xfff0707f -#define MATCH_VL2RE64_V 0x22807007 -#define MASK_VL2RE64_V 0xfff0707f -#define MATCH_VL4RE8_V 0x62800007 -#define MASK_VL4RE8_V 0xfff0707f -#define MATCH_VL4RE16_V 0x62805007 -#define MASK_VL4RE16_V 0xfff0707f -#define MATCH_VL4RE32_V 0x62806007 -#define MASK_VL4RE32_V 0xfff0707f -#define MATCH_VL4RE64_V 0x62807007 -#define MASK_VL4RE64_V 0xfff0707f -#define MATCH_VL8RE8_V 0xe2800007 -#define MASK_VL8RE8_V 0xfff0707f -#define MATCH_VL8RE16_V 0xe2805007 -#define MASK_VL8RE16_V 0xfff0707f -#define MATCH_VL8RE32_V 0xe2806007 -#define MASK_VL8RE32_V 0xfff0707f -#define MATCH_VL8RE64_V 0xe2807007 -#define MASK_VL8RE64_V 0xfff0707f -#define MATCH_VS1R_V 0x2800027 -#define MASK_VS1R_V 0xfff0707f -#define MATCH_VS2R_V 0x22800027 -#define MASK_VS2R_V 0xfff0707f -#define MATCH_VS4R_V 0x62800027 -#define MASK_VS4R_V 0xfff0707f -#define MATCH_VS8R_V 0xe2800027 -#define MASK_VS8R_V 0xfff0707f -#define MATCH_VFADD_VF 0x5057 -#define MASK_VFADD_VF 0xfc00707f -#define MATCH_VFSUB_VF 0x8005057 -#define MASK_VFSUB_VF 0xfc00707f -#define MATCH_VFMIN_VF 0x10005057 -#define MASK_VFMIN_VF 0xfc00707f -#define MATCH_VFMAX_VF 0x18005057 -#define MASK_VFMAX_VF 0xfc00707f -#define MATCH_VFSGNJ_VF 0x20005057 -#define MASK_VFSGNJ_VF 0xfc00707f -#define MATCH_VFSGNJN_VF 0x24005057 -#define MASK_VFSGNJN_VF 0xfc00707f -#define MATCH_VFSGNJX_VF 0x28005057 -#define MASK_VFSGNJX_VF 0xfc00707f -#define MATCH_VFSLIDE1UP_VF 0x38005057 -#define MASK_VFSLIDE1UP_VF 0xfc00707f -#define MATCH_VFSLIDE1DOWN_VF 0x3c005057 -#define MASK_VFSLIDE1DOWN_VF 0xfc00707f -#define MATCH_VFMV_S_F 0x42005057 -#define MASK_VFMV_S_F 0xfff0707f -#define MATCH_VFMERGE_VFM 0x5c005057 -#define MASK_VFMERGE_VFM 0xfe00707f -#define MATCH_VFMV_V_F 0x5e005057 -#define MASK_VFMV_V_F 0xfff0707f -#define MATCH_VMFEQ_VF 0x60005057 -#define MASK_VMFEQ_VF 0xfc00707f -#define MATCH_VMFLE_VF 0x64005057 -#define MASK_VMFLE_VF 0xfc00707f -#define MATCH_VMFLT_VF 0x6c005057 -#define MASK_VMFLT_VF 0xfc00707f -#define MATCH_VMFNE_VF 0x70005057 -#define MASK_VMFNE_VF 0xfc00707f -#define MATCH_VMFGT_VF 0x74005057 -#define MASK_VMFGT_VF 0xfc00707f -#define MATCH_VMFGE_VF 0x7c005057 -#define MASK_VMFGE_VF 0xfc00707f -#define MATCH_VFDIV_VF 0x80005057 -#define MASK_VFDIV_VF 0xfc00707f -#define MATCH_VFRDIV_VF 0x84005057 -#define MASK_VFRDIV_VF 0xfc00707f -#define MATCH_VFMUL_VF 0x90005057 -#define MASK_VFMUL_VF 0xfc00707f -#define MATCH_VFRSUB_VF 0x9c005057 -#define MASK_VFRSUB_VF 0xfc00707f -#define MATCH_VFMADD_VF 0xa0005057 -#define MASK_VFMADD_VF 0xfc00707f -#define MATCH_VFNMADD_VF 0xa4005057 -#define MASK_VFNMADD_VF 0xfc00707f -#define MATCH_VFMSUB_VF 0xa8005057 -#define MASK_VFMSUB_VF 0xfc00707f -#define MATCH_VFNMSUB_VF 0xac005057 -#define MASK_VFNMSUB_VF 0xfc00707f -#define MATCH_VFMACC_VF 0xb0005057 -#define MASK_VFMACC_VF 0xfc00707f -#define MATCH_VFNMACC_VF 0xb4005057 -#define MASK_VFNMACC_VF 0xfc00707f -#define MATCH_VFMSAC_VF 0xb8005057 -#define MASK_VFMSAC_VF 0xfc00707f -#define MATCH_VFNMSAC_VF 0xbc005057 -#define MASK_VFNMSAC_VF 0xfc00707f -#define MATCH_VFWADD_VF 0xc0005057 -#define MASK_VFWADD_VF 0xfc00707f -#define MATCH_VFWSUB_VF 0xc8005057 -#define MASK_VFWSUB_VF 0xfc00707f -#define MATCH_VFWADD_WF 0xd0005057 -#define MASK_VFWADD_WF 0xfc00707f -#define MATCH_VFWSUB_WF 0xd8005057 -#define MASK_VFWSUB_WF 0xfc00707f -#define MATCH_VFWMUL_VF 0xe0005057 -#define MASK_VFWMUL_VF 0xfc00707f -#define MATCH_VFWMACC_VF 0xf0005057 -#define MASK_VFWMACC_VF 0xfc00707f -#define MATCH_VFWNMACC_VF 0xf4005057 -#define MASK_VFWNMACC_VF 0xfc00707f -#define MATCH_VFWMSAC_VF 0xf8005057 -#define MASK_VFWMSAC_VF 0xfc00707f -#define MATCH_VFWNMSAC_VF 0xfc005057 -#define MASK_VFWNMSAC_VF 0xfc00707f -#define MATCH_VFADD_VV 0x1057 -#define MASK_VFADD_VV 0xfc00707f -#define MATCH_VFREDSUM_VS 0x4001057 -#define MASK_VFREDSUM_VS 0xfc00707f -#define MATCH_VFSUB_VV 0x8001057 -#define MASK_VFSUB_VV 0xfc00707f -#define MATCH_VFREDOSUM_VS 0xc001057 -#define MASK_VFREDOSUM_VS 0xfc00707f -#define MATCH_VFMIN_VV 0x10001057 -#define MASK_VFMIN_VV 0xfc00707f -#define MATCH_VFREDMIN_VS 0x14001057 -#define MASK_VFREDMIN_VS 0xfc00707f -#define MATCH_VFMAX_VV 0x18001057 -#define MASK_VFMAX_VV 0xfc00707f -#define MATCH_VFREDMAX_VS 0x1c001057 -#define MASK_VFREDMAX_VS 0xfc00707f -#define MATCH_VFSGNJ_VV 0x20001057 -#define MASK_VFSGNJ_VV 0xfc00707f -#define MATCH_VFSGNJN_VV 0x24001057 -#define MASK_VFSGNJN_VV 0xfc00707f -#define MATCH_VFSGNJX_VV 0x28001057 -#define MASK_VFSGNJX_VV 0xfc00707f -#define MATCH_VFMV_F_S 0x42001057 -#define MASK_VFMV_F_S 0xfe0ff07f -#define MATCH_VMFEQ_VV 0x60001057 -#define MASK_VMFEQ_VV 0xfc00707f -#define MATCH_VMFLE_VV 0x64001057 -#define MASK_VMFLE_VV 0xfc00707f -#define MATCH_VMFLT_VV 0x6c001057 -#define MASK_VMFLT_VV 0xfc00707f -#define MATCH_VMFNE_VV 0x70001057 -#define MASK_VMFNE_VV 0xfc00707f -#define MATCH_VFDIV_VV 0x80001057 -#define MASK_VFDIV_VV 0xfc00707f -#define MATCH_VFMUL_VV 0x90001057 -#define MASK_VFMUL_VV 0xfc00707f -#define MATCH_VFMADD_VV 0xa0001057 -#define MASK_VFMADD_VV 0xfc00707f -#define MATCH_VFNMADD_VV 0xa4001057 -#define MASK_VFNMADD_VV 0xfc00707f -#define MATCH_VFMSUB_VV 0xa8001057 -#define MASK_VFMSUB_VV 0xfc00707f -#define MATCH_VFNMSUB_VV 0xac001057 -#define MASK_VFNMSUB_VV 0xfc00707f -#define MATCH_VFMACC_VV 0xb0001057 -#define MASK_VFMACC_VV 0xfc00707f -#define MATCH_VFNMACC_VV 0xb4001057 -#define MASK_VFNMACC_VV 0xfc00707f -#define MATCH_VFMSAC_VV 0xb8001057 -#define MASK_VFMSAC_VV 0xfc00707f -#define MATCH_VFNMSAC_VV 0xbc001057 -#define MASK_VFNMSAC_VV 0xfc00707f -#define MATCH_VFCVT_XU_F_V 0x48001057 -#define MASK_VFCVT_XU_F_V 0xfc0ff07f -#define MATCH_VFCVT_X_F_V 0x48009057 -#define MASK_VFCVT_X_F_V 0xfc0ff07f -#define MATCH_VFCVT_F_XU_V 0x48011057 -#define MASK_VFCVT_F_XU_V 0xfc0ff07f -#define MATCH_VFCVT_F_X_V 0x48019057 -#define MASK_VFCVT_F_X_V 0xfc0ff07f -#define MATCH_VFCVT_RTZ_XU_F_V 0x48031057 -#define MASK_VFCVT_RTZ_XU_F_V 0xfc0ff07f -#define MATCH_VFCVT_RTZ_X_F_V 0x48039057 -#define MASK_VFCVT_RTZ_X_F_V 0xfc0ff07f -#define MATCH_VFWCVT_XU_F_V 0x48041057 -#define MASK_VFWCVT_XU_F_V 0xfc0ff07f -#define MATCH_VFWCVT_X_F_V 0x48049057 -#define MASK_VFWCVT_X_F_V 0xfc0ff07f -#define MATCH_VFWCVT_F_XU_V 0x48051057 -#define MASK_VFWCVT_F_XU_V 0xfc0ff07f -#define MATCH_VFWCVT_F_X_V 0x48059057 -#define MASK_VFWCVT_F_X_V 0xfc0ff07f -#define MATCH_VFWCVT_F_F_V 0x48061057 -#define MASK_VFWCVT_F_F_V 0xfc0ff07f -#define MATCH_VFWCVT_RTZ_XU_F_V 0x48071057 -#define MASK_VFWCVT_RTZ_XU_F_V 0xfc0ff07f -#define MATCH_VFWCVT_RTZ_X_F_V 0x48079057 -#define MASK_VFWCVT_RTZ_X_F_V 0xfc0ff07f -#define MATCH_VFNCVT_XU_F_W 0x48081057 -#define MASK_VFNCVT_XU_F_W 0xfc0ff07f -#define MATCH_VFNCVT_X_F_W 0x48089057 -#define MASK_VFNCVT_X_F_W 0xfc0ff07f -#define MATCH_VFNCVT_F_XU_W 0x48091057 -#define MASK_VFNCVT_F_XU_W 0xfc0ff07f -#define MATCH_VFNCVT_F_X_W 0x48099057 -#define MASK_VFNCVT_F_X_W 0xfc0ff07f -#define MATCH_VFNCVT_F_F_W 0x480a1057 -#define MASK_VFNCVT_F_F_W 0xfc0ff07f -#define MATCH_VFNCVT_ROD_F_F_W 0x480a9057 -#define MASK_VFNCVT_ROD_F_F_W 0xfc0ff07f -#define MATCH_VFNCVT_RTZ_XU_F_W 0x480b1057 -#define MASK_VFNCVT_RTZ_XU_F_W 0xfc0ff07f -#define MATCH_VFNCVT_RTZ_X_F_W 0x480b9057 -#define MASK_VFNCVT_RTZ_X_F_W 0xfc0ff07f -#define MATCH_VFSQRT_V 0x4c001057 -#define MASK_VFSQRT_V 0xfc0ff07f -#define MATCH_VFRSQRTE7_V 0x4c021057 -#define MASK_VFRSQRTE7_V 0xfc0ff07f -#define MATCH_VFRECE7_V 0x4c029057 -#define MASK_VFRECE7_V 0xfc0ff07f -#define MATCH_VFCLASS_V 0x4c081057 -#define MASK_VFCLASS_V 0xfc0ff07f -#define MATCH_VFWADD_VV 0xc0001057 -#define MASK_VFWADD_VV 0xfc00707f -#define MATCH_VFWREDSUM_VS 0xc4001057 -#define MASK_VFWREDSUM_VS 0xfc00707f -#define MATCH_VFWSUB_VV 0xc8001057 -#define MASK_VFWSUB_VV 0xfc00707f -#define MATCH_VFWREDOSUM_VS 0xcc001057 -#define MASK_VFWREDOSUM_VS 0xfc00707f -#define MATCH_VFWADD_WV 0xd0001057 -#define MASK_VFWADD_WV 0xfc00707f -#define MATCH_VFWSUB_WV 0xd8001057 -#define MASK_VFWSUB_WV 0xfc00707f -#define MATCH_VFWMUL_VV 0xe0001057 -#define MASK_VFWMUL_VV 0xfc00707f -#define MATCH_VFDOT_VV 0xe4001057 -#define MASK_VFDOT_VV 0xfc00707f -#define MATCH_VFWMACC_VV 0xf0001057 -#define MASK_VFWMACC_VV 0xfc00707f -#define MATCH_VFWNMACC_VV 0xf4001057 -#define MASK_VFWNMACC_VV 0xfc00707f -#define MATCH_VFWMSAC_VV 0xf8001057 -#define MASK_VFWMSAC_VV 0xfc00707f -#define MATCH_VFWNMSAC_VV 0xfc001057 -#define MASK_VFWNMSAC_VV 0xfc00707f -#define MATCH_VADD_VX 0x4057 -#define MASK_VADD_VX 0xfc00707f -#define MATCH_VSUB_VX 0x8004057 -#define MASK_VSUB_VX 0xfc00707f -#define MATCH_VRSUB_VX 0xc004057 -#define MASK_VRSUB_VX 0xfc00707f -#define MATCH_VMINU_VX 0x10004057 -#define MASK_VMINU_VX 0xfc00707f -#define MATCH_VMIN_VX 0x14004057 -#define MASK_VMIN_VX 0xfc00707f -#define MATCH_VMAXU_VX 0x18004057 -#define MASK_VMAXU_VX 0xfc00707f -#define MATCH_VMAX_VX 0x1c004057 -#define MASK_VMAX_VX 0xfc00707f -#define MATCH_VAND_VX 0x24004057 -#define MASK_VAND_VX 0xfc00707f -#define MATCH_VOR_VX 0x28004057 -#define MASK_VOR_VX 0xfc00707f -#define MATCH_VXOR_VX 0x2c004057 -#define MASK_VXOR_VX 0xfc00707f -#define MATCH_VRGATHER_VX 0x30004057 -#define MASK_VRGATHER_VX 0xfc00707f -#define MATCH_VSLIDEUP_VX 0x38004057 -#define MASK_VSLIDEUP_VX 0xfc00707f -#define MATCH_VSLIDEDOWN_VX 0x3c004057 -#define MASK_VSLIDEDOWN_VX 0xfc00707f -#define MATCH_VADC_VXM 0x40004057 -#define MASK_VADC_VXM 0xfe00707f -#define MATCH_VMADC_VXM 0x44004057 -#define MASK_VMADC_VXM 0xfc00707f -#define MATCH_VSBC_VXM 0x48004057 -#define MASK_VSBC_VXM 0xfe00707f -#define MATCH_VMSBC_VXM 0x4c004057 -#define MASK_VMSBC_VXM 0xfc00707f -#define MATCH_VMERGE_VXM 0x5c004057 -#define MASK_VMERGE_VXM 0xfe00707f -#define MATCH_VMV_V_X 0x5e004057 -#define MASK_VMV_V_X 0xfff0707f -#define MATCH_VMSEQ_VX 0x60004057 -#define MASK_VMSEQ_VX 0xfc00707f -#define MATCH_VMSNE_VX 0x64004057 -#define MASK_VMSNE_VX 0xfc00707f -#define MATCH_VMSLTU_VX 0x68004057 -#define MASK_VMSLTU_VX 0xfc00707f -#define MATCH_VMSLT_VX 0x6c004057 -#define MASK_VMSLT_VX 0xfc00707f -#define MATCH_VMSLEU_VX 0x70004057 -#define MASK_VMSLEU_VX 0xfc00707f -#define MATCH_VMSLE_VX 0x74004057 -#define MASK_VMSLE_VX 0xfc00707f -#define MATCH_VMSGTU_VX 0x78004057 -#define MASK_VMSGTU_VX 0xfc00707f -#define MATCH_VMSGT_VX 0x7c004057 -#define MASK_VMSGT_VX 0xfc00707f -#define MATCH_VSADDU_VX 0x80004057 -#define MASK_VSADDU_VX 0xfc00707f -#define MATCH_VSADD_VX 0x84004057 -#define MASK_VSADD_VX 0xfc00707f -#define MATCH_VSSUBU_VX 0x88004057 -#define MASK_VSSUBU_VX 0xfc00707f -#define MATCH_VSSUB_VX 0x8c004057 -#define MASK_VSSUB_VX 0xfc00707f -#define MATCH_VSLL_VX 0x94004057 -#define MASK_VSLL_VX 0xfc00707f -#define MATCH_VSMUL_VX 0x9c004057 -#define MASK_VSMUL_VX 0xfc00707f -#define MATCH_VSRL_VX 0xa0004057 -#define MASK_VSRL_VX 0xfc00707f -#define MATCH_VSRA_VX 0xa4004057 -#define MASK_VSRA_VX 0xfc00707f -#define MATCH_VSSRL_VX 0xa8004057 -#define MASK_VSSRL_VX 0xfc00707f -#define MATCH_VSSRA_VX 0xac004057 -#define MASK_VSSRA_VX 0xfc00707f -#define MATCH_VNSRL_WX 0xb0004057 -#define MASK_VNSRL_WX 0xfc00707f -#define MATCH_VNSRA_WX 0xb4004057 -#define MASK_VNSRA_WX 0xfc00707f -#define MATCH_VNCLIPU_WX 0xb8004057 -#define MASK_VNCLIPU_WX 0xfc00707f -#define MATCH_VNCLIP_WX 0xbc004057 -#define MASK_VNCLIP_WX 0xfc00707f -#define MATCH_VQMACCU_VX 0xf0004057 -#define MASK_VQMACCU_VX 0xfc00707f -#define MATCH_VQMACC_VX 0xf4004057 -#define MASK_VQMACC_VX 0xfc00707f -#define MATCH_VQMACCUS_VX 0xf8004057 -#define MASK_VQMACCUS_VX 0xfc00707f -#define MATCH_VQMACCSU_VX 0xfc004057 -#define MASK_VQMACCSU_VX 0xfc00707f -#define MATCH_VADD_VV 0x57 -#define MASK_VADD_VV 0xfc00707f -#define MATCH_VSUB_VV 0x8000057 -#define MASK_VSUB_VV 0xfc00707f -#define MATCH_VMINU_VV 0x10000057 -#define MASK_VMINU_VV 0xfc00707f -#define MATCH_VMIN_VV 0x14000057 -#define MASK_VMIN_VV 0xfc00707f -#define MATCH_VMAXU_VV 0x18000057 -#define MASK_VMAXU_VV 0xfc00707f -#define MATCH_VMAX_VV 0x1c000057 -#define MASK_VMAX_VV 0xfc00707f -#define MATCH_VAND_VV 0x24000057 -#define MASK_VAND_VV 0xfc00707f -#define MATCH_VOR_VV 0x28000057 -#define MASK_VOR_VV 0xfc00707f -#define MATCH_VXOR_VV 0x2c000057 -#define MASK_VXOR_VV 0xfc00707f -#define MATCH_VRGATHER_VV 0x30000057 -#define MASK_VRGATHER_VV 0xfc00707f -#define MATCH_VRGATHEREI16_VV 0x38000057 -#define MASK_VRGATHEREI16_VV 0xfc00707f -#define MATCH_VADC_VVM 0x40000057 -#define MASK_VADC_VVM 0xfe00707f -#define MATCH_VMADC_VVM 0x44000057 -#define MASK_VMADC_VVM 0xfc00707f -#define MATCH_VSBC_VVM 0x48000057 -#define MASK_VSBC_VVM 0xfe00707f -#define MATCH_VMSBC_VVM 0x4c000057 -#define MASK_VMSBC_VVM 0xfc00707f -#define MATCH_VMERGE_VVM 0x5c000057 -#define MASK_VMERGE_VVM 0xfe00707f -#define MATCH_VMV_V_V 0x5e000057 -#define MASK_VMV_V_V 0xfff0707f -#define MATCH_VMSEQ_VV 0x60000057 -#define MASK_VMSEQ_VV 0xfc00707f -#define MATCH_VMSNE_VV 0x64000057 -#define MASK_VMSNE_VV 0xfc00707f -#define MATCH_VMSLTU_VV 0x68000057 -#define MASK_VMSLTU_VV 0xfc00707f -#define MATCH_VMSLT_VV 0x6c000057 -#define MASK_VMSLT_VV 0xfc00707f -#define MATCH_VMSLEU_VV 0x70000057 -#define MASK_VMSLEU_VV 0xfc00707f -#define MATCH_VMSLE_VV 0x74000057 -#define MASK_VMSLE_VV 0xfc00707f -#define MATCH_VSADDU_VV 0x80000057 -#define MASK_VSADDU_VV 0xfc00707f -#define MATCH_VSADD_VV 0x84000057 -#define MASK_VSADD_VV 0xfc00707f -#define MATCH_VSSUBU_VV 0x88000057 -#define MASK_VSSUBU_VV 0xfc00707f -#define MATCH_VSSUB_VV 0x8c000057 -#define MASK_VSSUB_VV 0xfc00707f -#define MATCH_VSLL_VV 0x94000057 -#define MASK_VSLL_VV 0xfc00707f -#define MATCH_VSMUL_VV 0x9c000057 -#define MASK_VSMUL_VV 0xfc00707f -#define MATCH_VSRL_VV 0xa0000057 -#define MASK_VSRL_VV 0xfc00707f -#define MATCH_VSRA_VV 0xa4000057 -#define MASK_VSRA_VV 0xfc00707f -#define MATCH_VSSRL_VV 0xa8000057 -#define MASK_VSSRL_VV 0xfc00707f -#define MATCH_VSSRA_VV 0xac000057 -#define MASK_VSSRA_VV 0xfc00707f -#define MATCH_VNSRL_WV 0xb0000057 -#define MASK_VNSRL_WV 0xfc00707f -#define MATCH_VNSRA_WV 0xb4000057 -#define MASK_VNSRA_WV 0xfc00707f -#define MATCH_VNCLIPU_WV 0xb8000057 -#define MASK_VNCLIPU_WV 0xfc00707f -#define MATCH_VNCLIP_WV 0xbc000057 -#define MASK_VNCLIP_WV 0xfc00707f -#define MATCH_VWREDSUMU_VS 0xc0000057 -#define MASK_VWREDSUMU_VS 0xfc00707f -#define MATCH_VWREDSUM_VS 0xc4000057 -#define MASK_VWREDSUM_VS 0xfc00707f -#define MATCH_VDOTU_VV 0xe0000057 -#define MASK_VDOTU_VV 0xfc00707f -#define MATCH_VDOT_VV 0xe4000057 -#define MASK_VDOT_VV 0xfc00707f -#define MATCH_VQMACCU_VV 0xf0000057 -#define MASK_VQMACCU_VV 0xfc00707f -#define MATCH_VQMACC_VV 0xf4000057 -#define MASK_VQMACC_VV 0xfc00707f -#define MATCH_VQMACCSU_VV 0xfc000057 -#define MASK_VQMACCSU_VV 0xfc00707f -#define MATCH_VADD_VI 0x3057 -#define MASK_VADD_VI 0xfc00707f -#define MATCH_VRSUB_VI 0xc003057 -#define MASK_VRSUB_VI 0xfc00707f -#define MATCH_VAND_VI 0x24003057 -#define MASK_VAND_VI 0xfc00707f -#define MATCH_VOR_VI 0x28003057 -#define MASK_VOR_VI 0xfc00707f -#define MATCH_VXOR_VI 0x2c003057 -#define MASK_VXOR_VI 0xfc00707f -#define MATCH_VRGATHER_VI 0x30003057 -#define MASK_VRGATHER_VI 0xfc00707f -#define MATCH_VSLIDEUP_VI 0x38003057 -#define MASK_VSLIDEUP_VI 0xfc00707f -#define MATCH_VSLIDEDOWN_VI 0x3c003057 -#define MASK_VSLIDEDOWN_VI 0xfc00707f -#define MATCH_VADC_VIM 0x40003057 -#define MASK_VADC_VIM 0xfe00707f -#define MATCH_VMADC_VIM 0x44003057 -#define MASK_VMADC_VIM 0xfc00707f -#define MATCH_VMERGE_VIM 0x5c003057 -#define MASK_VMERGE_VIM 0xfe00707f -#define MATCH_VMV_V_I 0x5e003057 -#define MASK_VMV_V_I 0xfff0707f -#define MATCH_VMSEQ_VI 0x60003057 -#define MASK_VMSEQ_VI 0xfc00707f -#define MATCH_VMSNE_VI 0x64003057 -#define MASK_VMSNE_VI 0xfc00707f -#define MATCH_VMSLEU_VI 0x70003057 -#define MASK_VMSLEU_VI 0xfc00707f -#define MATCH_VMSLE_VI 0x74003057 -#define MASK_VMSLE_VI 0xfc00707f -#define MATCH_VMSGTU_VI 0x78003057 -#define MASK_VMSGTU_VI 0xfc00707f -#define MATCH_VMSGT_VI 0x7c003057 -#define MASK_VMSGT_VI 0xfc00707f -#define MATCH_VSADDU_VI 0x80003057 -#define MASK_VSADDU_VI 0xfc00707f -#define MATCH_VSADD_VI 0x84003057 -#define MASK_VSADD_VI 0xfc00707f -#define MATCH_VSLL_VI 0x94003057 -#define MASK_VSLL_VI 0xfc00707f -#define MATCH_VMV1R_V 0x9e003057 -#define MASK_VMV1R_V 0xfe0ff07f -#define MATCH_VMV2R_V 0x9e00b057 -#define MASK_VMV2R_V 0xfe0ff07f -#define MATCH_VMV4R_V 0x9e01b057 -#define MASK_VMV4R_V 0xfe0ff07f -#define MATCH_VMV8R_V 0x9e03b057 -#define MASK_VMV8R_V 0xfe0ff07f -#define MATCH_VSRL_VI 0xa0003057 -#define MASK_VSRL_VI 0xfc00707f -#define MATCH_VSRA_VI 0xa4003057 -#define MASK_VSRA_VI 0xfc00707f -#define MATCH_VSSRL_VI 0xa8003057 -#define MASK_VSSRL_VI 0xfc00707f -#define MATCH_VSSRA_VI 0xac003057 -#define MASK_VSSRA_VI 0xfc00707f -#define MATCH_VNSRL_WI 0xb0003057 -#define MASK_VNSRL_WI 0xfc00707f -#define MATCH_VNSRA_WI 0xb4003057 -#define MASK_VNSRA_WI 0xfc00707f -#define MATCH_VNCLIPU_WI 0xb8003057 -#define MASK_VNCLIPU_WI 0xfc00707f -#define MATCH_VNCLIP_WI 0xbc003057 -#define MASK_VNCLIP_WI 0xfc00707f -#define MATCH_VREDSUM_VS 0x2057 -#define MASK_VREDSUM_VS 0xfc00707f -#define MATCH_VREDAND_VS 0x4002057 -#define MASK_VREDAND_VS 0xfc00707f -#define MATCH_VREDOR_VS 0x8002057 -#define MASK_VREDOR_VS 0xfc00707f -#define MATCH_VREDXOR_VS 0xc002057 -#define MASK_VREDXOR_VS 0xfc00707f -#define MATCH_VREDMINU_VS 0x10002057 -#define MASK_VREDMINU_VS 0xfc00707f -#define MATCH_VREDMIN_VS 0x14002057 -#define MASK_VREDMIN_VS 0xfc00707f -#define MATCH_VREDMAXU_VS 0x18002057 -#define MASK_VREDMAXU_VS 0xfc00707f -#define MATCH_VREDMAX_VS 0x1c002057 -#define MASK_VREDMAX_VS 0xfc00707f -#define MATCH_VAADDU_VV 0x20002057 -#define MASK_VAADDU_VV 0xfc00707f -#define MATCH_VAADD_VV 0x24002057 -#define MASK_VAADD_VV 0xfc00707f -#define MATCH_VASUBU_VV 0x28002057 -#define MASK_VASUBU_VV 0xfc00707f -#define MATCH_VASUB_VV 0x2c002057 -#define MASK_VASUB_VV 0xfc00707f -#define MATCH_VMV_X_S 0x42002057 -#define MASK_VMV_X_S 0xfe0ff07f -#define MATCH_VZEXT_VF8 0x48012057 -#define MASK_VZEXT_VF8 0xfc0ff07f -#define MATCH_VSEXT_VF8 0x4801a057 -#define MASK_VSEXT_VF8 0xfc0ff07f -#define MATCH_VZEXT_VF4 0x48022057 -#define MASK_VZEXT_VF4 0xfc0ff07f -#define MATCH_VSEXT_VF4 0x4802a057 -#define MASK_VSEXT_VF4 0xfc0ff07f -#define MATCH_VZEXT_VF2 0x48032057 -#define MASK_VZEXT_VF2 0xfc0ff07f -#define MATCH_VSEXT_VF2 0x4803a057 -#define MASK_VSEXT_VF2 0xfc0ff07f -#define MATCH_VCOMPRESS_VM 0x5e002057 -#define MASK_VCOMPRESS_VM 0xfe00707f -#define MATCH_VMANDNOT_MM 0x60002057 -#define MASK_VMANDNOT_MM 0xfc00707f -#define MATCH_VMAND_MM 0x64002057 -#define MASK_VMAND_MM 0xfc00707f -#define MATCH_VMOR_MM 0x68002057 -#define MASK_VMOR_MM 0xfc00707f -#define MATCH_VMXOR_MM 0x6c002057 -#define MASK_VMXOR_MM 0xfc00707f -#define MATCH_VMORNOT_MM 0x70002057 -#define MASK_VMORNOT_MM 0xfc00707f -#define MATCH_VMNAND_MM 0x74002057 -#define MASK_VMNAND_MM 0xfc00707f -#define MATCH_VMNOR_MM 0x78002057 -#define MASK_VMNOR_MM 0xfc00707f -#define MATCH_VMXNOR_MM 0x7c002057 -#define MASK_VMXNOR_MM 0xfc00707f -#define MATCH_VMSBF_M 0x5000a057 -#define MASK_VMSBF_M 0xfc0ff07f -#define MATCH_VMSOF_M 0x50012057 -#define MASK_VMSOF_M 0xfc0ff07f -#define MATCH_VMSIF_M 0x5001a057 -#define MASK_VMSIF_M 0xfc0ff07f -#define MATCH_VIOTA_M 0x50082057 -#define MASK_VIOTA_M 0xfc0ff07f -#define MATCH_VID_V 0x5008a057 -#define MASK_VID_V 0xfdfff07f -#define MATCH_VPOPC_M 0x40082057 -#define MASK_VPOPC_M 0xfc0ff07f -#define MATCH_VFIRST_M 0x4008a057 -#define MASK_VFIRST_M 0xfc0ff07f -#define MATCH_VDIVU_VV 0x80002057 -#define MASK_VDIVU_VV 0xfc00707f -#define MATCH_VDIV_VV 0x84002057 -#define MASK_VDIV_VV 0xfc00707f -#define MATCH_VREMU_VV 0x88002057 -#define MASK_VREMU_VV 0xfc00707f -#define MATCH_VREM_VV 0x8c002057 -#define MASK_VREM_VV 0xfc00707f -#define MATCH_VMULHU_VV 0x90002057 -#define MASK_VMULHU_VV 0xfc00707f -#define MATCH_VMUL_VV 0x94002057 -#define MASK_VMUL_VV 0xfc00707f -#define MATCH_VMULHSU_VV 0x98002057 -#define MASK_VMULHSU_VV 0xfc00707f -#define MATCH_VMULH_VV 0x9c002057 -#define MASK_VMULH_VV 0xfc00707f -#define MATCH_VMADD_VV 0xa4002057 -#define MASK_VMADD_VV 0xfc00707f -#define MATCH_VNMSUB_VV 0xac002057 -#define MASK_VNMSUB_VV 0xfc00707f -#define MATCH_VMACC_VV 0xb4002057 -#define MASK_VMACC_VV 0xfc00707f -#define MATCH_VNMSAC_VV 0xbc002057 -#define MASK_VNMSAC_VV 0xfc00707f -#define MATCH_VWADDU_VV 0xc0002057 -#define MASK_VWADDU_VV 0xfc00707f -#define MATCH_VWADD_VV 0xc4002057 -#define MASK_VWADD_VV 0xfc00707f -#define MATCH_VWSUBU_VV 0xc8002057 -#define MASK_VWSUBU_VV 0xfc00707f -#define MATCH_VWSUB_VV 0xcc002057 -#define MASK_VWSUB_VV 0xfc00707f -#define MATCH_VWADDU_WV 0xd0002057 -#define MASK_VWADDU_WV 0xfc00707f -#define MATCH_VWADD_WV 0xd4002057 -#define MASK_VWADD_WV 0xfc00707f -#define MATCH_VWSUBU_WV 0xd8002057 -#define MASK_VWSUBU_WV 0xfc00707f -#define MATCH_VWSUB_WV 0xdc002057 -#define MASK_VWSUB_WV 0xfc00707f -#define MATCH_VWMULU_VV 0xe0002057 -#define MASK_VWMULU_VV 0xfc00707f -#define MATCH_VWMULSU_VV 0xe8002057 -#define MASK_VWMULSU_VV 0xfc00707f -#define MATCH_VWMUL_VV 0xec002057 -#define MASK_VWMUL_VV 0xfc00707f -#define MATCH_VWMACCU_VV 0xf0002057 -#define MASK_VWMACCU_VV 0xfc00707f -#define MATCH_VWMACC_VV 0xf4002057 -#define MASK_VWMACC_VV 0xfc00707f -#define MATCH_VWMACCSU_VV 0xfc002057 -#define MASK_VWMACCSU_VV 0xfc00707f -#define MATCH_VAADDU_VX 0x20006057 -#define MASK_VAADDU_VX 0xfc00707f -#define MATCH_VAADD_VX 0x24006057 -#define MASK_VAADD_VX 0xfc00707f -#define MATCH_VASUBU_VX 0x28006057 -#define MASK_VASUBU_VX 0xfc00707f -#define MATCH_VASUB_VX 0x2c006057 -#define MASK_VASUB_VX 0xfc00707f -#define MATCH_VMV_S_X 0x42006057 -#define MASK_VMV_S_X 0xfff0707f -#define MATCH_VSLIDE1UP_VX 0x38006057 -#define MASK_VSLIDE1UP_VX 0xfc00707f -#define MATCH_VSLIDE1DOWN_VX 0x3c006057 -#define MASK_VSLIDE1DOWN_VX 0xfc00707f -#define MATCH_VDIVU_VX 0x80006057 -#define MASK_VDIVU_VX 0xfc00707f -#define MATCH_VDIV_VX 0x84006057 -#define MASK_VDIV_VX 0xfc00707f -#define MATCH_VREMU_VX 0x88006057 -#define MASK_VREMU_VX 0xfc00707f -#define MATCH_VREM_VX 0x8c006057 -#define MASK_VREM_VX 0xfc00707f -#define MATCH_VMULHU_VX 0x90006057 -#define MASK_VMULHU_VX 0xfc00707f -#define MATCH_VMUL_VX 0x94006057 -#define MASK_VMUL_VX 0xfc00707f -#define MATCH_VMULHSU_VX 0x98006057 -#define MASK_VMULHSU_VX 0xfc00707f -#define MATCH_VMULH_VX 0x9c006057 -#define MASK_VMULH_VX 0xfc00707f -#define MATCH_VMADD_VX 0xa4006057 -#define MASK_VMADD_VX 0xfc00707f -#define MATCH_VNMSUB_VX 0xac006057 -#define MASK_VNMSUB_VX 0xfc00707f -#define MATCH_VMACC_VX 0xb4006057 -#define MASK_VMACC_VX 0xfc00707f -#define MATCH_VNMSAC_VX 0xbc006057 -#define MASK_VNMSAC_VX 0xfc00707f -#define MATCH_VWADDU_VX 0xc0006057 -#define MASK_VWADDU_VX 0xfc00707f -#define MATCH_VWADD_VX 0xc4006057 -#define MASK_VWADD_VX 0xfc00707f -#define MATCH_VWSUBU_VX 0xc8006057 -#define MASK_VWSUBU_VX 0xfc00707f -#define MATCH_VWSUB_VX 0xcc006057 -#define MASK_VWSUB_VX 0xfc00707f -#define MATCH_VWADDU_WX 0xd0006057 -#define MASK_VWADDU_WX 0xfc00707f -#define MATCH_VWADD_WX 0xd4006057 -#define MASK_VWADD_WX 0xfc00707f -#define MATCH_VWSUBU_WX 0xd8006057 -#define MASK_VWSUBU_WX 0xfc00707f -#define MATCH_VWSUB_WX 0xdc006057 -#define MASK_VWSUB_WX 0xfc00707f -#define MATCH_VWMULU_VX 0xe0006057 -#define MASK_VWMULU_VX 0xfc00707f -#define MATCH_VWMULSU_VX 0xe8006057 -#define MASK_VWMULSU_VX 0xfc00707f -#define MATCH_VWMUL_VX 0xec006057 -#define MASK_VWMUL_VX 0xfc00707f -#define MATCH_VWMACCU_VX 0xf0006057 -#define MASK_VWMACCU_VX 0xfc00707f -#define MATCH_VWMACC_VX 0xf4006057 -#define MASK_VWMACC_VX 0xfc00707f -#define MATCH_VWMACCUS_VX 0xf8006057 -#define MASK_VWMACCUS_VX 0xfc00707f -#define MATCH_VWMACCSU_VX 0xfc006057 -#define MASK_VWMACCSU_VX 0xfc00707f -#define MATCH_VAMOSWAPEI8_V 0x800002f -#define MASK_VAMOSWAPEI8_V 0xf800707f -#define MATCH_VAMOADDEI8_V 0x2f -#define MASK_VAMOADDEI8_V 0xf800707f -#define MATCH_VAMOXOREI8_V 0x2000002f -#define MASK_VAMOXOREI8_V 0xf800707f -#define MATCH_VAMOANDEI8_V 0x6000002f -#define MASK_VAMOANDEI8_V 0xf800707f -#define MATCH_VAMOOREI8_V 0x4000002f -#define MASK_VAMOOREI8_V 0xf800707f -#define MATCH_VAMOMINEI8_V 0x8000002f -#define MASK_VAMOMINEI8_V 0xf800707f -#define MATCH_VAMOMAXEI8_V 0xa000002f -#define MASK_VAMOMAXEI8_V 0xf800707f -#define MATCH_VAMOMINUEI8_V 0xc000002f -#define MASK_VAMOMINUEI8_V 0xf800707f -#define MATCH_VAMOMAXUEI8_V 0xe000002f -#define MASK_VAMOMAXUEI8_V 0xf800707f -#define MATCH_VAMOSWAPEI16_V 0x800502f -#define MASK_VAMOSWAPEI16_V 0xf800707f -#define MATCH_VAMOADDEI16_V 0x502f -#define MASK_VAMOADDEI16_V 0xf800707f -#define MATCH_VAMOXOREI16_V 0x2000502f -#define MASK_VAMOXOREI16_V 0xf800707f -#define MATCH_VAMOANDEI16_V 0x6000502f -#define MASK_VAMOANDEI16_V 0xf800707f -#define MATCH_VAMOOREI16_V 0x4000502f -#define MASK_VAMOOREI16_V 0xf800707f -#define MATCH_VAMOMINEI16_V 0x8000502f -#define MASK_VAMOMINEI16_V 0xf800707f -#define MATCH_VAMOMAXEI16_V 0xa000502f -#define MASK_VAMOMAXEI16_V 0xf800707f -#define MATCH_VAMOMINUEI16_V 0xc000502f -#define MASK_VAMOMINUEI16_V 0xf800707f -#define MATCH_VAMOMAXUEI16_V 0xe000502f -#define MASK_VAMOMAXUEI16_V 0xf800707f -#define MATCH_VAMOSWAPEI32_V 0x800602f -#define MASK_VAMOSWAPEI32_V 0xf800707f -#define MATCH_VAMOADDEI32_V 0x602f -#define MASK_VAMOADDEI32_V 0xf800707f -#define MATCH_VAMOXOREI32_V 0x2000602f -#define MASK_VAMOXOREI32_V 0xf800707f -#define MATCH_VAMOANDEI32_V 0x6000602f -#define MASK_VAMOANDEI32_V 0xf800707f -#define MATCH_VAMOOREI32_V 0x4000602f -#define MASK_VAMOOREI32_V 0xf800707f -#define MATCH_VAMOMINEI32_V 0x8000602f -#define MASK_VAMOMINEI32_V 0xf800707f -#define MATCH_VAMOMAXEI32_V 0xa000602f -#define MASK_VAMOMAXEI32_V 0xf800707f -#define MATCH_VAMOMINUEI32_V 0xc000602f -#define MASK_VAMOMINUEI32_V 0xf800707f -#define MATCH_VAMOMAXUEI32_V 0xe000602f -#define MASK_VAMOMAXUEI32_V 0xf800707f -#define MATCH_VAMOSWAPEI64_V 0x800702f -#define MASK_VAMOSWAPEI64_V 0xf800707f -#define MATCH_VAMOADDEI64_V 0x702f -#define MASK_VAMOADDEI64_V 0xf800707f -#define MATCH_VAMOXOREI64_V 0x2000702f -#define MASK_VAMOXOREI64_V 0xf800707f -#define MATCH_VAMOANDEI64_V 0x6000702f -#define MASK_VAMOANDEI64_V 0xf800707f -#define MATCH_VAMOOREI64_V 0x4000702f -#define MASK_VAMOOREI64_V 0xf800707f -#define MATCH_VAMOMINEI64_V 0x8000702f -#define MASK_VAMOMINEI64_V 0xf800707f -#define MATCH_VAMOMAXEI64_V 0xa000702f -#define MASK_VAMOMAXEI64_V 0xf800707f -#define MATCH_VAMOMINUEI64_V 0xc000702f -#define MASK_VAMOMINUEI64_V 0xf800707f -#define MATCH_VAMOMAXUEI64_V 0xe000702f -#define MASK_VAMOMAXUEI64_V 0xf800707f #define MATCH_VMVNFR_V 0x9e003057 #define MASK_VMVNFR_V 0xfe00707f #define MATCH_VL1R_V 0x2800007 @@ -1930,6 +1048,190 @@ #define MASK_P_MAC 0xfe00707f #define MATCH_P_MSU 0x42001033 #define MASK_P_MSU 0xfe00707f +#define MATCH_PV_ADD_H 0x57 +#define MASK_PV_ADD_H 0xfe00707f +#define MATCH_PV_ADD_SC_H 0x4057 +#define MASK_PV_ADD_SC_H 0xfe00707f +#define MATCH_PV_ADD_SCI_H 0x6057 +#define MASK_PV_ADD_SCI_H 0xfc00707f +#define MATCH_PV_ADD_B 0x1057 +#define MASK_PV_ADD_B 0xfe00707f +#define MATCH_PV_ADD_SC_B 0x5057 +#define MASK_PV_ADD_SC_B 0xfe00707f +#define MATCH_PV_ADD_SCI_B 0x7057 +#define MASK_PV_ADD_SCI_B 0xfc00707f +#define MATCH_PV_SUB_H 0x8000057 +#define MASK_PV_SUB_H 0xfe00707f +#define MATCH_PV_SUB_SC_H 0x8004057 +#define MASK_PV_SUB_SC_H 0xfe00707f +#define MATCH_PV_SUB_SCI_H 0x8006057 +#define MASK_PV_SUB_SCI_H 0xfc00707f +#define MATCH_PV_SUB_B 0x8001057 +#define MASK_PV_SUB_B 0xfe00707f +#define MATCH_PV_SUB_SC_B 0x8005057 +#define MASK_PV_SUB_SC_B 0xfe00707f +#define MATCH_PV_SUB_SCI_B 0x8007057 +#define MASK_PV_SUB_SCI_B 0xfc00707f +#define MATCH_PV_AVG_H 0x10000057 +#define MASK_PV_AVG_H 0xfe00707f +#define MATCH_PV_AVG_SC_H 0x10004057 +#define MASK_PV_AVG_SC_H 0xfe00707f +#define MATCH_PV_AVG_SCI_H 0x10006057 +#define MASK_PV_AVG_SCI_H 0xfc00707f +#define MATCH_PV_AVG_B 0x10001057 +#define MASK_PV_AVG_B 0xfe00707f +#define MATCH_PV_AVG_SC_B 0x10005057 +#define MASK_PV_AVG_SC_B 0xfe00707f +#define MATCH_PV_AVG_SCI_B 0x10007057 +#define MASK_PV_AVG_SCI_B 0xfc00707f +#define MATCH_PV_AVGU_H 0x18000057 +#define MASK_PV_AVGU_H 0xfe00707f +#define MATCH_PV_AVGU_SC_H 0x18004057 +#define MASK_PV_AVGU_SC_H 0xfe00707f +#define MATCH_PV_AVGU_SCI_H 0x18006057 +#define MASK_PV_AVGU_SCI_H 0xfc00707f +#define MATCH_PV_AVGU_B 0x18001057 +#define MASK_PV_AVGU_B 0xfe00707f +#define MATCH_PV_AVGU_SC_B 0x18005057 +#define MASK_PV_AVGU_SC_B 0xfe00707f +#define MATCH_PV_AVGU_SCI_B 0x18007057 +#define MASK_PV_AVGU_SCI_B 0xfc00707f +#define MATCH_PV_MIN_H 0x20000057 +#define MASK_PV_MIN_H 0xfe00707f +#define MATCH_PV_MIN_SC_H 0x20004057 +#define MASK_PV_MIN_SC_H 0xfe00707f +#define MATCH_PV_MIN_SCI_H 0x20006057 +#define MASK_PV_MIN_SCI_H 0xfc00707f +#define MATCH_PV_MIN_B 0x20001057 +#define MASK_PV_MIN_B 0xfe00707f +#define MATCH_PV_MIN_SC_B 0x20005057 +#define MASK_PV_MIN_SC_B 0xfe00707f +#define MATCH_PV_MIN_SCI_B 0x20007057 +#define MASK_PV_MIN_SCI_B 0xfc00707f +#define MATCH_PV_MINU_H 0x28000057 +#define MASK_PV_MINU_H 0xfe00707f +#define MATCH_PV_MINU_SC_H 0x28004057 +#define MASK_PV_MINU_SC_H 0xfe00707f +#define MATCH_PV_MINU_SCI_H 0x28006057 +#define MASK_PV_MINU_SCI_H 0xfc00707f +#define MATCH_PV_MINU_B 0x28001057 +#define MASK_PV_MINU_B 0xfe00707f +#define MATCH_PV_MINU_SC_B 0x28005057 +#define MASK_PV_MINU_SC_B 0xfe00707f +#define MATCH_PV_MINU_SCI_B 0x28007057 +#define MASK_PV_MINU_SCI_B 0xfc00707f +#define MATCH_PV_MAX_H 0x30000057 +#define MASK_PV_MAX_H 0xfe00707f +#define MATCH_PV_MAX_SC_H 0x30004057 +#define MASK_PV_MAX_SC_H 0xfe00707f +#define MATCH_PV_MAX_SCI_H 0x30006057 +#define MASK_PV_MAX_SCI_H 0xfc00707f +#define MATCH_PV_MAX_B 0x30001057 +#define MASK_PV_MAX_B 0xfe00707f +#define MATCH_PV_MAX_SC_B 0x30005057 +#define MASK_PV_MAX_SC_B 0xfe00707f +#define MATCH_PV_MAX_SCI_B 0x30007057 +#define MASK_PV_MAX_SCI_B 0xfc00707f +#define MATCH_PV_MAXU_H 0x38000057 +#define MASK_PV_MAXU_H 0xfe00707f +#define MATCH_PV_MAXU_SC_H 0x38004057 +#define MASK_PV_MAXU_SC_H 0xfe00707f +#define MATCH_PV_MAXU_SCI_H 0x38006057 +#define MASK_PV_MAXU_SCI_H 0xfc00707f +#define MATCH_PV_MAXU_B 0x38001057 +#define MASK_PV_MAXU_B 0xfe00707f +#define MATCH_PV_MAXU_SC_B 0x38005057 +#define MASK_PV_MAXU_SC_B 0xfe00707f +#define MATCH_PV_MAXU_SCI_B 0x38007057 +#define MASK_PV_MAXU_SCI_B 0xfc00707f +#define MATCH_PV_SRL_H 0x40000057 +#define MASK_PV_SRL_H 0xfe00707f +#define MATCH_PV_SRL_SC_H 0x40004057 +#define MASK_PV_SRL_SC_H 0xfe00707f +#define MATCH_PV_SRL_SCI_H 0x40006057 +#define MASK_PV_SRL_SCI_H 0xfc00707f +#define MATCH_PV_SRL_B 0x40001057 +#define MASK_PV_SRL_B 0xfe00707f +#define MATCH_PV_SRL_SC_B 0x40005057 +#define MASK_PV_SRL_SC_B 0xfe00707f +#define MATCH_PV_SRL_SCI_B 0x40007057 +#define MASK_PV_SRL_SCI_B 0xfc00707f +#define MATCH_PV_SRA_H 0x48000057 +#define MASK_PV_SRA_H 0xfe00707f +#define MATCH_PV_SRA_SC_H 0x48004057 +#define MASK_PV_SRA_SC_H 0xfe00707f +#define MATCH_PV_SRA_SCI_H 0x48006057 +#define MASK_PV_SRA_SCI_H 0xfc00707f +#define MATCH_PV_SRA_B 0x48001057 +#define MASK_PV_SRA_B 0xfe00707f +#define MATCH_PV_SRA_SC_B 0x48005057 +#define MASK_PV_SRA_SC_B 0xfe00707f +#define MATCH_PV_SRA_SCI_B 0x48007057 +#define MASK_PV_SRA_SCI_B 0xfc00707f +#define MATCH_PV_SLL_H 0x50000057 +#define MASK_PV_SLL_H 0xfe00707f +#define MATCH_PV_SLL_SC_H 0x50004057 +#define MASK_PV_SLL_SC_H 0xfe00707f +#define MATCH_PV_SLL_SCI_H 0x50006057 +#define MASK_PV_SLL_SCI_H 0xfc00707f +#define MATCH_PV_SLL_B 0x50001057 +#define MASK_PV_SLL_B 0xfe00707f +#define MATCH_PV_SLL_SC_B 0x50005057 +#define MASK_PV_SLL_SC_B 0xfe00707f +#define MATCH_PV_SLL_SCI_B 0x50007057 +#define MASK_PV_SLL_SCI_B 0xfc00707f +#define MATCH_PV_OR_H 0x58000057 +#define MASK_PV_OR_H 0xfe00707f +#define MATCH_PV_OR_SC_H 0x58004057 +#define MASK_PV_OR_SC_H 0xfe00707f +#define MATCH_PV_OR_SCI_H 0x58006057 +#define MASK_PV_OR_SCI_H 0xfc00707f +#define MATCH_PV_OR_B 0x58001057 +#define MASK_PV_OR_B 0xfe00707f +#define MATCH_PV_OR_SC_B 0x58005057 +#define MASK_PV_OR_SC_B 0xfe00707f +#define MATCH_PV_OR_SCI_B 0x58007057 +#define MASK_PV_OR_SCI_B 0xfc00707f +#define MATCH_PV_XOR_H 0x60000057 +#define MASK_PV_XOR_H 0xfe00707f +#define MATCH_PV_XOR_SC_H 0x60004057 +#define MASK_PV_XOR_SC_H 0xfe00707f +#define MATCH_PV_XOR_SCI_H 0x60006057 +#define MASK_PV_XOR_SCI_H 0xfc00707f +#define MATCH_PV_XOR_B 0x60001057 +#define MASK_PV_XOR_B 0xfe00707f +#define MATCH_PV_XOR_SC_B 0x60005057 +#define MASK_PV_XOR_SC_B 0xfe00707f +#define MATCH_PV_XOR_SCI_B 0x60007057 +#define MASK_PV_XOR_SCI_B 0xfc00707f +#define MATCH_PV_AND_H 0x68000057 +#define MASK_PV_AND_H 0xfe00707f +#define MATCH_PV_AND_SC_H 0x68004057 +#define MASK_PV_AND_SC_H 0xfe00707f +#define MATCH_PV_AND_SCI_H 0x68006057 +#define MASK_PV_AND_SCI_H 0xfc00707f +#define MATCH_PV_AND_B 0x68001057 +#define MASK_PV_AND_B 0xfe00707f +#define MATCH_PV_AND_SC_B 0x68005057 +#define MASK_PV_AND_SC_B 0xfe00707f +#define MATCH_PV_AND_SCI_B 0x68007057 +#define MASK_PV_AND_SCI_B 0xfc00707f +#define MATCH_PV_ABS_H 0x70000057 +#define MASK_PV_ABS_H 0xfff0707f +#define MATCH_PV_ABS_B 0x70001057 +#define MASK_PV_ABS_B 0xfff0707f +#define MATCH_PV_EXTRACT_H 0x78006057 +#define MASK_PV_EXTRACT_H 0xfc00707f +#define MATCH_PV_EXTRACT_B 0x78007057 +#define MASK_PV_EXTRACT_B 0xfc00707f +#define MATCH_PV_EXTRACTU_H 0x90006057 +#define MASK_PV_EXTRACTU_H 0xfc00707f +#define MATCH_PV_EXTRACTU_B 0x90007057 +#define MASK_PV_EXTRACTU_B 0xfc00707f +#define MATCH_PV_INSERT_H 0xb0006057 +#define MASK_PV_INSERT_H 0xfc00707f +#define MATCH_PV_INSERT_B 0xb0007057 +#define MASK_PV_INSERT_B 0xfc00707f #define MATCH_FLAH 0x1007 #define MASK_FLAH 0x707f #define MATCH_FSAH 0x1027 @@ -3197,447 +2499,6 @@ DECLARE_INSN(c_add, MATCH_C_ADD, MASK_C_ADD) DECLARE_INSN(c_fsdsp, MATCH_C_FSDSP, MASK_C_FSDSP) DECLARE_INSN(c_swsp, MATCH_C_SWSP, MASK_C_SWSP) DECLARE_INSN(c_fswsp, MATCH_C_FSWSP, MASK_C_FSWSP) -DECLARE_INSN(vsetvli, MATCH_VSETVLI, MASK_VSETVLI) -DECLARE_INSN(vsetvl, MATCH_VSETVL, MASK_VSETVL) -DECLARE_INSN(vle8_v, MATCH_VLE8_V, MASK_VLE8_V) -DECLARE_INSN(vle16_v, MATCH_VLE16_V, MASK_VLE16_V) -DECLARE_INSN(vle32_v, MATCH_VLE32_V, MASK_VLE32_V) -DECLARE_INSN(vle64_v, MATCH_VLE64_V, MASK_VLE64_V) -DECLARE_INSN(vle128_v, MATCH_VLE128_V, MASK_VLE128_V) -DECLARE_INSN(vle256_v, MATCH_VLE256_V, MASK_VLE256_V) -DECLARE_INSN(vle512_v, MATCH_VLE512_V, MASK_VLE512_V) -DECLARE_INSN(vle1024_v, MATCH_VLE1024_V, MASK_VLE1024_V) -DECLARE_INSN(vse8_v, MATCH_VSE8_V, MASK_VSE8_V) -DECLARE_INSN(vse16_v, MATCH_VSE16_V, MASK_VSE16_V) -DECLARE_INSN(vse32_v, MATCH_VSE32_V, MASK_VSE32_V) -DECLARE_INSN(vse64_v, MATCH_VSE64_V, MASK_VSE64_V) -DECLARE_INSN(vse128_v, MATCH_VSE128_V, MASK_VSE128_V) -DECLARE_INSN(vse256_v, MATCH_VSE256_V, MASK_VSE256_V) -DECLARE_INSN(vse512_v, MATCH_VSE512_V, MASK_VSE512_V) -DECLARE_INSN(vse1024_v, MATCH_VSE1024_V, MASK_VSE1024_V) -DECLARE_INSN(vlse8_v, MATCH_VLSE8_V, MASK_VLSE8_V) -DECLARE_INSN(vlse16_v, MATCH_VLSE16_V, MASK_VLSE16_V) -DECLARE_INSN(vlse32_v, MATCH_VLSE32_V, MASK_VLSE32_V) -DECLARE_INSN(vlse64_v, MATCH_VLSE64_V, MASK_VLSE64_V) -DECLARE_INSN(vlse128_v, MATCH_VLSE128_V, MASK_VLSE128_V) -DECLARE_INSN(vlse256_v, MATCH_VLSE256_V, MASK_VLSE256_V) -DECLARE_INSN(vlse512_v, MATCH_VLSE512_V, MASK_VLSE512_V) -DECLARE_INSN(vlse1024_v, MATCH_VLSE1024_V, MASK_VLSE1024_V) -DECLARE_INSN(vsse8_v, MATCH_VSSE8_V, MASK_VSSE8_V) -DECLARE_INSN(vsse16_v, MATCH_VSSE16_V, MASK_VSSE16_V) -DECLARE_INSN(vsse32_v, MATCH_VSSE32_V, MASK_VSSE32_V) -DECLARE_INSN(vsse64_v, MATCH_VSSE64_V, MASK_VSSE64_V) -DECLARE_INSN(vsse128_v, MATCH_VSSE128_V, MASK_VSSE128_V) -DECLARE_INSN(vsse256_v, MATCH_VSSE256_V, MASK_VSSE256_V) -DECLARE_INSN(vsse512_v, MATCH_VSSE512_V, MASK_VSSE512_V) -DECLARE_INSN(vsse1024_v, MATCH_VSSE1024_V, MASK_VSSE1024_V) -DECLARE_INSN(vlxei8_v, MATCH_VLXEI8_V, MASK_VLXEI8_V) -DECLARE_INSN(vlxei16_v, MATCH_VLXEI16_V, MASK_VLXEI16_V) -DECLARE_INSN(vlxei32_v, MATCH_VLXEI32_V, MASK_VLXEI32_V) -DECLARE_INSN(vlxei64_v, MATCH_VLXEI64_V, MASK_VLXEI64_V) -DECLARE_INSN(vlxei128_v, MATCH_VLXEI128_V, MASK_VLXEI128_V) -DECLARE_INSN(vlxei256_v, MATCH_VLXEI256_V, MASK_VLXEI256_V) -DECLARE_INSN(vlxei512_v, MATCH_VLXEI512_V, MASK_VLXEI512_V) -DECLARE_INSN(vlxei1024_v, MATCH_VLXEI1024_V, MASK_VLXEI1024_V) -DECLARE_INSN(vsxei8_v, MATCH_VSXEI8_V, MASK_VSXEI8_V) -DECLARE_INSN(vsxei16_v, MATCH_VSXEI16_V, MASK_VSXEI16_V) -DECLARE_INSN(vsxei32_v, MATCH_VSXEI32_V, MASK_VSXEI32_V) -DECLARE_INSN(vsxei64_v, MATCH_VSXEI64_V, MASK_VSXEI64_V) -DECLARE_INSN(vsxei128_v, MATCH_VSXEI128_V, MASK_VSXEI128_V) -DECLARE_INSN(vsxei256_v, MATCH_VSXEI256_V, MASK_VSXEI256_V) -DECLARE_INSN(vsxei512_v, MATCH_VSXEI512_V, MASK_VSXEI512_V) -DECLARE_INSN(vsxei1024_v, MATCH_VSXEI1024_V, MASK_VSXEI1024_V) -DECLARE_INSN(vsuxei8_v, MATCH_VSUXEI8_V, MASK_VSUXEI8_V) -DECLARE_INSN(vsuxei16_v, MATCH_VSUXEI16_V, MASK_VSUXEI16_V) -DECLARE_INSN(vsuxei32_v, MATCH_VSUXEI32_V, MASK_VSUXEI32_V) -DECLARE_INSN(vsuxei64_v, MATCH_VSUXEI64_V, MASK_VSUXEI64_V) -DECLARE_INSN(vsuxei128_v, MATCH_VSUXEI128_V, MASK_VSUXEI128_V) -DECLARE_INSN(vsuxei256_v, MATCH_VSUXEI256_V, MASK_VSUXEI256_V) -DECLARE_INSN(vsuxei512_v, MATCH_VSUXEI512_V, MASK_VSUXEI512_V) -DECLARE_INSN(vsuxei1024_v, MATCH_VSUXEI1024_V, MASK_VSUXEI1024_V) -DECLARE_INSN(vle8ff_v, MATCH_VLE8FF_V, MASK_VLE8FF_V) -DECLARE_INSN(vle16ff_v, MATCH_VLE16FF_V, MASK_VLE16FF_V) -DECLARE_INSN(vle32ff_v, MATCH_VLE32FF_V, MASK_VLE32FF_V) -DECLARE_INSN(vle64ff_v, MATCH_VLE64FF_V, MASK_VLE64FF_V) -DECLARE_INSN(vle128ff_v, MATCH_VLE128FF_V, MASK_VLE128FF_V) -DECLARE_INSN(vle256ff_v, MATCH_VLE256FF_V, MASK_VLE256FF_V) -DECLARE_INSN(vle512ff_v, MATCH_VLE512FF_V, MASK_VLE512FF_V) -DECLARE_INSN(vle1024ff_v, MATCH_VLE1024FF_V, MASK_VLE1024FF_V) -DECLARE_INSN(vl1re8_v, MATCH_VL1RE8_V, MASK_VL1RE8_V) -DECLARE_INSN(vl1re16_v, MATCH_VL1RE16_V, MASK_VL1RE16_V) -DECLARE_INSN(vl1re32_v, MATCH_VL1RE32_V, MASK_VL1RE32_V) -DECLARE_INSN(vl1re64_v, MATCH_VL1RE64_V, MASK_VL1RE64_V) -DECLARE_INSN(vl2re8_v, MATCH_VL2RE8_V, MASK_VL2RE8_V) -DECLARE_INSN(vl2re16_v, MATCH_VL2RE16_V, MASK_VL2RE16_V) -DECLARE_INSN(vl2re32_v, MATCH_VL2RE32_V, MASK_VL2RE32_V) -DECLARE_INSN(vl2re64_v, MATCH_VL2RE64_V, MASK_VL2RE64_V) -DECLARE_INSN(vl4re8_v, MATCH_VL4RE8_V, MASK_VL4RE8_V) -DECLARE_INSN(vl4re16_v, MATCH_VL4RE16_V, MASK_VL4RE16_V) -DECLARE_INSN(vl4re32_v, MATCH_VL4RE32_V, MASK_VL4RE32_V) -DECLARE_INSN(vl4re64_v, MATCH_VL4RE64_V, MASK_VL4RE64_V) -DECLARE_INSN(vl8re8_v, MATCH_VL8RE8_V, MASK_VL8RE8_V) -DECLARE_INSN(vl8re16_v, MATCH_VL8RE16_V, MASK_VL8RE16_V) -DECLARE_INSN(vl8re32_v, MATCH_VL8RE32_V, MASK_VL8RE32_V) -DECLARE_INSN(vl8re64_v, MATCH_VL8RE64_V, MASK_VL8RE64_V) -DECLARE_INSN(vs1r_v, MATCH_VS1R_V, MASK_VS1R_V) -DECLARE_INSN(vs2r_v, MATCH_VS2R_V, MASK_VS2R_V) -DECLARE_INSN(vs4r_v, MATCH_VS4R_V, MASK_VS4R_V) -DECLARE_INSN(vs8r_v, MATCH_VS8R_V, MASK_VS8R_V) -DECLARE_INSN(vfadd_vf, MATCH_VFADD_VF, MASK_VFADD_VF) -DECLARE_INSN(vfsub_vf, MATCH_VFSUB_VF, MASK_VFSUB_VF) -DECLARE_INSN(vfmin_vf, MATCH_VFMIN_VF, MASK_VFMIN_VF) -DECLARE_INSN(vfmax_vf, MATCH_VFMAX_VF, MASK_VFMAX_VF) -DECLARE_INSN(vfsgnj_vf, MATCH_VFSGNJ_VF, MASK_VFSGNJ_VF) -DECLARE_INSN(vfsgnjn_vf, MATCH_VFSGNJN_VF, MASK_VFSGNJN_VF) -DECLARE_INSN(vfsgnjx_vf, MATCH_VFSGNJX_VF, MASK_VFSGNJX_VF) -DECLARE_INSN(vfslide1up_vf, MATCH_VFSLIDE1UP_VF, MASK_VFSLIDE1UP_VF) -DECLARE_INSN(vfslide1down_vf, MATCH_VFSLIDE1DOWN_VF, MASK_VFSLIDE1DOWN_VF) -DECLARE_INSN(vfmv_s_f, MATCH_VFMV_S_F, MASK_VFMV_S_F) -DECLARE_INSN(vfmerge_vfm, MATCH_VFMERGE_VFM, MASK_VFMERGE_VFM) -DECLARE_INSN(vfmv_v_f, MATCH_VFMV_V_F, MASK_VFMV_V_F) -DECLARE_INSN(vmfeq_vf, MATCH_VMFEQ_VF, MASK_VMFEQ_VF) -DECLARE_INSN(vmfle_vf, MATCH_VMFLE_VF, MASK_VMFLE_VF) -DECLARE_INSN(vmflt_vf, MATCH_VMFLT_VF, MASK_VMFLT_VF) -DECLARE_INSN(vmfne_vf, MATCH_VMFNE_VF, MASK_VMFNE_VF) -DECLARE_INSN(vmfgt_vf, MATCH_VMFGT_VF, MASK_VMFGT_VF) -DECLARE_INSN(vmfge_vf, MATCH_VMFGE_VF, MASK_VMFGE_VF) -DECLARE_INSN(vfdiv_vf, MATCH_VFDIV_VF, MASK_VFDIV_VF) -DECLARE_INSN(vfrdiv_vf, MATCH_VFRDIV_VF, MASK_VFRDIV_VF) -DECLARE_INSN(vfmul_vf, MATCH_VFMUL_VF, MASK_VFMUL_VF) -DECLARE_INSN(vfrsub_vf, MATCH_VFRSUB_VF, MASK_VFRSUB_VF) -DECLARE_INSN(vfmadd_vf, MATCH_VFMADD_VF, MASK_VFMADD_VF) -DECLARE_INSN(vfnmadd_vf, MATCH_VFNMADD_VF, MASK_VFNMADD_VF) -DECLARE_INSN(vfmsub_vf, MATCH_VFMSUB_VF, MASK_VFMSUB_VF) -DECLARE_INSN(vfnmsub_vf, MATCH_VFNMSUB_VF, MASK_VFNMSUB_VF) -DECLARE_INSN(vfmacc_vf, MATCH_VFMACC_VF, MASK_VFMACC_VF) -DECLARE_INSN(vfnmacc_vf, MATCH_VFNMACC_VF, MASK_VFNMACC_VF) -DECLARE_INSN(vfmsac_vf, MATCH_VFMSAC_VF, MASK_VFMSAC_VF) -DECLARE_INSN(vfnmsac_vf, MATCH_VFNMSAC_VF, MASK_VFNMSAC_VF) -DECLARE_INSN(vfwadd_vf, MATCH_VFWADD_VF, MASK_VFWADD_VF) -DECLARE_INSN(vfwsub_vf, MATCH_VFWSUB_VF, MASK_VFWSUB_VF) -DECLARE_INSN(vfwadd_wf, MATCH_VFWADD_WF, MASK_VFWADD_WF) -DECLARE_INSN(vfwsub_wf, MATCH_VFWSUB_WF, MASK_VFWSUB_WF) -DECLARE_INSN(vfwmul_vf, MATCH_VFWMUL_VF, MASK_VFWMUL_VF) -DECLARE_INSN(vfwmacc_vf, MATCH_VFWMACC_VF, MASK_VFWMACC_VF) -DECLARE_INSN(vfwnmacc_vf, MATCH_VFWNMACC_VF, MASK_VFWNMACC_VF) -DECLARE_INSN(vfwmsac_vf, MATCH_VFWMSAC_VF, MASK_VFWMSAC_VF) -DECLARE_INSN(vfwnmsac_vf, MATCH_VFWNMSAC_VF, MASK_VFWNMSAC_VF) -DECLARE_INSN(vfadd_vv, MATCH_VFADD_VV, MASK_VFADD_VV) -DECLARE_INSN(vfredsum_vs, MATCH_VFREDSUM_VS, MASK_VFREDSUM_VS) -DECLARE_INSN(vfsub_vv, MATCH_VFSUB_VV, MASK_VFSUB_VV) -DECLARE_INSN(vfredosum_vs, MATCH_VFREDOSUM_VS, MASK_VFREDOSUM_VS) -DECLARE_INSN(vfmin_vv, MATCH_VFMIN_VV, MASK_VFMIN_VV) -DECLARE_INSN(vfredmin_vs, MATCH_VFREDMIN_VS, MASK_VFREDMIN_VS) -DECLARE_INSN(vfmax_vv, MATCH_VFMAX_VV, MASK_VFMAX_VV) -DECLARE_INSN(vfredmax_vs, MATCH_VFREDMAX_VS, MASK_VFREDMAX_VS) -DECLARE_INSN(vfsgnj_vv, MATCH_VFSGNJ_VV, MASK_VFSGNJ_VV) -DECLARE_INSN(vfsgnjn_vv, MATCH_VFSGNJN_VV, MASK_VFSGNJN_VV) -DECLARE_INSN(vfsgnjx_vv, MATCH_VFSGNJX_VV, MASK_VFSGNJX_VV) -DECLARE_INSN(vfmv_f_s, MATCH_VFMV_F_S, MASK_VFMV_F_S) -DECLARE_INSN(vmfeq_vv, MATCH_VMFEQ_VV, MASK_VMFEQ_VV) -DECLARE_INSN(vmfle_vv, MATCH_VMFLE_VV, MASK_VMFLE_VV) -DECLARE_INSN(vmflt_vv, MATCH_VMFLT_VV, MASK_VMFLT_VV) -DECLARE_INSN(vmfne_vv, MATCH_VMFNE_VV, MASK_VMFNE_VV) -DECLARE_INSN(vfdiv_vv, MATCH_VFDIV_VV, MASK_VFDIV_VV) -DECLARE_INSN(vfmul_vv, MATCH_VFMUL_VV, MASK_VFMUL_VV) -DECLARE_INSN(vfmadd_vv, MATCH_VFMADD_VV, MASK_VFMADD_VV) -DECLARE_INSN(vfnmadd_vv, MATCH_VFNMADD_VV, MASK_VFNMADD_VV) -DECLARE_INSN(vfmsub_vv, MATCH_VFMSUB_VV, MASK_VFMSUB_VV) -DECLARE_INSN(vfnmsub_vv, MATCH_VFNMSUB_VV, MASK_VFNMSUB_VV) -DECLARE_INSN(vfmacc_vv, MATCH_VFMACC_VV, MASK_VFMACC_VV) -DECLARE_INSN(vfnmacc_vv, MATCH_VFNMACC_VV, MASK_VFNMACC_VV) -DECLARE_INSN(vfmsac_vv, MATCH_VFMSAC_VV, MASK_VFMSAC_VV) -DECLARE_INSN(vfnmsac_vv, MATCH_VFNMSAC_VV, MASK_VFNMSAC_VV) -DECLARE_INSN(vfcvt_xu_f_v, MATCH_VFCVT_XU_F_V, MASK_VFCVT_XU_F_V) -DECLARE_INSN(vfcvt_x_f_v, MATCH_VFCVT_X_F_V, MASK_VFCVT_X_F_V) -DECLARE_INSN(vfcvt_f_xu_v, MATCH_VFCVT_F_XU_V, MASK_VFCVT_F_XU_V) -DECLARE_INSN(vfcvt_f_x_v, MATCH_VFCVT_F_X_V, MASK_VFCVT_F_X_V) -DECLARE_INSN(vfcvt_rtz_xu_f_v, MATCH_VFCVT_RTZ_XU_F_V, MASK_VFCVT_RTZ_XU_F_V) -DECLARE_INSN(vfcvt_rtz_x_f_v, MATCH_VFCVT_RTZ_X_F_V, MASK_VFCVT_RTZ_X_F_V) -DECLARE_INSN(vfwcvt_xu_f_v, MATCH_VFWCVT_XU_F_V, MASK_VFWCVT_XU_F_V) -DECLARE_INSN(vfwcvt_x_f_v, MATCH_VFWCVT_X_F_V, MASK_VFWCVT_X_F_V) -DECLARE_INSN(vfwcvt_f_xu_v, MATCH_VFWCVT_F_XU_V, MASK_VFWCVT_F_XU_V) -DECLARE_INSN(vfwcvt_f_x_v, MATCH_VFWCVT_F_X_V, MASK_VFWCVT_F_X_V) -DECLARE_INSN(vfwcvt_f_f_v, MATCH_VFWCVT_F_F_V, MASK_VFWCVT_F_F_V) -DECLARE_INSN(vfwcvt_rtz_xu_f_v, MATCH_VFWCVT_RTZ_XU_F_V, MASK_VFWCVT_RTZ_XU_F_V) -DECLARE_INSN(vfwcvt_rtz_x_f_v, MATCH_VFWCVT_RTZ_X_F_V, MASK_VFWCVT_RTZ_X_F_V) -DECLARE_INSN(vfncvt_xu_f_w, MATCH_VFNCVT_XU_F_W, MASK_VFNCVT_XU_F_W) -DECLARE_INSN(vfncvt_x_f_w, MATCH_VFNCVT_X_F_W, MASK_VFNCVT_X_F_W) -DECLARE_INSN(vfncvt_f_xu_w, MATCH_VFNCVT_F_XU_W, MASK_VFNCVT_F_XU_W) -DECLARE_INSN(vfncvt_f_x_w, MATCH_VFNCVT_F_X_W, MASK_VFNCVT_F_X_W) -DECLARE_INSN(vfncvt_f_f_w, MATCH_VFNCVT_F_F_W, MASK_VFNCVT_F_F_W) -DECLARE_INSN(vfncvt_rod_f_f_w, MATCH_VFNCVT_ROD_F_F_W, MASK_VFNCVT_ROD_F_F_W) -DECLARE_INSN(vfncvt_rtz_xu_f_w, MATCH_VFNCVT_RTZ_XU_F_W, MASK_VFNCVT_RTZ_XU_F_W) -DECLARE_INSN(vfncvt_rtz_x_f_w, MATCH_VFNCVT_RTZ_X_F_W, MASK_VFNCVT_RTZ_X_F_W) -DECLARE_INSN(vfsqrt_v, MATCH_VFSQRT_V, MASK_VFSQRT_V) -DECLARE_INSN(vfrsqrte7_v, MATCH_VFRSQRTE7_V, MASK_VFRSQRTE7_V) -DECLARE_INSN(vfrece7_v, MATCH_VFRECE7_V, MASK_VFRECE7_V) -DECLARE_INSN(vfclass_v, MATCH_VFCLASS_V, MASK_VFCLASS_V) -DECLARE_INSN(vfwadd_vv, MATCH_VFWADD_VV, MASK_VFWADD_VV) -DECLARE_INSN(vfwredsum_vs, MATCH_VFWREDSUM_VS, MASK_VFWREDSUM_VS) -DECLARE_INSN(vfwsub_vv, MATCH_VFWSUB_VV, MASK_VFWSUB_VV) -DECLARE_INSN(vfwredosum_vs, MATCH_VFWREDOSUM_VS, MASK_VFWREDOSUM_VS) -DECLARE_INSN(vfwadd_wv, MATCH_VFWADD_WV, MASK_VFWADD_WV) -DECLARE_INSN(vfwsub_wv, MATCH_VFWSUB_WV, MASK_VFWSUB_WV) -DECLARE_INSN(vfwmul_vv, MATCH_VFWMUL_VV, MASK_VFWMUL_VV) -DECLARE_INSN(vfdot_vv, MATCH_VFDOT_VV, MASK_VFDOT_VV) -DECLARE_INSN(vfwmacc_vv, MATCH_VFWMACC_VV, MASK_VFWMACC_VV) -DECLARE_INSN(vfwnmacc_vv, MATCH_VFWNMACC_VV, MASK_VFWNMACC_VV) -DECLARE_INSN(vfwmsac_vv, MATCH_VFWMSAC_VV, MASK_VFWMSAC_VV) -DECLARE_INSN(vfwnmsac_vv, MATCH_VFWNMSAC_VV, MASK_VFWNMSAC_VV) -DECLARE_INSN(vadd_vx, MATCH_VADD_VX, MASK_VADD_VX) -DECLARE_INSN(vsub_vx, MATCH_VSUB_VX, MASK_VSUB_VX) -DECLARE_INSN(vrsub_vx, MATCH_VRSUB_VX, MASK_VRSUB_VX) -DECLARE_INSN(vminu_vx, MATCH_VMINU_VX, MASK_VMINU_VX) -DECLARE_INSN(vmin_vx, MATCH_VMIN_VX, MASK_VMIN_VX) -DECLARE_INSN(vmaxu_vx, MATCH_VMAXU_VX, MASK_VMAXU_VX) -DECLARE_INSN(vmax_vx, MATCH_VMAX_VX, MASK_VMAX_VX) -DECLARE_INSN(vand_vx, MATCH_VAND_VX, MASK_VAND_VX) -DECLARE_INSN(vor_vx, MATCH_VOR_VX, MASK_VOR_VX) -DECLARE_INSN(vxor_vx, MATCH_VXOR_VX, MASK_VXOR_VX) -DECLARE_INSN(vrgather_vx, MATCH_VRGATHER_VX, MASK_VRGATHER_VX) -DECLARE_INSN(vslideup_vx, MATCH_VSLIDEUP_VX, MASK_VSLIDEUP_VX) -DECLARE_INSN(vslidedown_vx, MATCH_VSLIDEDOWN_VX, MASK_VSLIDEDOWN_VX) -DECLARE_INSN(vadc_vxm, MATCH_VADC_VXM, MASK_VADC_VXM) -DECLARE_INSN(vmadc_vxm, MATCH_VMADC_VXM, MASK_VMADC_VXM) -DECLARE_INSN(vsbc_vxm, MATCH_VSBC_VXM, MASK_VSBC_VXM) -DECLARE_INSN(vmsbc_vxm, MATCH_VMSBC_VXM, MASK_VMSBC_VXM) -DECLARE_INSN(vmerge_vxm, MATCH_VMERGE_VXM, MASK_VMERGE_VXM) -DECLARE_INSN(vmv_v_x, MATCH_VMV_V_X, MASK_VMV_V_X) -DECLARE_INSN(vmseq_vx, MATCH_VMSEQ_VX, MASK_VMSEQ_VX) -DECLARE_INSN(vmsne_vx, MATCH_VMSNE_VX, MASK_VMSNE_VX) -DECLARE_INSN(vmsltu_vx, MATCH_VMSLTU_VX, MASK_VMSLTU_VX) -DECLARE_INSN(vmslt_vx, MATCH_VMSLT_VX, MASK_VMSLT_VX) -DECLARE_INSN(vmsleu_vx, MATCH_VMSLEU_VX, MASK_VMSLEU_VX) -DECLARE_INSN(vmsle_vx, MATCH_VMSLE_VX, MASK_VMSLE_VX) -DECLARE_INSN(vmsgtu_vx, MATCH_VMSGTU_VX, MASK_VMSGTU_VX) -DECLARE_INSN(vmsgt_vx, MATCH_VMSGT_VX, MASK_VMSGT_VX) -DECLARE_INSN(vsaddu_vx, MATCH_VSADDU_VX, MASK_VSADDU_VX) -DECLARE_INSN(vsadd_vx, MATCH_VSADD_VX, MASK_VSADD_VX) -DECLARE_INSN(vssubu_vx, MATCH_VSSUBU_VX, MASK_VSSUBU_VX) -DECLARE_INSN(vssub_vx, MATCH_VSSUB_VX, MASK_VSSUB_VX) -DECLARE_INSN(vsll_vx, MATCH_VSLL_VX, MASK_VSLL_VX) -DECLARE_INSN(vsmul_vx, MATCH_VSMUL_VX, MASK_VSMUL_VX) -DECLARE_INSN(vsrl_vx, MATCH_VSRL_VX, MASK_VSRL_VX) -DECLARE_INSN(vsra_vx, MATCH_VSRA_VX, MASK_VSRA_VX) -DECLARE_INSN(vssrl_vx, MATCH_VSSRL_VX, MASK_VSSRL_VX) -DECLARE_INSN(vssra_vx, MATCH_VSSRA_VX, MASK_VSSRA_VX) -DECLARE_INSN(vnsrl_wx, MATCH_VNSRL_WX, MASK_VNSRL_WX) -DECLARE_INSN(vnsra_wx, MATCH_VNSRA_WX, MASK_VNSRA_WX) -DECLARE_INSN(vnclipu_wx, MATCH_VNCLIPU_WX, MASK_VNCLIPU_WX) -DECLARE_INSN(vnclip_wx, MATCH_VNCLIP_WX, MASK_VNCLIP_WX) -DECLARE_INSN(vqmaccu_vx, MATCH_VQMACCU_VX, MASK_VQMACCU_VX) -DECLARE_INSN(vqmacc_vx, MATCH_VQMACC_VX, MASK_VQMACC_VX) -DECLARE_INSN(vqmaccus_vx, MATCH_VQMACCUS_VX, MASK_VQMACCUS_VX) -DECLARE_INSN(vqmaccsu_vx, MATCH_VQMACCSU_VX, MASK_VQMACCSU_VX) -DECLARE_INSN(vadd_vv, MATCH_VADD_VV, MASK_VADD_VV) -DECLARE_INSN(vsub_vv, MATCH_VSUB_VV, MASK_VSUB_VV) -DECLARE_INSN(vminu_vv, MATCH_VMINU_VV, MASK_VMINU_VV) -DECLARE_INSN(vmin_vv, MATCH_VMIN_VV, MASK_VMIN_VV) -DECLARE_INSN(vmaxu_vv, MATCH_VMAXU_VV, MASK_VMAXU_VV) -DECLARE_INSN(vmax_vv, MATCH_VMAX_VV, MASK_VMAX_VV) -DECLARE_INSN(vand_vv, MATCH_VAND_VV, MASK_VAND_VV) -DECLARE_INSN(vor_vv, MATCH_VOR_VV, MASK_VOR_VV) -DECLARE_INSN(vxor_vv, MATCH_VXOR_VV, MASK_VXOR_VV) -DECLARE_INSN(vrgather_vv, MATCH_VRGATHER_VV, MASK_VRGATHER_VV) -DECLARE_INSN(vrgatherei16_vv, MATCH_VRGATHEREI16_VV, MASK_VRGATHEREI16_VV) -DECLARE_INSN(vadc_vvm, MATCH_VADC_VVM, MASK_VADC_VVM) -DECLARE_INSN(vmadc_vvm, MATCH_VMADC_VVM, MASK_VMADC_VVM) -DECLARE_INSN(vsbc_vvm, MATCH_VSBC_VVM, MASK_VSBC_VVM) -DECLARE_INSN(vmsbc_vvm, MATCH_VMSBC_VVM, MASK_VMSBC_VVM) -DECLARE_INSN(vmerge_vvm, MATCH_VMERGE_VVM, MASK_VMERGE_VVM) -DECLARE_INSN(vmv_v_v, MATCH_VMV_V_V, MASK_VMV_V_V) -DECLARE_INSN(vmseq_vv, MATCH_VMSEQ_VV, MASK_VMSEQ_VV) -DECLARE_INSN(vmsne_vv, MATCH_VMSNE_VV, MASK_VMSNE_VV) -DECLARE_INSN(vmsltu_vv, MATCH_VMSLTU_VV, MASK_VMSLTU_VV) -DECLARE_INSN(vmslt_vv, MATCH_VMSLT_VV, MASK_VMSLT_VV) -DECLARE_INSN(vmsleu_vv, MATCH_VMSLEU_VV, MASK_VMSLEU_VV) -DECLARE_INSN(vmsle_vv, MATCH_VMSLE_VV, MASK_VMSLE_VV) -DECLARE_INSN(vsaddu_vv, MATCH_VSADDU_VV, MASK_VSADDU_VV) -DECLARE_INSN(vsadd_vv, MATCH_VSADD_VV, MASK_VSADD_VV) -DECLARE_INSN(vssubu_vv, MATCH_VSSUBU_VV, MASK_VSSUBU_VV) -DECLARE_INSN(vssub_vv, MATCH_VSSUB_VV, MASK_VSSUB_VV) -DECLARE_INSN(vsll_vv, MATCH_VSLL_VV, MASK_VSLL_VV) -DECLARE_INSN(vsmul_vv, MATCH_VSMUL_VV, MASK_VSMUL_VV) -DECLARE_INSN(vsrl_vv, MATCH_VSRL_VV, MASK_VSRL_VV) -DECLARE_INSN(vsra_vv, MATCH_VSRA_VV, MASK_VSRA_VV) -DECLARE_INSN(vssrl_vv, MATCH_VSSRL_VV, MASK_VSSRL_VV) -DECLARE_INSN(vssra_vv, MATCH_VSSRA_VV, MASK_VSSRA_VV) -DECLARE_INSN(vnsrl_wv, MATCH_VNSRL_WV, MASK_VNSRL_WV) -DECLARE_INSN(vnsra_wv, MATCH_VNSRA_WV, MASK_VNSRA_WV) -DECLARE_INSN(vnclipu_wv, MATCH_VNCLIPU_WV, MASK_VNCLIPU_WV) -DECLARE_INSN(vnclip_wv, MATCH_VNCLIP_WV, MASK_VNCLIP_WV) -DECLARE_INSN(vwredsumu_vs, MATCH_VWREDSUMU_VS, MASK_VWREDSUMU_VS) -DECLARE_INSN(vwredsum_vs, MATCH_VWREDSUM_VS, MASK_VWREDSUM_VS) -DECLARE_INSN(vdotu_vv, MATCH_VDOTU_VV, MASK_VDOTU_VV) -DECLARE_INSN(vdot_vv, MATCH_VDOT_VV, MASK_VDOT_VV) -DECLARE_INSN(vqmaccu_vv, MATCH_VQMACCU_VV, MASK_VQMACCU_VV) -DECLARE_INSN(vqmacc_vv, MATCH_VQMACC_VV, MASK_VQMACC_VV) -DECLARE_INSN(vqmaccsu_vv, MATCH_VQMACCSU_VV, MASK_VQMACCSU_VV) -DECLARE_INSN(vadd_vi, MATCH_VADD_VI, MASK_VADD_VI) -DECLARE_INSN(vrsub_vi, MATCH_VRSUB_VI, MASK_VRSUB_VI) -DECLARE_INSN(vand_vi, MATCH_VAND_VI, MASK_VAND_VI) -DECLARE_INSN(vor_vi, MATCH_VOR_VI, MASK_VOR_VI) -DECLARE_INSN(vxor_vi, MATCH_VXOR_VI, MASK_VXOR_VI) -DECLARE_INSN(vrgather_vi, MATCH_VRGATHER_VI, MASK_VRGATHER_VI) -DECLARE_INSN(vslideup_vi, MATCH_VSLIDEUP_VI, MASK_VSLIDEUP_VI) -DECLARE_INSN(vslidedown_vi, MATCH_VSLIDEDOWN_VI, MASK_VSLIDEDOWN_VI) -DECLARE_INSN(vadc_vim, MATCH_VADC_VIM, MASK_VADC_VIM) -DECLARE_INSN(vmadc_vim, MATCH_VMADC_VIM, MASK_VMADC_VIM) -DECLARE_INSN(vmerge_vim, MATCH_VMERGE_VIM, MASK_VMERGE_VIM) -DECLARE_INSN(vmv_v_i, MATCH_VMV_V_I, MASK_VMV_V_I) -DECLARE_INSN(vmseq_vi, MATCH_VMSEQ_VI, MASK_VMSEQ_VI) -DECLARE_INSN(vmsne_vi, MATCH_VMSNE_VI, MASK_VMSNE_VI) -DECLARE_INSN(vmsleu_vi, MATCH_VMSLEU_VI, MASK_VMSLEU_VI) -DECLARE_INSN(vmsle_vi, MATCH_VMSLE_VI, MASK_VMSLE_VI) -DECLARE_INSN(vmsgtu_vi, MATCH_VMSGTU_VI, MASK_VMSGTU_VI) -DECLARE_INSN(vmsgt_vi, MATCH_VMSGT_VI, MASK_VMSGT_VI) -DECLARE_INSN(vsaddu_vi, MATCH_VSADDU_VI, MASK_VSADDU_VI) -DECLARE_INSN(vsadd_vi, MATCH_VSADD_VI, MASK_VSADD_VI) -DECLARE_INSN(vsll_vi, MATCH_VSLL_VI, MASK_VSLL_VI) -DECLARE_INSN(vmv1r_v, MATCH_VMV1R_V, MASK_VMV1R_V) -DECLARE_INSN(vmv2r_v, MATCH_VMV2R_V, MASK_VMV2R_V) -DECLARE_INSN(vmv4r_v, MATCH_VMV4R_V, MASK_VMV4R_V) -DECLARE_INSN(vmv8r_v, MATCH_VMV8R_V, MASK_VMV8R_V) -DECLARE_INSN(vsrl_vi, MATCH_VSRL_VI, MASK_VSRL_VI) -DECLARE_INSN(vsra_vi, MATCH_VSRA_VI, MASK_VSRA_VI) -DECLARE_INSN(vssrl_vi, MATCH_VSSRL_VI, MASK_VSSRL_VI) -DECLARE_INSN(vssra_vi, MATCH_VSSRA_VI, MASK_VSSRA_VI) -DECLARE_INSN(vnsrl_wi, MATCH_VNSRL_WI, MASK_VNSRL_WI) -DECLARE_INSN(vnsra_wi, MATCH_VNSRA_WI, MASK_VNSRA_WI) -DECLARE_INSN(vnclipu_wi, MATCH_VNCLIPU_WI, MASK_VNCLIPU_WI) -DECLARE_INSN(vnclip_wi, MATCH_VNCLIP_WI, MASK_VNCLIP_WI) -DECLARE_INSN(vredsum_vs, MATCH_VREDSUM_VS, MASK_VREDSUM_VS) -DECLARE_INSN(vredand_vs, MATCH_VREDAND_VS, MASK_VREDAND_VS) -DECLARE_INSN(vredor_vs, MATCH_VREDOR_VS, MASK_VREDOR_VS) -DECLARE_INSN(vredxor_vs, MATCH_VREDXOR_VS, MASK_VREDXOR_VS) -DECLARE_INSN(vredminu_vs, MATCH_VREDMINU_VS, MASK_VREDMINU_VS) -DECLARE_INSN(vredmin_vs, MATCH_VREDMIN_VS, MASK_VREDMIN_VS) -DECLARE_INSN(vredmaxu_vs, MATCH_VREDMAXU_VS, MASK_VREDMAXU_VS) -DECLARE_INSN(vredmax_vs, MATCH_VREDMAX_VS, MASK_VREDMAX_VS) -DECLARE_INSN(vaaddu_vv, MATCH_VAADDU_VV, MASK_VAADDU_VV) -DECLARE_INSN(vaadd_vv, MATCH_VAADD_VV, MASK_VAADD_VV) -DECLARE_INSN(vasubu_vv, MATCH_VASUBU_VV, MASK_VASUBU_VV) -DECLARE_INSN(vasub_vv, MATCH_VASUB_VV, MASK_VASUB_VV) -DECLARE_INSN(vmv_x_s, MATCH_VMV_X_S, MASK_VMV_X_S) -DECLARE_INSN(vzext_vf8, MATCH_VZEXT_VF8, MASK_VZEXT_VF8) -DECLARE_INSN(vsext_vf8, MATCH_VSEXT_VF8, MASK_VSEXT_VF8) -DECLARE_INSN(vzext_vf4, MATCH_VZEXT_VF4, MASK_VZEXT_VF4) -DECLARE_INSN(vsext_vf4, MATCH_VSEXT_VF4, MASK_VSEXT_VF4) -DECLARE_INSN(vzext_vf2, MATCH_VZEXT_VF2, MASK_VZEXT_VF2) -DECLARE_INSN(vsext_vf2, MATCH_VSEXT_VF2, MASK_VSEXT_VF2) -DECLARE_INSN(vcompress_vm, MATCH_VCOMPRESS_VM, MASK_VCOMPRESS_VM) -DECLARE_INSN(vmandnot_mm, MATCH_VMANDNOT_MM, MASK_VMANDNOT_MM) -DECLARE_INSN(vmand_mm, MATCH_VMAND_MM, MASK_VMAND_MM) -DECLARE_INSN(vmor_mm, MATCH_VMOR_MM, MASK_VMOR_MM) -DECLARE_INSN(vmxor_mm, MATCH_VMXOR_MM, MASK_VMXOR_MM) -DECLARE_INSN(vmornot_mm, MATCH_VMORNOT_MM, MASK_VMORNOT_MM) -DECLARE_INSN(vmnand_mm, MATCH_VMNAND_MM, MASK_VMNAND_MM) -DECLARE_INSN(vmnor_mm, MATCH_VMNOR_MM, MASK_VMNOR_MM) -DECLARE_INSN(vmxnor_mm, MATCH_VMXNOR_MM, MASK_VMXNOR_MM) -DECLARE_INSN(vmsbf_m, MATCH_VMSBF_M, MASK_VMSBF_M) -DECLARE_INSN(vmsof_m, MATCH_VMSOF_M, MASK_VMSOF_M) -DECLARE_INSN(vmsif_m, MATCH_VMSIF_M, MASK_VMSIF_M) -DECLARE_INSN(viota_m, MATCH_VIOTA_M, MASK_VIOTA_M) -DECLARE_INSN(vid_v, MATCH_VID_V, MASK_VID_V) -DECLARE_INSN(vpopc_m, MATCH_VPOPC_M, MASK_VPOPC_M) -DECLARE_INSN(vfirst_m, MATCH_VFIRST_M, MASK_VFIRST_M) -DECLARE_INSN(vdivu_vv, MATCH_VDIVU_VV, MASK_VDIVU_VV) -DECLARE_INSN(vdiv_vv, MATCH_VDIV_VV, MASK_VDIV_VV) -DECLARE_INSN(vremu_vv, MATCH_VREMU_VV, MASK_VREMU_VV) -DECLARE_INSN(vrem_vv, MATCH_VREM_VV, MASK_VREM_VV) -DECLARE_INSN(vmulhu_vv, MATCH_VMULHU_VV, MASK_VMULHU_VV) -DECLARE_INSN(vmul_vv, MATCH_VMUL_VV, MASK_VMUL_VV) -DECLARE_INSN(vmulhsu_vv, MATCH_VMULHSU_VV, MASK_VMULHSU_VV) -DECLARE_INSN(vmulh_vv, MATCH_VMULH_VV, MASK_VMULH_VV) -DECLARE_INSN(vmadd_vv, MATCH_VMADD_VV, MASK_VMADD_VV) -DECLARE_INSN(vnmsub_vv, MATCH_VNMSUB_VV, MASK_VNMSUB_VV) -DECLARE_INSN(vmacc_vv, MATCH_VMACC_VV, MASK_VMACC_VV) -DECLARE_INSN(vnmsac_vv, MATCH_VNMSAC_VV, MASK_VNMSAC_VV) -DECLARE_INSN(vwaddu_vv, MATCH_VWADDU_VV, MASK_VWADDU_VV) -DECLARE_INSN(vwadd_vv, MATCH_VWADD_VV, MASK_VWADD_VV) -DECLARE_INSN(vwsubu_vv, MATCH_VWSUBU_VV, MASK_VWSUBU_VV) -DECLARE_INSN(vwsub_vv, MATCH_VWSUB_VV, MASK_VWSUB_VV) -DECLARE_INSN(vwaddu_wv, MATCH_VWADDU_WV, MASK_VWADDU_WV) -DECLARE_INSN(vwadd_wv, MATCH_VWADD_WV, MASK_VWADD_WV) -DECLARE_INSN(vwsubu_wv, MATCH_VWSUBU_WV, MASK_VWSUBU_WV) -DECLARE_INSN(vwsub_wv, MATCH_VWSUB_WV, MASK_VWSUB_WV) -DECLARE_INSN(vwmulu_vv, MATCH_VWMULU_VV, MASK_VWMULU_VV) -DECLARE_INSN(vwmulsu_vv, MATCH_VWMULSU_VV, MASK_VWMULSU_VV) -DECLARE_INSN(vwmul_vv, MATCH_VWMUL_VV, MASK_VWMUL_VV) -DECLARE_INSN(vwmaccu_vv, MATCH_VWMACCU_VV, MASK_VWMACCU_VV) -DECLARE_INSN(vwmacc_vv, MATCH_VWMACC_VV, MASK_VWMACC_VV) -DECLARE_INSN(vwmaccsu_vv, MATCH_VWMACCSU_VV, MASK_VWMACCSU_VV) -DECLARE_INSN(vaaddu_vx, MATCH_VAADDU_VX, MASK_VAADDU_VX) -DECLARE_INSN(vaadd_vx, MATCH_VAADD_VX, MASK_VAADD_VX) -DECLARE_INSN(vasubu_vx, MATCH_VASUBU_VX, MASK_VASUBU_VX) -DECLARE_INSN(vasub_vx, MATCH_VASUB_VX, MASK_VASUB_VX) -DECLARE_INSN(vmv_s_x, MATCH_VMV_S_X, MASK_VMV_S_X) -DECLARE_INSN(vslide1up_vx, MATCH_VSLIDE1UP_VX, MASK_VSLIDE1UP_VX) -DECLARE_INSN(vslide1down_vx, MATCH_VSLIDE1DOWN_VX, MASK_VSLIDE1DOWN_VX) -DECLARE_INSN(vdivu_vx, MATCH_VDIVU_VX, MASK_VDIVU_VX) -DECLARE_INSN(vdiv_vx, MATCH_VDIV_VX, MASK_VDIV_VX) -DECLARE_INSN(vremu_vx, MATCH_VREMU_VX, MASK_VREMU_VX) -DECLARE_INSN(vrem_vx, MATCH_VREM_VX, MASK_VREM_VX) -DECLARE_INSN(vmulhu_vx, MATCH_VMULHU_VX, MASK_VMULHU_VX) -DECLARE_INSN(vmul_vx, MATCH_VMUL_VX, MASK_VMUL_VX) -DECLARE_INSN(vmulhsu_vx, MATCH_VMULHSU_VX, MASK_VMULHSU_VX) -DECLARE_INSN(vmulh_vx, MATCH_VMULH_VX, MASK_VMULH_VX) -DECLARE_INSN(vmadd_vx, MATCH_VMADD_VX, MASK_VMADD_VX) -DECLARE_INSN(vnmsub_vx, MATCH_VNMSUB_VX, MASK_VNMSUB_VX) -DECLARE_INSN(vmacc_vx, MATCH_VMACC_VX, MASK_VMACC_VX) -DECLARE_INSN(vnmsac_vx, MATCH_VNMSAC_VX, MASK_VNMSAC_VX) -DECLARE_INSN(vwaddu_vx, MATCH_VWADDU_VX, MASK_VWADDU_VX) -DECLARE_INSN(vwadd_vx, MATCH_VWADD_VX, MASK_VWADD_VX) -DECLARE_INSN(vwsubu_vx, MATCH_VWSUBU_VX, MASK_VWSUBU_VX) -DECLARE_INSN(vwsub_vx, MATCH_VWSUB_VX, MASK_VWSUB_VX) -DECLARE_INSN(vwaddu_wx, MATCH_VWADDU_WX, MASK_VWADDU_WX) -DECLARE_INSN(vwadd_wx, MATCH_VWADD_WX, MASK_VWADD_WX) -DECLARE_INSN(vwsubu_wx, MATCH_VWSUBU_WX, MASK_VWSUBU_WX) -DECLARE_INSN(vwsub_wx, MATCH_VWSUB_WX, MASK_VWSUB_WX) -DECLARE_INSN(vwmulu_vx, MATCH_VWMULU_VX, MASK_VWMULU_VX) -DECLARE_INSN(vwmulsu_vx, MATCH_VWMULSU_VX, MASK_VWMULSU_VX) -DECLARE_INSN(vwmul_vx, MATCH_VWMUL_VX, MASK_VWMUL_VX) -DECLARE_INSN(vwmaccu_vx, MATCH_VWMACCU_VX, MASK_VWMACCU_VX) -DECLARE_INSN(vwmacc_vx, MATCH_VWMACC_VX, MASK_VWMACC_VX) -DECLARE_INSN(vwmaccus_vx, MATCH_VWMACCUS_VX, MASK_VWMACCUS_VX) -DECLARE_INSN(vwmaccsu_vx, MATCH_VWMACCSU_VX, MASK_VWMACCSU_VX) -DECLARE_INSN(vamoswapei8_v, MATCH_VAMOSWAPEI8_V, MASK_VAMOSWAPEI8_V) -DECLARE_INSN(vamoaddei8_v, MATCH_VAMOADDEI8_V, MASK_VAMOADDEI8_V) -DECLARE_INSN(vamoxorei8_v, MATCH_VAMOXOREI8_V, MASK_VAMOXOREI8_V) -DECLARE_INSN(vamoandei8_v, MATCH_VAMOANDEI8_V, MASK_VAMOANDEI8_V) -DECLARE_INSN(vamoorei8_v, MATCH_VAMOOREI8_V, MASK_VAMOOREI8_V) -DECLARE_INSN(vamominei8_v, MATCH_VAMOMINEI8_V, MASK_VAMOMINEI8_V) -DECLARE_INSN(vamomaxei8_v, MATCH_VAMOMAXEI8_V, MASK_VAMOMAXEI8_V) -DECLARE_INSN(vamominuei8_v, MATCH_VAMOMINUEI8_V, MASK_VAMOMINUEI8_V) -DECLARE_INSN(vamomaxuei8_v, MATCH_VAMOMAXUEI8_V, MASK_VAMOMAXUEI8_V) -DECLARE_INSN(vamoswapei16_v, MATCH_VAMOSWAPEI16_V, MASK_VAMOSWAPEI16_V) -DECLARE_INSN(vamoaddei16_v, MATCH_VAMOADDEI16_V, MASK_VAMOADDEI16_V) -DECLARE_INSN(vamoxorei16_v, MATCH_VAMOXOREI16_V, MASK_VAMOXOREI16_V) -DECLARE_INSN(vamoandei16_v, MATCH_VAMOANDEI16_V, MASK_VAMOANDEI16_V) -DECLARE_INSN(vamoorei16_v, MATCH_VAMOOREI16_V, MASK_VAMOOREI16_V) -DECLARE_INSN(vamominei16_v, MATCH_VAMOMINEI16_V, MASK_VAMOMINEI16_V) -DECLARE_INSN(vamomaxei16_v, MATCH_VAMOMAXEI16_V, MASK_VAMOMAXEI16_V) -DECLARE_INSN(vamominuei16_v, MATCH_VAMOMINUEI16_V, MASK_VAMOMINUEI16_V) -DECLARE_INSN(vamomaxuei16_v, MATCH_VAMOMAXUEI16_V, MASK_VAMOMAXUEI16_V) -DECLARE_INSN(vamoswapei32_v, MATCH_VAMOSWAPEI32_V, MASK_VAMOSWAPEI32_V) -DECLARE_INSN(vamoaddei32_v, MATCH_VAMOADDEI32_V, MASK_VAMOADDEI32_V) -DECLARE_INSN(vamoxorei32_v, MATCH_VAMOXOREI32_V, MASK_VAMOXOREI32_V) -DECLARE_INSN(vamoandei32_v, MATCH_VAMOANDEI32_V, MASK_VAMOANDEI32_V) -DECLARE_INSN(vamoorei32_v, MATCH_VAMOOREI32_V, MASK_VAMOOREI32_V) -DECLARE_INSN(vamominei32_v, MATCH_VAMOMINEI32_V, MASK_VAMOMINEI32_V) -DECLARE_INSN(vamomaxei32_v, MATCH_VAMOMAXEI32_V, MASK_VAMOMAXEI32_V) -DECLARE_INSN(vamominuei32_v, MATCH_VAMOMINUEI32_V, MASK_VAMOMINUEI32_V) -DECLARE_INSN(vamomaxuei32_v, MATCH_VAMOMAXUEI32_V, MASK_VAMOMAXUEI32_V) -DECLARE_INSN(vamoswapei64_v, MATCH_VAMOSWAPEI64_V, MASK_VAMOSWAPEI64_V) -DECLARE_INSN(vamoaddei64_v, MATCH_VAMOADDEI64_V, MASK_VAMOADDEI64_V) -DECLARE_INSN(vamoxorei64_v, MATCH_VAMOXOREI64_V, MASK_VAMOXOREI64_V) -DECLARE_INSN(vamoandei64_v, MATCH_VAMOANDEI64_V, MASK_VAMOANDEI64_V) -DECLARE_INSN(vamoorei64_v, MATCH_VAMOOREI64_V, MASK_VAMOOREI64_V) -DECLARE_INSN(vamominei64_v, MATCH_VAMOMINEI64_V, MASK_VAMOMINEI64_V) -DECLARE_INSN(vamomaxei64_v, MATCH_VAMOMAXEI64_V, MASK_VAMOMAXEI64_V) -DECLARE_INSN(vamominuei64_v, MATCH_VAMOMINUEI64_V, MASK_VAMOMINUEI64_V) -DECLARE_INSN(vamomaxuei64_v, MATCH_VAMOMAXUEI64_V, MASK_VAMOMAXUEI64_V) DECLARE_INSN(vmvnfr_v, MATCH_VMVNFR_V, MASK_VMVNFR_V) DECLARE_INSN(vl1r_v, MATCH_VL1R_V, MASK_VL1R_V) DECLARE_INSN(vl2r_v, MATCH_VL2R_V, MASK_VL2R_V) @@ -3700,6 +2561,98 @@ DECLARE_INSN(p_beqimm, MATCH_P_BEQIMM, MASK_P_BEQIMM) DECLARE_INSN(p_bneimm, MATCH_P_BNEIMM, MASK_P_BNEIMM) DECLARE_INSN(p_mac, MATCH_P_MAC, MASK_P_MAC) DECLARE_INSN(p_msu, MATCH_P_MSU, MASK_P_MSU) +DECLARE_INSN(pv_add_h, MATCH_PV_ADD_H, MASK_PV_ADD_H) +DECLARE_INSN(pv_add_sc_h, MATCH_PV_ADD_SC_H, MASK_PV_ADD_SC_H) +DECLARE_INSN(pv_add_sci_h, MATCH_PV_ADD_SCI_H, MASK_PV_ADD_SCI_H) +DECLARE_INSN(pv_add_b, MATCH_PV_ADD_B, MASK_PV_ADD_B) +DECLARE_INSN(pv_add_sc_b, MATCH_PV_ADD_SC_B, MASK_PV_ADD_SC_B) +DECLARE_INSN(pv_add_sci_b, MATCH_PV_ADD_SCI_B, MASK_PV_ADD_SCI_B) +DECLARE_INSN(pv_sub_h, MATCH_PV_SUB_H, MASK_PV_SUB_H) +DECLARE_INSN(pv_sub_sc_h, MATCH_PV_SUB_SC_H, MASK_PV_SUB_SC_H) +DECLARE_INSN(pv_sub_sci_h, MATCH_PV_SUB_SCI_H, MASK_PV_SUB_SCI_H) +DECLARE_INSN(pv_sub_b, MATCH_PV_SUB_B, MASK_PV_SUB_B) +DECLARE_INSN(pv_sub_sc_b, MATCH_PV_SUB_SC_B, MASK_PV_SUB_SC_B) +DECLARE_INSN(pv_sub_sci_b, MATCH_PV_SUB_SCI_B, MASK_PV_SUB_SCI_B) +DECLARE_INSN(pv_avg_h, MATCH_PV_AVG_H, MASK_PV_AVG_H) +DECLARE_INSN(pv_avg_sc_h, MATCH_PV_AVG_SC_H, MASK_PV_AVG_SC_H) +DECLARE_INSN(pv_avg_sci_h, MATCH_PV_AVG_SCI_H, MASK_PV_AVG_SCI_H) +DECLARE_INSN(pv_avg_b, MATCH_PV_AVG_B, MASK_PV_AVG_B) +DECLARE_INSN(pv_avg_sc_b, MATCH_PV_AVG_SC_B, MASK_PV_AVG_SC_B) +DECLARE_INSN(pv_avg_sci_b, MATCH_PV_AVG_SCI_B, MASK_PV_AVG_SCI_B) +DECLARE_INSN(pv_avgu_h, MATCH_PV_AVGU_H, MASK_PV_AVGU_H) +DECLARE_INSN(pv_avgu_sc_h, MATCH_PV_AVGU_SC_H, MASK_PV_AVGU_SC_H) +DECLARE_INSN(pv_avgu_sci_h, MATCH_PV_AVGU_SCI_H, MASK_PV_AVGU_SCI_H) +DECLARE_INSN(pv_avgu_b, MATCH_PV_AVGU_B, MASK_PV_AVGU_B) +DECLARE_INSN(pv_avgu_sc_b, MATCH_PV_AVGU_SC_B, MASK_PV_AVGU_SC_B) +DECLARE_INSN(pv_avgu_sci_b, MATCH_PV_AVGU_SCI_B, MASK_PV_AVGU_SCI_B) +DECLARE_INSN(pv_min_h, MATCH_PV_MIN_H, MASK_PV_MIN_H) +DECLARE_INSN(pv_min_sc_h, MATCH_PV_MIN_SC_H, MASK_PV_MIN_SC_H) +DECLARE_INSN(pv_min_sci_h, MATCH_PV_MIN_SCI_H, MASK_PV_MIN_SCI_H) +DECLARE_INSN(pv_min_b, MATCH_PV_MIN_B, MASK_PV_MIN_B) +DECLARE_INSN(pv_min_sc_b, MATCH_PV_MIN_SC_B, MASK_PV_MIN_SC_B) +DECLARE_INSN(pv_min_sci_b, MATCH_PV_MIN_SCI_B, MASK_PV_MIN_SCI_B) +DECLARE_INSN(pv_minu_h, MATCH_PV_MINU_H, MASK_PV_MINU_H) +DECLARE_INSN(pv_minu_sc_h, MATCH_PV_MINU_SC_H, MASK_PV_MINU_SC_H) +DECLARE_INSN(pv_minu_sci_h, MATCH_PV_MINU_SCI_H, MASK_PV_MINU_SCI_H) +DECLARE_INSN(pv_minu_b, MATCH_PV_MINU_B, MASK_PV_MINU_B) +DECLARE_INSN(pv_minu_sc_b, MATCH_PV_MINU_SC_B, MASK_PV_MINU_SC_B) +DECLARE_INSN(pv_minu_sci_b, MATCH_PV_MINU_SCI_B, MASK_PV_MINU_SCI_B) +DECLARE_INSN(pv_max_h, MATCH_PV_MAX_H, MASK_PV_MAX_H) +DECLARE_INSN(pv_max_sc_h, MATCH_PV_MAX_SC_H, MASK_PV_MAX_SC_H) +DECLARE_INSN(pv_max_sci_h, MATCH_PV_MAX_SCI_H, MASK_PV_MAX_SCI_H) +DECLARE_INSN(pv_max_b, MATCH_PV_MAX_B, MASK_PV_MAX_B) +DECLARE_INSN(pv_max_sc_b, MATCH_PV_MAX_SC_B, MASK_PV_MAX_SC_B) +DECLARE_INSN(pv_max_sci_b, MATCH_PV_MAX_SCI_B, MASK_PV_MAX_SCI_B) +DECLARE_INSN(pv_maxu_h, MATCH_PV_MAXU_H, MASK_PV_MAXU_H) +DECLARE_INSN(pv_maxu_sc_h, MATCH_PV_MAXU_SC_H, MASK_PV_MAXU_SC_H) +DECLARE_INSN(pv_maxu_sci_h, MATCH_PV_MAXU_SCI_H, MASK_PV_MAXU_SCI_H) +DECLARE_INSN(pv_maxu_b, MATCH_PV_MAXU_B, MASK_PV_MAXU_B) +DECLARE_INSN(pv_maxu_sc_b, MATCH_PV_MAXU_SC_B, MASK_PV_MAXU_SC_B) +DECLARE_INSN(pv_maxu_sci_b, MATCH_PV_MAXU_SCI_B, MASK_PV_MAXU_SCI_B) +DECLARE_INSN(pv_srl_h, MATCH_PV_SRL_H, MASK_PV_SRL_H) +DECLARE_INSN(pv_srl_sc_h, MATCH_PV_SRL_SC_H, MASK_PV_SRL_SC_H) +DECLARE_INSN(pv_srl_sci_h, MATCH_PV_SRL_SCI_H, MASK_PV_SRL_SCI_H) +DECLARE_INSN(pv_srl_b, MATCH_PV_SRL_B, MASK_PV_SRL_B) +DECLARE_INSN(pv_srl_sc_b, MATCH_PV_SRL_SC_B, MASK_PV_SRL_SC_B) +DECLARE_INSN(pv_srl_sci_b, MATCH_PV_SRL_SCI_B, MASK_PV_SRL_SCI_B) +DECLARE_INSN(pv_sra_h, MATCH_PV_SRA_H, MASK_PV_SRA_H) +DECLARE_INSN(pv_sra_sc_h, MATCH_PV_SRA_SC_H, MASK_PV_SRA_SC_H) +DECLARE_INSN(pv_sra_sci_h, MATCH_PV_SRA_SCI_H, MASK_PV_SRA_SCI_H) +DECLARE_INSN(pv_sra_b, MATCH_PV_SRA_B, MASK_PV_SRA_B) +DECLARE_INSN(pv_sra_sc_b, MATCH_PV_SRA_SC_B, MASK_PV_SRA_SC_B) +DECLARE_INSN(pv_sra_sci_b, MATCH_PV_SRA_SCI_B, MASK_PV_SRA_SCI_B) +DECLARE_INSN(pv_sll_h, MATCH_PV_SLL_H, MASK_PV_SLL_H) +DECLARE_INSN(pv_sll_sc_h, MATCH_PV_SLL_SC_H, MASK_PV_SLL_SC_H) +DECLARE_INSN(pv_sll_sci_h, MATCH_PV_SLL_SCI_H, MASK_PV_SLL_SCI_H) +DECLARE_INSN(pv_sll_b, MATCH_PV_SLL_B, MASK_PV_SLL_B) +DECLARE_INSN(pv_sll_sc_b, MATCH_PV_SLL_SC_B, MASK_PV_SLL_SC_B) +DECLARE_INSN(pv_sll_sci_b, MATCH_PV_SLL_SCI_B, MASK_PV_SLL_SCI_B) +DECLARE_INSN(pv_or_h, MATCH_PV_OR_H, MASK_PV_OR_H) +DECLARE_INSN(pv_or_sc_h, MATCH_PV_OR_SC_H, MASK_PV_OR_SC_H) +DECLARE_INSN(pv_or_sci_h, MATCH_PV_OR_SCI_H, MASK_PV_OR_SCI_H) +DECLARE_INSN(pv_or_b, MATCH_PV_OR_B, MASK_PV_OR_B) +DECLARE_INSN(pv_or_sc_b, MATCH_PV_OR_SC_B, MASK_PV_OR_SC_B) +DECLARE_INSN(pv_or_sci_b, MATCH_PV_OR_SCI_B, MASK_PV_OR_SCI_B) +DECLARE_INSN(pv_xor_h, MATCH_PV_XOR_H, MASK_PV_XOR_H) +DECLARE_INSN(pv_xor_sc_h, MATCH_PV_XOR_SC_H, MASK_PV_XOR_SC_H) +DECLARE_INSN(pv_xor_sci_h, MATCH_PV_XOR_SCI_H, MASK_PV_XOR_SCI_H) +DECLARE_INSN(pv_xor_b, MATCH_PV_XOR_B, MASK_PV_XOR_B) +DECLARE_INSN(pv_xor_sc_b, MATCH_PV_XOR_SC_B, MASK_PV_XOR_SC_B) +DECLARE_INSN(pv_xor_sci_b, MATCH_PV_XOR_SCI_B, MASK_PV_XOR_SCI_B) +DECLARE_INSN(pv_and_h, MATCH_PV_AND_H, MASK_PV_AND_H) +DECLARE_INSN(pv_and_sc_h, MATCH_PV_AND_SC_H, MASK_PV_AND_SC_H) +DECLARE_INSN(pv_and_sci_h, MATCH_PV_AND_SCI_H, MASK_PV_AND_SCI_H) +DECLARE_INSN(pv_and_b, MATCH_PV_AND_B, MASK_PV_AND_B) +DECLARE_INSN(pv_and_sc_b, MATCH_PV_AND_SC_B, MASK_PV_AND_SC_B) +DECLARE_INSN(pv_and_sci_b, MATCH_PV_AND_SCI_B, MASK_PV_AND_SCI_B) +DECLARE_INSN(pv_abs_h, MATCH_PV_ABS_H, MASK_PV_ABS_H) +DECLARE_INSN(pv_abs_b, MATCH_PV_ABS_B, MASK_PV_ABS_B) +DECLARE_INSN(pv_extract_h, MATCH_PV_EXTRACT_H, MASK_PV_EXTRACT_H) +DECLARE_INSN(pv_extract_b, MATCH_PV_EXTRACT_B, MASK_PV_EXTRACT_B) +DECLARE_INSN(pv_extractu_h, MATCH_PV_EXTRACTU_H, MASK_PV_EXTRACTU_H) +DECLARE_INSN(pv_extractu_b, MATCH_PV_EXTRACTU_B, MASK_PV_EXTRACTU_B) +DECLARE_INSN(pv_insert_h, MATCH_PV_INSERT_H, MASK_PV_INSERT_H) +DECLARE_INSN(pv_insert_b, MATCH_PV_INSERT_B, MASK_PV_INSERT_B) DECLARE_INSN(flah, MATCH_FLAH, MASK_FLAH) DECLARE_INSN(fsah, MATCH_FSAH, MASK_FSAH) DECLARE_INSN(fmadd_ah, MATCH_FMADD_AH, MASK_FMADD_AH) diff --git a/toolchain/riscv-opcodes/inst.sverilog b/toolchain/riscv-opcodes/inst.sverilog index f8879a42d..e0bf49d0f 100644 --- a/toolchain/riscv-opcodes/inst.sverilog +++ b/toolchain/riscv-opcodes/inst.sverilog @@ -323,447 +323,6 @@ package riscv_instr; localparam [31:0] C_FSDSP = 32'b????????????????101???????????10; localparam [31:0] C_SWSP = 32'b????????????????110???????????10; localparam [31:0] C_FSWSP = 32'b????????????????111???????????10; - localparam [31:0] VSETVLI = 32'b0????????????????111?????1010111; - localparam [31:0] VSETVL = 32'b1000000??????????111?????1010111; - localparam [31:0] VLE8_V = 32'b???000?00000?????000?????0000111; - localparam [31:0] VLE16_V = 32'b???000?00000?????101?????0000111; - localparam [31:0] VLE32_V = 32'b???000?00000?????110?????0000111; - localparam [31:0] VLE64_V = 32'b???000?00000?????111?????0000111; - localparam [31:0] VLE128_V = 32'b???100?00000?????000?????0000111; - localparam [31:0] VLE256_V = 32'b???100?00000?????101?????0000111; - localparam [31:0] VLE512_V = 32'b???100?00000?????110?????0000111; - localparam [31:0] VLE1024_V = 32'b???100?00000?????111?????0000111; - localparam [31:0] VSE8_V = 32'b???000?00000?????000?????0100111; - localparam [31:0] VSE16_V = 32'b???000?00000?????101?????0100111; - localparam [31:0] VSE32_V = 32'b???000?00000?????110?????0100111; - localparam [31:0] VSE64_V = 32'b???000?00000?????111?????0100111; - localparam [31:0] VSE128_V = 32'b???100?00000?????000?????0100111; - localparam [31:0] VSE256_V = 32'b???100?00000?????101?????0100111; - localparam [31:0] VSE512_V = 32'b???100?00000?????110?????0100111; - localparam [31:0] VSE1024_V = 32'b???100?00000?????111?????0100111; - localparam [31:0] VLSE8_V = 32'b???010???????????000?????0000111; - localparam [31:0] VLSE16_V = 32'b???010???????????101?????0000111; - localparam [31:0] VLSE32_V = 32'b???010???????????110?????0000111; - localparam [31:0] VLSE64_V = 32'b???010???????????111?????0000111; - localparam [31:0] VLSE128_V = 32'b???110???????????000?????0000111; - localparam [31:0] VLSE256_V = 32'b???110???????????101?????0000111; - localparam [31:0] VLSE512_V = 32'b???110???????????110?????0000111; - localparam [31:0] VLSE1024_V = 32'b???110???????????111?????0000111; - localparam [31:0] VSSE8_V = 32'b???010???????????000?????0100111; - localparam [31:0] VSSE16_V = 32'b???010???????????101?????0100111; - localparam [31:0] VSSE32_V = 32'b???010???????????110?????0100111; - localparam [31:0] VSSE64_V = 32'b???010???????????111?????0100111; - localparam [31:0] VSSE128_V = 32'b???110???????????000?????0100111; - localparam [31:0] VSSE256_V = 32'b???110???????????101?????0100111; - localparam [31:0] VSSE512_V = 32'b???110???????????110?????0100111; - localparam [31:0] VSSE1024_V = 32'b???110???????????111?????0100111; - localparam [31:0] VLXEI8_V = 32'b???011???????????000?????0000111; - localparam [31:0] VLXEI16_V = 32'b???011???????????101?????0000111; - localparam [31:0] VLXEI32_V = 32'b???011???????????110?????0000111; - localparam [31:0] VLXEI64_V = 32'b???011???????????111?????0000111; - localparam [31:0] VLXEI128_V = 32'b???111???????????000?????0000111; - localparam [31:0] VLXEI256_V = 32'b???111???????????101?????0000111; - localparam [31:0] VLXEI512_V = 32'b???111???????????110?????0000111; - localparam [31:0] VLXEI1024_V = 32'b???111???????????111?????0000111; - localparam [31:0] VSXEI8_V = 32'b???011???????????000?????0100111; - localparam [31:0] VSXEI16_V = 32'b???011???????????101?????0100111; - localparam [31:0] VSXEI32_V = 32'b???011???????????110?????0100111; - localparam [31:0] VSXEI64_V = 32'b???011???????????111?????0100111; - localparam [31:0] VSXEI128_V = 32'b???111???????????000?????0100111; - localparam [31:0] VSXEI256_V = 32'b???111???????????101?????0100111; - localparam [31:0] VSXEI512_V = 32'b???111???????????110?????0100111; - localparam [31:0] VSXEI1024_V = 32'b???111???????????111?????0100111; - localparam [31:0] VSUXEI8_V = 32'b???001???????????000?????0100111; - localparam [31:0] VSUXEI16_V = 32'b???001???????????101?????0100111; - localparam [31:0] VSUXEI32_V = 32'b???001???????????110?????0100111; - localparam [31:0] VSUXEI64_V = 32'b???001???????????111?????0100111; - localparam [31:0] VSUXEI128_V = 32'b???101???????????000?????0100111; - localparam [31:0] VSUXEI256_V = 32'b???101???????????101?????0100111; - localparam [31:0] VSUXEI512_V = 32'b???101???????????110?????0100111; - localparam [31:0] VSUXEI1024_V = 32'b???101???????????111?????0100111; - localparam [31:0] VLE8FF_V = 32'b???000?10000?????000?????0000111; - localparam [31:0] VLE16FF_V = 32'b???000?10000?????101?????0000111; - localparam [31:0] VLE32FF_V = 32'b???000?10000?????110?????0000111; - localparam [31:0] VLE64FF_V = 32'b???000?10000?????111?????0000111; - localparam [31:0] VLE128FF_V = 32'b???100?10000?????000?????0000111; - localparam [31:0] VLE256FF_V = 32'b???100?10000?????101?????0000111; - localparam [31:0] VLE512FF_V = 32'b???100?10000?????110?????0000111; - localparam [31:0] VLE1024FF_V = 32'b???100?10000?????111?????0000111; - localparam [31:0] VL1RE8_V = 32'b000000101000?????000?????0000111; - localparam [31:0] VL1RE16_V = 32'b000000101000?????101?????0000111; - localparam [31:0] VL1RE32_V = 32'b000000101000?????110?????0000111; - localparam [31:0] VL1RE64_V = 32'b000000101000?????111?????0000111; - localparam [31:0] VL2RE8_V = 32'b001000101000?????000?????0000111; - localparam [31:0] VL2RE16_V = 32'b001000101000?????101?????0000111; - localparam [31:0] VL2RE32_V = 32'b001000101000?????110?????0000111; - localparam [31:0] VL2RE64_V = 32'b001000101000?????111?????0000111; - localparam [31:0] VL4RE8_V = 32'b011000101000?????000?????0000111; - localparam [31:0] VL4RE16_V = 32'b011000101000?????101?????0000111; - localparam [31:0] VL4RE32_V = 32'b011000101000?????110?????0000111; - localparam [31:0] VL4RE64_V = 32'b011000101000?????111?????0000111; - localparam [31:0] VL8RE8_V = 32'b111000101000?????000?????0000111; - localparam [31:0] VL8RE16_V = 32'b111000101000?????101?????0000111; - localparam [31:0] VL8RE32_V = 32'b111000101000?????110?????0000111; - localparam [31:0] VL8RE64_V = 32'b111000101000?????111?????0000111; - localparam [31:0] VS1R_V = 32'b000000101000?????000?????0100111; - localparam [31:0] VS2R_V = 32'b001000101000?????000?????0100111; - localparam [31:0] VS4R_V = 32'b011000101000?????000?????0100111; - localparam [31:0] VS8R_V = 32'b111000101000?????000?????0100111; - localparam [31:0] VFADD_VF = 32'b000000???????????101?????1010111; - localparam [31:0] VFSUB_VF = 32'b000010???????????101?????1010111; - localparam [31:0] VFMIN_VF = 32'b000100???????????101?????1010111; - localparam [31:0] VFMAX_VF = 32'b000110???????????101?????1010111; - localparam [31:0] VFSGNJ_VF = 32'b001000???????????101?????1010111; - localparam [31:0] VFSGNJN_VF = 32'b001001???????????101?????1010111; - localparam [31:0] VFSGNJX_VF = 32'b001010???????????101?????1010111; - localparam [31:0] VFSLIDE1UP_VF = 32'b001110???????????101?????1010111; - localparam [31:0] VFSLIDE1DOWN_VF = 32'b001111???????????101?????1010111; - localparam [31:0] VFMV_S_F = 32'b010000100000?????101?????1010111; - localparam [31:0] VFMERGE_VFM = 32'b0101110??????????101?????1010111; - localparam [31:0] VFMV_V_F = 32'b010111100000?????101?????1010111; - localparam [31:0] VMFEQ_VF = 32'b011000???????????101?????1010111; - localparam [31:0] VMFLE_VF = 32'b011001???????????101?????1010111; - localparam [31:0] VMFLT_VF = 32'b011011???????????101?????1010111; - localparam [31:0] VMFNE_VF = 32'b011100???????????101?????1010111; - localparam [31:0] VMFGT_VF = 32'b011101???????????101?????1010111; - localparam [31:0] VMFGE_VF = 32'b011111???????????101?????1010111; - localparam [31:0] VFDIV_VF = 32'b100000???????????101?????1010111; - localparam [31:0] VFRDIV_VF = 32'b100001???????????101?????1010111; - localparam [31:0] VFMUL_VF = 32'b100100???????????101?????1010111; - localparam [31:0] VFRSUB_VF = 32'b100111???????????101?????1010111; - localparam [31:0] VFMADD_VF = 32'b101000???????????101?????1010111; - localparam [31:0] VFNMADD_VF = 32'b101001???????????101?????1010111; - localparam [31:0] VFMSUB_VF = 32'b101010???????????101?????1010111; - localparam [31:0] VFNMSUB_VF = 32'b101011???????????101?????1010111; - localparam [31:0] VFMACC_VF = 32'b101100???????????101?????1010111; - localparam [31:0] VFNMACC_VF = 32'b101101???????????101?????1010111; - localparam [31:0] VFMSAC_VF = 32'b101110???????????101?????1010111; - localparam [31:0] VFNMSAC_VF = 32'b101111???????????101?????1010111; - localparam [31:0] VFWADD_VF = 32'b110000???????????101?????1010111; - localparam [31:0] VFWSUB_VF = 32'b110010???????????101?????1010111; - localparam [31:0] VFWADD_WF = 32'b110100???????????101?????1010111; - localparam [31:0] VFWSUB_WF = 32'b110110???????????101?????1010111; - localparam [31:0] VFWMUL_VF = 32'b111000???????????101?????1010111; - localparam [31:0] VFWMACC_VF = 32'b111100???????????101?????1010111; - localparam [31:0] VFWNMACC_VF = 32'b111101???????????101?????1010111; - localparam [31:0] VFWMSAC_VF = 32'b111110???????????101?????1010111; - localparam [31:0] VFWNMSAC_VF = 32'b111111???????????101?????1010111; - localparam [31:0] VFADD_VV = 32'b000000???????????001?????1010111; - localparam [31:0] VFREDSUM_VS = 32'b000001???????????001?????1010111; - localparam [31:0] VFSUB_VV = 32'b000010???????????001?????1010111; - localparam [31:0] VFREDOSUM_VS = 32'b000011???????????001?????1010111; - localparam [31:0] VFMIN_VV = 32'b000100???????????001?????1010111; - localparam [31:0] VFREDMIN_VS = 32'b000101???????????001?????1010111; - localparam [31:0] VFMAX_VV = 32'b000110???????????001?????1010111; - localparam [31:0] VFREDMAX_VS = 32'b000111???????????001?????1010111; - localparam [31:0] VFSGNJ_VV = 32'b001000???????????001?????1010111; - localparam [31:0] VFSGNJN_VV = 32'b001001???????????001?????1010111; - localparam [31:0] VFSGNJX_VV = 32'b001010???????????001?????1010111; - localparam [31:0] VFMV_F_S = 32'b0100001?????00000001?????1010111; - localparam [31:0] VMFEQ_VV = 32'b011000???????????001?????1010111; - localparam [31:0] VMFLE_VV = 32'b011001???????????001?????1010111; - localparam [31:0] VMFLT_VV = 32'b011011???????????001?????1010111; - localparam [31:0] VMFNE_VV = 32'b011100???????????001?????1010111; - localparam [31:0] VFDIV_VV = 32'b100000???????????001?????1010111; - localparam [31:0] VFMUL_VV = 32'b100100???????????001?????1010111; - localparam [31:0] VFMADD_VV = 32'b101000???????????001?????1010111; - localparam [31:0] VFNMADD_VV = 32'b101001???????????001?????1010111; - localparam [31:0] VFMSUB_VV = 32'b101010???????????001?????1010111; - localparam [31:0] VFNMSUB_VV = 32'b101011???????????001?????1010111; - localparam [31:0] VFMACC_VV = 32'b101100???????????001?????1010111; - localparam [31:0] VFNMACC_VV = 32'b101101???????????001?????1010111; - localparam [31:0] VFMSAC_VV = 32'b101110???????????001?????1010111; - localparam [31:0] VFNMSAC_VV = 32'b101111???????????001?????1010111; - localparam [31:0] VFCVT_XU_F_V = 32'b010010??????00000001?????1010111; - localparam [31:0] VFCVT_X_F_V = 32'b010010??????00001001?????1010111; - localparam [31:0] VFCVT_F_XU_V = 32'b010010??????00010001?????1010111; - localparam [31:0] VFCVT_F_X_V = 32'b010010??????00011001?????1010111; - localparam [31:0] VFCVT_RTZ_XU_F_V = 32'b010010??????00110001?????1010111; - localparam [31:0] VFCVT_RTZ_X_F_V = 32'b010010??????00111001?????1010111; - localparam [31:0] VFWCVT_XU_F_V = 32'b010010??????01000001?????1010111; - localparam [31:0] VFWCVT_X_F_V = 32'b010010??????01001001?????1010111; - localparam [31:0] VFWCVT_F_XU_V = 32'b010010??????01010001?????1010111; - localparam [31:0] VFWCVT_F_X_V = 32'b010010??????01011001?????1010111; - localparam [31:0] VFWCVT_F_F_V = 32'b010010??????01100001?????1010111; - localparam [31:0] VFWCVT_RTZ_XU_F_V = 32'b010010??????01110001?????1010111; - localparam [31:0] VFWCVT_RTZ_X_F_V = 32'b010010??????01111001?????1010111; - localparam [31:0] VFNCVT_XU_F_W = 32'b010010??????10000001?????1010111; - localparam [31:0] VFNCVT_X_F_W = 32'b010010??????10001001?????1010111; - localparam [31:0] VFNCVT_F_XU_W = 32'b010010??????10010001?????1010111; - localparam [31:0] VFNCVT_F_X_W = 32'b010010??????10011001?????1010111; - localparam [31:0] VFNCVT_F_F_W = 32'b010010??????10100001?????1010111; - localparam [31:0] VFNCVT_ROD_F_F_W = 32'b010010??????10101001?????1010111; - localparam [31:0] VFNCVT_RTZ_XU_F_W = 32'b010010??????10110001?????1010111; - localparam [31:0] VFNCVT_RTZ_X_F_W = 32'b010010??????10111001?????1010111; - localparam [31:0] VFSQRT_V = 32'b010011??????00000001?????1010111; - localparam [31:0] VFRSQRTE7_V = 32'b010011??????00100001?????1010111; - localparam [31:0] VFRECE7_V = 32'b010011??????00101001?????1010111; - localparam [31:0] VFCLASS_V = 32'b010011??????10000001?????1010111; - localparam [31:0] VFWADD_VV = 32'b110000???????????001?????1010111; - localparam [31:0] VFWREDSUM_VS = 32'b110001???????????001?????1010111; - localparam [31:0] VFWSUB_VV = 32'b110010???????????001?????1010111; - localparam [31:0] VFWREDOSUM_VS = 32'b110011???????????001?????1010111; - localparam [31:0] VFWADD_WV = 32'b110100???????????001?????1010111; - localparam [31:0] VFWSUB_WV = 32'b110110???????????001?????1010111; - localparam [31:0] VFWMUL_VV = 32'b111000???????????001?????1010111; - localparam [31:0] VFDOT_VV = 32'b111001???????????001?????1010111; - localparam [31:0] VFWMACC_VV = 32'b111100???????????001?????1010111; - localparam [31:0] VFWNMACC_VV = 32'b111101???????????001?????1010111; - localparam [31:0] VFWMSAC_VV = 32'b111110???????????001?????1010111; - localparam [31:0] VFWNMSAC_VV = 32'b111111???????????001?????1010111; - localparam [31:0] VADD_VX = 32'b000000???????????100?????1010111; - localparam [31:0] VSUB_VX = 32'b000010???????????100?????1010111; - localparam [31:0] VRSUB_VX = 32'b000011???????????100?????1010111; - localparam [31:0] VMINU_VX = 32'b000100???????????100?????1010111; - localparam [31:0] VMIN_VX = 32'b000101???????????100?????1010111; - localparam [31:0] VMAXU_VX = 32'b000110???????????100?????1010111; - localparam [31:0] VMAX_VX = 32'b000111???????????100?????1010111; - localparam [31:0] VAND_VX = 32'b001001???????????100?????1010111; - localparam [31:0] VOR_VX = 32'b001010???????????100?????1010111; - localparam [31:0] VXOR_VX = 32'b001011???????????100?????1010111; - localparam [31:0] VRGATHER_VX = 32'b001100???????????100?????1010111; - localparam [31:0] VSLIDEUP_VX = 32'b001110???????????100?????1010111; - localparam [31:0] VSLIDEDOWN_VX = 32'b001111???????????100?????1010111; - localparam [31:0] VADC_VXM = 32'b0100000??????????100?????1010111; - localparam [31:0] VMADC_VXM = 32'b010001???????????100?????1010111; - localparam [31:0] VSBC_VXM = 32'b0100100??????????100?????1010111; - localparam [31:0] VMSBC_VXM = 32'b010011???????????100?????1010111; - localparam [31:0] VMERGE_VXM = 32'b0101110??????????100?????1010111; - localparam [31:0] VMV_V_X = 32'b010111100000?????100?????1010111; - localparam [31:0] VMSEQ_VX = 32'b011000???????????100?????1010111; - localparam [31:0] VMSNE_VX = 32'b011001???????????100?????1010111; - localparam [31:0] VMSLTU_VX = 32'b011010???????????100?????1010111; - localparam [31:0] VMSLT_VX = 32'b011011???????????100?????1010111; - localparam [31:0] VMSLEU_VX = 32'b011100???????????100?????1010111; - localparam [31:0] VMSLE_VX = 32'b011101???????????100?????1010111; - localparam [31:0] VMSGTU_VX = 32'b011110???????????100?????1010111; - localparam [31:0] VMSGT_VX = 32'b011111???????????100?????1010111; - localparam [31:0] VSADDU_VX = 32'b100000???????????100?????1010111; - localparam [31:0] VSADD_VX = 32'b100001???????????100?????1010111; - localparam [31:0] VSSUBU_VX = 32'b100010???????????100?????1010111; - localparam [31:0] VSSUB_VX = 32'b100011???????????100?????1010111; - localparam [31:0] VSLL_VX = 32'b100101???????????100?????1010111; - localparam [31:0] VSMUL_VX = 32'b100111???????????100?????1010111; - localparam [31:0] VSRL_VX = 32'b101000???????????100?????1010111; - localparam [31:0] VSRA_VX = 32'b101001???????????100?????1010111; - localparam [31:0] VSSRL_VX = 32'b101010???????????100?????1010111; - localparam [31:0] VSSRA_VX = 32'b101011???????????100?????1010111; - localparam [31:0] VNSRL_WX = 32'b101100???????????100?????1010111; - localparam [31:0] VNSRA_WX = 32'b101101???????????100?????1010111; - localparam [31:0] VNCLIPU_WX = 32'b101110???????????100?????1010111; - localparam [31:0] VNCLIP_WX = 32'b101111???????????100?????1010111; - localparam [31:0] VQMACCU_VX = 32'b111100???????????100?????1010111; - localparam [31:0] VQMACC_VX = 32'b111101???????????100?????1010111; - localparam [31:0] VQMACCUS_VX = 32'b111110???????????100?????1010111; - localparam [31:0] VQMACCSU_VX = 32'b111111???????????100?????1010111; - localparam [31:0] VADD_VV = 32'b000000???????????000?????1010111; - localparam [31:0] VSUB_VV = 32'b000010???????????000?????1010111; - localparam [31:0] VMINU_VV = 32'b000100???????????000?????1010111; - localparam [31:0] VMIN_VV = 32'b000101???????????000?????1010111; - localparam [31:0] VMAXU_VV = 32'b000110???????????000?????1010111; - localparam [31:0] VMAX_VV = 32'b000111???????????000?????1010111; - localparam [31:0] VAND_VV = 32'b001001???????????000?????1010111; - localparam [31:0] VOR_VV = 32'b001010???????????000?????1010111; - localparam [31:0] VXOR_VV = 32'b001011???????????000?????1010111; - localparam [31:0] VRGATHER_VV = 32'b001100???????????000?????1010111; - localparam [31:0] VRGATHEREI16_VV = 32'b001110???????????000?????1010111; - localparam [31:0] VADC_VVM = 32'b0100000??????????000?????1010111; - localparam [31:0] VMADC_VVM = 32'b010001???????????000?????1010111; - localparam [31:0] VSBC_VVM = 32'b0100100??????????000?????1010111; - localparam [31:0] VMSBC_VVM = 32'b010011???????????000?????1010111; - localparam [31:0] VMERGE_VVM = 32'b0101110??????????000?????1010111; - localparam [31:0] VMV_V_V = 32'b010111100000?????000?????1010111; - localparam [31:0] VMSEQ_VV = 32'b011000???????????000?????1010111; - localparam [31:0] VMSNE_VV = 32'b011001???????????000?????1010111; - localparam [31:0] VMSLTU_VV = 32'b011010???????????000?????1010111; - localparam [31:0] VMSLT_VV = 32'b011011???????????000?????1010111; - localparam [31:0] VMSLEU_VV = 32'b011100???????????000?????1010111; - localparam [31:0] VMSLE_VV = 32'b011101???????????000?????1010111; - localparam [31:0] VSADDU_VV = 32'b100000???????????000?????1010111; - localparam [31:0] VSADD_VV = 32'b100001???????????000?????1010111; - localparam [31:0] VSSUBU_VV = 32'b100010???????????000?????1010111; - localparam [31:0] VSSUB_VV = 32'b100011???????????000?????1010111; - localparam [31:0] VSLL_VV = 32'b100101???????????000?????1010111; - localparam [31:0] VSMUL_VV = 32'b100111???????????000?????1010111; - localparam [31:0] VSRL_VV = 32'b101000???????????000?????1010111; - localparam [31:0] VSRA_VV = 32'b101001???????????000?????1010111; - localparam [31:0] VSSRL_VV = 32'b101010???????????000?????1010111; - localparam [31:0] VSSRA_VV = 32'b101011???????????000?????1010111; - localparam [31:0] VNSRL_WV = 32'b101100???????????000?????1010111; - localparam [31:0] VNSRA_WV = 32'b101101???????????000?????1010111; - localparam [31:0] VNCLIPU_WV = 32'b101110???????????000?????1010111; - localparam [31:0] VNCLIP_WV = 32'b101111???????????000?????1010111; - localparam [31:0] VWREDSUMU_VS = 32'b110000???????????000?????1010111; - localparam [31:0] VWREDSUM_VS = 32'b110001???????????000?????1010111; - localparam [31:0] VDOTU_VV = 32'b111000???????????000?????1010111; - localparam [31:0] VDOT_VV = 32'b111001???????????000?????1010111; - localparam [31:0] VQMACCU_VV = 32'b111100???????????000?????1010111; - localparam [31:0] VQMACC_VV = 32'b111101???????????000?????1010111; - localparam [31:0] VQMACCSU_VV = 32'b111111???????????000?????1010111; - localparam [31:0] VADD_VI = 32'b000000???????????011?????1010111; - localparam [31:0] VRSUB_VI = 32'b000011???????????011?????1010111; - localparam [31:0] VAND_VI = 32'b001001???????????011?????1010111; - localparam [31:0] VOR_VI = 32'b001010???????????011?????1010111; - localparam [31:0] VXOR_VI = 32'b001011???????????011?????1010111; - localparam [31:0] VRGATHER_VI = 32'b001100???????????011?????1010111; - localparam [31:0] VSLIDEUP_VI = 32'b001110???????????011?????1010111; - localparam [31:0] VSLIDEDOWN_VI = 32'b001111???????????011?????1010111; - localparam [31:0] VADC_VIM = 32'b0100000??????????011?????1010111; - localparam [31:0] VMADC_VIM = 32'b010001???????????011?????1010111; - localparam [31:0] VMERGE_VIM = 32'b0101110??????????011?????1010111; - localparam [31:0] VMV_V_I = 32'b010111100000?????011?????1010111; - localparam [31:0] VMSEQ_VI = 32'b011000???????????011?????1010111; - localparam [31:0] VMSNE_VI = 32'b011001???????????011?????1010111; - localparam [31:0] VMSLEU_VI = 32'b011100???????????011?????1010111; - localparam [31:0] VMSLE_VI = 32'b011101???????????011?????1010111; - localparam [31:0] VMSGTU_VI = 32'b011110???????????011?????1010111; - localparam [31:0] VMSGT_VI = 32'b011111???????????011?????1010111; - localparam [31:0] VSADDU_VI = 32'b100000???????????011?????1010111; - localparam [31:0] VSADD_VI = 32'b100001???????????011?????1010111; - localparam [31:0] VSLL_VI = 32'b100101???????????011?????1010111; - localparam [31:0] VMV1R_V = 32'b1001111?????00000011?????1010111; - localparam [31:0] VMV2R_V = 32'b1001111?????00001011?????1010111; - localparam [31:0] VMV4R_V = 32'b1001111?????00011011?????1010111; - localparam [31:0] VMV8R_V = 32'b1001111?????00111011?????1010111; - localparam [31:0] VSRL_VI = 32'b101000???????????011?????1010111; - localparam [31:0] VSRA_VI = 32'b101001???????????011?????1010111; - localparam [31:0] VSSRL_VI = 32'b101010???????????011?????1010111; - localparam [31:0] VSSRA_VI = 32'b101011???????????011?????1010111; - localparam [31:0] VNSRL_WI = 32'b101100???????????011?????1010111; - localparam [31:0] VNSRA_WI = 32'b101101???????????011?????1010111; - localparam [31:0] VNCLIPU_WI = 32'b101110???????????011?????1010111; - localparam [31:0] VNCLIP_WI = 32'b101111???????????011?????1010111; - localparam [31:0] VREDSUM_VS = 32'b000000???????????010?????1010111; - localparam [31:0] VREDAND_VS = 32'b000001???????????010?????1010111; - localparam [31:0] VREDOR_VS = 32'b000010???????????010?????1010111; - localparam [31:0] VREDXOR_VS = 32'b000011???????????010?????1010111; - localparam [31:0] VREDMINU_VS = 32'b000100???????????010?????1010111; - localparam [31:0] VREDMIN_VS = 32'b000101???????????010?????1010111; - localparam [31:0] VREDMAXU_VS = 32'b000110???????????010?????1010111; - localparam [31:0] VREDMAX_VS = 32'b000111???????????010?????1010111; - localparam [31:0] VAADDU_VV = 32'b001000???????????010?????1010111; - localparam [31:0] VAADD_VV = 32'b001001???????????010?????1010111; - localparam [31:0] VASUBU_VV = 32'b001010???????????010?????1010111; - localparam [31:0] VASUB_VV = 32'b001011???????????010?????1010111; - localparam [31:0] VMV_X_S = 32'b0100001?????00000010?????1010111; - localparam [31:0] VZEXT_VF8 = 32'b010010??????00010010?????1010111; - localparam [31:0] VSEXT_VF8 = 32'b010010??????00011010?????1010111; - localparam [31:0] VZEXT_VF4 = 32'b010010??????00100010?????1010111; - localparam [31:0] VSEXT_VF4 = 32'b010010??????00101010?????1010111; - localparam [31:0] VZEXT_VF2 = 32'b010010??????00110010?????1010111; - localparam [31:0] VSEXT_VF2 = 32'b010010??????00111010?????1010111; - localparam [31:0] VCOMPRESS_VM = 32'b0101111??????????010?????1010111; - localparam [31:0] VMANDNOT_MM = 32'b011000???????????010?????1010111; - localparam [31:0] VMAND_MM = 32'b011001???????????010?????1010111; - localparam [31:0] VMOR_MM = 32'b011010???????????010?????1010111; - localparam [31:0] VMXOR_MM = 32'b011011???????????010?????1010111; - localparam [31:0] VMORNOT_MM = 32'b011100???????????010?????1010111; - localparam [31:0] VMNAND_MM = 32'b011101???????????010?????1010111; - localparam [31:0] VMNOR_MM = 32'b011110???????????010?????1010111; - localparam [31:0] VMXNOR_MM = 32'b011111???????????010?????1010111; - localparam [31:0] VMSBF_M = 32'b010100??????00001010?????1010111; - localparam [31:0] VMSOF_M = 32'b010100??????00010010?????1010111; - localparam [31:0] VMSIF_M = 32'b010100??????00011010?????1010111; - localparam [31:0] VIOTA_M = 32'b010100??????10000010?????1010111; - localparam [31:0] VID_V = 32'b010100?0000010001010?????1010111; - localparam [31:0] VPOPC_M = 32'b010000??????10000010?????1010111; - localparam [31:0] VFIRST_M = 32'b010000??????10001010?????1010111; - localparam [31:0] VDIVU_VV = 32'b100000???????????010?????1010111; - localparam [31:0] VDIV_VV = 32'b100001???????????010?????1010111; - localparam [31:0] VREMU_VV = 32'b100010???????????010?????1010111; - localparam [31:0] VREM_VV = 32'b100011???????????010?????1010111; - localparam [31:0] VMULHU_VV = 32'b100100???????????010?????1010111; - localparam [31:0] VMUL_VV = 32'b100101???????????010?????1010111; - localparam [31:0] VMULHSU_VV = 32'b100110???????????010?????1010111; - localparam [31:0] VMULH_VV = 32'b100111???????????010?????1010111; - localparam [31:0] VMADD_VV = 32'b101001???????????010?????1010111; - localparam [31:0] VNMSUB_VV = 32'b101011???????????010?????1010111; - localparam [31:0] VMACC_VV = 32'b101101???????????010?????1010111; - localparam [31:0] VNMSAC_VV = 32'b101111???????????010?????1010111; - localparam [31:0] VWADDU_VV = 32'b110000???????????010?????1010111; - localparam [31:0] VWADD_VV = 32'b110001???????????010?????1010111; - localparam [31:0] VWSUBU_VV = 32'b110010???????????010?????1010111; - localparam [31:0] VWSUB_VV = 32'b110011???????????010?????1010111; - localparam [31:0] VWADDU_WV = 32'b110100???????????010?????1010111; - localparam [31:0] VWADD_WV = 32'b110101???????????010?????1010111; - localparam [31:0] VWSUBU_WV = 32'b110110???????????010?????1010111; - localparam [31:0] VWSUB_WV = 32'b110111???????????010?????1010111; - localparam [31:0] VWMULU_VV = 32'b111000???????????010?????1010111; - localparam [31:0] VWMULSU_VV = 32'b111010???????????010?????1010111; - localparam [31:0] VWMUL_VV = 32'b111011???????????010?????1010111; - localparam [31:0] VWMACCU_VV = 32'b111100???????????010?????1010111; - localparam [31:0] VWMACC_VV = 32'b111101???????????010?????1010111; - localparam [31:0] VWMACCSU_VV = 32'b111111???????????010?????1010111; - localparam [31:0] VAADDU_VX = 32'b001000???????????110?????1010111; - localparam [31:0] VAADD_VX = 32'b001001???????????110?????1010111; - localparam [31:0] VASUBU_VX = 32'b001010???????????110?????1010111; - localparam [31:0] VASUB_VX = 32'b001011???????????110?????1010111; - localparam [31:0] VMV_S_X = 32'b010000100000?????110?????1010111; - localparam [31:0] VSLIDE1UP_VX = 32'b001110???????????110?????1010111; - localparam [31:0] VSLIDE1DOWN_VX = 32'b001111???????????110?????1010111; - localparam [31:0] VDIVU_VX = 32'b100000???????????110?????1010111; - localparam [31:0] VDIV_VX = 32'b100001???????????110?????1010111; - localparam [31:0] VREMU_VX = 32'b100010???????????110?????1010111; - localparam [31:0] VREM_VX = 32'b100011???????????110?????1010111; - localparam [31:0] VMULHU_VX = 32'b100100???????????110?????1010111; - localparam [31:0] VMUL_VX = 32'b100101???????????110?????1010111; - localparam [31:0] VMULHSU_VX = 32'b100110???????????110?????1010111; - localparam [31:0] VMULH_VX = 32'b100111???????????110?????1010111; - localparam [31:0] VMADD_VX = 32'b101001???????????110?????1010111; - localparam [31:0] VNMSUB_VX = 32'b101011???????????110?????1010111; - localparam [31:0] VMACC_VX = 32'b101101???????????110?????1010111; - localparam [31:0] VNMSAC_VX = 32'b101111???????????110?????1010111; - localparam [31:0] VWADDU_VX = 32'b110000???????????110?????1010111; - localparam [31:0] VWADD_VX = 32'b110001???????????110?????1010111; - localparam [31:0] VWSUBU_VX = 32'b110010???????????110?????1010111; - localparam [31:0] VWSUB_VX = 32'b110011???????????110?????1010111; - localparam [31:0] VWADDU_WX = 32'b110100???????????110?????1010111; - localparam [31:0] VWADD_WX = 32'b110101???????????110?????1010111; - localparam [31:0] VWSUBU_WX = 32'b110110???????????110?????1010111; - localparam [31:0] VWSUB_WX = 32'b110111???????????110?????1010111; - localparam [31:0] VWMULU_VX = 32'b111000???????????110?????1010111; - localparam [31:0] VWMULSU_VX = 32'b111010???????????110?????1010111; - localparam [31:0] VWMUL_VX = 32'b111011???????????110?????1010111; - localparam [31:0] VWMACCU_VX = 32'b111100???????????110?????1010111; - localparam [31:0] VWMACC_VX = 32'b111101???????????110?????1010111; - localparam [31:0] VWMACCUS_VX = 32'b111110???????????110?????1010111; - localparam [31:0] VWMACCSU_VX = 32'b111111???????????110?????1010111; - localparam [31:0] VAMOSWAPEI8_V = 32'b00001????????????000?????0101111; - localparam [31:0] VAMOADDEI8_V = 32'b00000????????????000?????0101111; - localparam [31:0] VAMOXOREI8_V = 32'b00100????????????000?????0101111; - localparam [31:0] VAMOANDEI8_V = 32'b01100????????????000?????0101111; - localparam [31:0] VAMOOREI8_V = 32'b01000????????????000?????0101111; - localparam [31:0] VAMOMINEI8_V = 32'b10000????????????000?????0101111; - localparam [31:0] VAMOMAXEI8_V = 32'b10100????????????000?????0101111; - localparam [31:0] VAMOMINUEI8_V = 32'b11000????????????000?????0101111; - localparam [31:0] VAMOMAXUEI8_V = 32'b11100????????????000?????0101111; - localparam [31:0] VAMOSWAPEI16_V = 32'b00001????????????101?????0101111; - localparam [31:0] VAMOADDEI16_V = 32'b00000????????????101?????0101111; - localparam [31:0] VAMOXOREI16_V = 32'b00100????????????101?????0101111; - localparam [31:0] VAMOANDEI16_V = 32'b01100????????????101?????0101111; - localparam [31:0] VAMOOREI16_V = 32'b01000????????????101?????0101111; - localparam [31:0] VAMOMINEI16_V = 32'b10000????????????101?????0101111; - localparam [31:0] VAMOMAXEI16_V = 32'b10100????????????101?????0101111; - localparam [31:0] VAMOMINUEI16_V = 32'b11000????????????101?????0101111; - localparam [31:0] VAMOMAXUEI16_V = 32'b11100????????????101?????0101111; - localparam [31:0] VAMOSWAPEI32_V = 32'b00001????????????110?????0101111; - localparam [31:0] VAMOADDEI32_V = 32'b00000????????????110?????0101111; - localparam [31:0] VAMOXOREI32_V = 32'b00100????????????110?????0101111; - localparam [31:0] VAMOANDEI32_V = 32'b01100????????????110?????0101111; - localparam [31:0] VAMOOREI32_V = 32'b01000????????????110?????0101111; - localparam [31:0] VAMOMINEI32_V = 32'b10000????????????110?????0101111; - localparam [31:0] VAMOMAXEI32_V = 32'b10100????????????110?????0101111; - localparam [31:0] VAMOMINUEI32_V = 32'b11000????????????110?????0101111; - localparam [31:0] VAMOMAXUEI32_V = 32'b11100????????????110?????0101111; - localparam [31:0] VAMOSWAPEI64_V = 32'b00001????????????111?????0101111; - localparam [31:0] VAMOADDEI64_V = 32'b00000????????????111?????0101111; - localparam [31:0] VAMOXOREI64_V = 32'b00100????????????111?????0101111; - localparam [31:0] VAMOANDEI64_V = 32'b01100????????????111?????0101111; - localparam [31:0] VAMOOREI64_V = 32'b01000????????????111?????0101111; - localparam [31:0] VAMOMINEI64_V = 32'b10000????????????111?????0101111; - localparam [31:0] VAMOMAXEI64_V = 32'b10100????????????111?????0101111; - localparam [31:0] VAMOMINUEI64_V = 32'b11000????????????111?????0101111; - localparam [31:0] VAMOMAXUEI64_V = 32'b11100????????????111?????0101111; localparam [31:0] VMVNFR_V = 32'b1001111??????????011?????1010111; localparam [31:0] VL1R_V = 32'b000000101000?????000?????0000111; localparam [31:0] VL2R_V = 32'b000001101000?????101?????0000111; @@ -826,6 +385,98 @@ package riscv_instr; localparam [31:0] P_BNEIMM = 32'b?????????????????011?????1100011; localparam [31:0] P_MAC = 32'b0100001??????????000?????0110011; localparam [31:0] P_MSU = 32'b0100001??????????001?????0110011; + localparam [31:0] PV_ADD_H = 32'b0000000??????????000?????1010111; + localparam [31:0] PV_ADD_SC_H = 32'b0000000??????????100?????1010111; + localparam [31:0] PV_ADD_SCI_H = 32'b000000???????????110?????1010111; + localparam [31:0] PV_ADD_B = 32'b0000000??????????001?????1010111; + localparam [31:0] PV_ADD_SC_B = 32'b0000000??????????101?????1010111; + localparam [31:0] PV_ADD_SCI_B = 32'b000000???????????111?????1010111; + localparam [31:0] PV_SUB_H = 32'b0000100??????????000?????1010111; + localparam [31:0] PV_SUB_SC_H = 32'b0000100??????????100?????1010111; + localparam [31:0] PV_SUB_SCI_H = 32'b000010???????????110?????1010111; + localparam [31:0] PV_SUB_B = 32'b0000100??????????001?????1010111; + localparam [31:0] PV_SUB_SC_B = 32'b0000100??????????101?????1010111; + localparam [31:0] PV_SUB_SCI_B = 32'b000010???????????111?????1010111; + localparam [31:0] PV_AVG_H = 32'b0001000??????????000?????1010111; + localparam [31:0] PV_AVG_SC_H = 32'b0001000??????????100?????1010111; + localparam [31:0] PV_AVG_SCI_H = 32'b000100???????????110?????1010111; + localparam [31:0] PV_AVG_B = 32'b0001000??????????001?????1010111; + localparam [31:0] PV_AVG_SC_B = 32'b0001000??????????101?????1010111; + localparam [31:0] PV_AVG_SCI_B = 32'b000100???????????111?????1010111; + localparam [31:0] PV_AVGU_H = 32'b0001100??????????000?????1010111; + localparam [31:0] PV_AVGU_SC_H = 32'b0001100??????????100?????1010111; + localparam [31:0] PV_AVGU_SCI_H = 32'b000110???????????110?????1010111; + localparam [31:0] PV_AVGU_B = 32'b0001100??????????001?????1010111; + localparam [31:0] PV_AVGU_SC_B = 32'b0001100??????????101?????1010111; + localparam [31:0] PV_AVGU_SCI_B = 32'b000110???????????111?????1010111; + localparam [31:0] PV_MIN_H = 32'b0010000??????????000?????1010111; + localparam [31:0] PV_MIN_SC_H = 32'b0010000??????????100?????1010111; + localparam [31:0] PV_MIN_SCI_H = 32'b001000???????????110?????1010111; + localparam [31:0] PV_MIN_B = 32'b0010000??????????001?????1010111; + localparam [31:0] PV_MIN_SC_B = 32'b0010000??????????101?????1010111; + localparam [31:0] PV_MIN_SCI_B = 32'b001000???????????111?????1010111; + localparam [31:0] PV_MINU_H = 32'b0010100??????????000?????1010111; + localparam [31:0] PV_MINU_SC_H = 32'b0010100??????????100?????1010111; + localparam [31:0] PV_MINU_SCI_H = 32'b001010???????????110?????1010111; + localparam [31:0] PV_MINU_B = 32'b0010100??????????001?????1010111; + localparam [31:0] PV_MINU_SC_B = 32'b0010100??????????101?????1010111; + localparam [31:0] PV_MINU_SCI_B = 32'b001010???????????111?????1010111; + localparam [31:0] PV_MAX_H = 32'b0011000??????????000?????1010111; + localparam [31:0] PV_MAX_SC_H = 32'b0011000??????????100?????1010111; + localparam [31:0] PV_MAX_SCI_H = 32'b001100???????????110?????1010111; + localparam [31:0] PV_MAX_B = 32'b0011000??????????001?????1010111; + localparam [31:0] PV_MAX_SC_B = 32'b0011000??????????101?????1010111; + localparam [31:0] PV_MAX_SCI_B = 32'b001100???????????111?????1010111; + localparam [31:0] PV_MAXU_H = 32'b0011100??????????000?????1010111; + localparam [31:0] PV_MAXU_SC_H = 32'b0011100??????????100?????1010111; + localparam [31:0] PV_MAXU_SCI_H = 32'b001110???????????110?????1010111; + localparam [31:0] PV_MAXU_B = 32'b0011100??????????001?????1010111; + localparam [31:0] PV_MAXU_SC_B = 32'b0011100??????????101?????1010111; + localparam [31:0] PV_MAXU_SCI_B = 32'b001110???????????111?????1010111; + localparam [31:0] PV_SRL_H = 32'b0100000??????????000?????1010111; + localparam [31:0] PV_SRL_SC_H = 32'b0100000??????????100?????1010111; + localparam [31:0] PV_SRL_SCI_H = 32'b010000???????????110?????1010111; + localparam [31:0] PV_SRL_B = 32'b0100000??????????001?????1010111; + localparam [31:0] PV_SRL_SC_B = 32'b0100000??????????101?????1010111; + localparam [31:0] PV_SRL_SCI_B = 32'b010000???????????111?????1010111; + localparam [31:0] PV_SRA_H = 32'b0100100??????????000?????1010111; + localparam [31:0] PV_SRA_SC_H = 32'b0100100??????????100?????1010111; + localparam [31:0] PV_SRA_SCI_H = 32'b010010???????????110?????1010111; + localparam [31:0] PV_SRA_B = 32'b0100100??????????001?????1010111; + localparam [31:0] PV_SRA_SC_B = 32'b0100100??????????101?????1010111; + localparam [31:0] PV_SRA_SCI_B = 32'b010010???????????111?????1010111; + localparam [31:0] PV_SLL_H = 32'b0101000??????????000?????1010111; + localparam [31:0] PV_SLL_SC_H = 32'b0101000??????????100?????1010111; + localparam [31:0] PV_SLL_SCI_H = 32'b010100???????????110?????1010111; + localparam [31:0] PV_SLL_B = 32'b0101000??????????001?????1010111; + localparam [31:0] PV_SLL_SC_B = 32'b0101000??????????101?????1010111; + localparam [31:0] PV_SLL_SCI_B = 32'b010100???????????111?????1010111; + localparam [31:0] PV_OR_H = 32'b0101100??????????000?????1010111; + localparam [31:0] PV_OR_SC_H = 32'b0101100??????????100?????1010111; + localparam [31:0] PV_OR_SCI_H = 32'b010110???????????110?????1010111; + localparam [31:0] PV_OR_B = 32'b0101100??????????001?????1010111; + localparam [31:0] PV_OR_SC_B = 32'b0101100??????????101?????1010111; + localparam [31:0] PV_OR_SCI_B = 32'b010110???????????111?????1010111; + localparam [31:0] PV_XOR_H = 32'b0110000??????????000?????1010111; + localparam [31:0] PV_XOR_SC_H = 32'b0110000??????????100?????1010111; + localparam [31:0] PV_XOR_SCI_H = 32'b011000???????????110?????1010111; + localparam [31:0] PV_XOR_B = 32'b0110000??????????001?????1010111; + localparam [31:0] PV_XOR_SC_B = 32'b0110000??????????101?????1010111; + localparam [31:0] PV_XOR_SCI_B = 32'b011000???????????111?????1010111; + localparam [31:0] PV_AND_H = 32'b0110100??????????000?????1010111; + localparam [31:0] PV_AND_SC_H = 32'b0110100??????????100?????1010111; + localparam [31:0] PV_AND_SCI_H = 32'b011010???????????110?????1010111; + localparam [31:0] PV_AND_B = 32'b0110100??????????001?????1010111; + localparam [31:0] PV_AND_SC_B = 32'b0110100??????????101?????1010111; + localparam [31:0] PV_AND_SCI_B = 32'b011010???????????111?????1010111; + localparam [31:0] PV_ABS_H = 32'b011100000000?????000?????1010111; + localparam [31:0] PV_ABS_B = 32'b011100000000?????001?????1010111; + localparam [31:0] PV_EXTRACT_H = 32'b011110???????????110?????1010111; + localparam [31:0] PV_EXTRACT_B = 32'b011110???????????111?????1010111; + localparam [31:0] PV_EXTRACTU_H = 32'b100100???????????110?????1010111; + localparam [31:0] PV_EXTRACTU_B = 32'b100100???????????111?????1010111; + localparam [31:0] PV_INSERT_H = 32'b101100???????????110?????1010111; + localparam [31:0] PV_INSERT_B = 32'b101100???????????111?????1010111; localparam [31:0] FLAH = 32'b?????????????????001?????0000111; localparam [31:0] FSAH = 32'b?????????????????001?????0100111; localparam [31:0] FMADD_AH = 32'b?????10??????????101?????1000011; diff --git a/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM b/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM index ab4f6134e..94e364bde 100644 --- a/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM +++ b/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM @@ -61,3 +61,104 @@ p.bneimm rs1 imm5 bimm12hi bimm12lo 14..12=3 6..2=0x18 1..0=3 # MAC operations p.mac rd rs1 rs2 31..25=33 14..12=0 6..2=0x0C 1..0=3 p.msu rd rs1 rs2 31..25=33 14..12=1 6..2=0x0C 1..0=3 + +# SIMD arithmetical operations +pv.add.h rd rs1 rs2 31..27=0 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.add.sc.h rd rs1 rs2 31..27=0 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.add.sci.h rd rs1 imm6 31..27=0 26=0 14..12=6 6..2=0x15 1..0=3 +pv.add.b rd rs1 rs2 31..27=0 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.add.sc.b rd rs1 rs2 31..27=0 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.add.sci.b rd rs1 imm6 31..27=0 26=0 14..12=7 6..2=0x15 1..0=3 +pv.sub.h rd rs1 rs2 31..27=1 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.sub.sc.h rd rs1 rs2 31..27=1 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.sub.sci.h rd rs1 imm6 31..27=1 26=0 14..12=6 6..2=0x15 1..0=3 +pv.sub.b rd rs1 rs2 31..27=1 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.sub.sc.b rd rs1 rs2 31..27=1 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.sub.sci.b rd rs1 imm6 31..27=1 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.avg.h rd rs1 rs2 31..27=2 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.avg.sc.h rd rs1 rs2 31..27=2 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.avg.sci.h rd rs1 imm6 31..27=2 26=0 14..12=6 6..2=0x15 1..0=3 +pv.avg.b rd rs1 rs2 31..27=2 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.avg.sc.b rd rs1 rs2 31..27=2 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.avg.sci.b rd rs1 imm6 31..27=2 26=0 14..12=7 6..2=0x15 1..0=3 +pv.avgu.h rd rs1 rs2 31..27=3 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.avgu.sc.h rd rs1 rs2 31..27=3 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.avgu.sci.h rd rs1 imm6 31..27=3 26=0 14..12=6 6..2=0x15 1..0=3 +pv.avgu.b rd rs1 rs2 31..27=3 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.avgu.sc.b rd rs1 rs2 31..27=3 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.avgu.sci.b rd rs1 imm6 31..27=3 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.min.h rd rs1 rs2 31..27=4 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.min.sc.h rd rs1 rs2 31..27=4 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.min.sci.h rd rs1 imm6 31..27=4 26=0 14..12=6 6..2=0x15 1..0=3 +pv.min.b rd rs1 rs2 31..27=4 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.min.sc.b rd rs1 rs2 31..27=4 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.min.sci.b rd rs1 imm6 31..27=4 26=0 14..12=7 6..2=0x15 1..0=3 +pv.minu.h rd rs1 rs2 31..27=5 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.minu.sc.h rd rs1 rs2 31..27=5 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.minu.sci.h rd rs1 imm6 31..27=5 26=0 14..12=6 6..2=0x15 1..0=3 +pv.minu.b rd rs1 rs2 31..27=5 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.minu.sc.b rd rs1 rs2 31..27=5 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.minu.sci.b rd rs1 imm6 31..27=5 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.max.h rd rs1 rs2 31..27=6 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.max.sc.h rd rs1 rs2 31..27=6 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.max.sci.h rd rs1 imm6 31..27=6 26=0 14..12=6 6..2=0x15 1..0=3 +pv.max.b rd rs1 rs2 31..27=6 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.max.sc.b rd rs1 rs2 31..27=6 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.max.sci.b rd rs1 imm6 31..27=6 26=0 14..12=7 6..2=0x15 1..0=3 +pv.maxu.h rd rs1 rs2 31..27=7 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.maxu.sc.h rd rs1 rs2 31..27=7 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.maxu.sci.h rd rs1 imm6 31..27=7 26=0 14..12=6 6..2=0x15 1..0=3 +pv.maxu.b rd rs1 rs2 31..27=7 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.maxu.sc.b rd rs1 rs2 31..27=7 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.maxu.sci.b rd rs1 imm6 31..27=7 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.srl.h rd rs1 rs2 31..27=8 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.srl.sc.h rd rs1 rs2 31..27=8 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.srl.sci.h rd rs1 imm6 31..27=8 26=0 14..12=6 6..2=0x15 1..0=3 +pv.srl.b rd rs1 rs2 31..27=8 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.srl.sc.b rd rs1 rs2 31..27=8 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.srl.sci.b rd rs1 imm6 31..27=8 26=0 14..12=7 6..2=0x15 1..0=3 +pv.sra.h rd rs1 rs2 31..27=9 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.sra.sc.h rd rs1 rs2 31..27=9 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.sra.sci.h rd rs1 imm6 31..27=9 26=0 14..12=6 6..2=0x15 1..0=3 +pv.sra.b rd rs1 rs2 31..27=9 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.sra.sc.b rd rs1 rs2 31..27=9 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.sra.sci.b rd rs1 imm6 31..27=9 26=0 14..12=7 6..2=0x15 1..0=3 +pv.sll.h rd rs1 rs2 31..27=10 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.sll.sc.h rd rs1 rs2 31..27=10 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.sll.sci.h rd rs1 imm6 31..27=10 26=0 14..12=6 6..2=0x15 1..0=3 +pv.sll.b rd rs1 rs2 31..27=10 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.sll.sc.b rd rs1 rs2 31..27=10 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.sll.sci.b rd rs1 imm6 31..27=10 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.or.h rd rs1 rs2 31..27=11 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.or.sc.h rd rs1 rs2 31..27=11 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.or.sci.h rd rs1 imm6 31..27=11 26=0 14..12=6 6..2=0x15 1..0=3 +pv.or.b rd rs1 rs2 31..27=11 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.or.sc.b rd rs1 rs2 31..27=11 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.or.sci.b rd rs1 imm6 31..27=11 26=0 14..12=7 6..2=0x15 1..0=3 +pv.xor.h rd rs1 rs2 31..27=12 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.xor.sc.h rd rs1 rs2 31..27=12 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.xor.sci.h rd rs1 imm6 31..27=12 26=0 14..12=6 6..2=0x15 1..0=3 +pv.xor.b rd rs1 rs2 31..27=12 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.xor.sc.b rd rs1 rs2 31..27=12 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.xor.sci.b rd rs1 imm6 31..27=12 26=0 14..12=7 6..2=0x15 1..0=3 +pv.and.h rd rs1 rs2 31..27=13 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.and.sc.h rd rs1 rs2 31..27=13 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.and.sci.h rd rs1 imm6 31..27=13 26=0 14..12=6 6..2=0x15 1..0=3 +pv.and.b rd rs1 rs2 31..27=13 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.and.sc.b rd rs1 rs2 31..27=13 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.and.sci.b rd rs1 imm6 31..27=13 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.abs.h rd rs1 31..27=14 26=0 25=0 24..20=0 14..12=0 6..2=0x15 1..0=3 +pv.abs.b rd rs1 31..27=14 26=0 25=0 24..20=0 14..12=1 6..2=0x15 1..0=3 + +pv.extract.h rd rs1 imm6 31..27=15 26=0 14..12=6 6..2=0x15 1..0=3 +pv.extract.b rd rs1 imm6 31..27=15 26=0 14..12=7 6..2=0x15 1..0=3 +pv.extractu.h rd rs1 imm6 31..27=18 26=0 14..12=6 6..2=0x15 1..0=3 +pv.extractu.b rd rs1 imm6 31..27=18 26=0 14..12=7 6..2=0x15 1..0=3 +pv.insert.h rd rs1 imm6 31..27=22 26=0 14..12=6 6..2=0x15 1..0=3 +pv.insert.b rd rs1 imm6 31..27=22 26=0 14..12=7 6..2=0x15 1..0=3 diff --git a/toolchain/riscv-opcodes/parse_opcodes b/toolchain/riscv-opcodes/parse_opcodes index ad36c229f..f7b0a837e 100755 --- a/toolchain/riscv-opcodes/parse_opcodes +++ b/toolchain/riscv-opcodes/parse_opcodes @@ -39,6 +39,7 @@ arglut['shamtw'] = (24,20) # for xpulpimg arglut['imm5'] = (24,20) arglut['prs3'] = (11,7) +arglut['imm6'] = (25,20) # for vectors arglut['vd'] = (11,7) From a44e01219935a15b8bbdce63a53a74b4c41b9bf4 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Wed, 6 Jan 2021 14:14:40 +0100 Subject: [PATCH 17/65] [riscv-isa-sim] Change DEFINE_PITYPE define name in disasm --- toolchain/riscv-isa-sim/disasm/disasm.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/toolchain/riscv-isa-sim/disasm/disasm.cc b/toolchain/riscv-isa-sim/disasm/disasm.cc index 61b44e3c6..8386139b6 100644 --- a/toolchain/riscv-isa-sim/disasm/disasm.cc +++ b/toolchain/riscv-isa-sim/disasm/disasm.cc @@ -477,7 +477,7 @@ disassembler_t::disassembler_t(int xlen) #define DEFINE_PSTORE_IRPOST(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address_irpost}) #define DEFINE_PSTORE_RRPOST(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address_rrpost}) #define DEFINE_PSTORE_RR(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address_rr}) - #define DEFINE_PITYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_zimm5}) + #define DEFINE_PI0TYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_zimm5}) #define DEFINE_PBTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_simm5, &branch_target}) DEFINE_XLOAD(lb) @@ -1355,8 +1355,8 @@ disassembler_t::disassembler_t(int xlen) DEFINE_R1TYPE(p_exthz); DEFINE_R1TYPE(p_extbs); DEFINE_R1TYPE(p_extbz); - DEFINE_PITYPE(p_clip); - DEFINE_PITYPE(p_clipu); + DEFINE_PI0TYPE(p_clip); + DEFINE_PI0TYPE(p_clipu); DEFINE_RTYPE(p_clipr); DEFINE_RTYPE(p_clipur); DEFINE_PBTYPE(p_beqimm); From fa82cdc9eda8aa4f0cded3d884a350c7a4535ec9 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Wed, 6 Jan 2021 14:17:50 +0100 Subject: [PATCH 18/65] [riscv-isa-sim] Add Xpulpv2 SIMD arithmetical instructions Added instructions: SIMD add, sub, avg, min, max, shifts, logicals, abs, extract, insert --- toolchain/riscv-isa-sim/disasm/disasm.cc | 107 ++++++++++++++++++ toolchain/riscv-isa-sim/riscv/decode.h | 10 ++ .../riscv-isa-sim/riscv/insns/pv_abs_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_abs_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_add_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_add_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_add_sc_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_add_sc_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_add_sci_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_add_sci_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_and_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_and_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_and_sc_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_and_sc_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_and_sci_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_and_sci_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_avg_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_avg_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_avg_sc_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_avg_sc_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_avg_sci_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_avg_sci_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_avgu_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_avgu_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_avgu_sc_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_avgu_sc_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_avgu_sci_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_avgu_sci_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_extract_b.h | 1 + .../riscv-isa-sim/riscv/insns/pv_extract_h.h | 1 + .../riscv-isa-sim/riscv/insns/pv_extractu_b.h | 1 + .../riscv-isa-sim/riscv/insns/pv_extractu_h.h | 1 + .../riscv-isa-sim/riscv/insns/pv_insert_b.h | 1 + .../riscv-isa-sim/riscv/insns/pv_insert_h.h | 1 + .../riscv-isa-sim/riscv/insns/pv_max_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_max_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_max_sc_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_max_sc_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_max_sci_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_max_sci_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_maxu_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_maxu_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_maxu_sc_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_maxu_sc_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_maxu_sci_b.h | 10 ++ .../riscv-isa-sim/riscv/insns/pv_maxu_sci_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_min_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_min_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_min_sc_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_min_sc_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_min_sci_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_min_sci_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_minu_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_minu_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_minu_sc_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_minu_sc_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_minu_sci_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_minu_sci_h.h | 9 ++ toolchain/riscv-isa-sim/riscv/insns/pv_or_b.h | 9 ++ toolchain/riscv-isa-sim/riscv/insns/pv_or_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_or_sc_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_or_sc_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_or_sci_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_or_sci_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_sll_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_sll_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_sll_sc_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_sll_sc_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_sll_sci_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_sll_sci_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_sra_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_sra_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_sra_sc_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_sra_sc_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_sra_sci_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_sra_sci_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_srl_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_srl_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_srl_sc_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_srl_sc_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_srl_sci_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_srl_sci_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_sub_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_sub_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_sub_sc_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_sub_sc_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_sub_sci_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_sub_sci_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_xor_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_xor_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_xor_sc_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_xor_sc_h.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_xor_sci_b.h | 9 ++ .../riscv-isa-sim/riscv/insns/pv_xor_sci_h.h | 9 ++ toolchain/riscv-isa-sim/riscv/riscv.mk.in | 92 +++++++++++++++ 95 files changed, 990 insertions(+) create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_abs_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_abs_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_add_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_add_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_add_sc_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_add_sc_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_add_sci_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_add_sci_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_and_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_and_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_avg_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_avg_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_avgu_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_avgu_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_extract_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_extract_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_extractu_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_extractu_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_insert_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_insert_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_max_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_max_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_max_sc_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_max_sc_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_max_sci_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_max_sci_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_maxu_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_maxu_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_min_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_min_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_min_sc_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_min_sc_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_min_sci_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_min_sci_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_minu_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_minu_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_or_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_or_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sll_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sll_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sra_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sra_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_srl_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_srl_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sub_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sub_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sub_sc_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sub_sc_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sub_sci_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sub_sci_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_xor_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_xor_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_h.h diff --git a/toolchain/riscv-isa-sim/disasm/disasm.cc b/toolchain/riscv-isa-sim/disasm/disasm.cc index 8386139b6..8277ab077 100644 --- a/toolchain/riscv-isa-sim/disasm/disasm.cc +++ b/toolchain/riscv-isa-sim/disasm/disasm.cc @@ -367,6 +367,18 @@ struct : public arg_t { } } p_simm5; +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((uint32_t)insn.p_zimm6()); + } +} p_zimm6; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.p_simm6()); + } +} p_simm6; + struct : public arg_t { std::string to_string(insn_t insn) const { return std::to_string((int)insn.i_imm()) + '(' + xpr_name[insn.rs1()] + "!)"; @@ -478,6 +490,8 @@ disassembler_t::disassembler_t(int xlen) #define DEFINE_PSTORE_RRPOST(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address_rrpost}) #define DEFINE_PSTORE_RR(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address_rr}) #define DEFINE_PI0TYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_zimm5}) + #define DEFINE_PI1ZTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_zimm6}) + #define DEFINE_PI1STYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_simm6}) #define DEFINE_PBTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_simm5, &branch_target}) DEFINE_XLOAD(lb) @@ -1364,6 +1378,99 @@ disassembler_t::disassembler_t(int xlen) DEFINE_RTYPE(p_mac); DEFINE_RTYPE(p_msu); + DEFINE_RTYPE(pv_add_h); + DEFINE_RTYPE(pv_add_sc_h); + DEFINE_PI1STYPE(pv_add_sci_h); + DEFINE_RTYPE(pv_add_b); + DEFINE_RTYPE(pv_add_sc_b); + DEFINE_PI1STYPE(pv_add_sci_b); + DEFINE_RTYPE(pv_sub_h); + DEFINE_RTYPE(pv_sub_sc_h); + DEFINE_PI1STYPE(pv_sub_sci_h); + DEFINE_RTYPE(pv_sub_b); + DEFINE_RTYPE(pv_sub_sc_b); + DEFINE_PI1STYPE(pv_sub_sci_b); + DEFINE_RTYPE(pv_avg_h); + DEFINE_RTYPE(pv_avg_sc_h); + DEFINE_PI1STYPE(pv_avg_sci_h); + DEFINE_RTYPE(pv_avg_b); + DEFINE_RTYPE(pv_avg_sc_b); + DEFINE_PI1STYPE(pv_avg_sci_b); + DEFINE_RTYPE(pv_avgu_h); + DEFINE_RTYPE(pv_avgu_sc_h); + DEFINE_PI1ZTYPE(pv_avgu_sci_h); + DEFINE_RTYPE(pv_avgu_b); + DEFINE_RTYPE(pv_avgu_sc_b); + DEFINE_PI1ZTYPE(pv_avgu_sci_b); + DEFINE_RTYPE(pv_min_h); + DEFINE_RTYPE(pv_min_sc_h); + DEFINE_PI1STYPE(pv_min_sci_h); + DEFINE_RTYPE(pv_min_b); + DEFINE_RTYPE(pv_min_sc_b); + DEFINE_PI1STYPE(pv_min_sci_b); + DEFINE_RTYPE(pv_minu_h); + DEFINE_RTYPE(pv_minu_sc_h); + DEFINE_PI1ZTYPE(pv_minu_sci_h); + DEFINE_RTYPE(pv_minu_b); + DEFINE_RTYPE(pv_minu_sc_b); + DEFINE_PI1ZTYPE(pv_minu_sci_b); + DEFINE_RTYPE(pv_max_h); + DEFINE_RTYPE(pv_max_sc_h); + DEFINE_PI1STYPE(pv_max_sci_h); + DEFINE_RTYPE(pv_max_b); + DEFINE_RTYPE(pv_max_sc_b); + DEFINE_PI1STYPE(pv_max_sci_b); + DEFINE_RTYPE(pv_maxu_h); + DEFINE_RTYPE(pv_maxu_sc_h); + DEFINE_PI1ZTYPE(pv_maxu_sci_h); + DEFINE_RTYPE(pv_maxu_b); + DEFINE_RTYPE(pv_maxu_sc_b); + DEFINE_PI1ZTYPE(pv_maxu_sci_b); + DEFINE_RTYPE(pv_srl_h); + DEFINE_RTYPE(pv_srl_sc_h); + DEFINE_PI1ZTYPE(pv_srl_sci_h); + DEFINE_RTYPE(pv_srl_b); + DEFINE_RTYPE(pv_srl_sc_b); + DEFINE_PI1ZTYPE(pv_srl_sci_b); + DEFINE_RTYPE(pv_sra_h); + DEFINE_RTYPE(pv_sra_sc_h); + DEFINE_PI1ZTYPE(pv_sra_sci_h); + DEFINE_RTYPE(pv_sra_b); + DEFINE_RTYPE(pv_sra_sc_b); + DEFINE_PI1ZTYPE(pv_sra_sci_b); + DEFINE_RTYPE(pv_sll_h); + DEFINE_RTYPE(pv_sll_sc_h); + DEFINE_PI1ZTYPE(pv_sll_sci_h); + DEFINE_RTYPE(pv_sll_b); + DEFINE_RTYPE(pv_sll_sc_b); + DEFINE_PI1ZTYPE(pv_sll_sci_b); + DEFINE_RTYPE(pv_or_h); + DEFINE_RTYPE(pv_or_sc_h); + DEFINE_PI1ZTYPE(pv_or_sci_h); + DEFINE_RTYPE(pv_or_b); + DEFINE_RTYPE(pv_or_sc_b); + DEFINE_PI1ZTYPE(pv_or_sci_b); + DEFINE_RTYPE(pv_xor_h); + DEFINE_RTYPE(pv_xor_sc_h); + DEFINE_PI1ZTYPE(pv_xor_sci_h); + DEFINE_RTYPE(pv_xor_b); + DEFINE_RTYPE(pv_xor_sc_b); + DEFINE_PI1ZTYPE(pv_xor_sci_b); + DEFINE_RTYPE(pv_and_h); + DEFINE_RTYPE(pv_and_sc_h); + DEFINE_PI1ZTYPE(pv_and_sci_h); + DEFINE_RTYPE(pv_and_b); + DEFINE_RTYPE(pv_and_sc_b); + DEFINE_PI1ZTYPE(pv_and_sci_b); + DEFINE_R1TYPE(pv_abs_h); + DEFINE_R1TYPE(pv_abs_b); + DEFINE_PI1ZTYPE(pv_extract_h); + DEFINE_PI1ZTYPE(pv_extract_b); + DEFINE_PI1ZTYPE(pv_extractu_h); + DEFINE_PI1ZTYPE(pv_extractu_b); + DEFINE_PI1ZTYPE(pv_insert_h); + DEFINE_PI1ZTYPE(pv_insert_b); + // provide a default disassembly for all instructions as a fallback #define DECLARE_INSN(code, match, mask) \ add_insn(new disasm_insn_t(#code " (args unknown)", match, mask, {})); diff --git a/toolchain/riscv-isa-sim/riscv/decode.h b/toolchain/riscv-isa-sim/riscv/decode.h index f0dcfa10e..11e711963 100644 --- a/toolchain/riscv-isa-sim/riscv/decode.h +++ b/toolchain/riscv-isa-sim/riscv/decode.h @@ -132,6 +132,9 @@ class insn_t uint64_t p_zimm5() { return x(20, 5); } int64_t p_simm5() { return xs(20, 5); } uint64_t p_rs3() { return x(7, 5); } + uint64_t p_zimm6() { return x(25,1) + (x(20, 5) << 1); } + int64_t p_simm6() { return x(25,1) + (xs(20, 5) << 1); } + private: insn_bits_t b; @@ -288,6 +291,13 @@ class regfile_t #define P_RS3 READ_REG(insn.p_rs3()) /* same as RD, just different semantical value */ #define WRITE_RS1(value) WRITE_REG(insn.rs1(), value) +#define RS1_H(i) ((RS1 >> ((xlen >> 1) * (i & 0x1))) & 0xFFFF) /* select rs1 half: i should only be 0 or 1 */ +#define RS1_B(i) ((RS1 >> ((xlen >> 2) * (i & 0x3))) & 0xFF) /* select rs1 byte: i should only be from 0 to 3 */ +#define RS2_H(i) ((RS2 >> ((xlen >> 1) * (i & 0x1))) & 0xFFFF) /* select rs2 half: i should only be 0 or 1 */ +#define RS2_B(i) ((RS2 >> ((xlen >> 2) * (i & 0x3))) & 0xFF) /* select rs2 byte: i should only be from 0 to 3 */ +#define WRITE_RD_H(i, value) WRITE_RD((RD & ~(0xFFFF << ((xlen >> 1) * (i & 0x1)))) | ((value & 0xFFFF) << ((xlen >> 1) * (i & 0x1)))) /* select to which rd half to write the 16-bit value */ +#define WRITE_RD_B(i, value) WRITE_RD((RD & ~(0xFF << ((xlen >> 2) * (i & 0x3)))) | ((value & 0xFF) << ((xlen >> 2) * (i & 0x3)))) /* select to which rd byte to write the 8-bit value */ + #define sext32(x) ((sreg_t)(int32_t)(x)) #define zext32(x) ((reg_t)(uint32_t)(x)) diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_abs_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_abs_b.h new file mode 100644 index 000000000..c0bc089cc --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_abs_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) > 0 ? RS1_B(i) : -sext8(RS1_B(i)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_abs_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_abs_h.h new file mode 100644 index 000000000..42ca4ff3c --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_abs_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) > 0 ? RS1_H(i) : -sext16(RS1_H(i)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_add_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_add_b.h new file mode 100644 index 000000000..ecae63a04 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_add_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) + sext8(RS2_B(i)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_add_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_add_h.h new file mode 100644 index 000000000..0a78665af --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_add_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) + sext16(RS2_H(i)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_add_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_add_sc_b.h new file mode 100644 index 000000000..572b61c07 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_add_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) + sext8(RS2_B(0)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_add_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_add_sc_h.h new file mode 100644 index 000000000..734a911b8 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_add_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) + sext16(RS2_H(0)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_add_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_add_sci_b.h new file mode 100644 index 000000000..df47f1cb5 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_add_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) + insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_add_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_add_sci_h.h new file mode 100644 index 000000000..907621c09 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_add_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) + insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_and_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_and_b.h new file mode 100644 index 000000000..e5c584083 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_and_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) & RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_and_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_and_h.h new file mode 100644 index 000000000..8447455f5 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_and_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) & RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_b.h new file mode 100644 index 000000000..5dfeca4d5 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) & RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_h.h new file mode 100644 index 000000000..07d035cd0 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) & RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_b.h new file mode 100644 index 000000000..8501c70cd --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) & insn.p_zimm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_h.h new file mode 100644 index 000000000..80c617e65 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_h.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_H(i) & insn.p_zimm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_b.h new file mode 100644 index 000000000..e35d95cfd --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = (sext8(RS1_B(i)) + sext8(RS2_B(i))) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_h.h new file mode 100644 index 000000000..084b978c5 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = (sext16(RS1_H(i)) + sext16(RS2_H(i))) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_b.h new file mode 100644 index 000000000..044f8450b --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = (sext8(RS1_B(i)) + sext8(RS2_B(0))) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_h.h new file mode 100644 index 000000000..f4877484a --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = (sext16(RS1_H(i)) + sext16(RS2_H(0))) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_b.h new file mode 100644 index 000000000..1c313fb2e --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = (sext8(RS1_B(i)) + insn.p_simm6()) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_h.h new file mode 100644 index 000000000..7b9a834e6 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = (sext16(RS1_H(i)) + insn.p_simm6()) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_b.h new file mode 100644 index 000000000..ca173f6c9 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = (zext8(RS1_B(i)) + zext8(RS2_B(i))) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_h.h new file mode 100644 index 000000000..e1f517e5b --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = (zext16(RS1_H(i)) + zext16(RS2_H(i))) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_b.h new file mode 100644 index 000000000..cc8b5e1a0 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = (zext8(RS1_B(i)) + zext8(RS2_B(0))) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_h.h new file mode 100644 index 000000000..78173d664 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = (zext16(RS1_H(i)) + zext16(RS2_H(0))) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_b.h new file mode 100644 index 000000000..86db408ee --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = (zext8(RS1_B(i)) + insn.p_zimm6()) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_h.h new file mode 100644 index 000000000..13072157e --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = (zext16(RS1_H(i)) + insn.p_zimm6()) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_extract_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_extract_b.h new file mode 100644 index 000000000..c7711e189 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_extract_b.h @@ -0,0 +1 @@ +WRITE_RD(sext8(RS1_B(insn.p_zimm6() & 0x1))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_extract_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_extract_h.h new file mode 100644 index 000000000..dc4d6b9a5 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_extract_h.h @@ -0,0 +1 @@ +WRITE_RD(sext16(RS1_H(insn.p_zimm6() & 0x3))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_extractu_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_extractu_b.h new file mode 100644 index 000000000..4c72191ea --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_extractu_b.h @@ -0,0 +1 @@ +WRITE_RD(zext8(RS1_B(insn.p_zimm6() & 0x1))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_extractu_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_extractu_h.h new file mode 100644 index 000000000..f1192d1ef --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_extractu_h.h @@ -0,0 +1 @@ +WRITE_RD(zext16(RS1_H(insn.p_zimm6() & 0x3))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_insert_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_insert_b.h new file mode 100644 index 000000000..5c47662fb --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_insert_b.h @@ -0,0 +1 @@ +WRITE_RD_B(insn.p_zimm6() & 0x1, RS1_B(0)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_insert_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_insert_h.h new file mode 100644 index 000000000..094d1fe1d --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_insert_h.h @@ -0,0 +1 @@ +WRITE_RD_H(insn.p_zimm6() & 0x3, RS1_H(0)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_max_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_max_b.h new file mode 100644 index 000000000..4dc3e6be8 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_max_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) > sext8(RS2_B(i)) ? RS1_B(i) : RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_max_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_max_h.h new file mode 100644 index 000000000..c65a32da6 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_max_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) > sext16(RS2_H(i)) ? RS1_H(i) : RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_max_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_max_sc_b.h new file mode 100644 index 000000000..896087f62 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_max_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) > sext8(RS2_B(0)) ? RS1_B(i) : RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_max_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_max_sc_h.h new file mode 100644 index 000000000..fd55fb49b --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_max_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) > sext16(RS2_H(0)) ? RS1_H(i) : RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_max_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_max_sci_b.h new file mode 100644 index 000000000..5e06669fa --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_max_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) > insn.p_simm6() ? RS1_B(i) : insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_max_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_max_sci_h.h new file mode 100644 index 000000000..ce1df2ee1 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_max_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) > insn.p_simm6() ? RS1_H(i) : insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_b.h new file mode 100644 index 000000000..bd22a57e2 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) > zext8(RS2_B(i)) ? RS1_B(i) : RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_h.h new file mode 100644 index 000000000..898052c24 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) > zext16(RS2_H(i)) ? RS1_H(i) : RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_b.h new file mode 100644 index 000000000..3c8b341b1 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) > zext8(RS2_B(0)) ? RS1_B(i) : RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_h.h new file mode 100644 index 000000000..67cf56374 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) > zext16(RS2_H(0)) ? RS1_H(i) : RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_b.h new file mode 100644 index 000000000..108883d46 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_b.h @@ -0,0 +1,10 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) > insn.p_zimm6() ? RS1_B(i) : insn.p_zimm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); + diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_h.h new file mode 100644 index 000000000..3ff763dd0 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) > insn.p_zimm6() ? RS1_H(i) : insn.p_zimm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_min_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_min_b.h new file mode 100644 index 000000000..1b9104b55 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_min_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) <= sext8(RS2_B(i)) ? RS1_B(i) : RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_min_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_min_h.h new file mode 100644 index 000000000..bbc83caea --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_min_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) <= sext16(RS2_H(i)) ? RS1_H(i) : RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_min_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_min_sc_b.h new file mode 100644 index 000000000..1d2aac507 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_min_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) <= sext8(RS2_B(0)) ? RS1_B(i) : RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_min_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_min_sc_h.h new file mode 100644 index 000000000..b2b8ab110 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_min_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) <= sext16(RS2_H(0)) ? RS1_H(i) : RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_min_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_min_sci_b.h new file mode 100644 index 000000000..031b51f36 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_min_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) <= insn.p_simm6() ? RS1_B(i) : insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_min_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_min_sci_h.h new file mode 100644 index 000000000..d007e0662 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_min_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) <= insn.p_simm6() ? RS1_H(i) : insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_b.h new file mode 100644 index 000000000..606dcdefd --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) <= zext8(RS2_B(i)) ? RS1_B(i) : RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_h.h new file mode 100644 index 000000000..287cf9322 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) <= zext16(RS2_H(i)) ? RS1_H(i) : RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_b.h new file mode 100644 index 000000000..a8210a751 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) <= zext8(RS2_B(0)) ? RS1_B(i) : RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_h.h new file mode 100644 index 000000000..0c5d317f4 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) <= zext16(RS2_H(0)) ? RS1_H(i) : RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_b.h new file mode 100644 index 000000000..c2cacdf9a --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) <= insn.p_zimm6() ? RS1_B(i) : insn.p_zimm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_h.h new file mode 100644 index 000000000..90a1c2333 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) <= insn.p_zimm6() ? RS1_H(i) : insn.p_zimm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_or_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_or_b.h new file mode 100644 index 000000000..a47117ee1 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_or_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) | RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_or_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_or_h.h new file mode 100644 index 000000000..df156074d --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_or_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) | RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_b.h new file mode 100644 index 000000000..be527e067 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) | RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_h.h new file mode 100644 index 000000000..4ef4e475a --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) | RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_b.h new file mode 100644 index 000000000..c5abaed13 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) | insn.p_zimm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_h.h new file mode 100644 index 000000000..d9e383675 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) | insn.p_zimm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_b.h new file mode 100644 index 000000000..3d16dfc38 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) << zext8(RS2_B(i)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_h.h new file mode 100644 index 000000000..129ccc90c --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) << zext16(RS2_H(i)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_b.h new file mode 100644 index 000000000..13b2205b5 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) << zext8(RS2_B(0)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_h.h new file mode 100644 index 000000000..a5e40c80e --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) << zext16(RS2_H(0)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_b.h new file mode 100644 index 000000000..03379e44a --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) << insn.p_zimm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_h.h new file mode 100644 index 000000000..6a0945df9 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) << insn.p_zimm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_b.h new file mode 100644 index 000000000..e69ef07e7 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) >> zext8(RS2_B(i)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_h.h new file mode 100644 index 000000000..bf3c7cb75 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext8(RS1_H(i)) >> zext16(RS2_H(i)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_b.h new file mode 100644 index 000000000..cd0e4409e --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) >> zext8(RS2_B(0)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_h.h new file mode 100644 index 000000000..1a0e38d07 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext8(RS1_H(i)) >> zext16(RS2_H(0)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_b.h new file mode 100644 index 000000000..2cf433784 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) >> insn.p_zimm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_h.h new file mode 100644 index 000000000..687b54608 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext8(RS1_H(i)) >> insn.p_zimm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_b.h new file mode 100644 index 000000000..fc14f81ba --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) >> zext8(RS2_B(i)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_h.h new file mode 100644 index 000000000..5942550bb --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) >> zext16(RS2_H(i)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_b.h new file mode 100644 index 000000000..f12f828de --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) >> zext8(RS2_B(0)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_h.h new file mode 100644 index 000000000..477b1df98 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) >> zext16(RS2_H(0)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_b.h new file mode 100644 index 000000000..9edaac0b8 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) >> insn.p_zimm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_h.h new file mode 100644 index 000000000..12755f62a --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) >> insn.p_zimm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sub_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_b.h new file mode 100644 index 000000000..2ce1fe224 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) - sext8(RS2_B(i)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sub_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_h.h new file mode 100644 index 000000000..4ec513726 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) - sext16(RS2_H(i)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sc_b.h new file mode 100644 index 000000000..3375e64c2 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) - sext8(RS2_B(0)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sc_h.h new file mode 100644 index 000000000..4bb12839c --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) - sext16(RS2_H(0)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sci_b.h new file mode 100644 index 000000000..20cc94123 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) - insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sci_h.h new file mode 100644 index 000000000..50b11a665 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) - insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_b.h new file mode 100644 index 000000000..307beb5b3 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) ^ RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_h.h new file mode 100644 index 000000000..30b0be448 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) ^ RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_b.h new file mode 100644 index 000000000..f2e53bf82 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) ^ RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_h.h new file mode 100644 index 000000000..69bbeae9b --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) ^ RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_b.h new file mode 100644 index 000000000..5f765e609 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) ^ insn.p_zimm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_h.h new file mode 100644 index 000000000..ed0c46e06 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) ^ insn.p_zimm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(zext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/riscv.mk.in b/toolchain/riscv-isa-sim/riscv/riscv.mk.in index 4acb19170..a6ece4fb4 100644 --- a/toolchain/riscv-isa-sim/riscv/riscv.mk.in +++ b/toolchain/riscv-isa-sim/riscv/riscv.mk.in @@ -796,6 +796,98 @@ riscv_insn_ext_xpulpimg = \ p_bneimm \ p_mac \ p_msu \ + pv_add_h \ + pv_add_sc_h \ + pv_add_sci_h \ + pv_add_b \ + pv_add_sc_b \ + pv_add_sci_b \ + pv_sub_h \ + pv_sub_sc_h \ + pv_sub_sci_h \ + pv_sub_b \ + pv_sub_sc_b \ + pv_sub_sci_b \ + pv_avg_h \ + pv_avg_sc_h \ + pv_avg_sci_h \ + pv_avg_b \ + pv_avg_sc_b \ + pv_avg_sci_b \ + pv_avgu_h \ + pv_avgu_sc_h \ + pv_avgu_sci_h \ + pv_avgu_b \ + pv_avgu_sc_b \ + pv_avgu_sci_b \ + pv_min_h \ + pv_min_sc_h \ + pv_min_sci_h \ + pv_min_b \ + pv_min_sc_b \ + pv_min_sci_b \ + pv_minu_h \ + pv_minu_sc_h \ + pv_minu_sci_h \ + pv_minu_b \ + pv_minu_sc_b \ + pv_minu_sci_b \ + pv_max_h \ + pv_max_sc_h \ + pv_max_sci_h \ + pv_max_b \ + pv_max_sc_b \ + pv_max_sci_b \ + pv_maxu_h \ + pv_maxu_sc_h \ + pv_maxu_sci_h \ + pv_maxu_b \ + pv_maxu_sc_b \ + pv_maxu_sci_b \ + pv_srl_h \ + pv_srl_sc_h \ + pv_srl_sci_h \ + pv_srl_b \ + pv_srl_sc_b \ + pv_srl_sci_b \ + pv_sra_h \ + pv_sra_sc_h \ + pv_sra_sci_h \ + pv_sra_b \ + pv_sra_sc_b \ + pv_sra_sci_b \ + pv_sll_h \ + pv_sll_sc_h \ + pv_sll_sci_h \ + pv_sll_b \ + pv_sll_sc_b \ + pv_sll_sci_b \ + pv_or_h \ + pv_or_sc_h \ + pv_or_sci_h \ + pv_or_b \ + pv_or_sc_b \ + pv_or_sci_b \ + pv_xor_h \ + pv_xor_sc_h \ + pv_xor_sci_h \ + pv_xor_b \ + pv_xor_sc_b \ + pv_xor_sci_b \ + pv_and_h \ + pv_and_sc_h \ + pv_and_sci_h \ + pv_and_b \ + pv_and_sc_b \ + pv_and_sci_b \ + pv_abs_h \ + pv_abs_b \ + pv_extract_h \ + pv_extract_b \ + pv_extractu_h \ + pv_extractu_b \ + pv_insert_h \ + pv_insert_b \ riscv_insn_ext_h = \ hfence_gvma \ From 8c9e0004b8e2ad8dc87f710848ba34a5dfccf135 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Wed, 6 Jan 2021 18:30:13 +0100 Subject: [PATCH 19/65] [riscv-opcodes] Make some vectorial instructions pseudo due to Xpulpv2 SIMD overlap --- toolchain/riscv-opcodes/Makefile | 2 +- toolchain/riscv-opcodes/README.md | 6 +- toolchain/riscv-opcodes/encoding_out.h | 1323 ++++++++++++++++++++++++ toolchain/riscv-opcodes/inst.sverilog | 441 ++++++++ toolchain/riscv-opcodes/opcodes-rvv | 148 +-- 5 files changed, 1844 insertions(+), 76 deletions(-) diff --git a/toolchain/riscv-opcodes/Makefile b/toolchain/riscv-opcodes/Makefile index 9afe3b7eb..12d02b4aa 100644 --- a/toolchain/riscv-opcodes/Makefile +++ b/toolchain/riscv-opcodes/Makefile @@ -7,7 +7,7 @@ MY_OPCODES := opcodes-frep_CUSTOM opcodes-xpulpimg_CUSTOM opcodes-rv32d-zfh_DRAF ALL_OPCODES := opcodes-pseudo $(ALL_REAL_OPCODES) $(MY_OPCODES) opcodes-rvv-pseudo # Opcodes to be discarded -DISCARDED_OPCODES := opcodes-frep_CUSTOM opcodes-rvv +DISCARDED_OPCODES := opcodes-frep_CUSTOM OPCODES = $(filter-out $(sort $(DISCARDED_OPCODES)), $(sort $(ALL_OPCODES))) diff --git a/toolchain/riscv-opcodes/README.md b/toolchain/riscv-opcodes/README.md index f3ac222ac..4b8110225 100644 --- a/toolchain/riscv-opcodes/README.md +++ b/toolchain/riscv-opcodes/README.md @@ -32,4 +32,8 @@ starting from their high-level, human-readable description. for the parsing script execution, basing on the target architecture, by listing them in the variable `DISCARDED_OPCODES`; - opcodes files from the official 128-bit extension have not been introduced - due to the other changes which they imply to other opcodes specifications. + due to the other changes which they imply to other opcodes specifications; +- some of the instructions originally declared in the vectorial extension + (`opcodes-rvv` file) have been set as pseudo-instruction due to the overlapping + of their opcodes space with the opcodes space of the SIMD instructions from + Xpulpv2, defined in `opcodes-xpulpimg_CUSTOM`. diff --git a/toolchain/riscv-opcodes/encoding_out.h b/toolchain/riscv-opcodes/encoding_out.h index bb0afda67..85eca9d1e 100644 --- a/toolchain/riscv-opcodes/encoding_out.h +++ b/toolchain/riscv-opcodes/encoding_out.h @@ -924,6 +924,888 @@ #define MASK_C_SWSP 0xe003 #define MATCH_C_FSWSP 0xe002 #define MASK_C_FSWSP 0xe003 +#define MATCH_VSETVLI 0x7057 +#define MASK_VSETVLI 0x8000707f +#define MATCH_VSETVL 0x80007057 +#define MASK_VSETVL 0xfe00707f +#define MATCH_VLE8_V 0x7 +#define MASK_VLE8_V 0x1df0707f +#define MATCH_VLE16_V 0x5007 +#define MASK_VLE16_V 0x1df0707f +#define MATCH_VLE32_V 0x6007 +#define MASK_VLE32_V 0x1df0707f +#define MATCH_VLE64_V 0x7007 +#define MASK_VLE64_V 0x1df0707f +#define MATCH_VLE128_V 0x10000007 +#define MASK_VLE128_V 0x1df0707f +#define MATCH_VLE256_V 0x10005007 +#define MASK_VLE256_V 0x1df0707f +#define MATCH_VLE512_V 0x10006007 +#define MASK_VLE512_V 0x1df0707f +#define MATCH_VLE1024_V 0x10007007 +#define MASK_VLE1024_V 0x1df0707f +#define MATCH_VSE8_V 0x27 +#define MASK_VSE8_V 0x1df0707f +#define MATCH_VSE16_V 0x5027 +#define MASK_VSE16_V 0x1df0707f +#define MATCH_VSE32_V 0x6027 +#define MASK_VSE32_V 0x1df0707f +#define MATCH_VSE64_V 0x7027 +#define MASK_VSE64_V 0x1df0707f +#define MATCH_VSE128_V 0x10000027 +#define MASK_VSE128_V 0x1df0707f +#define MATCH_VSE256_V 0x10005027 +#define MASK_VSE256_V 0x1df0707f +#define MATCH_VSE512_V 0x10006027 +#define MASK_VSE512_V 0x1df0707f +#define MATCH_VSE1024_V 0x10007027 +#define MASK_VSE1024_V 0x1df0707f +#define MATCH_VLSE8_V 0x8000007 +#define MASK_VLSE8_V 0x1c00707f +#define MATCH_VLSE16_V 0x8005007 +#define MASK_VLSE16_V 0x1c00707f +#define MATCH_VLSE32_V 0x8006007 +#define MASK_VLSE32_V 0x1c00707f +#define MATCH_VLSE64_V 0x8007007 +#define MASK_VLSE64_V 0x1c00707f +#define MATCH_VLSE128_V 0x18000007 +#define MASK_VLSE128_V 0x1c00707f +#define MATCH_VLSE256_V 0x18005007 +#define MASK_VLSE256_V 0x1c00707f +#define MATCH_VLSE512_V 0x18006007 +#define MASK_VLSE512_V 0x1c00707f +#define MATCH_VLSE1024_V 0x18007007 +#define MASK_VLSE1024_V 0x1c00707f +#define MATCH_VSSE8_V 0x8000027 +#define MASK_VSSE8_V 0x1c00707f +#define MATCH_VSSE16_V 0x8005027 +#define MASK_VSSE16_V 0x1c00707f +#define MATCH_VSSE32_V 0x8006027 +#define MASK_VSSE32_V 0x1c00707f +#define MATCH_VSSE64_V 0x8007027 +#define MASK_VSSE64_V 0x1c00707f +#define MATCH_VSSE128_V 0x18000027 +#define MASK_VSSE128_V 0x1c00707f +#define MATCH_VSSE256_V 0x18005027 +#define MASK_VSSE256_V 0x1c00707f +#define MATCH_VSSE512_V 0x18006027 +#define MASK_VSSE512_V 0x1c00707f +#define MATCH_VSSE1024_V 0x18007027 +#define MASK_VSSE1024_V 0x1c00707f +#define MATCH_VLXEI8_V 0xc000007 +#define MASK_VLXEI8_V 0x1c00707f +#define MATCH_VLXEI16_V 0xc005007 +#define MASK_VLXEI16_V 0x1c00707f +#define MATCH_VLXEI32_V 0xc006007 +#define MASK_VLXEI32_V 0x1c00707f +#define MATCH_VLXEI64_V 0xc007007 +#define MASK_VLXEI64_V 0x1c00707f +#define MATCH_VLXEI128_V 0x1c000007 +#define MASK_VLXEI128_V 0x1c00707f +#define MATCH_VLXEI256_V 0x1c005007 +#define MASK_VLXEI256_V 0x1c00707f +#define MATCH_VLXEI512_V 0x1c006007 +#define MASK_VLXEI512_V 0x1c00707f +#define MATCH_VLXEI1024_V 0x1c007007 +#define MASK_VLXEI1024_V 0x1c00707f +#define MATCH_VSXEI8_V 0xc000027 +#define MASK_VSXEI8_V 0x1c00707f +#define MATCH_VSXEI16_V 0xc005027 +#define MASK_VSXEI16_V 0x1c00707f +#define MATCH_VSXEI32_V 0xc006027 +#define MASK_VSXEI32_V 0x1c00707f +#define MATCH_VSXEI64_V 0xc007027 +#define MASK_VSXEI64_V 0x1c00707f +#define MATCH_VSXEI128_V 0x1c000027 +#define MASK_VSXEI128_V 0x1c00707f +#define MATCH_VSXEI256_V 0x1c005027 +#define MASK_VSXEI256_V 0x1c00707f +#define MATCH_VSXEI512_V 0x1c006027 +#define MASK_VSXEI512_V 0x1c00707f +#define MATCH_VSXEI1024_V 0x1c007027 +#define MASK_VSXEI1024_V 0x1c00707f +#define MATCH_VSUXEI8_V 0x4000027 +#define MASK_VSUXEI8_V 0x1c00707f +#define MATCH_VSUXEI16_V 0x4005027 +#define MASK_VSUXEI16_V 0x1c00707f +#define MATCH_VSUXEI32_V 0x4006027 +#define MASK_VSUXEI32_V 0x1c00707f +#define MATCH_VSUXEI64_V 0x4007027 +#define MASK_VSUXEI64_V 0x1c00707f +#define MATCH_VSUXEI128_V 0x14000027 +#define MASK_VSUXEI128_V 0x1c00707f +#define MATCH_VSUXEI256_V 0x14005027 +#define MASK_VSUXEI256_V 0x1c00707f +#define MATCH_VSUXEI512_V 0x14006027 +#define MASK_VSUXEI512_V 0x1c00707f +#define MATCH_VSUXEI1024_V 0x14007027 +#define MASK_VSUXEI1024_V 0x1c00707f +#define MATCH_VLE8FF_V 0x1000007 +#define MASK_VLE8FF_V 0x1df0707f +#define MATCH_VLE16FF_V 0x1005007 +#define MASK_VLE16FF_V 0x1df0707f +#define MATCH_VLE32FF_V 0x1006007 +#define MASK_VLE32FF_V 0x1df0707f +#define MATCH_VLE64FF_V 0x1007007 +#define MASK_VLE64FF_V 0x1df0707f +#define MATCH_VLE128FF_V 0x11000007 +#define MASK_VLE128FF_V 0x1df0707f +#define MATCH_VLE256FF_V 0x11005007 +#define MASK_VLE256FF_V 0x1df0707f +#define MATCH_VLE512FF_V 0x11006007 +#define MASK_VLE512FF_V 0x1df0707f +#define MATCH_VLE1024FF_V 0x11007007 +#define MASK_VLE1024FF_V 0x1df0707f +#define MATCH_VL1RE8_V 0x2800007 +#define MASK_VL1RE8_V 0xfff0707f +#define MATCH_VL1RE16_V 0x2805007 +#define MASK_VL1RE16_V 0xfff0707f +#define MATCH_VL1RE32_V 0x2806007 +#define MASK_VL1RE32_V 0xfff0707f +#define MATCH_VL1RE64_V 0x2807007 +#define MASK_VL1RE64_V 0xfff0707f +#define MATCH_VL2RE8_V 0x22800007 +#define MASK_VL2RE8_V 0xfff0707f +#define MATCH_VL2RE16_V 0x22805007 +#define MASK_VL2RE16_V 0xfff0707f +#define MATCH_VL2RE32_V 0x22806007 +#define MASK_VL2RE32_V 0xfff0707f +#define MATCH_VL2RE64_V 0x22807007 +#define MASK_VL2RE64_V 0xfff0707f +#define MATCH_VL4RE8_V 0x62800007 +#define MASK_VL4RE8_V 0xfff0707f +#define MATCH_VL4RE16_V 0x62805007 +#define MASK_VL4RE16_V 0xfff0707f +#define MATCH_VL4RE32_V 0x62806007 +#define MASK_VL4RE32_V 0xfff0707f +#define MATCH_VL4RE64_V 0x62807007 +#define MASK_VL4RE64_V 0xfff0707f +#define MATCH_VL8RE8_V 0xe2800007 +#define MASK_VL8RE8_V 0xfff0707f +#define MATCH_VL8RE16_V 0xe2805007 +#define MASK_VL8RE16_V 0xfff0707f +#define MATCH_VL8RE32_V 0xe2806007 +#define MASK_VL8RE32_V 0xfff0707f +#define MATCH_VL8RE64_V 0xe2807007 +#define MASK_VL8RE64_V 0xfff0707f +#define MATCH_VS1R_V 0x2800027 +#define MASK_VS1R_V 0xfff0707f +#define MATCH_VS2R_V 0x22800027 +#define MASK_VS2R_V 0xfff0707f +#define MATCH_VS4R_V 0x62800027 +#define MASK_VS4R_V 0xfff0707f +#define MATCH_VS8R_V 0xe2800027 +#define MASK_VS8R_V 0xfff0707f +#define MATCH_VFADD_VF 0x5057 +#define MASK_VFADD_VF 0xfc00707f +#define MATCH_VFSUB_VF 0x8005057 +#define MASK_VFSUB_VF 0xfc00707f +#define MATCH_VFMIN_VF 0x10005057 +#define MASK_VFMIN_VF 0xfc00707f +#define MATCH_VFMAX_VF 0x18005057 +#define MASK_VFMAX_VF 0xfc00707f +#define MATCH_VFSGNJ_VF 0x20005057 +#define MASK_VFSGNJ_VF 0xfc00707f +#define MATCH_VFSGNJN_VF 0x24005057 +#define MASK_VFSGNJN_VF 0xfc00707f +#define MATCH_VFSGNJX_VF 0x28005057 +#define MASK_VFSGNJX_VF 0xfc00707f +#define MATCH_VFSLIDE1UP_VF 0x38005057 +#define MASK_VFSLIDE1UP_VF 0xfc00707f +#define MATCH_VFSLIDE1DOWN_VF 0x3c005057 +#define MASK_VFSLIDE1DOWN_VF 0xfc00707f +#define MATCH_VFMV_S_F 0x42005057 +#define MASK_VFMV_S_F 0xfff0707f +#define MATCH_VFMERGE_VFM 0x5c005057 +#define MASK_VFMERGE_VFM 0xfe00707f +#define MATCH_VFMV_V_F 0x5e005057 +#define MASK_VFMV_V_F 0xfff0707f +#define MATCH_VMFEQ_VF 0x60005057 +#define MASK_VMFEQ_VF 0xfc00707f +#define MATCH_VMFLE_VF 0x64005057 +#define MASK_VMFLE_VF 0xfc00707f +#define MATCH_VMFLT_VF 0x6c005057 +#define MASK_VMFLT_VF 0xfc00707f +#define MATCH_VMFNE_VF 0x70005057 +#define MASK_VMFNE_VF 0xfc00707f +#define MATCH_VMFGT_VF 0x74005057 +#define MASK_VMFGT_VF 0xfc00707f +#define MATCH_VMFGE_VF 0x7c005057 +#define MASK_VMFGE_VF 0xfc00707f +#define MATCH_VFDIV_VF 0x80005057 +#define MASK_VFDIV_VF 0xfc00707f +#define MATCH_VFRDIV_VF 0x84005057 +#define MASK_VFRDIV_VF 0xfc00707f +#define MATCH_VFMUL_VF 0x90005057 +#define MASK_VFMUL_VF 0xfc00707f +#define MATCH_VFRSUB_VF 0x9c005057 +#define MASK_VFRSUB_VF 0xfc00707f +#define MATCH_VFMADD_VF 0xa0005057 +#define MASK_VFMADD_VF 0xfc00707f +#define MATCH_VFNMADD_VF 0xa4005057 +#define MASK_VFNMADD_VF 0xfc00707f +#define MATCH_VFMSUB_VF 0xa8005057 +#define MASK_VFMSUB_VF 0xfc00707f +#define MATCH_VFNMSUB_VF 0xac005057 +#define MASK_VFNMSUB_VF 0xfc00707f +#define MATCH_VFMACC_VF 0xb0005057 +#define MASK_VFMACC_VF 0xfc00707f +#define MATCH_VFNMACC_VF 0xb4005057 +#define MASK_VFNMACC_VF 0xfc00707f +#define MATCH_VFMSAC_VF 0xb8005057 +#define MASK_VFMSAC_VF 0xfc00707f +#define MATCH_VFNMSAC_VF 0xbc005057 +#define MASK_VFNMSAC_VF 0xfc00707f +#define MATCH_VFWADD_VF 0xc0005057 +#define MASK_VFWADD_VF 0xfc00707f +#define MATCH_VFWSUB_VF 0xc8005057 +#define MASK_VFWSUB_VF 0xfc00707f +#define MATCH_VFWADD_WF 0xd0005057 +#define MASK_VFWADD_WF 0xfc00707f +#define MATCH_VFWSUB_WF 0xd8005057 +#define MASK_VFWSUB_WF 0xfc00707f +#define MATCH_VFWMUL_VF 0xe0005057 +#define MASK_VFWMUL_VF 0xfc00707f +#define MATCH_VFWMACC_VF 0xf0005057 +#define MASK_VFWMACC_VF 0xfc00707f +#define MATCH_VFWNMACC_VF 0xf4005057 +#define MASK_VFWNMACC_VF 0xfc00707f +#define MATCH_VFWMSAC_VF 0xf8005057 +#define MASK_VFWMSAC_VF 0xfc00707f +#define MATCH_VFWNMSAC_VF 0xfc005057 +#define MASK_VFWNMSAC_VF 0xfc00707f +#define MATCH_VFADD_VV 0x1057 +#define MASK_VFADD_VV 0xfc00707f +#define MATCH_VFREDSUM_VS 0x4001057 +#define MASK_VFREDSUM_VS 0xfc00707f +#define MATCH_VFSUB_VV 0x8001057 +#define MASK_VFSUB_VV 0xfc00707f +#define MATCH_VFREDOSUM_VS 0xc001057 +#define MASK_VFREDOSUM_VS 0xfc00707f +#define MATCH_VFMIN_VV 0x10001057 +#define MASK_VFMIN_VV 0xfc00707f +#define MATCH_VFREDMIN_VS 0x14001057 +#define MASK_VFREDMIN_VS 0xfc00707f +#define MATCH_VFMAX_VV 0x18001057 +#define MASK_VFMAX_VV 0xfc00707f +#define MATCH_VFREDMAX_VS 0x1c001057 +#define MASK_VFREDMAX_VS 0xfc00707f +#define MATCH_VFSGNJ_VV 0x20001057 +#define MASK_VFSGNJ_VV 0xfc00707f +#define MATCH_VFSGNJN_VV 0x24001057 +#define MASK_VFSGNJN_VV 0xfc00707f +#define MATCH_VFSGNJX_VV 0x28001057 +#define MASK_VFSGNJX_VV 0xfc00707f +#define MATCH_VFMV_F_S 0x42001057 +#define MASK_VFMV_F_S 0xfe0ff07f +#define MATCH_VMFEQ_VV 0x60001057 +#define MASK_VMFEQ_VV 0xfc00707f +#define MATCH_VMFLE_VV 0x64001057 +#define MASK_VMFLE_VV 0xfc00707f +#define MATCH_VMFLT_VV 0x6c001057 +#define MASK_VMFLT_VV 0xfc00707f +#define MATCH_VMFNE_VV 0x70001057 +#define MASK_VMFNE_VV 0xfc00707f +#define MATCH_VFDIV_VV 0x80001057 +#define MASK_VFDIV_VV 0xfc00707f +#define MATCH_VFMUL_VV 0x90001057 +#define MASK_VFMUL_VV 0xfc00707f +#define MATCH_VFMADD_VV 0xa0001057 +#define MASK_VFMADD_VV 0xfc00707f +#define MATCH_VFNMADD_VV 0xa4001057 +#define MASK_VFNMADD_VV 0xfc00707f +#define MATCH_VFMSUB_VV 0xa8001057 +#define MASK_VFMSUB_VV 0xfc00707f +#define MATCH_VFNMSUB_VV 0xac001057 +#define MASK_VFNMSUB_VV 0xfc00707f +#define MATCH_VFMACC_VV 0xb0001057 +#define MASK_VFMACC_VV 0xfc00707f +#define MATCH_VFNMACC_VV 0xb4001057 +#define MASK_VFNMACC_VV 0xfc00707f +#define MATCH_VFMSAC_VV 0xb8001057 +#define MASK_VFMSAC_VV 0xfc00707f +#define MATCH_VFNMSAC_VV 0xbc001057 +#define MASK_VFNMSAC_VV 0xfc00707f +#define MATCH_VFCVT_XU_F_V 0x48001057 +#define MASK_VFCVT_XU_F_V 0xfc0ff07f +#define MATCH_VFCVT_X_F_V 0x48009057 +#define MASK_VFCVT_X_F_V 0xfc0ff07f +#define MATCH_VFCVT_F_XU_V 0x48011057 +#define MASK_VFCVT_F_XU_V 0xfc0ff07f +#define MATCH_VFCVT_F_X_V 0x48019057 +#define MASK_VFCVT_F_X_V 0xfc0ff07f +#define MATCH_VFCVT_RTZ_XU_F_V 0x48031057 +#define MASK_VFCVT_RTZ_XU_F_V 0xfc0ff07f +#define MATCH_VFCVT_RTZ_X_F_V 0x48039057 +#define MASK_VFCVT_RTZ_X_F_V 0xfc0ff07f +#define MATCH_VFWCVT_XU_F_V 0x48041057 +#define MASK_VFWCVT_XU_F_V 0xfc0ff07f +#define MATCH_VFWCVT_X_F_V 0x48049057 +#define MASK_VFWCVT_X_F_V 0xfc0ff07f +#define MATCH_VFWCVT_F_XU_V 0x48051057 +#define MASK_VFWCVT_F_XU_V 0xfc0ff07f +#define MATCH_VFWCVT_F_X_V 0x48059057 +#define MASK_VFWCVT_F_X_V 0xfc0ff07f +#define MATCH_VFWCVT_F_F_V 0x48061057 +#define MASK_VFWCVT_F_F_V 0xfc0ff07f +#define MATCH_VFWCVT_RTZ_XU_F_V 0x48071057 +#define MASK_VFWCVT_RTZ_XU_F_V 0xfc0ff07f +#define MATCH_VFWCVT_RTZ_X_F_V 0x48079057 +#define MASK_VFWCVT_RTZ_X_F_V 0xfc0ff07f +#define MATCH_VFNCVT_XU_F_W 0x48081057 +#define MASK_VFNCVT_XU_F_W 0xfc0ff07f +#define MATCH_VFNCVT_X_F_W 0x48089057 +#define MASK_VFNCVT_X_F_W 0xfc0ff07f +#define MATCH_VFNCVT_F_XU_W 0x48091057 +#define MASK_VFNCVT_F_XU_W 0xfc0ff07f +#define MATCH_VFNCVT_F_X_W 0x48099057 +#define MASK_VFNCVT_F_X_W 0xfc0ff07f +#define MATCH_VFNCVT_F_F_W 0x480a1057 +#define MASK_VFNCVT_F_F_W 0xfc0ff07f +#define MATCH_VFNCVT_ROD_F_F_W 0x480a9057 +#define MASK_VFNCVT_ROD_F_F_W 0xfc0ff07f +#define MATCH_VFNCVT_RTZ_XU_F_W 0x480b1057 +#define MASK_VFNCVT_RTZ_XU_F_W 0xfc0ff07f +#define MATCH_VFNCVT_RTZ_X_F_W 0x480b9057 +#define MASK_VFNCVT_RTZ_X_F_W 0xfc0ff07f +#define MATCH_VFSQRT_V 0x4c001057 +#define MASK_VFSQRT_V 0xfc0ff07f +#define MATCH_VFRSQRTE7_V 0x4c021057 +#define MASK_VFRSQRTE7_V 0xfc0ff07f +#define MATCH_VFRECE7_V 0x4c029057 +#define MASK_VFRECE7_V 0xfc0ff07f +#define MATCH_VFCLASS_V 0x4c081057 +#define MASK_VFCLASS_V 0xfc0ff07f +#define MATCH_VFWADD_VV 0xc0001057 +#define MASK_VFWADD_VV 0xfc00707f +#define MATCH_VFWREDSUM_VS 0xc4001057 +#define MASK_VFWREDSUM_VS 0xfc00707f +#define MATCH_VFWSUB_VV 0xc8001057 +#define MASK_VFWSUB_VV 0xfc00707f +#define MATCH_VFWREDOSUM_VS 0xcc001057 +#define MASK_VFWREDOSUM_VS 0xfc00707f +#define MATCH_VFWADD_WV 0xd0001057 +#define MASK_VFWADD_WV 0xfc00707f +#define MATCH_VFWSUB_WV 0xd8001057 +#define MASK_VFWSUB_WV 0xfc00707f +#define MATCH_VFWMUL_VV 0xe0001057 +#define MASK_VFWMUL_VV 0xfc00707f +#define MATCH_VFDOT_VV 0xe4001057 +#define MASK_VFDOT_VV 0xfc00707f +#define MATCH_VFWMACC_VV 0xf0001057 +#define MASK_VFWMACC_VV 0xfc00707f +#define MATCH_VFWNMACC_VV 0xf4001057 +#define MASK_VFWNMACC_VV 0xfc00707f +#define MATCH_VFWMSAC_VV 0xf8001057 +#define MASK_VFWMSAC_VV 0xfc00707f +#define MATCH_VFWNMSAC_VV 0xfc001057 +#define MASK_VFWNMSAC_VV 0xfc00707f +#define MATCH_VADD_VX 0x4057 +#define MASK_VADD_VX 0xfc00707f +#define MATCH_VSUB_VX 0x8004057 +#define MASK_VSUB_VX 0xfc00707f +#define MATCH_VRSUB_VX 0xc004057 +#define MASK_VRSUB_VX 0xfc00707f +#define MATCH_VMINU_VX 0x10004057 +#define MASK_VMINU_VX 0xfc00707f +#define MATCH_VMIN_VX 0x14004057 +#define MASK_VMIN_VX 0xfc00707f +#define MATCH_VMAXU_VX 0x18004057 +#define MASK_VMAXU_VX 0xfc00707f +#define MATCH_VMAX_VX 0x1c004057 +#define MASK_VMAX_VX 0xfc00707f +#define MATCH_VAND_VX 0x24004057 +#define MASK_VAND_VX 0xfc00707f +#define MATCH_VOR_VX 0x28004057 +#define MASK_VOR_VX 0xfc00707f +#define MATCH_VXOR_VX 0x2c004057 +#define MASK_VXOR_VX 0xfc00707f +#define MATCH_VRGATHER_VX 0x30004057 +#define MASK_VRGATHER_VX 0xfc00707f +#define MATCH_VSLIDEUP_VX 0x38004057 +#define MASK_VSLIDEUP_VX 0xfc00707f +#define MATCH_VSLIDEDOWN_VX 0x3c004057 +#define MASK_VSLIDEDOWN_VX 0xfc00707f +#define MATCH_VADC_VXM 0x40004057 +#define MASK_VADC_VXM 0xfe00707f +#define MATCH_VMADC_VXM 0x44004057 +#define MASK_VMADC_VXM 0xfc00707f +#define MATCH_VSBC_VXM 0x48004057 +#define MASK_VSBC_VXM 0xfe00707f +#define MATCH_VMSBC_VXM 0x4c004057 +#define MASK_VMSBC_VXM 0xfc00707f +#define MATCH_VMERGE_VXM 0x5c004057 +#define MASK_VMERGE_VXM 0xfe00707f +#define MATCH_VMV_V_X 0x5e004057 +#define MASK_VMV_V_X 0xfff0707f +#define MATCH_VMSEQ_VX 0x60004057 +#define MASK_VMSEQ_VX 0xfc00707f +#define MATCH_VMSNE_VX 0x64004057 +#define MASK_VMSNE_VX 0xfc00707f +#define MATCH_VMSLTU_VX 0x68004057 +#define MASK_VMSLTU_VX 0xfc00707f +#define MATCH_VMSLT_VX 0x6c004057 +#define MASK_VMSLT_VX 0xfc00707f +#define MATCH_VMSLEU_VX 0x70004057 +#define MASK_VMSLEU_VX 0xfc00707f +#define MATCH_VMSLE_VX 0x74004057 +#define MASK_VMSLE_VX 0xfc00707f +#define MATCH_VMSGTU_VX 0x78004057 +#define MASK_VMSGTU_VX 0xfc00707f +#define MATCH_VMSGT_VX 0x7c004057 +#define MASK_VMSGT_VX 0xfc00707f +#define MATCH_VSADDU_VX 0x80004057 +#define MASK_VSADDU_VX 0xfc00707f +#define MATCH_VSADD_VX 0x84004057 +#define MASK_VSADD_VX 0xfc00707f +#define MATCH_VSSUBU_VX 0x88004057 +#define MASK_VSSUBU_VX 0xfc00707f +#define MATCH_VSSUB_VX 0x8c004057 +#define MASK_VSSUB_VX 0xfc00707f +#define MATCH_VSLL_VX 0x94004057 +#define MASK_VSLL_VX 0xfc00707f +#define MATCH_VSMUL_VX 0x9c004057 +#define MASK_VSMUL_VX 0xfc00707f +#define MATCH_VSRL_VX 0xa0004057 +#define MASK_VSRL_VX 0xfc00707f +#define MATCH_VSRA_VX 0xa4004057 +#define MASK_VSRA_VX 0xfc00707f +#define MATCH_VSSRL_VX 0xa8004057 +#define MASK_VSSRL_VX 0xfc00707f +#define MATCH_VSSRA_VX 0xac004057 +#define MASK_VSSRA_VX 0xfc00707f +#define MATCH_VNSRL_WX 0xb0004057 +#define MASK_VNSRL_WX 0xfc00707f +#define MATCH_VNSRA_WX 0xb4004057 +#define MASK_VNSRA_WX 0xfc00707f +#define MATCH_VNCLIPU_WX 0xb8004057 +#define MASK_VNCLIPU_WX 0xfc00707f +#define MATCH_VNCLIP_WX 0xbc004057 +#define MASK_VNCLIP_WX 0xfc00707f +#define MATCH_VQMACCU_VX 0xf0004057 +#define MASK_VQMACCU_VX 0xfc00707f +#define MATCH_VQMACC_VX 0xf4004057 +#define MASK_VQMACC_VX 0xfc00707f +#define MATCH_VQMACCUS_VX 0xf8004057 +#define MASK_VQMACCUS_VX 0xfc00707f +#define MATCH_VQMACCSU_VX 0xfc004057 +#define MASK_VQMACCSU_VX 0xfc00707f +#define MATCH_VADD_VV 0x57 +#define MASK_VADD_VV 0xfc00707f +#define MATCH_VSUB_VV 0x8000057 +#define MASK_VSUB_VV 0xfc00707f +#define MATCH_VMINU_VV 0x10000057 +#define MASK_VMINU_VV 0xfc00707f +#define MATCH_VMIN_VV 0x14000057 +#define MASK_VMIN_VV 0xfc00707f +#define MATCH_VMAXU_VV 0x18000057 +#define MASK_VMAXU_VV 0xfc00707f +#define MATCH_VMAX_VV 0x1c000057 +#define MASK_VMAX_VV 0xfc00707f +#define MATCH_VAND_VV 0x24000057 +#define MASK_VAND_VV 0xfc00707f +#define MATCH_VOR_VV 0x28000057 +#define MASK_VOR_VV 0xfc00707f +#define MATCH_VXOR_VV 0x2c000057 +#define MASK_VXOR_VV 0xfc00707f +#define MATCH_VRGATHER_VV 0x30000057 +#define MASK_VRGATHER_VV 0xfc00707f +#define MATCH_VRGATHEREI16_VV 0x38000057 +#define MASK_VRGATHEREI16_VV 0xfc00707f +#define MATCH_VADC_VVM 0x40000057 +#define MASK_VADC_VVM 0xfe00707f +#define MATCH_VMADC_VVM 0x44000057 +#define MASK_VMADC_VVM 0xfc00707f +#define MATCH_VSBC_VVM 0x48000057 +#define MASK_VSBC_VVM 0xfe00707f +#define MATCH_VMSBC_VVM 0x4c000057 +#define MASK_VMSBC_VVM 0xfc00707f +#define MATCH_VMERGE_VVM 0x5c000057 +#define MASK_VMERGE_VVM 0xfe00707f +#define MATCH_VMV_V_V 0x5e000057 +#define MASK_VMV_V_V 0xfff0707f +#define MATCH_VMSEQ_VV 0x60000057 +#define MASK_VMSEQ_VV 0xfc00707f +#define MATCH_VMSNE_VV 0x64000057 +#define MASK_VMSNE_VV 0xfc00707f +#define MATCH_VMSLTU_VV 0x68000057 +#define MASK_VMSLTU_VV 0xfc00707f +#define MATCH_VMSLT_VV 0x6c000057 +#define MASK_VMSLT_VV 0xfc00707f +#define MATCH_VMSLEU_VV 0x70000057 +#define MASK_VMSLEU_VV 0xfc00707f +#define MATCH_VMSLE_VV 0x74000057 +#define MASK_VMSLE_VV 0xfc00707f +#define MATCH_VSADDU_VV 0x80000057 +#define MASK_VSADDU_VV 0xfc00707f +#define MATCH_VSADD_VV 0x84000057 +#define MASK_VSADD_VV 0xfc00707f +#define MATCH_VSSUBU_VV 0x88000057 +#define MASK_VSSUBU_VV 0xfc00707f +#define MATCH_VSSUB_VV 0x8c000057 +#define MASK_VSSUB_VV 0xfc00707f +#define MATCH_VSLL_VV 0x94000057 +#define MASK_VSLL_VV 0xfc00707f +#define MATCH_VSMUL_VV 0x9c000057 +#define MASK_VSMUL_VV 0xfc00707f +#define MATCH_VSRL_VV 0xa0000057 +#define MASK_VSRL_VV 0xfc00707f +#define MATCH_VSRA_VV 0xa4000057 +#define MASK_VSRA_VV 0xfc00707f +#define MATCH_VSSRL_VV 0xa8000057 +#define MASK_VSSRL_VV 0xfc00707f +#define MATCH_VSSRA_VV 0xac000057 +#define MASK_VSSRA_VV 0xfc00707f +#define MATCH_VNSRL_WV 0xb0000057 +#define MASK_VNSRL_WV 0xfc00707f +#define MATCH_VNSRA_WV 0xb4000057 +#define MASK_VNSRA_WV 0xfc00707f +#define MATCH_VNCLIPU_WV 0xb8000057 +#define MASK_VNCLIPU_WV 0xfc00707f +#define MATCH_VNCLIP_WV 0xbc000057 +#define MASK_VNCLIP_WV 0xfc00707f +#define MATCH_VWREDSUMU_VS 0xc0000057 +#define MASK_VWREDSUMU_VS 0xfc00707f +#define MATCH_VWREDSUM_VS 0xc4000057 +#define MASK_VWREDSUM_VS 0xfc00707f +#define MATCH_VDOTU_VV 0xe0000057 +#define MASK_VDOTU_VV 0xfc00707f +#define MATCH_VDOT_VV 0xe4000057 +#define MASK_VDOT_VV 0xfc00707f +#define MATCH_VQMACCU_VV 0xf0000057 +#define MASK_VQMACCU_VV 0xfc00707f +#define MATCH_VQMACC_VV 0xf4000057 +#define MASK_VQMACC_VV 0xfc00707f +#define MATCH_VQMACCSU_VV 0xfc000057 +#define MASK_VQMACCSU_VV 0xfc00707f +#define MATCH_VADD_VI 0x3057 +#define MASK_VADD_VI 0xfc00707f +#define MATCH_VRSUB_VI 0xc003057 +#define MASK_VRSUB_VI 0xfc00707f +#define MATCH_VAND_VI 0x24003057 +#define MASK_VAND_VI 0xfc00707f +#define MATCH_VOR_VI 0x28003057 +#define MASK_VOR_VI 0xfc00707f +#define MATCH_VXOR_VI 0x2c003057 +#define MASK_VXOR_VI 0xfc00707f +#define MATCH_VRGATHER_VI 0x30003057 +#define MASK_VRGATHER_VI 0xfc00707f +#define MATCH_VSLIDEUP_VI 0x38003057 +#define MASK_VSLIDEUP_VI 0xfc00707f +#define MATCH_VSLIDEDOWN_VI 0x3c003057 +#define MASK_VSLIDEDOWN_VI 0xfc00707f +#define MATCH_VADC_VIM 0x40003057 +#define MASK_VADC_VIM 0xfe00707f +#define MATCH_VMADC_VIM 0x44003057 +#define MASK_VMADC_VIM 0xfc00707f +#define MATCH_VMERGE_VIM 0x5c003057 +#define MASK_VMERGE_VIM 0xfe00707f +#define MATCH_VMV_V_I 0x5e003057 +#define MASK_VMV_V_I 0xfff0707f +#define MATCH_VMSEQ_VI 0x60003057 +#define MASK_VMSEQ_VI 0xfc00707f +#define MATCH_VMSNE_VI 0x64003057 +#define MASK_VMSNE_VI 0xfc00707f +#define MATCH_VMSLEU_VI 0x70003057 +#define MASK_VMSLEU_VI 0xfc00707f +#define MATCH_VMSLE_VI 0x74003057 +#define MASK_VMSLE_VI 0xfc00707f +#define MATCH_VMSGTU_VI 0x78003057 +#define MASK_VMSGTU_VI 0xfc00707f +#define MATCH_VMSGT_VI 0x7c003057 +#define MASK_VMSGT_VI 0xfc00707f +#define MATCH_VSADDU_VI 0x80003057 +#define MASK_VSADDU_VI 0xfc00707f +#define MATCH_VSADD_VI 0x84003057 +#define MASK_VSADD_VI 0xfc00707f +#define MATCH_VSLL_VI 0x94003057 +#define MASK_VSLL_VI 0xfc00707f +#define MATCH_VMV1R_V 0x9e003057 +#define MASK_VMV1R_V 0xfe0ff07f +#define MATCH_VMV2R_V 0x9e00b057 +#define MASK_VMV2R_V 0xfe0ff07f +#define MATCH_VMV4R_V 0x9e01b057 +#define MASK_VMV4R_V 0xfe0ff07f +#define MATCH_VMV8R_V 0x9e03b057 +#define MASK_VMV8R_V 0xfe0ff07f +#define MATCH_VSRL_VI 0xa0003057 +#define MASK_VSRL_VI 0xfc00707f +#define MATCH_VSRA_VI 0xa4003057 +#define MASK_VSRA_VI 0xfc00707f +#define MATCH_VSSRL_VI 0xa8003057 +#define MASK_VSSRL_VI 0xfc00707f +#define MATCH_VSSRA_VI 0xac003057 +#define MASK_VSSRA_VI 0xfc00707f +#define MATCH_VNSRL_WI 0xb0003057 +#define MASK_VNSRL_WI 0xfc00707f +#define MATCH_VNSRA_WI 0xb4003057 +#define MASK_VNSRA_WI 0xfc00707f +#define MATCH_VNCLIPU_WI 0xb8003057 +#define MASK_VNCLIPU_WI 0xfc00707f +#define MATCH_VNCLIP_WI 0xbc003057 +#define MASK_VNCLIP_WI 0xfc00707f +#define MATCH_VREDSUM_VS 0x2057 +#define MASK_VREDSUM_VS 0xfc00707f +#define MATCH_VREDAND_VS 0x4002057 +#define MASK_VREDAND_VS 0xfc00707f +#define MATCH_VREDOR_VS 0x8002057 +#define MASK_VREDOR_VS 0xfc00707f +#define MATCH_VREDXOR_VS 0xc002057 +#define MASK_VREDXOR_VS 0xfc00707f +#define MATCH_VREDMINU_VS 0x10002057 +#define MASK_VREDMINU_VS 0xfc00707f +#define MATCH_VREDMIN_VS 0x14002057 +#define MASK_VREDMIN_VS 0xfc00707f +#define MATCH_VREDMAXU_VS 0x18002057 +#define MASK_VREDMAXU_VS 0xfc00707f +#define MATCH_VREDMAX_VS 0x1c002057 +#define MASK_VREDMAX_VS 0xfc00707f +#define MATCH_VAADDU_VV 0x20002057 +#define MASK_VAADDU_VV 0xfc00707f +#define MATCH_VAADD_VV 0x24002057 +#define MASK_VAADD_VV 0xfc00707f +#define MATCH_VASUBU_VV 0x28002057 +#define MASK_VASUBU_VV 0xfc00707f +#define MATCH_VASUB_VV 0x2c002057 +#define MASK_VASUB_VV 0xfc00707f +#define MATCH_VMV_X_S 0x42002057 +#define MASK_VMV_X_S 0xfe0ff07f +#define MATCH_VZEXT_VF8 0x48012057 +#define MASK_VZEXT_VF8 0xfc0ff07f +#define MATCH_VSEXT_VF8 0x4801a057 +#define MASK_VSEXT_VF8 0xfc0ff07f +#define MATCH_VZEXT_VF4 0x48022057 +#define MASK_VZEXT_VF4 0xfc0ff07f +#define MATCH_VSEXT_VF4 0x4802a057 +#define MASK_VSEXT_VF4 0xfc0ff07f +#define MATCH_VZEXT_VF2 0x48032057 +#define MASK_VZEXT_VF2 0xfc0ff07f +#define MATCH_VSEXT_VF2 0x4803a057 +#define MASK_VSEXT_VF2 0xfc0ff07f +#define MATCH_VCOMPRESS_VM 0x5e002057 +#define MASK_VCOMPRESS_VM 0xfe00707f +#define MATCH_VMANDNOT_MM 0x60002057 +#define MASK_VMANDNOT_MM 0xfc00707f +#define MATCH_VMAND_MM 0x64002057 +#define MASK_VMAND_MM 0xfc00707f +#define MATCH_VMOR_MM 0x68002057 +#define MASK_VMOR_MM 0xfc00707f +#define MATCH_VMXOR_MM 0x6c002057 +#define MASK_VMXOR_MM 0xfc00707f +#define MATCH_VMORNOT_MM 0x70002057 +#define MASK_VMORNOT_MM 0xfc00707f +#define MATCH_VMNAND_MM 0x74002057 +#define MASK_VMNAND_MM 0xfc00707f +#define MATCH_VMNOR_MM 0x78002057 +#define MASK_VMNOR_MM 0xfc00707f +#define MATCH_VMXNOR_MM 0x7c002057 +#define MASK_VMXNOR_MM 0xfc00707f +#define MATCH_VMSBF_M 0x5000a057 +#define MASK_VMSBF_M 0xfc0ff07f +#define MATCH_VMSOF_M 0x50012057 +#define MASK_VMSOF_M 0xfc0ff07f +#define MATCH_VMSIF_M 0x5001a057 +#define MASK_VMSIF_M 0xfc0ff07f +#define MATCH_VIOTA_M 0x50082057 +#define MASK_VIOTA_M 0xfc0ff07f +#define MATCH_VID_V 0x5008a057 +#define MASK_VID_V 0xfdfff07f +#define MATCH_VPOPC_M 0x40082057 +#define MASK_VPOPC_M 0xfc0ff07f +#define MATCH_VFIRST_M 0x4008a057 +#define MASK_VFIRST_M 0xfc0ff07f +#define MATCH_VDIVU_VV 0x80002057 +#define MASK_VDIVU_VV 0xfc00707f +#define MATCH_VDIV_VV 0x84002057 +#define MASK_VDIV_VV 0xfc00707f +#define MATCH_VREMU_VV 0x88002057 +#define MASK_VREMU_VV 0xfc00707f +#define MATCH_VREM_VV 0x8c002057 +#define MASK_VREM_VV 0xfc00707f +#define MATCH_VMULHU_VV 0x90002057 +#define MASK_VMULHU_VV 0xfc00707f +#define MATCH_VMUL_VV 0x94002057 +#define MASK_VMUL_VV 0xfc00707f +#define MATCH_VMULHSU_VV 0x98002057 +#define MASK_VMULHSU_VV 0xfc00707f +#define MATCH_VMULH_VV 0x9c002057 +#define MASK_VMULH_VV 0xfc00707f +#define MATCH_VMADD_VV 0xa4002057 +#define MASK_VMADD_VV 0xfc00707f +#define MATCH_VNMSUB_VV 0xac002057 +#define MASK_VNMSUB_VV 0xfc00707f +#define MATCH_VMACC_VV 0xb4002057 +#define MASK_VMACC_VV 0xfc00707f +#define MATCH_VNMSAC_VV 0xbc002057 +#define MASK_VNMSAC_VV 0xfc00707f +#define MATCH_VWADDU_VV 0xc0002057 +#define MASK_VWADDU_VV 0xfc00707f +#define MATCH_VWADD_VV 0xc4002057 +#define MASK_VWADD_VV 0xfc00707f +#define MATCH_VWSUBU_VV 0xc8002057 +#define MASK_VWSUBU_VV 0xfc00707f +#define MATCH_VWSUB_VV 0xcc002057 +#define MASK_VWSUB_VV 0xfc00707f +#define MATCH_VWADDU_WV 0xd0002057 +#define MASK_VWADDU_WV 0xfc00707f +#define MATCH_VWADD_WV 0xd4002057 +#define MASK_VWADD_WV 0xfc00707f +#define MATCH_VWSUBU_WV 0xd8002057 +#define MASK_VWSUBU_WV 0xfc00707f +#define MATCH_VWSUB_WV 0xdc002057 +#define MASK_VWSUB_WV 0xfc00707f +#define MATCH_VWMULU_VV 0xe0002057 +#define MASK_VWMULU_VV 0xfc00707f +#define MATCH_VWMULSU_VV 0xe8002057 +#define MASK_VWMULSU_VV 0xfc00707f +#define MATCH_VWMUL_VV 0xec002057 +#define MASK_VWMUL_VV 0xfc00707f +#define MATCH_VWMACCU_VV 0xf0002057 +#define MASK_VWMACCU_VV 0xfc00707f +#define MATCH_VWMACC_VV 0xf4002057 +#define MASK_VWMACC_VV 0xfc00707f +#define MATCH_VWMACCSU_VV 0xfc002057 +#define MASK_VWMACCSU_VV 0xfc00707f +#define MATCH_VAADDU_VX 0x20006057 +#define MASK_VAADDU_VX 0xfc00707f +#define MATCH_VAADD_VX 0x24006057 +#define MASK_VAADD_VX 0xfc00707f +#define MATCH_VASUBU_VX 0x28006057 +#define MASK_VASUBU_VX 0xfc00707f +#define MATCH_VASUB_VX 0x2c006057 +#define MASK_VASUB_VX 0xfc00707f +#define MATCH_VMV_S_X 0x42006057 +#define MASK_VMV_S_X 0xfff0707f +#define MATCH_VSLIDE1UP_VX 0x38006057 +#define MASK_VSLIDE1UP_VX 0xfc00707f +#define MATCH_VSLIDE1DOWN_VX 0x3c006057 +#define MASK_VSLIDE1DOWN_VX 0xfc00707f +#define MATCH_VDIVU_VX 0x80006057 +#define MASK_VDIVU_VX 0xfc00707f +#define MATCH_VDIV_VX 0x84006057 +#define MASK_VDIV_VX 0xfc00707f +#define MATCH_VREMU_VX 0x88006057 +#define MASK_VREMU_VX 0xfc00707f +#define MATCH_VREM_VX 0x8c006057 +#define MASK_VREM_VX 0xfc00707f +#define MATCH_VMULHU_VX 0x90006057 +#define MASK_VMULHU_VX 0xfc00707f +#define MATCH_VMUL_VX 0x94006057 +#define MASK_VMUL_VX 0xfc00707f +#define MATCH_VMULHSU_VX 0x98006057 +#define MASK_VMULHSU_VX 0xfc00707f +#define MATCH_VMULH_VX 0x9c006057 +#define MASK_VMULH_VX 0xfc00707f +#define MATCH_VMADD_VX 0xa4006057 +#define MASK_VMADD_VX 0xfc00707f +#define MATCH_VNMSUB_VX 0xac006057 +#define MASK_VNMSUB_VX 0xfc00707f +#define MATCH_VMACC_VX 0xb4006057 +#define MASK_VMACC_VX 0xfc00707f +#define MATCH_VNMSAC_VX 0xbc006057 +#define MASK_VNMSAC_VX 0xfc00707f +#define MATCH_VWADDU_VX 0xc0006057 +#define MASK_VWADDU_VX 0xfc00707f +#define MATCH_VWADD_VX 0xc4006057 +#define MASK_VWADD_VX 0xfc00707f +#define MATCH_VWSUBU_VX 0xc8006057 +#define MASK_VWSUBU_VX 0xfc00707f +#define MATCH_VWSUB_VX 0xcc006057 +#define MASK_VWSUB_VX 0xfc00707f +#define MATCH_VWADDU_WX 0xd0006057 +#define MASK_VWADDU_WX 0xfc00707f +#define MATCH_VWADD_WX 0xd4006057 +#define MASK_VWADD_WX 0xfc00707f +#define MATCH_VWSUBU_WX 0xd8006057 +#define MASK_VWSUBU_WX 0xfc00707f +#define MATCH_VWSUB_WX 0xdc006057 +#define MASK_VWSUB_WX 0xfc00707f +#define MATCH_VWMULU_VX 0xe0006057 +#define MASK_VWMULU_VX 0xfc00707f +#define MATCH_VWMULSU_VX 0xe8006057 +#define MASK_VWMULSU_VX 0xfc00707f +#define MATCH_VWMUL_VX 0xec006057 +#define MASK_VWMUL_VX 0xfc00707f +#define MATCH_VWMACCU_VX 0xf0006057 +#define MASK_VWMACCU_VX 0xfc00707f +#define MATCH_VWMACC_VX 0xf4006057 +#define MASK_VWMACC_VX 0xfc00707f +#define MATCH_VWMACCUS_VX 0xf8006057 +#define MASK_VWMACCUS_VX 0xfc00707f +#define MATCH_VWMACCSU_VX 0xfc006057 +#define MASK_VWMACCSU_VX 0xfc00707f +#define MATCH_VAMOSWAPEI8_V 0x800002f +#define MASK_VAMOSWAPEI8_V 0xf800707f +#define MATCH_VAMOADDEI8_V 0x2f +#define MASK_VAMOADDEI8_V 0xf800707f +#define MATCH_VAMOXOREI8_V 0x2000002f +#define MASK_VAMOXOREI8_V 0xf800707f +#define MATCH_VAMOANDEI8_V 0x6000002f +#define MASK_VAMOANDEI8_V 0xf800707f +#define MATCH_VAMOOREI8_V 0x4000002f +#define MASK_VAMOOREI8_V 0xf800707f +#define MATCH_VAMOMINEI8_V 0x8000002f +#define MASK_VAMOMINEI8_V 0xf800707f +#define MATCH_VAMOMAXEI8_V 0xa000002f +#define MASK_VAMOMAXEI8_V 0xf800707f +#define MATCH_VAMOMINUEI8_V 0xc000002f +#define MASK_VAMOMINUEI8_V 0xf800707f +#define MATCH_VAMOMAXUEI8_V 0xe000002f +#define MASK_VAMOMAXUEI8_V 0xf800707f +#define MATCH_VAMOSWAPEI16_V 0x800502f +#define MASK_VAMOSWAPEI16_V 0xf800707f +#define MATCH_VAMOADDEI16_V 0x502f +#define MASK_VAMOADDEI16_V 0xf800707f +#define MATCH_VAMOXOREI16_V 0x2000502f +#define MASK_VAMOXOREI16_V 0xf800707f +#define MATCH_VAMOANDEI16_V 0x6000502f +#define MASK_VAMOANDEI16_V 0xf800707f +#define MATCH_VAMOOREI16_V 0x4000502f +#define MASK_VAMOOREI16_V 0xf800707f +#define MATCH_VAMOMINEI16_V 0x8000502f +#define MASK_VAMOMINEI16_V 0xf800707f +#define MATCH_VAMOMAXEI16_V 0xa000502f +#define MASK_VAMOMAXEI16_V 0xf800707f +#define MATCH_VAMOMINUEI16_V 0xc000502f +#define MASK_VAMOMINUEI16_V 0xf800707f +#define MATCH_VAMOMAXUEI16_V 0xe000502f +#define MASK_VAMOMAXUEI16_V 0xf800707f +#define MATCH_VAMOSWAPEI32_V 0x800602f +#define MASK_VAMOSWAPEI32_V 0xf800707f +#define MATCH_VAMOADDEI32_V 0x602f +#define MASK_VAMOADDEI32_V 0xf800707f +#define MATCH_VAMOXOREI32_V 0x2000602f +#define MASK_VAMOXOREI32_V 0xf800707f +#define MATCH_VAMOANDEI32_V 0x6000602f +#define MASK_VAMOANDEI32_V 0xf800707f +#define MATCH_VAMOOREI32_V 0x4000602f +#define MASK_VAMOOREI32_V 0xf800707f +#define MATCH_VAMOMINEI32_V 0x8000602f +#define MASK_VAMOMINEI32_V 0xf800707f +#define MATCH_VAMOMAXEI32_V 0xa000602f +#define MASK_VAMOMAXEI32_V 0xf800707f +#define MATCH_VAMOMINUEI32_V 0xc000602f +#define MASK_VAMOMINUEI32_V 0xf800707f +#define MATCH_VAMOMAXUEI32_V 0xe000602f +#define MASK_VAMOMAXUEI32_V 0xf800707f +#define MATCH_VAMOSWAPEI64_V 0x800702f +#define MASK_VAMOSWAPEI64_V 0xf800707f +#define MATCH_VAMOADDEI64_V 0x702f +#define MASK_VAMOADDEI64_V 0xf800707f +#define MATCH_VAMOXOREI64_V 0x2000702f +#define MASK_VAMOXOREI64_V 0xf800707f +#define MATCH_VAMOANDEI64_V 0x6000702f +#define MASK_VAMOANDEI64_V 0xf800707f +#define MATCH_VAMOOREI64_V 0x4000702f +#define MASK_VAMOOREI64_V 0xf800707f +#define MATCH_VAMOMINEI64_V 0x8000702f +#define MASK_VAMOMINEI64_V 0xf800707f +#define MATCH_VAMOMAXEI64_V 0xa000702f +#define MASK_VAMOMAXEI64_V 0xf800707f +#define MATCH_VAMOMINUEI64_V 0xc000702f +#define MASK_VAMOMINUEI64_V 0xf800707f +#define MATCH_VAMOMAXUEI64_V 0xe000702f +#define MASK_VAMOMAXUEI64_V 0xf800707f #define MATCH_VMVNFR_V 0x9e003057 #define MASK_VMVNFR_V 0xfe00707f #define MATCH_VL1R_V 0x2800007 @@ -2499,6 +3381,447 @@ DECLARE_INSN(c_add, MATCH_C_ADD, MASK_C_ADD) DECLARE_INSN(c_fsdsp, MATCH_C_FSDSP, MASK_C_FSDSP) DECLARE_INSN(c_swsp, MATCH_C_SWSP, MASK_C_SWSP) DECLARE_INSN(c_fswsp, MATCH_C_FSWSP, MASK_C_FSWSP) +DECLARE_INSN(vsetvli, MATCH_VSETVLI, MASK_VSETVLI) +DECLARE_INSN(vsetvl, MATCH_VSETVL, MASK_VSETVL) +DECLARE_INSN(vle8_v, MATCH_VLE8_V, MASK_VLE8_V) +DECLARE_INSN(vle16_v, MATCH_VLE16_V, MASK_VLE16_V) +DECLARE_INSN(vle32_v, MATCH_VLE32_V, MASK_VLE32_V) +DECLARE_INSN(vle64_v, MATCH_VLE64_V, MASK_VLE64_V) +DECLARE_INSN(vle128_v, MATCH_VLE128_V, MASK_VLE128_V) +DECLARE_INSN(vle256_v, MATCH_VLE256_V, MASK_VLE256_V) +DECLARE_INSN(vle512_v, MATCH_VLE512_V, MASK_VLE512_V) +DECLARE_INSN(vle1024_v, MATCH_VLE1024_V, MASK_VLE1024_V) +DECLARE_INSN(vse8_v, MATCH_VSE8_V, MASK_VSE8_V) +DECLARE_INSN(vse16_v, MATCH_VSE16_V, MASK_VSE16_V) +DECLARE_INSN(vse32_v, MATCH_VSE32_V, MASK_VSE32_V) +DECLARE_INSN(vse64_v, MATCH_VSE64_V, MASK_VSE64_V) +DECLARE_INSN(vse128_v, MATCH_VSE128_V, MASK_VSE128_V) +DECLARE_INSN(vse256_v, MATCH_VSE256_V, MASK_VSE256_V) +DECLARE_INSN(vse512_v, MATCH_VSE512_V, MASK_VSE512_V) +DECLARE_INSN(vse1024_v, MATCH_VSE1024_V, MASK_VSE1024_V) +DECLARE_INSN(vlse8_v, MATCH_VLSE8_V, MASK_VLSE8_V) +DECLARE_INSN(vlse16_v, MATCH_VLSE16_V, MASK_VLSE16_V) +DECLARE_INSN(vlse32_v, MATCH_VLSE32_V, MASK_VLSE32_V) +DECLARE_INSN(vlse64_v, MATCH_VLSE64_V, MASK_VLSE64_V) +DECLARE_INSN(vlse128_v, MATCH_VLSE128_V, MASK_VLSE128_V) +DECLARE_INSN(vlse256_v, MATCH_VLSE256_V, MASK_VLSE256_V) +DECLARE_INSN(vlse512_v, MATCH_VLSE512_V, MASK_VLSE512_V) +DECLARE_INSN(vlse1024_v, MATCH_VLSE1024_V, MASK_VLSE1024_V) +DECLARE_INSN(vsse8_v, MATCH_VSSE8_V, MASK_VSSE8_V) +DECLARE_INSN(vsse16_v, MATCH_VSSE16_V, MASK_VSSE16_V) +DECLARE_INSN(vsse32_v, MATCH_VSSE32_V, MASK_VSSE32_V) +DECLARE_INSN(vsse64_v, MATCH_VSSE64_V, MASK_VSSE64_V) +DECLARE_INSN(vsse128_v, MATCH_VSSE128_V, MASK_VSSE128_V) +DECLARE_INSN(vsse256_v, MATCH_VSSE256_V, MASK_VSSE256_V) +DECLARE_INSN(vsse512_v, MATCH_VSSE512_V, MASK_VSSE512_V) +DECLARE_INSN(vsse1024_v, MATCH_VSSE1024_V, MASK_VSSE1024_V) +DECLARE_INSN(vlxei8_v, MATCH_VLXEI8_V, MASK_VLXEI8_V) +DECLARE_INSN(vlxei16_v, MATCH_VLXEI16_V, MASK_VLXEI16_V) +DECLARE_INSN(vlxei32_v, MATCH_VLXEI32_V, MASK_VLXEI32_V) +DECLARE_INSN(vlxei64_v, MATCH_VLXEI64_V, MASK_VLXEI64_V) +DECLARE_INSN(vlxei128_v, MATCH_VLXEI128_V, MASK_VLXEI128_V) +DECLARE_INSN(vlxei256_v, MATCH_VLXEI256_V, MASK_VLXEI256_V) +DECLARE_INSN(vlxei512_v, MATCH_VLXEI512_V, MASK_VLXEI512_V) +DECLARE_INSN(vlxei1024_v, MATCH_VLXEI1024_V, MASK_VLXEI1024_V) +DECLARE_INSN(vsxei8_v, MATCH_VSXEI8_V, MASK_VSXEI8_V) +DECLARE_INSN(vsxei16_v, MATCH_VSXEI16_V, MASK_VSXEI16_V) +DECLARE_INSN(vsxei32_v, MATCH_VSXEI32_V, MASK_VSXEI32_V) +DECLARE_INSN(vsxei64_v, MATCH_VSXEI64_V, MASK_VSXEI64_V) +DECLARE_INSN(vsxei128_v, MATCH_VSXEI128_V, MASK_VSXEI128_V) +DECLARE_INSN(vsxei256_v, MATCH_VSXEI256_V, MASK_VSXEI256_V) +DECLARE_INSN(vsxei512_v, MATCH_VSXEI512_V, MASK_VSXEI512_V) +DECLARE_INSN(vsxei1024_v, MATCH_VSXEI1024_V, MASK_VSXEI1024_V) +DECLARE_INSN(vsuxei8_v, MATCH_VSUXEI8_V, MASK_VSUXEI8_V) +DECLARE_INSN(vsuxei16_v, MATCH_VSUXEI16_V, MASK_VSUXEI16_V) +DECLARE_INSN(vsuxei32_v, MATCH_VSUXEI32_V, MASK_VSUXEI32_V) +DECLARE_INSN(vsuxei64_v, MATCH_VSUXEI64_V, MASK_VSUXEI64_V) +DECLARE_INSN(vsuxei128_v, MATCH_VSUXEI128_V, MASK_VSUXEI128_V) +DECLARE_INSN(vsuxei256_v, MATCH_VSUXEI256_V, MASK_VSUXEI256_V) +DECLARE_INSN(vsuxei512_v, MATCH_VSUXEI512_V, MASK_VSUXEI512_V) +DECLARE_INSN(vsuxei1024_v, MATCH_VSUXEI1024_V, MASK_VSUXEI1024_V) +DECLARE_INSN(vle8ff_v, MATCH_VLE8FF_V, MASK_VLE8FF_V) +DECLARE_INSN(vle16ff_v, MATCH_VLE16FF_V, MASK_VLE16FF_V) +DECLARE_INSN(vle32ff_v, MATCH_VLE32FF_V, MASK_VLE32FF_V) +DECLARE_INSN(vle64ff_v, MATCH_VLE64FF_V, MASK_VLE64FF_V) +DECLARE_INSN(vle128ff_v, MATCH_VLE128FF_V, MASK_VLE128FF_V) +DECLARE_INSN(vle256ff_v, MATCH_VLE256FF_V, MASK_VLE256FF_V) +DECLARE_INSN(vle512ff_v, MATCH_VLE512FF_V, MASK_VLE512FF_V) +DECLARE_INSN(vle1024ff_v, MATCH_VLE1024FF_V, MASK_VLE1024FF_V) +DECLARE_INSN(vl1re8_v, MATCH_VL1RE8_V, MASK_VL1RE8_V) +DECLARE_INSN(vl1re16_v, MATCH_VL1RE16_V, MASK_VL1RE16_V) +DECLARE_INSN(vl1re32_v, MATCH_VL1RE32_V, MASK_VL1RE32_V) +DECLARE_INSN(vl1re64_v, MATCH_VL1RE64_V, MASK_VL1RE64_V) +DECLARE_INSN(vl2re8_v, MATCH_VL2RE8_V, MASK_VL2RE8_V) +DECLARE_INSN(vl2re16_v, MATCH_VL2RE16_V, MASK_VL2RE16_V) +DECLARE_INSN(vl2re32_v, MATCH_VL2RE32_V, MASK_VL2RE32_V) +DECLARE_INSN(vl2re64_v, MATCH_VL2RE64_V, MASK_VL2RE64_V) +DECLARE_INSN(vl4re8_v, MATCH_VL4RE8_V, MASK_VL4RE8_V) +DECLARE_INSN(vl4re16_v, MATCH_VL4RE16_V, MASK_VL4RE16_V) +DECLARE_INSN(vl4re32_v, MATCH_VL4RE32_V, MASK_VL4RE32_V) +DECLARE_INSN(vl4re64_v, MATCH_VL4RE64_V, MASK_VL4RE64_V) +DECLARE_INSN(vl8re8_v, MATCH_VL8RE8_V, MASK_VL8RE8_V) +DECLARE_INSN(vl8re16_v, MATCH_VL8RE16_V, MASK_VL8RE16_V) +DECLARE_INSN(vl8re32_v, MATCH_VL8RE32_V, MASK_VL8RE32_V) +DECLARE_INSN(vl8re64_v, MATCH_VL8RE64_V, MASK_VL8RE64_V) +DECLARE_INSN(vs1r_v, MATCH_VS1R_V, MASK_VS1R_V) +DECLARE_INSN(vs2r_v, MATCH_VS2R_V, MASK_VS2R_V) +DECLARE_INSN(vs4r_v, MATCH_VS4R_V, MASK_VS4R_V) +DECLARE_INSN(vs8r_v, MATCH_VS8R_V, MASK_VS8R_V) +DECLARE_INSN(vfadd_vf, MATCH_VFADD_VF, MASK_VFADD_VF) +DECLARE_INSN(vfsub_vf, MATCH_VFSUB_VF, MASK_VFSUB_VF) +DECLARE_INSN(vfmin_vf, MATCH_VFMIN_VF, MASK_VFMIN_VF) +DECLARE_INSN(vfmax_vf, MATCH_VFMAX_VF, MASK_VFMAX_VF) +DECLARE_INSN(vfsgnj_vf, MATCH_VFSGNJ_VF, MASK_VFSGNJ_VF) +DECLARE_INSN(vfsgnjn_vf, MATCH_VFSGNJN_VF, MASK_VFSGNJN_VF) +DECLARE_INSN(vfsgnjx_vf, MATCH_VFSGNJX_VF, MASK_VFSGNJX_VF) +DECLARE_INSN(vfslide1up_vf, MATCH_VFSLIDE1UP_VF, MASK_VFSLIDE1UP_VF) +DECLARE_INSN(vfslide1down_vf, MATCH_VFSLIDE1DOWN_VF, MASK_VFSLIDE1DOWN_VF) +DECLARE_INSN(vfmv_s_f, MATCH_VFMV_S_F, MASK_VFMV_S_F) +DECLARE_INSN(vfmerge_vfm, MATCH_VFMERGE_VFM, MASK_VFMERGE_VFM) +DECLARE_INSN(vfmv_v_f, MATCH_VFMV_V_F, MASK_VFMV_V_F) +DECLARE_INSN(vmfeq_vf, MATCH_VMFEQ_VF, MASK_VMFEQ_VF) +DECLARE_INSN(vmfle_vf, MATCH_VMFLE_VF, MASK_VMFLE_VF) +DECLARE_INSN(vmflt_vf, MATCH_VMFLT_VF, MASK_VMFLT_VF) +DECLARE_INSN(vmfne_vf, MATCH_VMFNE_VF, MASK_VMFNE_VF) +DECLARE_INSN(vmfgt_vf, MATCH_VMFGT_VF, MASK_VMFGT_VF) +DECLARE_INSN(vmfge_vf, MATCH_VMFGE_VF, MASK_VMFGE_VF) +DECLARE_INSN(vfdiv_vf, MATCH_VFDIV_VF, MASK_VFDIV_VF) +DECLARE_INSN(vfrdiv_vf, MATCH_VFRDIV_VF, MASK_VFRDIV_VF) +DECLARE_INSN(vfmul_vf, MATCH_VFMUL_VF, MASK_VFMUL_VF) +DECLARE_INSN(vfrsub_vf, MATCH_VFRSUB_VF, MASK_VFRSUB_VF) +DECLARE_INSN(vfmadd_vf, MATCH_VFMADD_VF, MASK_VFMADD_VF) +DECLARE_INSN(vfnmadd_vf, MATCH_VFNMADD_VF, MASK_VFNMADD_VF) +DECLARE_INSN(vfmsub_vf, MATCH_VFMSUB_VF, MASK_VFMSUB_VF) +DECLARE_INSN(vfnmsub_vf, MATCH_VFNMSUB_VF, MASK_VFNMSUB_VF) +DECLARE_INSN(vfmacc_vf, MATCH_VFMACC_VF, MASK_VFMACC_VF) +DECLARE_INSN(vfnmacc_vf, MATCH_VFNMACC_VF, MASK_VFNMACC_VF) +DECLARE_INSN(vfmsac_vf, MATCH_VFMSAC_VF, MASK_VFMSAC_VF) +DECLARE_INSN(vfnmsac_vf, MATCH_VFNMSAC_VF, MASK_VFNMSAC_VF) +DECLARE_INSN(vfwadd_vf, MATCH_VFWADD_VF, MASK_VFWADD_VF) +DECLARE_INSN(vfwsub_vf, MATCH_VFWSUB_VF, MASK_VFWSUB_VF) +DECLARE_INSN(vfwadd_wf, MATCH_VFWADD_WF, MASK_VFWADD_WF) +DECLARE_INSN(vfwsub_wf, MATCH_VFWSUB_WF, MASK_VFWSUB_WF) +DECLARE_INSN(vfwmul_vf, MATCH_VFWMUL_VF, MASK_VFWMUL_VF) +DECLARE_INSN(vfwmacc_vf, MATCH_VFWMACC_VF, MASK_VFWMACC_VF) +DECLARE_INSN(vfwnmacc_vf, MATCH_VFWNMACC_VF, MASK_VFWNMACC_VF) +DECLARE_INSN(vfwmsac_vf, MATCH_VFWMSAC_VF, MASK_VFWMSAC_VF) +DECLARE_INSN(vfwnmsac_vf, MATCH_VFWNMSAC_VF, MASK_VFWNMSAC_VF) +DECLARE_INSN(vfadd_vv, MATCH_VFADD_VV, MASK_VFADD_VV) +DECLARE_INSN(vfredsum_vs, MATCH_VFREDSUM_VS, MASK_VFREDSUM_VS) +DECLARE_INSN(vfsub_vv, MATCH_VFSUB_VV, MASK_VFSUB_VV) +DECLARE_INSN(vfredosum_vs, MATCH_VFREDOSUM_VS, MASK_VFREDOSUM_VS) +DECLARE_INSN(vfmin_vv, MATCH_VFMIN_VV, MASK_VFMIN_VV) +DECLARE_INSN(vfredmin_vs, MATCH_VFREDMIN_VS, MASK_VFREDMIN_VS) +DECLARE_INSN(vfmax_vv, MATCH_VFMAX_VV, MASK_VFMAX_VV) +DECLARE_INSN(vfredmax_vs, MATCH_VFREDMAX_VS, MASK_VFREDMAX_VS) +DECLARE_INSN(vfsgnj_vv, MATCH_VFSGNJ_VV, MASK_VFSGNJ_VV) +DECLARE_INSN(vfsgnjn_vv, MATCH_VFSGNJN_VV, MASK_VFSGNJN_VV) +DECLARE_INSN(vfsgnjx_vv, MATCH_VFSGNJX_VV, MASK_VFSGNJX_VV) +DECLARE_INSN(vfmv_f_s, MATCH_VFMV_F_S, MASK_VFMV_F_S) +DECLARE_INSN(vmfeq_vv, MATCH_VMFEQ_VV, MASK_VMFEQ_VV) +DECLARE_INSN(vmfle_vv, MATCH_VMFLE_VV, MASK_VMFLE_VV) +DECLARE_INSN(vmflt_vv, MATCH_VMFLT_VV, MASK_VMFLT_VV) +DECLARE_INSN(vmfne_vv, MATCH_VMFNE_VV, MASK_VMFNE_VV) +DECLARE_INSN(vfdiv_vv, MATCH_VFDIV_VV, MASK_VFDIV_VV) +DECLARE_INSN(vfmul_vv, MATCH_VFMUL_VV, MASK_VFMUL_VV) +DECLARE_INSN(vfmadd_vv, MATCH_VFMADD_VV, MASK_VFMADD_VV) +DECLARE_INSN(vfnmadd_vv, MATCH_VFNMADD_VV, MASK_VFNMADD_VV) +DECLARE_INSN(vfmsub_vv, MATCH_VFMSUB_VV, MASK_VFMSUB_VV) +DECLARE_INSN(vfnmsub_vv, MATCH_VFNMSUB_VV, MASK_VFNMSUB_VV) +DECLARE_INSN(vfmacc_vv, MATCH_VFMACC_VV, MASK_VFMACC_VV) +DECLARE_INSN(vfnmacc_vv, MATCH_VFNMACC_VV, MASK_VFNMACC_VV) +DECLARE_INSN(vfmsac_vv, MATCH_VFMSAC_VV, MASK_VFMSAC_VV) +DECLARE_INSN(vfnmsac_vv, MATCH_VFNMSAC_VV, MASK_VFNMSAC_VV) +DECLARE_INSN(vfcvt_xu_f_v, MATCH_VFCVT_XU_F_V, MASK_VFCVT_XU_F_V) +DECLARE_INSN(vfcvt_x_f_v, MATCH_VFCVT_X_F_V, MASK_VFCVT_X_F_V) +DECLARE_INSN(vfcvt_f_xu_v, MATCH_VFCVT_F_XU_V, MASK_VFCVT_F_XU_V) +DECLARE_INSN(vfcvt_f_x_v, MATCH_VFCVT_F_X_V, MASK_VFCVT_F_X_V) +DECLARE_INSN(vfcvt_rtz_xu_f_v, MATCH_VFCVT_RTZ_XU_F_V, MASK_VFCVT_RTZ_XU_F_V) +DECLARE_INSN(vfcvt_rtz_x_f_v, MATCH_VFCVT_RTZ_X_F_V, MASK_VFCVT_RTZ_X_F_V) +DECLARE_INSN(vfwcvt_xu_f_v, MATCH_VFWCVT_XU_F_V, MASK_VFWCVT_XU_F_V) +DECLARE_INSN(vfwcvt_x_f_v, MATCH_VFWCVT_X_F_V, MASK_VFWCVT_X_F_V) +DECLARE_INSN(vfwcvt_f_xu_v, MATCH_VFWCVT_F_XU_V, MASK_VFWCVT_F_XU_V) +DECLARE_INSN(vfwcvt_f_x_v, MATCH_VFWCVT_F_X_V, MASK_VFWCVT_F_X_V) +DECLARE_INSN(vfwcvt_f_f_v, MATCH_VFWCVT_F_F_V, MASK_VFWCVT_F_F_V) +DECLARE_INSN(vfwcvt_rtz_xu_f_v, MATCH_VFWCVT_RTZ_XU_F_V, MASK_VFWCVT_RTZ_XU_F_V) +DECLARE_INSN(vfwcvt_rtz_x_f_v, MATCH_VFWCVT_RTZ_X_F_V, MASK_VFWCVT_RTZ_X_F_V) +DECLARE_INSN(vfncvt_xu_f_w, MATCH_VFNCVT_XU_F_W, MASK_VFNCVT_XU_F_W) +DECLARE_INSN(vfncvt_x_f_w, MATCH_VFNCVT_X_F_W, MASK_VFNCVT_X_F_W) +DECLARE_INSN(vfncvt_f_xu_w, MATCH_VFNCVT_F_XU_W, MASK_VFNCVT_F_XU_W) +DECLARE_INSN(vfncvt_f_x_w, MATCH_VFNCVT_F_X_W, MASK_VFNCVT_F_X_W) +DECLARE_INSN(vfncvt_f_f_w, MATCH_VFNCVT_F_F_W, MASK_VFNCVT_F_F_W) +DECLARE_INSN(vfncvt_rod_f_f_w, MATCH_VFNCVT_ROD_F_F_W, MASK_VFNCVT_ROD_F_F_W) +DECLARE_INSN(vfncvt_rtz_xu_f_w, MATCH_VFNCVT_RTZ_XU_F_W, MASK_VFNCVT_RTZ_XU_F_W) +DECLARE_INSN(vfncvt_rtz_x_f_w, MATCH_VFNCVT_RTZ_X_F_W, MASK_VFNCVT_RTZ_X_F_W) +DECLARE_INSN(vfsqrt_v, MATCH_VFSQRT_V, MASK_VFSQRT_V) +DECLARE_INSN(vfrsqrte7_v, MATCH_VFRSQRTE7_V, MASK_VFRSQRTE7_V) +DECLARE_INSN(vfrece7_v, MATCH_VFRECE7_V, MASK_VFRECE7_V) +DECLARE_INSN(vfclass_v, MATCH_VFCLASS_V, MASK_VFCLASS_V) +DECLARE_INSN(vfwadd_vv, MATCH_VFWADD_VV, MASK_VFWADD_VV) +DECLARE_INSN(vfwredsum_vs, MATCH_VFWREDSUM_VS, MASK_VFWREDSUM_VS) +DECLARE_INSN(vfwsub_vv, MATCH_VFWSUB_VV, MASK_VFWSUB_VV) +DECLARE_INSN(vfwredosum_vs, MATCH_VFWREDOSUM_VS, MASK_VFWREDOSUM_VS) +DECLARE_INSN(vfwadd_wv, MATCH_VFWADD_WV, MASK_VFWADD_WV) +DECLARE_INSN(vfwsub_wv, MATCH_VFWSUB_WV, MASK_VFWSUB_WV) +DECLARE_INSN(vfwmul_vv, MATCH_VFWMUL_VV, MASK_VFWMUL_VV) +DECLARE_INSN(vfdot_vv, MATCH_VFDOT_VV, MASK_VFDOT_VV) +DECLARE_INSN(vfwmacc_vv, MATCH_VFWMACC_VV, MASK_VFWMACC_VV) +DECLARE_INSN(vfwnmacc_vv, MATCH_VFWNMACC_VV, MASK_VFWNMACC_VV) +DECLARE_INSN(vfwmsac_vv, MATCH_VFWMSAC_VV, MASK_VFWMSAC_VV) +DECLARE_INSN(vfwnmsac_vv, MATCH_VFWNMSAC_VV, MASK_VFWNMSAC_VV) +DECLARE_INSN(vadd_vx, MATCH_VADD_VX, MASK_VADD_VX) +DECLARE_INSN(vsub_vx, MATCH_VSUB_VX, MASK_VSUB_VX) +DECLARE_INSN(vrsub_vx, MATCH_VRSUB_VX, MASK_VRSUB_VX) +DECLARE_INSN(vminu_vx, MATCH_VMINU_VX, MASK_VMINU_VX) +DECLARE_INSN(vmin_vx, MATCH_VMIN_VX, MASK_VMIN_VX) +DECLARE_INSN(vmaxu_vx, MATCH_VMAXU_VX, MASK_VMAXU_VX) +DECLARE_INSN(vmax_vx, MATCH_VMAX_VX, MASK_VMAX_VX) +DECLARE_INSN(vand_vx, MATCH_VAND_VX, MASK_VAND_VX) +DECLARE_INSN(vor_vx, MATCH_VOR_VX, MASK_VOR_VX) +DECLARE_INSN(vxor_vx, MATCH_VXOR_VX, MASK_VXOR_VX) +DECLARE_INSN(vrgather_vx, MATCH_VRGATHER_VX, MASK_VRGATHER_VX) +DECLARE_INSN(vslideup_vx, MATCH_VSLIDEUP_VX, MASK_VSLIDEUP_VX) +DECLARE_INSN(vslidedown_vx, MATCH_VSLIDEDOWN_VX, MASK_VSLIDEDOWN_VX) +DECLARE_INSN(vadc_vxm, MATCH_VADC_VXM, MASK_VADC_VXM) +DECLARE_INSN(vmadc_vxm, MATCH_VMADC_VXM, MASK_VMADC_VXM) +DECLARE_INSN(vsbc_vxm, MATCH_VSBC_VXM, MASK_VSBC_VXM) +DECLARE_INSN(vmsbc_vxm, MATCH_VMSBC_VXM, MASK_VMSBC_VXM) +DECLARE_INSN(vmerge_vxm, MATCH_VMERGE_VXM, MASK_VMERGE_VXM) +DECLARE_INSN(vmv_v_x, MATCH_VMV_V_X, MASK_VMV_V_X) +DECLARE_INSN(vmseq_vx, MATCH_VMSEQ_VX, MASK_VMSEQ_VX) +DECLARE_INSN(vmsne_vx, MATCH_VMSNE_VX, MASK_VMSNE_VX) +DECLARE_INSN(vmsltu_vx, MATCH_VMSLTU_VX, MASK_VMSLTU_VX) +DECLARE_INSN(vmslt_vx, MATCH_VMSLT_VX, MASK_VMSLT_VX) +DECLARE_INSN(vmsleu_vx, MATCH_VMSLEU_VX, MASK_VMSLEU_VX) +DECLARE_INSN(vmsle_vx, MATCH_VMSLE_VX, MASK_VMSLE_VX) +DECLARE_INSN(vmsgtu_vx, MATCH_VMSGTU_VX, MASK_VMSGTU_VX) +DECLARE_INSN(vmsgt_vx, MATCH_VMSGT_VX, MASK_VMSGT_VX) +DECLARE_INSN(vsaddu_vx, MATCH_VSADDU_VX, MASK_VSADDU_VX) +DECLARE_INSN(vsadd_vx, MATCH_VSADD_VX, MASK_VSADD_VX) +DECLARE_INSN(vssubu_vx, MATCH_VSSUBU_VX, MASK_VSSUBU_VX) +DECLARE_INSN(vssub_vx, MATCH_VSSUB_VX, MASK_VSSUB_VX) +DECLARE_INSN(vsll_vx, MATCH_VSLL_VX, MASK_VSLL_VX) +DECLARE_INSN(vsmul_vx, MATCH_VSMUL_VX, MASK_VSMUL_VX) +DECLARE_INSN(vsrl_vx, MATCH_VSRL_VX, MASK_VSRL_VX) +DECLARE_INSN(vsra_vx, MATCH_VSRA_VX, MASK_VSRA_VX) +DECLARE_INSN(vssrl_vx, MATCH_VSSRL_VX, MASK_VSSRL_VX) +DECLARE_INSN(vssra_vx, MATCH_VSSRA_VX, MASK_VSSRA_VX) +DECLARE_INSN(vnsrl_wx, MATCH_VNSRL_WX, MASK_VNSRL_WX) +DECLARE_INSN(vnsra_wx, MATCH_VNSRA_WX, MASK_VNSRA_WX) +DECLARE_INSN(vnclipu_wx, MATCH_VNCLIPU_WX, MASK_VNCLIPU_WX) +DECLARE_INSN(vnclip_wx, MATCH_VNCLIP_WX, MASK_VNCLIP_WX) +DECLARE_INSN(vqmaccu_vx, MATCH_VQMACCU_VX, MASK_VQMACCU_VX) +DECLARE_INSN(vqmacc_vx, MATCH_VQMACC_VX, MASK_VQMACC_VX) +DECLARE_INSN(vqmaccus_vx, MATCH_VQMACCUS_VX, MASK_VQMACCUS_VX) +DECLARE_INSN(vqmaccsu_vx, MATCH_VQMACCSU_VX, MASK_VQMACCSU_VX) +DECLARE_INSN(vadd_vv, MATCH_VADD_VV, MASK_VADD_VV) +DECLARE_INSN(vsub_vv, MATCH_VSUB_VV, MASK_VSUB_VV) +DECLARE_INSN(vminu_vv, MATCH_VMINU_VV, MASK_VMINU_VV) +DECLARE_INSN(vmin_vv, MATCH_VMIN_VV, MASK_VMIN_VV) +DECLARE_INSN(vmaxu_vv, MATCH_VMAXU_VV, MASK_VMAXU_VV) +DECLARE_INSN(vmax_vv, MATCH_VMAX_VV, MASK_VMAX_VV) +DECLARE_INSN(vand_vv, MATCH_VAND_VV, MASK_VAND_VV) +DECLARE_INSN(vor_vv, MATCH_VOR_VV, MASK_VOR_VV) +DECLARE_INSN(vxor_vv, MATCH_VXOR_VV, MASK_VXOR_VV) +DECLARE_INSN(vrgather_vv, MATCH_VRGATHER_VV, MASK_VRGATHER_VV) +DECLARE_INSN(vrgatherei16_vv, MATCH_VRGATHEREI16_VV, MASK_VRGATHEREI16_VV) +DECLARE_INSN(vadc_vvm, MATCH_VADC_VVM, MASK_VADC_VVM) +DECLARE_INSN(vmadc_vvm, MATCH_VMADC_VVM, MASK_VMADC_VVM) +DECLARE_INSN(vsbc_vvm, MATCH_VSBC_VVM, MASK_VSBC_VVM) +DECLARE_INSN(vmsbc_vvm, MATCH_VMSBC_VVM, MASK_VMSBC_VVM) +DECLARE_INSN(vmerge_vvm, MATCH_VMERGE_VVM, MASK_VMERGE_VVM) +DECLARE_INSN(vmv_v_v, MATCH_VMV_V_V, MASK_VMV_V_V) +DECLARE_INSN(vmseq_vv, MATCH_VMSEQ_VV, MASK_VMSEQ_VV) +DECLARE_INSN(vmsne_vv, MATCH_VMSNE_VV, MASK_VMSNE_VV) +DECLARE_INSN(vmsltu_vv, MATCH_VMSLTU_VV, MASK_VMSLTU_VV) +DECLARE_INSN(vmslt_vv, MATCH_VMSLT_VV, MASK_VMSLT_VV) +DECLARE_INSN(vmsleu_vv, MATCH_VMSLEU_VV, MASK_VMSLEU_VV) +DECLARE_INSN(vmsle_vv, MATCH_VMSLE_VV, MASK_VMSLE_VV) +DECLARE_INSN(vsaddu_vv, MATCH_VSADDU_VV, MASK_VSADDU_VV) +DECLARE_INSN(vsadd_vv, MATCH_VSADD_VV, MASK_VSADD_VV) +DECLARE_INSN(vssubu_vv, MATCH_VSSUBU_VV, MASK_VSSUBU_VV) +DECLARE_INSN(vssub_vv, MATCH_VSSUB_VV, MASK_VSSUB_VV) +DECLARE_INSN(vsll_vv, MATCH_VSLL_VV, MASK_VSLL_VV) +DECLARE_INSN(vsmul_vv, MATCH_VSMUL_VV, MASK_VSMUL_VV) +DECLARE_INSN(vsrl_vv, MATCH_VSRL_VV, MASK_VSRL_VV) +DECLARE_INSN(vsra_vv, MATCH_VSRA_VV, MASK_VSRA_VV) +DECLARE_INSN(vssrl_vv, MATCH_VSSRL_VV, MASK_VSSRL_VV) +DECLARE_INSN(vssra_vv, MATCH_VSSRA_VV, MASK_VSSRA_VV) +DECLARE_INSN(vnsrl_wv, MATCH_VNSRL_WV, MASK_VNSRL_WV) +DECLARE_INSN(vnsra_wv, MATCH_VNSRA_WV, MASK_VNSRA_WV) +DECLARE_INSN(vnclipu_wv, MATCH_VNCLIPU_WV, MASK_VNCLIPU_WV) +DECLARE_INSN(vnclip_wv, MATCH_VNCLIP_WV, MASK_VNCLIP_WV) +DECLARE_INSN(vwredsumu_vs, MATCH_VWREDSUMU_VS, MASK_VWREDSUMU_VS) +DECLARE_INSN(vwredsum_vs, MATCH_VWREDSUM_VS, MASK_VWREDSUM_VS) +DECLARE_INSN(vdotu_vv, MATCH_VDOTU_VV, MASK_VDOTU_VV) +DECLARE_INSN(vdot_vv, MATCH_VDOT_VV, MASK_VDOT_VV) +DECLARE_INSN(vqmaccu_vv, MATCH_VQMACCU_VV, MASK_VQMACCU_VV) +DECLARE_INSN(vqmacc_vv, MATCH_VQMACC_VV, MASK_VQMACC_VV) +DECLARE_INSN(vqmaccsu_vv, MATCH_VQMACCSU_VV, MASK_VQMACCSU_VV) +DECLARE_INSN(vadd_vi, MATCH_VADD_VI, MASK_VADD_VI) +DECLARE_INSN(vrsub_vi, MATCH_VRSUB_VI, MASK_VRSUB_VI) +DECLARE_INSN(vand_vi, MATCH_VAND_VI, MASK_VAND_VI) +DECLARE_INSN(vor_vi, MATCH_VOR_VI, MASK_VOR_VI) +DECLARE_INSN(vxor_vi, MATCH_VXOR_VI, MASK_VXOR_VI) +DECLARE_INSN(vrgather_vi, MATCH_VRGATHER_VI, MASK_VRGATHER_VI) +DECLARE_INSN(vslideup_vi, MATCH_VSLIDEUP_VI, MASK_VSLIDEUP_VI) +DECLARE_INSN(vslidedown_vi, MATCH_VSLIDEDOWN_VI, MASK_VSLIDEDOWN_VI) +DECLARE_INSN(vadc_vim, MATCH_VADC_VIM, MASK_VADC_VIM) +DECLARE_INSN(vmadc_vim, MATCH_VMADC_VIM, MASK_VMADC_VIM) +DECLARE_INSN(vmerge_vim, MATCH_VMERGE_VIM, MASK_VMERGE_VIM) +DECLARE_INSN(vmv_v_i, MATCH_VMV_V_I, MASK_VMV_V_I) +DECLARE_INSN(vmseq_vi, MATCH_VMSEQ_VI, MASK_VMSEQ_VI) +DECLARE_INSN(vmsne_vi, MATCH_VMSNE_VI, MASK_VMSNE_VI) +DECLARE_INSN(vmsleu_vi, MATCH_VMSLEU_VI, MASK_VMSLEU_VI) +DECLARE_INSN(vmsle_vi, MATCH_VMSLE_VI, MASK_VMSLE_VI) +DECLARE_INSN(vmsgtu_vi, MATCH_VMSGTU_VI, MASK_VMSGTU_VI) +DECLARE_INSN(vmsgt_vi, MATCH_VMSGT_VI, MASK_VMSGT_VI) +DECLARE_INSN(vsaddu_vi, MATCH_VSADDU_VI, MASK_VSADDU_VI) +DECLARE_INSN(vsadd_vi, MATCH_VSADD_VI, MASK_VSADD_VI) +DECLARE_INSN(vsll_vi, MATCH_VSLL_VI, MASK_VSLL_VI) +DECLARE_INSN(vmv1r_v, MATCH_VMV1R_V, MASK_VMV1R_V) +DECLARE_INSN(vmv2r_v, MATCH_VMV2R_V, MASK_VMV2R_V) +DECLARE_INSN(vmv4r_v, MATCH_VMV4R_V, MASK_VMV4R_V) +DECLARE_INSN(vmv8r_v, MATCH_VMV8R_V, MASK_VMV8R_V) +DECLARE_INSN(vsrl_vi, MATCH_VSRL_VI, MASK_VSRL_VI) +DECLARE_INSN(vsra_vi, MATCH_VSRA_VI, MASK_VSRA_VI) +DECLARE_INSN(vssrl_vi, MATCH_VSSRL_VI, MASK_VSSRL_VI) +DECLARE_INSN(vssra_vi, MATCH_VSSRA_VI, MASK_VSSRA_VI) +DECLARE_INSN(vnsrl_wi, MATCH_VNSRL_WI, MASK_VNSRL_WI) +DECLARE_INSN(vnsra_wi, MATCH_VNSRA_WI, MASK_VNSRA_WI) +DECLARE_INSN(vnclipu_wi, MATCH_VNCLIPU_WI, MASK_VNCLIPU_WI) +DECLARE_INSN(vnclip_wi, MATCH_VNCLIP_WI, MASK_VNCLIP_WI) +DECLARE_INSN(vredsum_vs, MATCH_VREDSUM_VS, MASK_VREDSUM_VS) +DECLARE_INSN(vredand_vs, MATCH_VREDAND_VS, MASK_VREDAND_VS) +DECLARE_INSN(vredor_vs, MATCH_VREDOR_VS, MASK_VREDOR_VS) +DECLARE_INSN(vredxor_vs, MATCH_VREDXOR_VS, MASK_VREDXOR_VS) +DECLARE_INSN(vredminu_vs, MATCH_VREDMINU_VS, MASK_VREDMINU_VS) +DECLARE_INSN(vredmin_vs, MATCH_VREDMIN_VS, MASK_VREDMIN_VS) +DECLARE_INSN(vredmaxu_vs, MATCH_VREDMAXU_VS, MASK_VREDMAXU_VS) +DECLARE_INSN(vredmax_vs, MATCH_VREDMAX_VS, MASK_VREDMAX_VS) +DECLARE_INSN(vaaddu_vv, MATCH_VAADDU_VV, MASK_VAADDU_VV) +DECLARE_INSN(vaadd_vv, MATCH_VAADD_VV, MASK_VAADD_VV) +DECLARE_INSN(vasubu_vv, MATCH_VASUBU_VV, MASK_VASUBU_VV) +DECLARE_INSN(vasub_vv, MATCH_VASUB_VV, MASK_VASUB_VV) +DECLARE_INSN(vmv_x_s, MATCH_VMV_X_S, MASK_VMV_X_S) +DECLARE_INSN(vzext_vf8, MATCH_VZEXT_VF8, MASK_VZEXT_VF8) +DECLARE_INSN(vsext_vf8, MATCH_VSEXT_VF8, MASK_VSEXT_VF8) +DECLARE_INSN(vzext_vf4, MATCH_VZEXT_VF4, MASK_VZEXT_VF4) +DECLARE_INSN(vsext_vf4, MATCH_VSEXT_VF4, MASK_VSEXT_VF4) +DECLARE_INSN(vzext_vf2, MATCH_VZEXT_VF2, MASK_VZEXT_VF2) +DECLARE_INSN(vsext_vf2, MATCH_VSEXT_VF2, MASK_VSEXT_VF2) +DECLARE_INSN(vcompress_vm, MATCH_VCOMPRESS_VM, MASK_VCOMPRESS_VM) +DECLARE_INSN(vmandnot_mm, MATCH_VMANDNOT_MM, MASK_VMANDNOT_MM) +DECLARE_INSN(vmand_mm, MATCH_VMAND_MM, MASK_VMAND_MM) +DECLARE_INSN(vmor_mm, MATCH_VMOR_MM, MASK_VMOR_MM) +DECLARE_INSN(vmxor_mm, MATCH_VMXOR_MM, MASK_VMXOR_MM) +DECLARE_INSN(vmornot_mm, MATCH_VMORNOT_MM, MASK_VMORNOT_MM) +DECLARE_INSN(vmnand_mm, MATCH_VMNAND_MM, MASK_VMNAND_MM) +DECLARE_INSN(vmnor_mm, MATCH_VMNOR_MM, MASK_VMNOR_MM) +DECLARE_INSN(vmxnor_mm, MATCH_VMXNOR_MM, MASK_VMXNOR_MM) +DECLARE_INSN(vmsbf_m, MATCH_VMSBF_M, MASK_VMSBF_M) +DECLARE_INSN(vmsof_m, MATCH_VMSOF_M, MASK_VMSOF_M) +DECLARE_INSN(vmsif_m, MATCH_VMSIF_M, MASK_VMSIF_M) +DECLARE_INSN(viota_m, MATCH_VIOTA_M, MASK_VIOTA_M) +DECLARE_INSN(vid_v, MATCH_VID_V, MASK_VID_V) +DECLARE_INSN(vpopc_m, MATCH_VPOPC_M, MASK_VPOPC_M) +DECLARE_INSN(vfirst_m, MATCH_VFIRST_M, MASK_VFIRST_M) +DECLARE_INSN(vdivu_vv, MATCH_VDIVU_VV, MASK_VDIVU_VV) +DECLARE_INSN(vdiv_vv, MATCH_VDIV_VV, MASK_VDIV_VV) +DECLARE_INSN(vremu_vv, MATCH_VREMU_VV, MASK_VREMU_VV) +DECLARE_INSN(vrem_vv, MATCH_VREM_VV, MASK_VREM_VV) +DECLARE_INSN(vmulhu_vv, MATCH_VMULHU_VV, MASK_VMULHU_VV) +DECLARE_INSN(vmul_vv, MATCH_VMUL_VV, MASK_VMUL_VV) +DECLARE_INSN(vmulhsu_vv, MATCH_VMULHSU_VV, MASK_VMULHSU_VV) +DECLARE_INSN(vmulh_vv, MATCH_VMULH_VV, MASK_VMULH_VV) +DECLARE_INSN(vmadd_vv, MATCH_VMADD_VV, MASK_VMADD_VV) +DECLARE_INSN(vnmsub_vv, MATCH_VNMSUB_VV, MASK_VNMSUB_VV) +DECLARE_INSN(vmacc_vv, MATCH_VMACC_VV, MASK_VMACC_VV) +DECLARE_INSN(vnmsac_vv, MATCH_VNMSAC_VV, MASK_VNMSAC_VV) +DECLARE_INSN(vwaddu_vv, MATCH_VWADDU_VV, MASK_VWADDU_VV) +DECLARE_INSN(vwadd_vv, MATCH_VWADD_VV, MASK_VWADD_VV) +DECLARE_INSN(vwsubu_vv, MATCH_VWSUBU_VV, MASK_VWSUBU_VV) +DECLARE_INSN(vwsub_vv, MATCH_VWSUB_VV, MASK_VWSUB_VV) +DECLARE_INSN(vwaddu_wv, MATCH_VWADDU_WV, MASK_VWADDU_WV) +DECLARE_INSN(vwadd_wv, MATCH_VWADD_WV, MASK_VWADD_WV) +DECLARE_INSN(vwsubu_wv, MATCH_VWSUBU_WV, MASK_VWSUBU_WV) +DECLARE_INSN(vwsub_wv, MATCH_VWSUB_WV, MASK_VWSUB_WV) +DECLARE_INSN(vwmulu_vv, MATCH_VWMULU_VV, MASK_VWMULU_VV) +DECLARE_INSN(vwmulsu_vv, MATCH_VWMULSU_VV, MASK_VWMULSU_VV) +DECLARE_INSN(vwmul_vv, MATCH_VWMUL_VV, MASK_VWMUL_VV) +DECLARE_INSN(vwmaccu_vv, MATCH_VWMACCU_VV, MASK_VWMACCU_VV) +DECLARE_INSN(vwmacc_vv, MATCH_VWMACC_VV, MASK_VWMACC_VV) +DECLARE_INSN(vwmaccsu_vv, MATCH_VWMACCSU_VV, MASK_VWMACCSU_VV) +DECLARE_INSN(vaaddu_vx, MATCH_VAADDU_VX, MASK_VAADDU_VX) +DECLARE_INSN(vaadd_vx, MATCH_VAADD_VX, MASK_VAADD_VX) +DECLARE_INSN(vasubu_vx, MATCH_VASUBU_VX, MASK_VASUBU_VX) +DECLARE_INSN(vasub_vx, MATCH_VASUB_VX, MASK_VASUB_VX) +DECLARE_INSN(vmv_s_x, MATCH_VMV_S_X, MASK_VMV_S_X) +DECLARE_INSN(vslide1up_vx, MATCH_VSLIDE1UP_VX, MASK_VSLIDE1UP_VX) +DECLARE_INSN(vslide1down_vx, MATCH_VSLIDE1DOWN_VX, MASK_VSLIDE1DOWN_VX) +DECLARE_INSN(vdivu_vx, MATCH_VDIVU_VX, MASK_VDIVU_VX) +DECLARE_INSN(vdiv_vx, MATCH_VDIV_VX, MASK_VDIV_VX) +DECLARE_INSN(vremu_vx, MATCH_VREMU_VX, MASK_VREMU_VX) +DECLARE_INSN(vrem_vx, MATCH_VREM_VX, MASK_VREM_VX) +DECLARE_INSN(vmulhu_vx, MATCH_VMULHU_VX, MASK_VMULHU_VX) +DECLARE_INSN(vmul_vx, MATCH_VMUL_VX, MASK_VMUL_VX) +DECLARE_INSN(vmulhsu_vx, MATCH_VMULHSU_VX, MASK_VMULHSU_VX) +DECLARE_INSN(vmulh_vx, MATCH_VMULH_VX, MASK_VMULH_VX) +DECLARE_INSN(vmadd_vx, MATCH_VMADD_VX, MASK_VMADD_VX) +DECLARE_INSN(vnmsub_vx, MATCH_VNMSUB_VX, MASK_VNMSUB_VX) +DECLARE_INSN(vmacc_vx, MATCH_VMACC_VX, MASK_VMACC_VX) +DECLARE_INSN(vnmsac_vx, MATCH_VNMSAC_VX, MASK_VNMSAC_VX) +DECLARE_INSN(vwaddu_vx, MATCH_VWADDU_VX, MASK_VWADDU_VX) +DECLARE_INSN(vwadd_vx, MATCH_VWADD_VX, MASK_VWADD_VX) +DECLARE_INSN(vwsubu_vx, MATCH_VWSUBU_VX, MASK_VWSUBU_VX) +DECLARE_INSN(vwsub_vx, MATCH_VWSUB_VX, MASK_VWSUB_VX) +DECLARE_INSN(vwaddu_wx, MATCH_VWADDU_WX, MASK_VWADDU_WX) +DECLARE_INSN(vwadd_wx, MATCH_VWADD_WX, MASK_VWADD_WX) +DECLARE_INSN(vwsubu_wx, MATCH_VWSUBU_WX, MASK_VWSUBU_WX) +DECLARE_INSN(vwsub_wx, MATCH_VWSUB_WX, MASK_VWSUB_WX) +DECLARE_INSN(vwmulu_vx, MATCH_VWMULU_VX, MASK_VWMULU_VX) +DECLARE_INSN(vwmulsu_vx, MATCH_VWMULSU_VX, MASK_VWMULSU_VX) +DECLARE_INSN(vwmul_vx, MATCH_VWMUL_VX, MASK_VWMUL_VX) +DECLARE_INSN(vwmaccu_vx, MATCH_VWMACCU_VX, MASK_VWMACCU_VX) +DECLARE_INSN(vwmacc_vx, MATCH_VWMACC_VX, MASK_VWMACC_VX) +DECLARE_INSN(vwmaccus_vx, MATCH_VWMACCUS_VX, MASK_VWMACCUS_VX) +DECLARE_INSN(vwmaccsu_vx, MATCH_VWMACCSU_VX, MASK_VWMACCSU_VX) +DECLARE_INSN(vamoswapei8_v, MATCH_VAMOSWAPEI8_V, MASK_VAMOSWAPEI8_V) +DECLARE_INSN(vamoaddei8_v, MATCH_VAMOADDEI8_V, MASK_VAMOADDEI8_V) +DECLARE_INSN(vamoxorei8_v, MATCH_VAMOXOREI8_V, MASK_VAMOXOREI8_V) +DECLARE_INSN(vamoandei8_v, MATCH_VAMOANDEI8_V, MASK_VAMOANDEI8_V) +DECLARE_INSN(vamoorei8_v, MATCH_VAMOOREI8_V, MASK_VAMOOREI8_V) +DECLARE_INSN(vamominei8_v, MATCH_VAMOMINEI8_V, MASK_VAMOMINEI8_V) +DECLARE_INSN(vamomaxei8_v, MATCH_VAMOMAXEI8_V, MASK_VAMOMAXEI8_V) +DECLARE_INSN(vamominuei8_v, MATCH_VAMOMINUEI8_V, MASK_VAMOMINUEI8_V) +DECLARE_INSN(vamomaxuei8_v, MATCH_VAMOMAXUEI8_V, MASK_VAMOMAXUEI8_V) +DECLARE_INSN(vamoswapei16_v, MATCH_VAMOSWAPEI16_V, MASK_VAMOSWAPEI16_V) +DECLARE_INSN(vamoaddei16_v, MATCH_VAMOADDEI16_V, MASK_VAMOADDEI16_V) +DECLARE_INSN(vamoxorei16_v, MATCH_VAMOXOREI16_V, MASK_VAMOXOREI16_V) +DECLARE_INSN(vamoandei16_v, MATCH_VAMOANDEI16_V, MASK_VAMOANDEI16_V) +DECLARE_INSN(vamoorei16_v, MATCH_VAMOOREI16_V, MASK_VAMOOREI16_V) +DECLARE_INSN(vamominei16_v, MATCH_VAMOMINEI16_V, MASK_VAMOMINEI16_V) +DECLARE_INSN(vamomaxei16_v, MATCH_VAMOMAXEI16_V, MASK_VAMOMAXEI16_V) +DECLARE_INSN(vamominuei16_v, MATCH_VAMOMINUEI16_V, MASK_VAMOMINUEI16_V) +DECLARE_INSN(vamomaxuei16_v, MATCH_VAMOMAXUEI16_V, MASK_VAMOMAXUEI16_V) +DECLARE_INSN(vamoswapei32_v, MATCH_VAMOSWAPEI32_V, MASK_VAMOSWAPEI32_V) +DECLARE_INSN(vamoaddei32_v, MATCH_VAMOADDEI32_V, MASK_VAMOADDEI32_V) +DECLARE_INSN(vamoxorei32_v, MATCH_VAMOXOREI32_V, MASK_VAMOXOREI32_V) +DECLARE_INSN(vamoandei32_v, MATCH_VAMOANDEI32_V, MASK_VAMOANDEI32_V) +DECLARE_INSN(vamoorei32_v, MATCH_VAMOOREI32_V, MASK_VAMOOREI32_V) +DECLARE_INSN(vamominei32_v, MATCH_VAMOMINEI32_V, MASK_VAMOMINEI32_V) +DECLARE_INSN(vamomaxei32_v, MATCH_VAMOMAXEI32_V, MASK_VAMOMAXEI32_V) +DECLARE_INSN(vamominuei32_v, MATCH_VAMOMINUEI32_V, MASK_VAMOMINUEI32_V) +DECLARE_INSN(vamomaxuei32_v, MATCH_VAMOMAXUEI32_V, MASK_VAMOMAXUEI32_V) +DECLARE_INSN(vamoswapei64_v, MATCH_VAMOSWAPEI64_V, MASK_VAMOSWAPEI64_V) +DECLARE_INSN(vamoaddei64_v, MATCH_VAMOADDEI64_V, MASK_VAMOADDEI64_V) +DECLARE_INSN(vamoxorei64_v, MATCH_VAMOXOREI64_V, MASK_VAMOXOREI64_V) +DECLARE_INSN(vamoandei64_v, MATCH_VAMOANDEI64_V, MASK_VAMOANDEI64_V) +DECLARE_INSN(vamoorei64_v, MATCH_VAMOOREI64_V, MASK_VAMOOREI64_V) +DECLARE_INSN(vamominei64_v, MATCH_VAMOMINEI64_V, MASK_VAMOMINEI64_V) +DECLARE_INSN(vamomaxei64_v, MATCH_VAMOMAXEI64_V, MASK_VAMOMAXEI64_V) +DECLARE_INSN(vamominuei64_v, MATCH_VAMOMINUEI64_V, MASK_VAMOMINUEI64_V) +DECLARE_INSN(vamomaxuei64_v, MATCH_VAMOMAXUEI64_V, MASK_VAMOMAXUEI64_V) DECLARE_INSN(vmvnfr_v, MATCH_VMVNFR_V, MASK_VMVNFR_V) DECLARE_INSN(vl1r_v, MATCH_VL1R_V, MASK_VL1R_V) DECLARE_INSN(vl2r_v, MATCH_VL2R_V, MASK_VL2R_V) diff --git a/toolchain/riscv-opcodes/inst.sverilog b/toolchain/riscv-opcodes/inst.sverilog index e0bf49d0f..8918e2c6b 100644 --- a/toolchain/riscv-opcodes/inst.sverilog +++ b/toolchain/riscv-opcodes/inst.sverilog @@ -323,6 +323,447 @@ package riscv_instr; localparam [31:0] C_FSDSP = 32'b????????????????101???????????10; localparam [31:0] C_SWSP = 32'b????????????????110???????????10; localparam [31:0] C_FSWSP = 32'b????????????????111???????????10; + localparam [31:0] VSETVLI = 32'b0????????????????111?????1010111; + localparam [31:0] VSETVL = 32'b1000000??????????111?????1010111; + localparam [31:0] VLE8_V = 32'b???000?00000?????000?????0000111; + localparam [31:0] VLE16_V = 32'b???000?00000?????101?????0000111; + localparam [31:0] VLE32_V = 32'b???000?00000?????110?????0000111; + localparam [31:0] VLE64_V = 32'b???000?00000?????111?????0000111; + localparam [31:0] VLE128_V = 32'b???100?00000?????000?????0000111; + localparam [31:0] VLE256_V = 32'b???100?00000?????101?????0000111; + localparam [31:0] VLE512_V = 32'b???100?00000?????110?????0000111; + localparam [31:0] VLE1024_V = 32'b???100?00000?????111?????0000111; + localparam [31:0] VSE8_V = 32'b???000?00000?????000?????0100111; + localparam [31:0] VSE16_V = 32'b???000?00000?????101?????0100111; + localparam [31:0] VSE32_V = 32'b???000?00000?????110?????0100111; + localparam [31:0] VSE64_V = 32'b???000?00000?????111?????0100111; + localparam [31:0] VSE128_V = 32'b???100?00000?????000?????0100111; + localparam [31:0] VSE256_V = 32'b???100?00000?????101?????0100111; + localparam [31:0] VSE512_V = 32'b???100?00000?????110?????0100111; + localparam [31:0] VSE1024_V = 32'b???100?00000?????111?????0100111; + localparam [31:0] VLSE8_V = 32'b???010???????????000?????0000111; + localparam [31:0] VLSE16_V = 32'b???010???????????101?????0000111; + localparam [31:0] VLSE32_V = 32'b???010???????????110?????0000111; + localparam [31:0] VLSE64_V = 32'b???010???????????111?????0000111; + localparam [31:0] VLSE128_V = 32'b???110???????????000?????0000111; + localparam [31:0] VLSE256_V = 32'b???110???????????101?????0000111; + localparam [31:0] VLSE512_V = 32'b???110???????????110?????0000111; + localparam [31:0] VLSE1024_V = 32'b???110???????????111?????0000111; + localparam [31:0] VSSE8_V = 32'b???010???????????000?????0100111; + localparam [31:0] VSSE16_V = 32'b???010???????????101?????0100111; + localparam [31:0] VSSE32_V = 32'b???010???????????110?????0100111; + localparam [31:0] VSSE64_V = 32'b???010???????????111?????0100111; + localparam [31:0] VSSE128_V = 32'b???110???????????000?????0100111; + localparam [31:0] VSSE256_V = 32'b???110???????????101?????0100111; + localparam [31:0] VSSE512_V = 32'b???110???????????110?????0100111; + localparam [31:0] VSSE1024_V = 32'b???110???????????111?????0100111; + localparam [31:0] VLXEI8_V = 32'b???011???????????000?????0000111; + localparam [31:0] VLXEI16_V = 32'b???011???????????101?????0000111; + localparam [31:0] VLXEI32_V = 32'b???011???????????110?????0000111; + localparam [31:0] VLXEI64_V = 32'b???011???????????111?????0000111; + localparam [31:0] VLXEI128_V = 32'b???111???????????000?????0000111; + localparam [31:0] VLXEI256_V = 32'b???111???????????101?????0000111; + localparam [31:0] VLXEI512_V = 32'b???111???????????110?????0000111; + localparam [31:0] VLXEI1024_V = 32'b???111???????????111?????0000111; + localparam [31:0] VSXEI8_V = 32'b???011???????????000?????0100111; + localparam [31:0] VSXEI16_V = 32'b???011???????????101?????0100111; + localparam [31:0] VSXEI32_V = 32'b???011???????????110?????0100111; + localparam [31:0] VSXEI64_V = 32'b???011???????????111?????0100111; + localparam [31:0] VSXEI128_V = 32'b???111???????????000?????0100111; + localparam [31:0] VSXEI256_V = 32'b???111???????????101?????0100111; + localparam [31:0] VSXEI512_V = 32'b???111???????????110?????0100111; + localparam [31:0] VSXEI1024_V = 32'b???111???????????111?????0100111; + localparam [31:0] VSUXEI8_V = 32'b???001???????????000?????0100111; + localparam [31:0] VSUXEI16_V = 32'b???001???????????101?????0100111; + localparam [31:0] VSUXEI32_V = 32'b???001???????????110?????0100111; + localparam [31:0] VSUXEI64_V = 32'b???001???????????111?????0100111; + localparam [31:0] VSUXEI128_V = 32'b???101???????????000?????0100111; + localparam [31:0] VSUXEI256_V = 32'b???101???????????101?????0100111; + localparam [31:0] VSUXEI512_V = 32'b???101???????????110?????0100111; + localparam [31:0] VSUXEI1024_V = 32'b???101???????????111?????0100111; + localparam [31:0] VLE8FF_V = 32'b???000?10000?????000?????0000111; + localparam [31:0] VLE16FF_V = 32'b???000?10000?????101?????0000111; + localparam [31:0] VLE32FF_V = 32'b???000?10000?????110?????0000111; + localparam [31:0] VLE64FF_V = 32'b???000?10000?????111?????0000111; + localparam [31:0] VLE128FF_V = 32'b???100?10000?????000?????0000111; + localparam [31:0] VLE256FF_V = 32'b???100?10000?????101?????0000111; + localparam [31:0] VLE512FF_V = 32'b???100?10000?????110?????0000111; + localparam [31:0] VLE1024FF_V = 32'b???100?10000?????111?????0000111; + localparam [31:0] VL1RE8_V = 32'b000000101000?????000?????0000111; + localparam [31:0] VL1RE16_V = 32'b000000101000?????101?????0000111; + localparam [31:0] VL1RE32_V = 32'b000000101000?????110?????0000111; + localparam [31:0] VL1RE64_V = 32'b000000101000?????111?????0000111; + localparam [31:0] VL2RE8_V = 32'b001000101000?????000?????0000111; + localparam [31:0] VL2RE16_V = 32'b001000101000?????101?????0000111; + localparam [31:0] VL2RE32_V = 32'b001000101000?????110?????0000111; + localparam [31:0] VL2RE64_V = 32'b001000101000?????111?????0000111; + localparam [31:0] VL4RE8_V = 32'b011000101000?????000?????0000111; + localparam [31:0] VL4RE16_V = 32'b011000101000?????101?????0000111; + localparam [31:0] VL4RE32_V = 32'b011000101000?????110?????0000111; + localparam [31:0] VL4RE64_V = 32'b011000101000?????111?????0000111; + localparam [31:0] VL8RE8_V = 32'b111000101000?????000?????0000111; + localparam [31:0] VL8RE16_V = 32'b111000101000?????101?????0000111; + localparam [31:0] VL8RE32_V = 32'b111000101000?????110?????0000111; + localparam [31:0] VL8RE64_V = 32'b111000101000?????111?????0000111; + localparam [31:0] VS1R_V = 32'b000000101000?????000?????0100111; + localparam [31:0] VS2R_V = 32'b001000101000?????000?????0100111; + localparam [31:0] VS4R_V = 32'b011000101000?????000?????0100111; + localparam [31:0] VS8R_V = 32'b111000101000?????000?????0100111; + localparam [31:0] VFADD_VF = 32'b000000???????????101?????1010111; + localparam [31:0] VFSUB_VF = 32'b000010???????????101?????1010111; + localparam [31:0] VFMIN_VF = 32'b000100???????????101?????1010111; + localparam [31:0] VFMAX_VF = 32'b000110???????????101?????1010111; + localparam [31:0] VFSGNJ_VF = 32'b001000???????????101?????1010111; + localparam [31:0] VFSGNJN_VF = 32'b001001???????????101?????1010111; + localparam [31:0] VFSGNJX_VF = 32'b001010???????????101?????1010111; + localparam [31:0] VFSLIDE1UP_VF = 32'b001110???????????101?????1010111; + localparam [31:0] VFSLIDE1DOWN_VF = 32'b001111???????????101?????1010111; + localparam [31:0] VFMV_S_F = 32'b010000100000?????101?????1010111; + localparam [31:0] VFMERGE_VFM = 32'b0101110??????????101?????1010111; + localparam [31:0] VFMV_V_F = 32'b010111100000?????101?????1010111; + localparam [31:0] VMFEQ_VF = 32'b011000???????????101?????1010111; + localparam [31:0] VMFLE_VF = 32'b011001???????????101?????1010111; + localparam [31:0] VMFLT_VF = 32'b011011???????????101?????1010111; + localparam [31:0] VMFNE_VF = 32'b011100???????????101?????1010111; + localparam [31:0] VMFGT_VF = 32'b011101???????????101?????1010111; + localparam [31:0] VMFGE_VF = 32'b011111???????????101?????1010111; + localparam [31:0] VFDIV_VF = 32'b100000???????????101?????1010111; + localparam [31:0] VFRDIV_VF = 32'b100001???????????101?????1010111; + localparam [31:0] VFMUL_VF = 32'b100100???????????101?????1010111; + localparam [31:0] VFRSUB_VF = 32'b100111???????????101?????1010111; + localparam [31:0] VFMADD_VF = 32'b101000???????????101?????1010111; + localparam [31:0] VFNMADD_VF = 32'b101001???????????101?????1010111; + localparam [31:0] VFMSUB_VF = 32'b101010???????????101?????1010111; + localparam [31:0] VFNMSUB_VF = 32'b101011???????????101?????1010111; + localparam [31:0] VFMACC_VF = 32'b101100???????????101?????1010111; + localparam [31:0] VFNMACC_VF = 32'b101101???????????101?????1010111; + localparam [31:0] VFMSAC_VF = 32'b101110???????????101?????1010111; + localparam [31:0] VFNMSAC_VF = 32'b101111???????????101?????1010111; + localparam [31:0] VFWADD_VF = 32'b110000???????????101?????1010111; + localparam [31:0] VFWSUB_VF = 32'b110010???????????101?????1010111; + localparam [31:0] VFWADD_WF = 32'b110100???????????101?????1010111; + localparam [31:0] VFWSUB_WF = 32'b110110???????????101?????1010111; + localparam [31:0] VFWMUL_VF = 32'b111000???????????101?????1010111; + localparam [31:0] VFWMACC_VF = 32'b111100???????????101?????1010111; + localparam [31:0] VFWNMACC_VF = 32'b111101???????????101?????1010111; + localparam [31:0] VFWMSAC_VF = 32'b111110???????????101?????1010111; + localparam [31:0] VFWNMSAC_VF = 32'b111111???????????101?????1010111; + localparam [31:0] VFADD_VV = 32'b000000???????????001?????1010111; + localparam [31:0] VFREDSUM_VS = 32'b000001???????????001?????1010111; + localparam [31:0] VFSUB_VV = 32'b000010???????????001?????1010111; + localparam [31:0] VFREDOSUM_VS = 32'b000011???????????001?????1010111; + localparam [31:0] VFMIN_VV = 32'b000100???????????001?????1010111; + localparam [31:0] VFREDMIN_VS = 32'b000101???????????001?????1010111; + localparam [31:0] VFMAX_VV = 32'b000110???????????001?????1010111; + localparam [31:0] VFREDMAX_VS = 32'b000111???????????001?????1010111; + localparam [31:0] VFSGNJ_VV = 32'b001000???????????001?????1010111; + localparam [31:0] VFSGNJN_VV = 32'b001001???????????001?????1010111; + localparam [31:0] VFSGNJX_VV = 32'b001010???????????001?????1010111; + localparam [31:0] VFMV_F_S = 32'b0100001?????00000001?????1010111; + localparam [31:0] VMFEQ_VV = 32'b011000???????????001?????1010111; + localparam [31:0] VMFLE_VV = 32'b011001???????????001?????1010111; + localparam [31:0] VMFLT_VV = 32'b011011???????????001?????1010111; + localparam [31:0] VMFNE_VV = 32'b011100???????????001?????1010111; + localparam [31:0] VFDIV_VV = 32'b100000???????????001?????1010111; + localparam [31:0] VFMUL_VV = 32'b100100???????????001?????1010111; + localparam [31:0] VFMADD_VV = 32'b101000???????????001?????1010111; + localparam [31:0] VFNMADD_VV = 32'b101001???????????001?????1010111; + localparam [31:0] VFMSUB_VV = 32'b101010???????????001?????1010111; + localparam [31:0] VFNMSUB_VV = 32'b101011???????????001?????1010111; + localparam [31:0] VFMACC_VV = 32'b101100???????????001?????1010111; + localparam [31:0] VFNMACC_VV = 32'b101101???????????001?????1010111; + localparam [31:0] VFMSAC_VV = 32'b101110???????????001?????1010111; + localparam [31:0] VFNMSAC_VV = 32'b101111???????????001?????1010111; + localparam [31:0] VFCVT_XU_F_V = 32'b010010??????00000001?????1010111; + localparam [31:0] VFCVT_X_F_V = 32'b010010??????00001001?????1010111; + localparam [31:0] VFCVT_F_XU_V = 32'b010010??????00010001?????1010111; + localparam [31:0] VFCVT_F_X_V = 32'b010010??????00011001?????1010111; + localparam [31:0] VFCVT_RTZ_XU_F_V = 32'b010010??????00110001?????1010111; + localparam [31:0] VFCVT_RTZ_X_F_V = 32'b010010??????00111001?????1010111; + localparam [31:0] VFWCVT_XU_F_V = 32'b010010??????01000001?????1010111; + localparam [31:0] VFWCVT_X_F_V = 32'b010010??????01001001?????1010111; + localparam [31:0] VFWCVT_F_XU_V = 32'b010010??????01010001?????1010111; + localparam [31:0] VFWCVT_F_X_V = 32'b010010??????01011001?????1010111; + localparam [31:0] VFWCVT_F_F_V = 32'b010010??????01100001?????1010111; + localparam [31:0] VFWCVT_RTZ_XU_F_V = 32'b010010??????01110001?????1010111; + localparam [31:0] VFWCVT_RTZ_X_F_V = 32'b010010??????01111001?????1010111; + localparam [31:0] VFNCVT_XU_F_W = 32'b010010??????10000001?????1010111; + localparam [31:0] VFNCVT_X_F_W = 32'b010010??????10001001?????1010111; + localparam [31:0] VFNCVT_F_XU_W = 32'b010010??????10010001?????1010111; + localparam [31:0] VFNCVT_F_X_W = 32'b010010??????10011001?????1010111; + localparam [31:0] VFNCVT_F_F_W = 32'b010010??????10100001?????1010111; + localparam [31:0] VFNCVT_ROD_F_F_W = 32'b010010??????10101001?????1010111; + localparam [31:0] VFNCVT_RTZ_XU_F_W = 32'b010010??????10110001?????1010111; + localparam [31:0] VFNCVT_RTZ_X_F_W = 32'b010010??????10111001?????1010111; + localparam [31:0] VFSQRT_V = 32'b010011??????00000001?????1010111; + localparam [31:0] VFRSQRTE7_V = 32'b010011??????00100001?????1010111; + localparam [31:0] VFRECE7_V = 32'b010011??????00101001?????1010111; + localparam [31:0] VFCLASS_V = 32'b010011??????10000001?????1010111; + localparam [31:0] VFWADD_VV = 32'b110000???????????001?????1010111; + localparam [31:0] VFWREDSUM_VS = 32'b110001???????????001?????1010111; + localparam [31:0] VFWSUB_VV = 32'b110010???????????001?????1010111; + localparam [31:0] VFWREDOSUM_VS = 32'b110011???????????001?????1010111; + localparam [31:0] VFWADD_WV = 32'b110100???????????001?????1010111; + localparam [31:0] VFWSUB_WV = 32'b110110???????????001?????1010111; + localparam [31:0] VFWMUL_VV = 32'b111000???????????001?????1010111; + localparam [31:0] VFDOT_VV = 32'b111001???????????001?????1010111; + localparam [31:0] VFWMACC_VV = 32'b111100???????????001?????1010111; + localparam [31:0] VFWNMACC_VV = 32'b111101???????????001?????1010111; + localparam [31:0] VFWMSAC_VV = 32'b111110???????????001?????1010111; + localparam [31:0] VFWNMSAC_VV = 32'b111111???????????001?????1010111; + localparam [31:0] VADD_VX = 32'b000000???????????100?????1010111; + localparam [31:0] VSUB_VX = 32'b000010???????????100?????1010111; + localparam [31:0] VRSUB_VX = 32'b000011???????????100?????1010111; + localparam [31:0] VMINU_VX = 32'b000100???????????100?????1010111; + localparam [31:0] VMIN_VX = 32'b000101???????????100?????1010111; + localparam [31:0] VMAXU_VX = 32'b000110???????????100?????1010111; + localparam [31:0] VMAX_VX = 32'b000111???????????100?????1010111; + localparam [31:0] VAND_VX = 32'b001001???????????100?????1010111; + localparam [31:0] VOR_VX = 32'b001010???????????100?????1010111; + localparam [31:0] VXOR_VX = 32'b001011???????????100?????1010111; + localparam [31:0] VRGATHER_VX = 32'b001100???????????100?????1010111; + localparam [31:0] VSLIDEUP_VX = 32'b001110???????????100?????1010111; + localparam [31:0] VSLIDEDOWN_VX = 32'b001111???????????100?????1010111; + localparam [31:0] VADC_VXM = 32'b0100000??????????100?????1010111; + localparam [31:0] VMADC_VXM = 32'b010001???????????100?????1010111; + localparam [31:0] VSBC_VXM = 32'b0100100??????????100?????1010111; + localparam [31:0] VMSBC_VXM = 32'b010011???????????100?????1010111; + localparam [31:0] VMERGE_VXM = 32'b0101110??????????100?????1010111; + localparam [31:0] VMV_V_X = 32'b010111100000?????100?????1010111; + localparam [31:0] VMSEQ_VX = 32'b011000???????????100?????1010111; + localparam [31:0] VMSNE_VX = 32'b011001???????????100?????1010111; + localparam [31:0] VMSLTU_VX = 32'b011010???????????100?????1010111; + localparam [31:0] VMSLT_VX = 32'b011011???????????100?????1010111; + localparam [31:0] VMSLEU_VX = 32'b011100???????????100?????1010111; + localparam [31:0] VMSLE_VX = 32'b011101???????????100?????1010111; + localparam [31:0] VMSGTU_VX = 32'b011110???????????100?????1010111; + localparam [31:0] VMSGT_VX = 32'b011111???????????100?????1010111; + localparam [31:0] VSADDU_VX = 32'b100000???????????100?????1010111; + localparam [31:0] VSADD_VX = 32'b100001???????????100?????1010111; + localparam [31:0] VSSUBU_VX = 32'b100010???????????100?????1010111; + localparam [31:0] VSSUB_VX = 32'b100011???????????100?????1010111; + localparam [31:0] VSLL_VX = 32'b100101???????????100?????1010111; + localparam [31:0] VSMUL_VX = 32'b100111???????????100?????1010111; + localparam [31:0] VSRL_VX = 32'b101000???????????100?????1010111; + localparam [31:0] VSRA_VX = 32'b101001???????????100?????1010111; + localparam [31:0] VSSRL_VX = 32'b101010???????????100?????1010111; + localparam [31:0] VSSRA_VX = 32'b101011???????????100?????1010111; + localparam [31:0] VNSRL_WX = 32'b101100???????????100?????1010111; + localparam [31:0] VNSRA_WX = 32'b101101???????????100?????1010111; + localparam [31:0] VNCLIPU_WX = 32'b101110???????????100?????1010111; + localparam [31:0] VNCLIP_WX = 32'b101111???????????100?????1010111; + localparam [31:0] VQMACCU_VX = 32'b111100???????????100?????1010111; + localparam [31:0] VQMACC_VX = 32'b111101???????????100?????1010111; + localparam [31:0] VQMACCUS_VX = 32'b111110???????????100?????1010111; + localparam [31:0] VQMACCSU_VX = 32'b111111???????????100?????1010111; + localparam [31:0] VADD_VV = 32'b000000???????????000?????1010111; + localparam [31:0] VSUB_VV = 32'b000010???????????000?????1010111; + localparam [31:0] VMINU_VV = 32'b000100???????????000?????1010111; + localparam [31:0] VMIN_VV = 32'b000101???????????000?????1010111; + localparam [31:0] VMAXU_VV = 32'b000110???????????000?????1010111; + localparam [31:0] VMAX_VV = 32'b000111???????????000?????1010111; + localparam [31:0] VAND_VV = 32'b001001???????????000?????1010111; + localparam [31:0] VOR_VV = 32'b001010???????????000?????1010111; + localparam [31:0] VXOR_VV = 32'b001011???????????000?????1010111; + localparam [31:0] VRGATHER_VV = 32'b001100???????????000?????1010111; + localparam [31:0] VRGATHEREI16_VV = 32'b001110???????????000?????1010111; + localparam [31:0] VADC_VVM = 32'b0100000??????????000?????1010111; + localparam [31:0] VMADC_VVM = 32'b010001???????????000?????1010111; + localparam [31:0] VSBC_VVM = 32'b0100100??????????000?????1010111; + localparam [31:0] VMSBC_VVM = 32'b010011???????????000?????1010111; + localparam [31:0] VMERGE_VVM = 32'b0101110??????????000?????1010111; + localparam [31:0] VMV_V_V = 32'b010111100000?????000?????1010111; + localparam [31:0] VMSEQ_VV = 32'b011000???????????000?????1010111; + localparam [31:0] VMSNE_VV = 32'b011001???????????000?????1010111; + localparam [31:0] VMSLTU_VV = 32'b011010???????????000?????1010111; + localparam [31:0] VMSLT_VV = 32'b011011???????????000?????1010111; + localparam [31:0] VMSLEU_VV = 32'b011100???????????000?????1010111; + localparam [31:0] VMSLE_VV = 32'b011101???????????000?????1010111; + localparam [31:0] VSADDU_VV = 32'b100000???????????000?????1010111; + localparam [31:0] VSADD_VV = 32'b100001???????????000?????1010111; + localparam [31:0] VSSUBU_VV = 32'b100010???????????000?????1010111; + localparam [31:0] VSSUB_VV = 32'b100011???????????000?????1010111; + localparam [31:0] VSLL_VV = 32'b100101???????????000?????1010111; + localparam [31:0] VSMUL_VV = 32'b100111???????????000?????1010111; + localparam [31:0] VSRL_VV = 32'b101000???????????000?????1010111; + localparam [31:0] VSRA_VV = 32'b101001???????????000?????1010111; + localparam [31:0] VSSRL_VV = 32'b101010???????????000?????1010111; + localparam [31:0] VSSRA_VV = 32'b101011???????????000?????1010111; + localparam [31:0] VNSRL_WV = 32'b101100???????????000?????1010111; + localparam [31:0] VNSRA_WV = 32'b101101???????????000?????1010111; + localparam [31:0] VNCLIPU_WV = 32'b101110???????????000?????1010111; + localparam [31:0] VNCLIP_WV = 32'b101111???????????000?????1010111; + localparam [31:0] VWREDSUMU_VS = 32'b110000???????????000?????1010111; + localparam [31:0] VWREDSUM_VS = 32'b110001???????????000?????1010111; + localparam [31:0] VDOTU_VV = 32'b111000???????????000?????1010111; + localparam [31:0] VDOT_VV = 32'b111001???????????000?????1010111; + localparam [31:0] VQMACCU_VV = 32'b111100???????????000?????1010111; + localparam [31:0] VQMACC_VV = 32'b111101???????????000?????1010111; + localparam [31:0] VQMACCSU_VV = 32'b111111???????????000?????1010111; + localparam [31:0] VADD_VI = 32'b000000???????????011?????1010111; + localparam [31:0] VRSUB_VI = 32'b000011???????????011?????1010111; + localparam [31:0] VAND_VI = 32'b001001???????????011?????1010111; + localparam [31:0] VOR_VI = 32'b001010???????????011?????1010111; + localparam [31:0] VXOR_VI = 32'b001011???????????011?????1010111; + localparam [31:0] VRGATHER_VI = 32'b001100???????????011?????1010111; + localparam [31:0] VSLIDEUP_VI = 32'b001110???????????011?????1010111; + localparam [31:0] VSLIDEDOWN_VI = 32'b001111???????????011?????1010111; + localparam [31:0] VADC_VIM = 32'b0100000??????????011?????1010111; + localparam [31:0] VMADC_VIM = 32'b010001???????????011?????1010111; + localparam [31:0] VMERGE_VIM = 32'b0101110??????????011?????1010111; + localparam [31:0] VMV_V_I = 32'b010111100000?????011?????1010111; + localparam [31:0] VMSEQ_VI = 32'b011000???????????011?????1010111; + localparam [31:0] VMSNE_VI = 32'b011001???????????011?????1010111; + localparam [31:0] VMSLEU_VI = 32'b011100???????????011?????1010111; + localparam [31:0] VMSLE_VI = 32'b011101???????????011?????1010111; + localparam [31:0] VMSGTU_VI = 32'b011110???????????011?????1010111; + localparam [31:0] VMSGT_VI = 32'b011111???????????011?????1010111; + localparam [31:0] VSADDU_VI = 32'b100000???????????011?????1010111; + localparam [31:0] VSADD_VI = 32'b100001???????????011?????1010111; + localparam [31:0] VSLL_VI = 32'b100101???????????011?????1010111; + localparam [31:0] VMV1R_V = 32'b1001111?????00000011?????1010111; + localparam [31:0] VMV2R_V = 32'b1001111?????00001011?????1010111; + localparam [31:0] VMV4R_V = 32'b1001111?????00011011?????1010111; + localparam [31:0] VMV8R_V = 32'b1001111?????00111011?????1010111; + localparam [31:0] VSRL_VI = 32'b101000???????????011?????1010111; + localparam [31:0] VSRA_VI = 32'b101001???????????011?????1010111; + localparam [31:0] VSSRL_VI = 32'b101010???????????011?????1010111; + localparam [31:0] VSSRA_VI = 32'b101011???????????011?????1010111; + localparam [31:0] VNSRL_WI = 32'b101100???????????011?????1010111; + localparam [31:0] VNSRA_WI = 32'b101101???????????011?????1010111; + localparam [31:0] VNCLIPU_WI = 32'b101110???????????011?????1010111; + localparam [31:0] VNCLIP_WI = 32'b101111???????????011?????1010111; + localparam [31:0] VREDSUM_VS = 32'b000000???????????010?????1010111; + localparam [31:0] VREDAND_VS = 32'b000001???????????010?????1010111; + localparam [31:0] VREDOR_VS = 32'b000010???????????010?????1010111; + localparam [31:0] VREDXOR_VS = 32'b000011???????????010?????1010111; + localparam [31:0] VREDMINU_VS = 32'b000100???????????010?????1010111; + localparam [31:0] VREDMIN_VS = 32'b000101???????????010?????1010111; + localparam [31:0] VREDMAXU_VS = 32'b000110???????????010?????1010111; + localparam [31:0] VREDMAX_VS = 32'b000111???????????010?????1010111; + localparam [31:0] VAADDU_VV = 32'b001000???????????010?????1010111; + localparam [31:0] VAADD_VV = 32'b001001???????????010?????1010111; + localparam [31:0] VASUBU_VV = 32'b001010???????????010?????1010111; + localparam [31:0] VASUB_VV = 32'b001011???????????010?????1010111; + localparam [31:0] VMV_X_S = 32'b0100001?????00000010?????1010111; + localparam [31:0] VZEXT_VF8 = 32'b010010??????00010010?????1010111; + localparam [31:0] VSEXT_VF8 = 32'b010010??????00011010?????1010111; + localparam [31:0] VZEXT_VF4 = 32'b010010??????00100010?????1010111; + localparam [31:0] VSEXT_VF4 = 32'b010010??????00101010?????1010111; + localparam [31:0] VZEXT_VF2 = 32'b010010??????00110010?????1010111; + localparam [31:0] VSEXT_VF2 = 32'b010010??????00111010?????1010111; + localparam [31:0] VCOMPRESS_VM = 32'b0101111??????????010?????1010111; + localparam [31:0] VMANDNOT_MM = 32'b011000???????????010?????1010111; + localparam [31:0] VMAND_MM = 32'b011001???????????010?????1010111; + localparam [31:0] VMOR_MM = 32'b011010???????????010?????1010111; + localparam [31:0] VMXOR_MM = 32'b011011???????????010?????1010111; + localparam [31:0] VMORNOT_MM = 32'b011100???????????010?????1010111; + localparam [31:0] VMNAND_MM = 32'b011101???????????010?????1010111; + localparam [31:0] VMNOR_MM = 32'b011110???????????010?????1010111; + localparam [31:0] VMXNOR_MM = 32'b011111???????????010?????1010111; + localparam [31:0] VMSBF_M = 32'b010100??????00001010?????1010111; + localparam [31:0] VMSOF_M = 32'b010100??????00010010?????1010111; + localparam [31:0] VMSIF_M = 32'b010100??????00011010?????1010111; + localparam [31:0] VIOTA_M = 32'b010100??????10000010?????1010111; + localparam [31:0] VID_V = 32'b010100?0000010001010?????1010111; + localparam [31:0] VPOPC_M = 32'b010000??????10000010?????1010111; + localparam [31:0] VFIRST_M = 32'b010000??????10001010?????1010111; + localparam [31:0] VDIVU_VV = 32'b100000???????????010?????1010111; + localparam [31:0] VDIV_VV = 32'b100001???????????010?????1010111; + localparam [31:0] VREMU_VV = 32'b100010???????????010?????1010111; + localparam [31:0] VREM_VV = 32'b100011???????????010?????1010111; + localparam [31:0] VMULHU_VV = 32'b100100???????????010?????1010111; + localparam [31:0] VMUL_VV = 32'b100101???????????010?????1010111; + localparam [31:0] VMULHSU_VV = 32'b100110???????????010?????1010111; + localparam [31:0] VMULH_VV = 32'b100111???????????010?????1010111; + localparam [31:0] VMADD_VV = 32'b101001???????????010?????1010111; + localparam [31:0] VNMSUB_VV = 32'b101011???????????010?????1010111; + localparam [31:0] VMACC_VV = 32'b101101???????????010?????1010111; + localparam [31:0] VNMSAC_VV = 32'b101111???????????010?????1010111; + localparam [31:0] VWADDU_VV = 32'b110000???????????010?????1010111; + localparam [31:0] VWADD_VV = 32'b110001???????????010?????1010111; + localparam [31:0] VWSUBU_VV = 32'b110010???????????010?????1010111; + localparam [31:0] VWSUB_VV = 32'b110011???????????010?????1010111; + localparam [31:0] VWADDU_WV = 32'b110100???????????010?????1010111; + localparam [31:0] VWADD_WV = 32'b110101???????????010?????1010111; + localparam [31:0] VWSUBU_WV = 32'b110110???????????010?????1010111; + localparam [31:0] VWSUB_WV = 32'b110111???????????010?????1010111; + localparam [31:0] VWMULU_VV = 32'b111000???????????010?????1010111; + localparam [31:0] VWMULSU_VV = 32'b111010???????????010?????1010111; + localparam [31:0] VWMUL_VV = 32'b111011???????????010?????1010111; + localparam [31:0] VWMACCU_VV = 32'b111100???????????010?????1010111; + localparam [31:0] VWMACC_VV = 32'b111101???????????010?????1010111; + localparam [31:0] VWMACCSU_VV = 32'b111111???????????010?????1010111; + localparam [31:0] VAADDU_VX = 32'b001000???????????110?????1010111; + localparam [31:0] VAADD_VX = 32'b001001???????????110?????1010111; + localparam [31:0] VASUBU_VX = 32'b001010???????????110?????1010111; + localparam [31:0] VASUB_VX = 32'b001011???????????110?????1010111; + localparam [31:0] VMV_S_X = 32'b010000100000?????110?????1010111; + localparam [31:0] VSLIDE1UP_VX = 32'b001110???????????110?????1010111; + localparam [31:0] VSLIDE1DOWN_VX = 32'b001111???????????110?????1010111; + localparam [31:0] VDIVU_VX = 32'b100000???????????110?????1010111; + localparam [31:0] VDIV_VX = 32'b100001???????????110?????1010111; + localparam [31:0] VREMU_VX = 32'b100010???????????110?????1010111; + localparam [31:0] VREM_VX = 32'b100011???????????110?????1010111; + localparam [31:0] VMULHU_VX = 32'b100100???????????110?????1010111; + localparam [31:0] VMUL_VX = 32'b100101???????????110?????1010111; + localparam [31:0] VMULHSU_VX = 32'b100110???????????110?????1010111; + localparam [31:0] VMULH_VX = 32'b100111???????????110?????1010111; + localparam [31:0] VMADD_VX = 32'b101001???????????110?????1010111; + localparam [31:0] VNMSUB_VX = 32'b101011???????????110?????1010111; + localparam [31:0] VMACC_VX = 32'b101101???????????110?????1010111; + localparam [31:0] VNMSAC_VX = 32'b101111???????????110?????1010111; + localparam [31:0] VWADDU_VX = 32'b110000???????????110?????1010111; + localparam [31:0] VWADD_VX = 32'b110001???????????110?????1010111; + localparam [31:0] VWSUBU_VX = 32'b110010???????????110?????1010111; + localparam [31:0] VWSUB_VX = 32'b110011???????????110?????1010111; + localparam [31:0] VWADDU_WX = 32'b110100???????????110?????1010111; + localparam [31:0] VWADD_WX = 32'b110101???????????110?????1010111; + localparam [31:0] VWSUBU_WX = 32'b110110???????????110?????1010111; + localparam [31:0] VWSUB_WX = 32'b110111???????????110?????1010111; + localparam [31:0] VWMULU_VX = 32'b111000???????????110?????1010111; + localparam [31:0] VWMULSU_VX = 32'b111010???????????110?????1010111; + localparam [31:0] VWMUL_VX = 32'b111011???????????110?????1010111; + localparam [31:0] VWMACCU_VX = 32'b111100???????????110?????1010111; + localparam [31:0] VWMACC_VX = 32'b111101???????????110?????1010111; + localparam [31:0] VWMACCUS_VX = 32'b111110???????????110?????1010111; + localparam [31:0] VWMACCSU_VX = 32'b111111???????????110?????1010111; + localparam [31:0] VAMOSWAPEI8_V = 32'b00001????????????000?????0101111; + localparam [31:0] VAMOADDEI8_V = 32'b00000????????????000?????0101111; + localparam [31:0] VAMOXOREI8_V = 32'b00100????????????000?????0101111; + localparam [31:0] VAMOANDEI8_V = 32'b01100????????????000?????0101111; + localparam [31:0] VAMOOREI8_V = 32'b01000????????????000?????0101111; + localparam [31:0] VAMOMINEI8_V = 32'b10000????????????000?????0101111; + localparam [31:0] VAMOMAXEI8_V = 32'b10100????????????000?????0101111; + localparam [31:0] VAMOMINUEI8_V = 32'b11000????????????000?????0101111; + localparam [31:0] VAMOMAXUEI8_V = 32'b11100????????????000?????0101111; + localparam [31:0] VAMOSWAPEI16_V = 32'b00001????????????101?????0101111; + localparam [31:0] VAMOADDEI16_V = 32'b00000????????????101?????0101111; + localparam [31:0] VAMOXOREI16_V = 32'b00100????????????101?????0101111; + localparam [31:0] VAMOANDEI16_V = 32'b01100????????????101?????0101111; + localparam [31:0] VAMOOREI16_V = 32'b01000????????????101?????0101111; + localparam [31:0] VAMOMINEI16_V = 32'b10000????????????101?????0101111; + localparam [31:0] VAMOMAXEI16_V = 32'b10100????????????101?????0101111; + localparam [31:0] VAMOMINUEI16_V = 32'b11000????????????101?????0101111; + localparam [31:0] VAMOMAXUEI16_V = 32'b11100????????????101?????0101111; + localparam [31:0] VAMOSWAPEI32_V = 32'b00001????????????110?????0101111; + localparam [31:0] VAMOADDEI32_V = 32'b00000????????????110?????0101111; + localparam [31:0] VAMOXOREI32_V = 32'b00100????????????110?????0101111; + localparam [31:0] VAMOANDEI32_V = 32'b01100????????????110?????0101111; + localparam [31:0] VAMOOREI32_V = 32'b01000????????????110?????0101111; + localparam [31:0] VAMOMINEI32_V = 32'b10000????????????110?????0101111; + localparam [31:0] VAMOMAXEI32_V = 32'b10100????????????110?????0101111; + localparam [31:0] VAMOMINUEI32_V = 32'b11000????????????110?????0101111; + localparam [31:0] VAMOMAXUEI32_V = 32'b11100????????????110?????0101111; + localparam [31:0] VAMOSWAPEI64_V = 32'b00001????????????111?????0101111; + localparam [31:0] VAMOADDEI64_V = 32'b00000????????????111?????0101111; + localparam [31:0] VAMOXOREI64_V = 32'b00100????????????111?????0101111; + localparam [31:0] VAMOANDEI64_V = 32'b01100????????????111?????0101111; + localparam [31:0] VAMOOREI64_V = 32'b01000????????????111?????0101111; + localparam [31:0] VAMOMINEI64_V = 32'b10000????????????111?????0101111; + localparam [31:0] VAMOMAXEI64_V = 32'b10100????????????111?????0101111; + localparam [31:0] VAMOMINUEI64_V = 32'b11000????????????111?????0101111; + localparam [31:0] VAMOMAXUEI64_V = 32'b11100????????????111?????0101111; localparam [31:0] VMVNFR_V = 32'b1001111??????????011?????1010111; localparam [31:0] VL1R_V = 32'b000000101000?????000?????0000111; localparam [31:0] VL2R_V = 32'b000001101000?????101?????0000111; diff --git a/toolchain/riscv-opcodes/opcodes-rvv b/toolchain/riscv-opcodes/opcodes-rvv index d34cdd81c..613bd5be7 100644 --- a/toolchain/riscv-opcodes/opcodes-rvv +++ b/toolchain/riscv-opcodes/opcodes-rvv @@ -8,7 +8,7 @@ # configuration setting # https://github.com/riscv/riscv-v-spec/blob/master/vcfg-format.adoc -vsetvli 31=0 zimm11 rs1 14..12=0x7 rd 6..0=0x57 +@vsetvli 31=0 zimm11 rs1 14..12=0x7 rd 6..0=0x57 vsetvl 31=1 30..25=0x0 rs2 rs1 14..12=0x7 rd 6..0=0x57 # @@ -118,20 +118,20 @@ vs8r.v 31..29=7 28=0 27..26=0 25=1 24..20=0x08 rs1 14..12=0x0 vs3 6..0=0 # Vector Floating-Point Instructions # https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#14-vector-floating-point-instructions # OPFVF -vfadd.vf 31..26=0x00 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfsub.vf 31..26=0x02 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfmin.vf 31..26=0x04 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfmax.vf 31..26=0x06 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfsgnj.vf 31..26=0x08 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfsgnjn.vf 31..26=0x09 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfsgnjx.vf 31..26=0x0a vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfslide1up.vf 31..26=0x0e vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfadd.vf 31..26=0x00 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfsub.vf 31..26=0x02 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfmin.vf 31..26=0x04 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfmax.vf 31..26=0x06 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfsgnj.vf 31..26=0x08 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfsgnjn.vf 31..26=0x09 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfsgnjx.vf 31..26=0x0a vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfslide1up.vf 31..26=0x0e vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfslide1down.vf 31..26=0x0f vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfmv.s.f 31..26=0x10 25=1 24..20=0 rs1 14..12=0x5 vd 6..0=0x57 vfmerge.vfm 31..26=0x17 25=0 vs2 rs1 14..12=0x5 vd 6..0=0x57 vfmv.v.f 31..26=0x17 25=1 24..20=0 rs1 14..12=0x5 vd 6..0=0x57 -vmfeq.vf 31..26=0x18 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vmfeq.vf 31..26=0x18 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vmfle.vf 31..26=0x19 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vmflt.vf 31..26=0x1b vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vmfne.vf 31..26=0x1c vm vs2 rs1 14..12=0x5 vd 6..0=0x57 @@ -162,23 +162,23 @@ vfwmsac.vf 31..26=0x3e vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfwnmsac.vf 31..26=0x3f vm vs2 rs1 14..12=0x5 vd 6..0=0x57 # OPFVV -vfadd.vv 31..26=0x00 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfadd.vv 31..26=0x00 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfredsum.vs 31..26=0x01 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfsub.vv 31..26=0x02 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfsub.vv 31..26=0x02 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfredosum.vs 31..26=0x03 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfmin.vv 31..26=0x04 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfmin.vv 31..26=0x04 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfredmin.vs 31..26=0x05 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfmax.vv 31..26=0x06 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfmax.vv 31..26=0x06 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfredmax.vs 31..26=0x07 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfsgnj.vv 31..26=0x08 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfsgnj.vv 31..26=0x08 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfsgnjn.vv 31..26=0x09 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfsgnjx.vv 31..26=0x0a vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfsgnjx.vv 31..26=0x0a vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfmv.f.s 31..26=0x10 25=1 vs2 19..15=0 14..12=0x1 rd 6..0=0x57 -vmfeq.vv 31..26=0x18 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vmfeq.vv 31..26=0x18 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vmfle.vv 31..26=0x19 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vmflt.vv 31..26=0x1b vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vmfne.vv 31..26=0x1c vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vmfne.vv 31..26=0x1c vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfdiv.vv 31..26=0x20 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfmul.vv 31..26=0x24 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 @@ -191,29 +191,29 @@ vfnmacc.vv 31..26=0x2d vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfmsac.vv 31..26=0x2e vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfnmsac.vv 31..26=0x2f vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfcvt.xu.f.v 31..26=0x12 vm vs2 19..15=0x00 14..12=0x1 vd 6..0=0x57 -vfcvt.x.f.v 31..26=0x12 vm vs2 19..15=0x01 14..12=0x1 vd 6..0=0x57 -vfcvt.f.xu.v 31..26=0x12 vm vs2 19..15=0x02 14..12=0x1 vd 6..0=0x57 -vfcvt.f.x.v 31..26=0x12 vm vs2 19..15=0x03 14..12=0x1 vd 6..0=0x57 -vfcvt.rtz.xu.f.v 31..26=0x12 vm vs2 19..15=0x06 14..12=0x1 vd 6..0=0x57 -vfcvt.rtz.x.f.v 31..26=0x12 vm vs2 19..15=0x07 14..12=0x1 vd 6..0=0x57 - -vfwcvt.xu.f.v 31..26=0x12 vm vs2 19..15=0x08 14..12=0x1 vd 6..0=0x57 -vfwcvt.x.f.v 31..26=0x12 vm vs2 19..15=0x09 14..12=0x1 vd 6..0=0x57 -vfwcvt.f.xu.v 31..26=0x12 vm vs2 19..15=0x0A 14..12=0x1 vd 6..0=0x57 -vfwcvt.f.x.v 31..26=0x12 vm vs2 19..15=0x0B 14..12=0x1 vd 6..0=0x57 -vfwcvt.f.f.v 31..26=0x12 vm vs2 19..15=0x0C 14..12=0x1 vd 6..0=0x57 -vfwcvt.rtz.xu.f.v 31..26=0x12 vm vs2 19..15=0x0E 14..12=0x1 vd 6..0=0x57 -vfwcvt.rtz.x.f.v 31..26=0x12 vm vs2 19..15=0x0F 14..12=0x1 vd 6..0=0x57 - -vfncvt.xu.f.w 31..26=0x12 vm vs2 19..15=0x10 14..12=0x1 vd 6..0=0x57 -vfncvt.x.f.w 31..26=0x12 vm vs2 19..15=0x11 14..12=0x1 vd 6..0=0x57 -vfncvt.f.xu.w 31..26=0x12 vm vs2 19..15=0x12 14..12=0x1 vd 6..0=0x57 -vfncvt.f.x.w 31..26=0x12 vm vs2 19..15=0x13 14..12=0x1 vd 6..0=0x57 -vfncvt.f.f.w 31..26=0x12 vm vs2 19..15=0x14 14..12=0x1 vd 6..0=0x57 -vfncvt.rod.f.f.w 31..26=0x12 vm vs2 19..15=0x15 14..12=0x1 vd 6..0=0x57 -vfncvt.rtz.xu.f.w 31..26=0x12 vm vs2 19..15=0x16 14..12=0x1 vd 6..0=0x57 -vfncvt.rtz.x.f.w 31..26=0x12 vm vs2 19..15=0x17 14..12=0x1 vd 6..0=0x57 +@vfcvt.xu.f.v 31..26=0x12 vm vs2 19..15=0x00 14..12=0x1 vd 6..0=0x57 +@vfcvt.x.f.v 31..26=0x12 vm vs2 19..15=0x01 14..12=0x1 vd 6..0=0x57 +@vfcvt.f.xu.v 31..26=0x12 vm vs2 19..15=0x02 14..12=0x1 vd 6..0=0x57 +@vfcvt.f.x.v 31..26=0x12 vm vs2 19..15=0x03 14..12=0x1 vd 6..0=0x57 +@vfcvt.rtz.xu.f.v 31..26=0x12 vm vs2 19..15=0x06 14..12=0x1 vd 6..0=0x57 +@vfcvt.rtz.x.f.v 31..26=0x12 vm vs2 19..15=0x07 14..12=0x1 vd 6..0=0x57 + +@vfwcvt.xu.f.v 31..26=0x12 vm vs2 19..15=0x08 14..12=0x1 vd 6..0=0x57 +@vfwcvt.x.f.v 31..26=0x12 vm vs2 19..15=0x09 14..12=0x1 vd 6..0=0x57 +@vfwcvt.f.xu.v 31..26=0x12 vm vs2 19..15=0x0A 14..12=0x1 vd 6..0=0x57 +@vfwcvt.f.x.v 31..26=0x12 vm vs2 19..15=0x0B 14..12=0x1 vd 6..0=0x57 +@vfwcvt.f.f.v 31..26=0x12 vm vs2 19..15=0x0C 14..12=0x1 vd 6..0=0x57 +@vfwcvt.rtz.xu.f.v 31..26=0x12 vm vs2 19..15=0x0E 14..12=0x1 vd 6..0=0x57 +@vfwcvt.rtz.x.f.v 31..26=0x12 vm vs2 19..15=0x0F 14..12=0x1 vd 6..0=0x57 + +@vfncvt.xu.f.w 31..26=0x12 vm vs2 19..15=0x10 14..12=0x1 vd 6..0=0x57 +@vfncvt.x.f.w 31..26=0x12 vm vs2 19..15=0x11 14..12=0x1 vd 6..0=0x57 +@vfncvt.f.xu.w 31..26=0x12 vm vs2 19..15=0x12 14..12=0x1 vd 6..0=0x57 +@vfncvt.f.x.w 31..26=0x12 vm vs2 19..15=0x13 14..12=0x1 vd 6..0=0x57 +@vfncvt.f.f.w 31..26=0x12 vm vs2 19..15=0x14 14..12=0x1 vd 6..0=0x57 +@vfncvt.rod.f.f.w 31..26=0x12 vm vs2 19..15=0x15 14..12=0x1 vd 6..0=0x57 +@vfncvt.rtz.xu.f.w 31..26=0x12 vm vs2 19..15=0x16 14..12=0x1 vd 6..0=0x57 +@vfncvt.rtz.x.f.w 31..26=0x12 vm vs2 19..15=0x17 14..12=0x1 vd 6..0=0x57 vfsqrt.v 31..26=0x13 vm vs2 19..15=0x00 14..12=0x1 vd 6..0=0x57 vfrsqrte7.v 31..26=0x13 vm vs2 19..15=0x04 14..12=0x1 vd 6..0=0x57 @@ -234,29 +234,29 @@ vfwmsac.vv 31..26=0x3e vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfwnmsac.vv 31..26=0x3f vm vs2 vs1 14..12=0x1 vd 6..0=0x57 # OPIVX -vadd.vx 31..26=0x00 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vsub.vx 31..26=0x02 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vadd.vx 31..26=0x00 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vsub.vx 31..26=0x02 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vrsub.vx 31..26=0x03 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vminu.vx 31..26=0x04 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vminu.vx 31..26=0x04 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vmin.vx 31..26=0x05 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vmaxu.vx 31..26=0x06 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vmaxu.vx 31..26=0x06 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vmax.vx 31..26=0x07 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vand.vx 31..26=0x09 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vor.vx 31..26=0x0a vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vor.vx 31..26=0x0a vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vxor.vx 31..26=0x0b vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vrgather.vx 31..26=0x0c vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vslideup.vx 31..26=0x0e vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vrgather.vx 31..26=0x0c vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vslideup.vx 31..26=0x0e vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vslidedown.vx 31..26=0x0f vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vadc.vxm 31..26=0x10 25=0 vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vadc.vxm 31..26=0x10 25=0 vs2 rs1 14..12=0x4 vd 6..0=0x57 vmadc.vxm 31..26=0x11 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vsbc.vxm 31..26=0x12 25=0 vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vsbc.vxm 31..26=0x12 25=0 vs2 rs1 14..12=0x4 vd 6..0=0x57 vmsbc.vxm 31..26=0x13 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vmerge.vxm 31..26=0x17 25=0 vs2 rs1 14..12=0x4 vd 6..0=0x57 -vmv.v.x 31..26=0x17 25=1 24..20=0 rs1 14..12=0x4 vd 6..0=0x57 -vmseq.vx 31..26=0x18 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vmsne.vx 31..26=0x19 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vmsltu.vx 31..26=0x1a vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vmerge.vxm 31..26=0x17 25=0 vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vmv.v.x 31..26=0x17 25=1 24..20=0 rs1 14..12=0x4 vd 6..0=0x57 +@vmseq.vx 31..26=0x18 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vmsne.vx 31..26=0x19 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vmsltu.vx 31..26=0x1a vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vmslt.vx 31..26=0x1b vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vmsleu.vx 31..26=0x1c vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vmsle.vx 31..26=0x1d vm vs2 rs1 14..12=0x4 vd 6..0=0x57 @@ -284,29 +284,29 @@ vqmaccus.vx 31..26=0x3e vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vqmaccsu.vx 31..26=0x3f vm vs2 rs1 14..12=0x4 vd 6..0=0x57 # OPIVV -vadd.vv 31..26=0x00 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vsub.vv 31..26=0x02 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vminu.vv 31..26=0x04 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vadd.vv 31..26=0x00 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vsub.vv 31..26=0x02 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vminu.vv 31..26=0x04 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vmin.vv 31..26=0x05 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vmaxu.vv 31..26=0x06 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vmaxu.vv 31..26=0x06 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vmax.vv 31..26=0x07 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vand.vv 31..26=0x09 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vor.vv 31..26=0x0a vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vor.vv 31..26=0x0a vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vxor.vv 31..26=0x0b vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vrgather.vv 31..26=0x0c vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vrgatherei16.vv 31..26=0x0e vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vrgather.vv 31..26=0x0c vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vrgatherei16.vv 31..26=0x0e vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vadc.vvm 31..26=0x10 25=0 vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vadc.vvm 31..26=0x10 25=0 vs2 vs1 14..12=0x0 vd 6..0=0x57 vmadc.vvm 31..26=0x11 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vsbc.vvm 31..26=0x12 25=0 vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vsbc.vvm 31..26=0x12 25=0 vs2 vs1 14..12=0x0 vd 6..0=0x57 vmsbc.vvm 31..26=0x13 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vmerge.vvm 31..26=0x17 25=0 vs2 vs1 14..12=0x0 vd 6..0=0x57 vmv.v.v 31..26=0x17 25=1 24..20=0 vs1 14..12=0x0 vd 6..0=0x57 -vmseq.vv 31..26=0x18 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vmseq.vv 31..26=0x18 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vmsne.vv 31..26=0x19 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vmsltu.vv 31..26=0x1a vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vmsltu.vv 31..26=0x1a vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vmslt.vv 31..26=0x1b vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vmsleu.vv 31..26=0x1c vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vmsleu.vv 31..26=0x1c vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vmsle.vv 31..26=0x1d vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vsaddu.vv 31..26=0x20 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 @@ -395,9 +395,9 @@ vsext.vf4 31..26=0x12 vm vs2 19..15=5 14..12=0x2 vd 6..0=0x57 vzext.vf2 31..26=0x12 vm vs2 19..15=6 14..12=0x2 vd 6..0=0x57 vsext.vf2 31..26=0x12 vm vs2 19..15=7 14..12=0x2 vd 6..0=0x57 -vcompress.vm 31..26=0x17 25=1 vs2 vs1 14..12=0x2 vd 6..0=0x57 +@vcompress.vm 31..26=0x17 25=1 vs2 vs1 14..12=0x2 vd 6..0=0x57 vmandnot.mm 31..26=0x18 vm vs2 vs1 14..12=0x2 vd 6..0=0x57 -vmand.mm 31..26=0x19 vm vs2 vs1 14..12=0x2 vd 6..0=0x57 +@vmand.mm 31..26=0x19 vm vs2 vs1 14..12=0x2 vd 6..0=0x57 vmor.mm 31..26=0x1a vm vs2 vs1 14..12=0x2 vd 6..0=0x57 vmxor.mm 31..26=0x1b vm vs2 vs1 14..12=0x2 vd 6..0=0x57 vmornot.mm 31..26=0x1c vm vs2 vs1 14..12=0x2 vd 6..0=0x57 @@ -442,20 +442,20 @@ vwmacc.vv 31..26=0x3d vm vs2 vs1 14..12=0x2 vd 6..0=0x57 vwmaccsu.vv 31..26=0x3f vm vs2 vs1 14..12=0x2 vd 6..0=0x57 # OPMVX -vaaddu.vx 31..26=0x08 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 +@vaaddu.vx 31..26=0x08 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vaadd.vx 31..26=0x09 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 -vasubu.vx 31..26=0x0a vm vs2 rs1 14..12=0x6 vd 6..0=0x57 +@vasubu.vx 31..26=0x0a vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vasub.vx 31..26=0x0b vm vs2 rs1 14..12=0x6 vd 6..0=0x57 -vmv.s.x 31..26=0x10 25=1 24..20=0 rs1 14..12=0x6 vd 6..0=0x57 -vslide1up.vx 31..26=0x0e vm vs2 rs1 14..12=0x6 vd 6..0=0x57 +@vmv.s.x 31..26=0x10 25=1 24..20=0 rs1 14..12=0x6 vd 6..0=0x57 +@vslide1up.vx 31..26=0x0e vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vslide1down.vx 31..26=0x0f vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vdivu.vx 31..26=0x20 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vdiv.vx 31..26=0x21 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vremu.vx 31..26=0x22 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vrem.vx 31..26=0x23 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 -vmulhu.vx 31..26=0x24 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 +@vmulhu.vx 31..26=0x24 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vmul.vx 31..26=0x25 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vmulhsu.vx 31..26=0x26 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vmulh.vx 31..26=0x27 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 From 19dceab2ff7f1bb15e86b474f6de6860e9580136 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Fri, 8 Jan 2021 12:00:42 +0100 Subject: [PATCH 20/65] [riscv-tests] Add basic tests for Xpulpv2 SIMD arithmetical ops Added instructions: SIMD add, sub, avg, min, max, shifts, logicals, abs, extract, insert --- .../isa/macros/scalar/test_macros.h | 103 ++++++++++++++++++ apps/riscv-tests/isa/rv32uxpulpimg/Makefrag | 14 +++ apps/riscv-tests/isa/rv32uxpulpimg/pv_abs.S | 52 +++++++++ apps/riscv-tests/isa/rv32uxpulpimg/pv_add.S | 74 +++++++++++++ apps/riscv-tests/isa/rv32uxpulpimg/pv_and.S | 74 +++++++++++++ apps/riscv-tests/isa/rv32uxpulpimg/pv_avg.S | 74 +++++++++++++ apps/riscv-tests/isa/rv32uxpulpimg/pv_avgu.S | 74 +++++++++++++ .../isa/rv32uxpulpimg/pv_extract.S | 65 +++++++++++ .../isa/rv32uxpulpimg/pv_extractu.S | 65 +++++++++++ .../riscv-tests/isa/rv32uxpulpimg/pv_insert.S | 87 +++++++++++++++ apps/riscv-tests/isa/rv32uxpulpimg/pv_max.S | 74 +++++++++++++ apps/riscv-tests/isa/rv32uxpulpimg/pv_maxu.S | 74 +++++++++++++ apps/riscv-tests/isa/rv32uxpulpimg/pv_min.S | 74 +++++++++++++ apps/riscv-tests/isa/rv32uxpulpimg/pv_minu.S | 74 +++++++++++++ apps/riscv-tests/isa/rv32uxpulpimg/pv_or.S | 74 +++++++++++++ apps/riscv-tests/isa/rv32uxpulpimg/pv_sll.S | 74 +++++++++++++ apps/riscv-tests/isa/rv32uxpulpimg/pv_sra.S | 74 +++++++++++++ apps/riscv-tests/isa/rv32uxpulpimg/pv_srl.S | 74 +++++++++++++ apps/riscv-tests/isa/rv32uxpulpimg/pv_sub.S | 74 +++++++++++++ apps/riscv-tests/isa/rv32uxpulpimg/pv_xor.S | 74 +++++++++++++ apps/riscv-tests/isa/snitch_isa.mk | 14 +++ 21 files changed, 1436 insertions(+) create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_abs.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_add.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_and.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_avg.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_avgu.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_extract.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_extractu.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_insert.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_max.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_maxu.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_min.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_minu.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_or.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_sll.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_sra.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_srl.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_sub.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_xor.S diff --git a/apps/riscv-tests/isa/macros/scalar/test_macros.h b/apps/riscv-tests/isa/macros/scalar/test_macros.h index 356ede1c3..f904ae67a 100644 --- a/apps/riscv-tests/isa/macros/scalar/test_macros.h +++ b/apps/riscv-tests/isa/macros/scalar/test_macros.h @@ -141,6 +141,109 @@ test_ ## testnum: \ inst x0, x1, ZEXT_UIMM5(imm); \ ) +#----------------------------------------------------------------------- +# Tests for Xpulpimg instructions with 6-bit unsigned immediate operand +#----------------------------------------------------------------------- + +#define ZEXT_UIMM6(x) ((x) & 0x3F) + +#define TEST_UIMM6_OP( testnum, inst, result, val1, imm ) \ + TEST_CASE( testnum, x14, result, \ + li x1, MASK_XLEN(val1); \ + inst x14, x1, ZEXT_UIMM6(imm); \ + ) + +#define TEST_UIMM6_SRC1_EQ_DEST( testnum, inst, result, val1, imm ) \ + TEST_CASE( testnum, x1, result, \ + li x1, MASK_XLEN(val1); \ + inst x1, x1, ZEXT_UIMM6(imm); \ + ) + +#define TEST_UIMM6_DEST_BYPASS( testnum, nop_cycles, inst, result, val1, imm ) \ + TEST_CASE( testnum, x6, result, \ + li x4, 0; \ +1: li x1, MASK_XLEN(val1); \ + inst x14, x1, ZEXT_UIMM6(imm); \ + TEST_INSERT_NOPS_ ## nop_cycles \ + addi x6, x14, 0; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + ) + +#define TEST_UIMM6_SRC1_BYPASS( testnum, nop_cycles, inst, result, val1, imm ) \ + TEST_CASE( testnum, x14, result, \ + li x4, 0; \ +1: li x1, MASK_XLEN(val1); \ + TEST_INSERT_NOPS_ ## nop_cycles \ + inst x14, x1, ZEXT_UIMM6(imm); \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + ) + +#define TEST_UIMM6_ZEROSRC1( testnum, inst, result, imm ) \ + TEST_CASE( testnum, x1, result, \ + inst x1, x0, ZEXT_UIMM6(imm); \ + ) + +#define TEST_UIMM6_ZERODEST( testnum, inst, val1, imm ) \ + TEST_CASE( testnum, x0, 0, \ + li x1, MASK_XLEN(val1); \ + inst x0, x1, ZEXT_UIMM6(imm); \ + ) + +#----------------------------------------------------------------------- +# Tests for Xpulpimg instructions with 6-bit signed immediate operand +#----------------------------------------------------------------------- +#define SEXT_IMM6(x) ((x) | (-(((x) >> 5) & 1) << 5)) + +#define TEST_SIMM6_OP( testnum, inst, result, val1, imm ) \ + TEST_CASE( testnum, x14, result, \ + li x1, MASK_XLEN(val1); \ + inst x14, x1, SEXT_IMM6(imm); \ + ) + +#define TEST_SIMM6_SRC1_EQ_DEST( testnum, inst, result, val1, imm ) \ + TEST_CASE( testnum, x1, result, \ + li x1, MASK_XLEN(val1); \ + inst x1, x1, SEXT_IMM6(imm); \ + ) + +#define TEST_SIMM6_DEST_BYPASS( testnum, nop_cycles, inst, result, val1, imm ) \ + TEST_CASE( testnum, x6, result, \ + li x4, 0; \ +1: li x1, MASK_XLEN(val1); \ + inst x14, x1, SEXT_IMM6(imm); \ + TEST_INSERT_NOPS_ ## nop_cycles \ + addi x6, x14, 0; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + ) + +#define TEST_SIMM6_SRC1_BYPASS( testnum, nop_cycles, inst, result, val1, imm ) \ + TEST_CASE( testnum, x14, result, \ + li x4, 0; \ +1: li x1, MASK_XLEN(val1); \ + TEST_INSERT_NOPS_ ## nop_cycles \ + inst x14, x1, SEXT_IMM6(imm); \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + ) + +#define TEST_SIMM6_ZEROSRC1( testnum, inst, result, imm ) \ + TEST_CASE( testnum, x1, result, \ + inst x1, x0, SEXT_IMM6(imm); \ + ) + +#define TEST_SIMM6_ZERODEST( testnum, inst, val1, imm ) \ + TEST_CASE( testnum, x0, 0, \ + li x1, MASK_XLEN(val1); \ + inst x0, x1, SEXT_IMM6(imm); \ + ) + #----------------------------------------------------------------------- # Tests for an instruction with register operands #----------------------------------------------------------------------- diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag b/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag index 471502857..f66419a9b 100644 --- a/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag +++ b/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag @@ -19,6 +19,20 @@ rv32uxpulpimg_sc_tests = \ p_clipr p_clipur \ p_beqimm p_bneimm \ p_mac p_msu \ + pv_add \ + pv_sub \ + pv_avg pv_avgu \ + pv_min pv_minu \ + pv_max pv_maxu \ + pv_srl \ + pv_sra \ + pv_sll \ + pv_or \ + pv_xor \ + pv_and \ + pv_abs \ + pv_extract pv_extractu \ + pv_insert \ rv32uxpulpimg_p_tests = $(addprefix rv32uxpulpimg-p-, $(rv32uxpulpimg_sc_tests)) rv32uxpulpimg_v_tests = $(addprefix rv32uxpulpimg-v-, $(rv32uxpulpimg_sc_tests)) diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_abs.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_abs.S new file mode 100644 index 000000000..79b4eeecd --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_abs.S @@ -0,0 +1,52 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_abs.S +#----------------------------------------------------------------------------- +# +# Test pv.abs instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.abs.h + TEST_R_OP( 2, pv.abs.h, 0x10081554, 0xEFF8EAAC ); + TEST_R_OP( 3, pv.abs.h, 0x369800DA, 0x3698FF26 ); + TEST_R_OP( 4, pv.abs.h, 0x7C127B74, 0x7C12848C ); + # pv.abs.b + TEST_R_OP( 5, pv.abs.b, 0x3A444335, 0x3ABC4335 ); + TEST_R_OP( 6, pv.abs.b, 0x2B743B7C, 0x2B8C3B7C ); + TEST_R_OP( 7, pv.abs.b, 0x70362066, 0x70362066 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # TEST_R_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # TEST_R_DEST_BYPASS + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_add.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_add.S new file mode 100644 index 000000000..0287cc57c --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_add.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_add.S +#----------------------------------------------------------------------------- +# +# Test pv.add instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.add.h + TEST_RR_OP( 2, pv.add.h, 0xC1ACF68C, 0xF014169D, 0xD198DFEF ); + TEST_RR_OP( 3, pv.add.h, 0x795F026B, 0x7ABB8DD7, 0xFEA47494 ); + TEST_RR_OP( 4, pv.add.h, 0x8ABE2A6C, 0xAA4F3E71, 0xE06FEBFB ); + # pv.add.sc.h + TEST_RR_OP( 5, pv.add.sc.h, 0x603D0BE0, 0xFC7EA821, 0x94BF63BF ); + TEST_RR_OP( 6, pv.add.sc.h, 0x4A3F89DF, 0x8E28CDC8, 0x3230BC17 ); + TEST_RR_OP( 7, pv.add.sc.h, 0x2034B556, 0x506CE58E, 0x4436CFC8 ); + # pv.add.sci.h + TEST_SIMM6_OP( 8, pv.add.sci.h, 0x77371C0E, 0x772C1C03, 11 ); + TEST_SIMM6_OP( 9, pv.add.sci.h, 0xD1BA3380, 0xD1AF3375, 11 ); + TEST_SIMM6_OP( 10, pv.add.sci.h, 0x6E73CC2D, 0x6E68CC22, 11 ); + # pv.add.b + TEST_RR_OP( 11, pv.add.b, 0x8A1518C0, 0x3E50B3BE, 0x4CC56502 ); + TEST_RR_OP( 12, pv.add.b, 0xE8E21596, 0x7ECB21CB, 0x6A17F4CB ); + TEST_RR_OP( 13, pv.add.b, 0xD48653D2, 0x2E741840, 0xA6123B92 ); + # pv.add.sc.b + TEST_RR_OP( 14, pv.add.sc.b, 0xC96CF4FF, 0x52F57D88, 0x86A5D077 ); + TEST_RR_OP( 15, pv.add.sc.b, 0x877D91A1, 0x2F253949, 0x694FD558 ); + TEST_RR_OP( 16, pv.add.sc.b, 0xC6646B7D, 0x28C6CDDF, 0x1E09659E ); + # pv.add.sci.b + TEST_SIMM6_OP( 17, pv.add.sci.b, 0x3820508C, 0x2D154581, 11 ); + TEST_SIMM6_OP( 18, pv.add.sci.b, 0xBF98380C, 0xB48D2D01, 11 ); + TEST_SIMM6_OP( 19, pv.add.sci.b, 0x90AAEB98, 0x859FE08D, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_SIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_SIMM6_DEST_BYPASS, + # TEST_SIMM6_SRC1_BYPASS, TEST_SIMM6_ZEROSRC1, TEST_SIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_and.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_and.S new file mode 100644 index 000000000..328104676 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_and.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_and.S +#----------------------------------------------------------------------------- +# +# Test pv.and instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.and.h + TEST_RR_OP( 2, pv.and.h, 0xE1C028D0, 0xE1D16DD8, 0xE7E4A8F0 ); + TEST_RR_OP( 3, pv.and.h, 0x30111070, 0xB0111070, 0x3715D975 ); + TEST_RR_OP( 4, pv.and.h, 0x04000084, 0x04040AA4, 0xBD7314C7 ); + # pv.and.sc.h + TEST_RR_OP( 5, pv.and.sc.h, 0x18008480, 0x5818A5AB, 0x7C269E80 ); + TEST_RR_OP( 6, pv.and.sc.h, 0x0E0B0683, 0xCF2B6697, 0x4E211ECB ); + TEST_RR_OP( 7, pv.and.sc.h, 0x08070806, 0x28376966, 0xD7848E0F ); + # pv.and.sci.h + TEST_UIMM6_OP( 8, pv.and.sci.h, 0x00010003, 0xBFE568E7, 11 ); + TEST_UIMM6_OP( 9, pv.and.sci.h, 0x000A0001, 0xC08A6275, 11 ); + TEST_UIMM6_OP( 10, pv.and.sci.h, 0x000A0008, 0xDFEE3E6C, 11 ); + # pv.and.b + TEST_RR_OP( 11, pv.and.b, 0xA106671C, 0xE317675C, 0xADC6E7BF ); + TEST_RR_OP( 12, pv.and.b, 0x036080A8, 0xB360A0A8, 0x077A84AC ); + TEST_RR_OP( 13, pv.and.b, 0x0B430011, 0x1FDB225B, 0xCB431CB5 ); + # pv.and.sc.b + TEST_RR_OP( 14, pv.and.sc.b, 0xE4892568, 0xE48B3778, 0xCCC46AED ); + TEST_RR_OP( 15, pv.and.sc.b, 0x00000000, 0x0D96B284, 0x8B596F00 ); + TEST_RR_OP( 16, pv.and.sc.b, 0x49672C2F, 0xC9672CBF, 0x6AC7706F ); + # pv.and.sci.b + TEST_UIMM6_OP( 17, pv.and.sci.b, 0x0B090808, 0x8F29C848, 11 ); + TEST_UIMM6_OP( 18, pv.and.sci.b, 0x000A0908, 0x30EA9D78, 11 ); + TEST_UIMM6_OP( 19, pv.and.sci.b, 0x03000801, 0x83743C41, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_UIMM6_DEST_BYPASS, + # TEST_UIMM6_SRC1_BYPASS, TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_avg.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_avg.S new file mode 100644 index 000000000..030fa69d5 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_avg.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_avg.S +#----------------------------------------------------------------------------- +# +# Test pv.avg instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.avg.h + TEST_RR_OP( 2, pv.avg.h, 0xDFA53D57, 0x2C5F4D25, 0x92EC2D89 ); + TEST_RR_OP( 3, pv.avg.h, 0x18A2C49C, 0xD09FBFB6, 0x60A5C983 ); + TEST_RR_OP( 4, pv.avg.h, 0xD290A560, 0xE37F8F8F, 0xC1A2BB32 ); + # pv.avg.sc.h + TEST_RR_OP( 5, pv.avg.sc.h, 0xF8B0DF51, 0x6A263768, 0xD18D873A ); + TEST_RR_OP( 6, pv.avg.sc.h, 0x29B50628, 0xDA3A9320, 0xDB667930 ); + TEST_RR_OP( 7, pv.avg.sc.h, 0x1CBDF112, 0x4376EC20, 0x13B2F605 ); + # pv.avg.sci.h + TEST_SIMM6_OP( 8, pv.avg.sci.h, 0x2F8BD535, 0x5F0CAA60, 11 ); + TEST_SIMM6_OP( 9, pv.avg.sci.h, 0x1F1B0A1B, 0x3E2B142C, 11 ); + TEST_SIMM6_OP( 10, pv.avg.sci.h, 0x1E533C46, 0x3C9C7881, 11 ); + # pv.avg.b + TEST_RR_OP( 11, pv.avg.b, 0xEF09DD01, 0x242B76A4, 0xBBE7445F ); + TEST_RR_OP( 12, pv.avg.b, 0x2C31DBEE, 0x7B0B5CD3, 0xDE575B0A ); + TEST_RR_OP( 13, pv.avg.b, 0xF11E19E0, 0x0278F0DE, 0xE0C543E3 ); + # pv.avg.sc.b + TEST_RR_OP( 14, pv.avg.sc.b, 0x12E71EFC, 0x40E95813, 0xDE5394E5 ); + TEST_RR_OP( 15, pv.avg.sc.b, 0x102204DA, 0xE005C975, 0xE6677040 ); + TEST_RR_OP( 16, pv.avg.sc.b, 0x1E2ADA29, 0x2840A03D, 0xCF897515 ); + # pv.avg.sci.b + TEST_SIMM6_OP( 17, pv.avg.sci.b, 0x04D5DEFE, 0xFDA0B1F1, 11 ); + TEST_SIMM6_OP( 18, pv.avg.sci.b, 0xD4192A1E, 0x9E274932, 11 ); + TEST_SIMM6_OP( 19, pv.avg.sci.b, 0x11E3CFE6, 0x17BC93C1, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_SIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_SIMM6_DEST_BYPASS, + # TEST_SIMM6_SRC1_BYPASS, TEST_SIMM6_ZEROSRC1, TEST_SIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_avgu.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_avgu.S new file mode 100644 index 000000000..1d4c7de8d --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_avgu.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_avgu.S +#----------------------------------------------------------------------------- +# +# Test pv.avgu instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.avgu.h + TEST_RR_OP( 2, pv.avgu.h, 0x627F5574, 0xA12DA561, 0x23D10588 ); + TEST_RR_OP( 3, pv.avgu.h, 0x5F5E7CE3, 0x979062E4, 0x272C96E3 ); + TEST_RR_OP( 4, pv.avgu.h, 0x6D64331C, 0xF472E6FA, 0xE6567F3F ); + # pv.avgu.sc.h + TEST_RR_OP( 5, pv.avgu.sc.h, 0x0CED14D1, 0xD924E8ED, 0xFFB240B6 ); + TEST_RR_OP( 6, pv.avgu.sc.h, 0x127F3F7B, 0x7447CE40, 0x64E4B0B7 ); + TEST_RR_OP( 7, pv.avgu.sc.h, 0x737C50C4, 0x7D7C380C, 0xB749697C ); + # pv.avgu.sci.h + TEST_UIMM6_OP( 8, pv.avgu.sci.h, 0x76BB744A, 0xED6BE88A, 11 ); + TEST_UIMM6_OP( 9, pv.avgu.sci.h, 0x3BD96A9F, 0x77A8D534, 11 ); + TEST_UIMM6_OP( 10, pv.avgu.sci.h, 0x551A6EC8, 0xAA29DD86, 11 ); + # pv.avgu.b + TEST_RR_OP( 11, pv.avgu.b, 0x366D332C, 0x8F75F8E9, 0xDD666F70 ); + TEST_RR_OP( 12, pv.avgu.b, 0x166D3707, 0x5F0C48DF, 0xCECE2730 ); + TEST_RR_OP( 13, pv.avgu.b, 0x13390E74, 0x2D0C048B, 0xFA67185E ); + # pv.avgu.sc.b + TEST_RR_OP( 14, pv.avgu.sc.b, 0x20102F22, 0xFDDD1B00, 0x65EACB44 ); + TEST_RR_OP( 15, pv.avgu.sc.b, 0x79130A10, 0x2156444F, 0xAF0796D1 ); + TEST_RR_OP( 16, pv.avgu.sc.b, 0x44260042, 0x591DD256, 0xFBAE832F ); + # pv.avgu.sci.b + TEST_UIMM6_OP( 17, pv.avgu.sci.b, 0x016B6549, 0xF7CBBF88, 11 ); + TEST_UIMM6_OP( 18, pv.avgu.sci.b, 0x742F1E50, 0xDE543195, 11 ); + TEST_UIMM6_OP( 19, pv.avgu.sci.b, 0x34686166, 0x5EC5B7C1, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_UIMM6_DEST_BYPASS, + # TEST_UIMM6_SRC1_BYPASS, TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_extract.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_extract.S new file mode 100644 index 000000000..5d0a0b70a --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_extract.S @@ -0,0 +1,65 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_extract.S +#----------------------------------------------------------------------------- +# +# Test pv.extract instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Functional tests + #------------------------------------------------------------- + + # pv.extract.h + TEST_SIMM6_OP( 2, pv.extract.h, 0x00000DEA, 0x53F90DEA, 0 ); + TEST_SIMM6_OP( 3, pv.extract.h, 0x00000315, 0xC6990315, 0 ); + TEST_SIMM6_OP( 4, pv.extract.h, 0x00005B08, 0xE1415B08, 0 ); + TEST_SIMM6_OP( 5, pv.extract.h, 0x00003654, 0x3654249D, 1 ); + TEST_SIMM6_OP( 6, pv.extract.h, 0x00002EE3, 0x2EE3D9FE, 1 ); + TEST_SIMM6_OP( 7, pv.extract.h, 0xFFFF93B1, 0x93B1AA99, 1 ); + # pv.extract.b + TEST_SIMM6_OP( 8, pv.extract.b, 0xFFFFFFD9, 0x53C073D9, 0 ); + TEST_SIMM6_OP( 9, pv.extract.b, 0x0000001F, 0x269EFC1F, 0 ); + TEST_SIMM6_OP( 10, pv.extract.b, 0xFFFFFFAB, 0x0E8CD3AB, 0 ); + TEST_SIMM6_OP( 11, pv.extract.b, 0x0000004A, 0xF7964A55, 1 ); + TEST_SIMM6_OP( 12, pv.extract.b, 0x0000006C, 0x1F366C84, 1 ); + TEST_SIMM6_OP( 13, pv.extract.b, 0x0000005B, 0x11205B09, 1 ); + TEST_SIMM6_OP( 14, pv.extract.b, 0x00000036, 0x2C36C818, 2 ); + TEST_SIMM6_OP( 15, pv.extract.b, 0x00000003, 0x4C039923, 2 ); + TEST_SIMM6_OP( 16, pv.extract.b, 0x0000007E, 0x057ED2EE, 2 ); + TEST_SIMM6_OP( 17, pv.extract.b, 0x00000056, 0x56B005BB, 3 ); + TEST_SIMM6_OP( 18, pv.extract.b, 0xFFFFFFE7, 0xE7798BAA, 3 ); + TEST_SIMM6_OP( 19, pv.extract.b, 0xFFFFFFF3, 0xF3F956A2, 3 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # TEST_SIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # TEST_SIMM6_DEST_BYPASS, TEST_SIMM6_SRC1_BYPASS, + # TEST_SIMM6_ZEROSRC1, TEST_SIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_extractu.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_extractu.S new file mode 100644 index 000000000..ccd6e37c0 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_extractu.S @@ -0,0 +1,65 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_extractu.S +#----------------------------------------------------------------------------- +# +# Test pv.extractu instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Functional tests + #------------------------------------------------------------- + + # pv.extractu.h + TEST_UIMM6_OP( 2, pv.extractu.h, 0x0000A18D, 0xED2CA18D, 0 ); + TEST_UIMM6_OP( 3, pv.extractu.h, 0x00006A18, 0x3C576A18, 0 ); + TEST_UIMM6_OP( 4, pv.extractu.h, 0x000040A2, 0x1DAB40A2, 0 ); + TEST_UIMM6_OP( 5, pv.extractu.h, 0x0000BC96, 0xBC969BEC, 1 ); + TEST_UIMM6_OP( 6, pv.extractu.h, 0x0000DF7E, 0xDF7E4D2B, 1 ); + TEST_UIMM6_OP( 7, pv.extractu.h, 0x000099AE, 0x99AEE13C, 1 ); + # pv.extractu.b + TEST_UIMM6_OP( 8, pv.extractu.b, 0x00000046, 0x8FA19B46, 0 ); + TEST_UIMM6_OP( 9, pv.extractu.b, 0x0000009A, 0xE19C009A, 0 ); + TEST_UIMM6_OP( 10, pv.extractu.b, 0x0000002A, 0x408D722A, 0 ); + TEST_UIMM6_OP( 11, pv.extractu.b, 0x0000006C, 0xA2AF6C67, 1 ); + TEST_UIMM6_OP( 12, pv.extractu.b, 0x0000001F, 0xDE671F25, 1 ); + TEST_UIMM6_OP( 13, pv.extractu.b, 0x00000046, 0x325D46CE, 1 ); + TEST_UIMM6_OP( 14, pv.extractu.b, 0x00000003, 0x4603F967, 2 ); + TEST_UIMM6_OP( 15, pv.extractu.b, 0x000000C1, 0xDBC1292F, 2 ); + TEST_UIMM6_OP( 16, pv.extractu.b, 0x000000D6, 0xE7D631CF, 2 ); + TEST_UIMM6_OP( 17, pv.extractu.b, 0x00000020, 0x20B64275, 3 ); + TEST_UIMM6_OP( 18, pv.extractu.b, 0x000000D6, 0xD64B2CC0, 3 ); + TEST_UIMM6_OP( 19, pv.extractu.b, 0x00000084, 0x845485BD, 3 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # TEST_UIMM6_DEST_BYPASS, TEST_UIMM6_SRC1_BYPASS, + # TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_insert.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_insert.S new file mode 100644 index 000000000..aa427d6ac --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_insert.S @@ -0,0 +1,87 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_insert.S +#----------------------------------------------------------------------------- +# +# Test pv.insert instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Functional tests + #------------------------------------------------------------- + + # load immediate in test register necessary before execution + # of each test case because the output is dependent on the + # previous state of rD + + # pv.insert.h + li x14, 0x6ACB7454; + TEST_UIMM6_OP( 2, pv.insert.h, 0x6ACBF419, 0x3A12F419, 0 ); + li x14, 0x2BCBE5BA; + TEST_UIMM6_OP( 3, pv.insert.h, 0x2BCB3FEE, 0x86013FEE, 0 ); + li x14, 0x8E18DBE7; + TEST_UIMM6_OP( 4, pv.insert.h, 0x8E18C59F, 0x7153C59F, 0 ); + li x14, 0x57DF0195; + TEST_UIMM6_OP( 5, pv.insert.h, 0x00F60195, 0x267700F6, 1 ); + li x14, 0x7825C668; + TEST_UIMM6_OP( 6, pv.insert.h, 0x17F7C668, 0x04A017F7, 1 ); + li x14, 0xDBC05DC7; + TEST_UIMM6_OP( 7, pv.insert.h, 0xF7455DC7, 0x3569F745, 1 ); + # pv.insert.b + li x14, 0x5C93979B; + TEST_UIMM6_OP( 8, pv.insert.b, 0x5C93979C, 0x955C289C, 0 ); + li x14, 0x4696DE77; + TEST_UIMM6_OP( 9, pv.insert.b, 0x4696DEB0, 0x00E6ADB0, 0 ); + li x14, 0x48024613; + TEST_UIMM6_OP( 10, pv.insert.b, 0x4802465E, 0xE580375E, 0 ); + li x14, 0x55963E26; + TEST_UIMM6_OP( 11, pv.insert.b, 0x5596FC26, 0x215C2AFC, 1 ); + li x14, 0xAA2930B8; + TEST_UIMM6_OP( 12, pv.insert.b, 0xAA29DCB8, 0xE0318DDC, 1 ); + li x14, 0x844521DE; + TEST_UIMM6_OP( 13, pv.insert.b, 0x84459DDE, 0x12ED4F9D, 1 ); + li x14, 0xFE27DE9A; + TEST_UIMM6_OP( 14, pv.insert.b, 0xFED1DE9A, 0xC72B60D1, 2 ); + li x14, 0x41075730; + TEST_UIMM6_OP( 15, pv.insert.b, 0x41065730, 0x63ED6A06, 2 ); + li x14, 0xFD9C6336; + TEST_UIMM6_OP( 16, pv.insert.b, 0xFD426336, 0xA924A142, 2 ); + li x14, 0x2A3A8341; + TEST_UIMM6_OP( 17, pv.insert.b, 0x513A8341, 0x6B50F251, 3 ); + li x14, 0x59FBF2A7; + TEST_UIMM6_OP( 18, pv.insert.b, 0x31FBF2A7, 0x41767331, 3 ); + li x14, 0xE056E2B2; + TEST_UIMM6_OP( 19, pv.insert.b, 0x8C56E2B2, 0x2B08038C, 3 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # TEST_UIMM6_DEST_BYPASS, TEST_UIMM6_SRC1_BYPASS, + # TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_max.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_max.S new file mode 100644 index 000000000..20f4c69f8 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_max.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_max.S +#----------------------------------------------------------------------------- +# +# Test pv.max instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.max.h + TEST_RR_OP( 2, pv.max.h, 0x731E1846, 0xF4D3B4D4, 0x731E1846 ); + TEST_RR_OP( 3, pv.max.h, 0x0E5963C7, 0x0E5963C7, 0xC078A04B ); + TEST_RR_OP( 4, pv.max.h, 0x10AF37F3, 0x10AF37F3, 0xA4DA964F ); + # pv.max.sc.h + TEST_RR_OP( 5, pv.max.sc.h, 0xFDCCFDCC, 0xC86DA7A4, 0x5AC6FDCC ); + TEST_RR_OP( 6, pv.max.sc.h, 0x6F096F09, 0x1EBE021F, 0xAEAF6F09 ); + TEST_RR_OP( 7, pv.max.sc.h, 0x72AF72AF, 0xCD6ACE5B, 0xA0D172AF ); + # pv.max.sci.h + TEST_SIMM6_OP( 8, pv.max.sci.h, 0x000B1FDD, 0xAD6D1FDD, 11 ); + TEST_SIMM6_OP( 9, pv.max.sci.h, 0x000B000B, 0xAAF6EBB0, 11 ); + TEST_SIMM6_OP( 10, pv.max.sci.h, 0x252B000B, 0x252BC255, 11 ); + # pv.max.b + TEST_RR_OP( 11, pv.max.b, 0xF2402D09, 0xB040FD9D, 0xF2842D09 ); + TEST_RR_OP( 12, pv.max.b, 0x78652008, 0xD749FDBE, 0x78652008 ); + TEST_RR_OP( 13, pv.max.b, 0xC5755F6A, 0xC5755F6A, 0xAD1CD088 ); + # pv.max.sc.b + TEST_RR_OP( 14, pv.max.sc.b, 0x75757575, 0x01B6C06B, 0xC1698275 ); + TEST_RR_OP( 15, pv.max.sc.b, 0x7B7B7B7B, 0x4A547B78, 0xCD4D377B ); + TEST_RR_OP( 16, pv.max.sc.b, 0x5D7B5D5F, 0x027B0E5F, 0x595E995D ); + # pv.max.sci.b + TEST_SIMM6_OP( 17, pv.max.sci.b, 0x0B0B0B0B, 0xEB06FBAB, 11 ); + TEST_SIMM6_OP( 18, pv.max.sci.b, 0x56240B26, 0x56249726, 11 ); + TEST_SIMM6_OP( 19, pv.max.sci.b, 0x5F32211E, 0x5F32211E, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_SIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_SIMM6_DEST_BYPASS, + # TEST_SIMM6_SRC1_BYPASS, TEST_SIMM6_ZEROSRC1, TEST_SIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_maxu.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_maxu.S new file mode 100644 index 000000000..e5451559f --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_maxu.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_maxu.S +#----------------------------------------------------------------------------- +# +# Test pv.maxu instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.maxu.h + TEST_RR_OP( 2, pv.maxu.h, 0xBA529136, 0x3C369136, 0xBA524CAE ); + TEST_RR_OP( 3, pv.maxu.h, 0xC9E65AD2, 0xC9E60677, 0x00145AD2 ); + TEST_RR_OP( 4, pv.maxu.h, 0x42D67990, 0x42D67990, 0x244A0E31 ); + # pv.maxu.sc.h + TEST_RR_OP( 5, pv.maxu.sc.h, 0x36D0CB1F, 0x36D0CB1F, 0x426D0434 ); + TEST_RR_OP( 6, pv.maxu.sc.h, 0xAE6DE3C7, 0xAE6DE3C7, 0x6ACB58AD ); + TEST_RR_OP( 7, pv.maxu.sc.h, 0xB6CDD3B0, 0xB6CDD3B0, 0x1CF29759 ); + # pv.maxu.sci.h + TEST_UIMM6_OP( 8, pv.maxu.sci.h, 0xF503CA6A, 0xF503CA6A, 11 ); + TEST_UIMM6_OP( 9, pv.maxu.sci.h, 0x6781179C, 0x6781179C, 11 ); + TEST_UIMM6_OP( 10, pv.maxu.sci.h, 0xB778D8A3, 0xB778D8A3, 11 ); + # pv.maxu.b + TEST_RR_OP( 11, pv.maxu.b, 0x17F9C1D2, 0x0DF91FD2, 0x1703C18D ); + TEST_RR_OP( 12, pv.maxu.b, 0xD04FEFA4, 0x434FEFA4, 0xD032B42E ); + TEST_RR_OP( 13, pv.maxu.b, 0x8A95BFF2, 0x56955708, 0x8A08BFF2 ); + # pv.maxu.sc.b + TEST_RR_OP( 14, pv.maxu.sc.b, 0xE8E8E8E8, 0x318C6A64, 0x82B8BEE8 ); + TEST_RR_OP( 15, pv.maxu.sc.b, 0xDFE73926, 0xDFE73909, 0xBFC58126 ); + TEST_RR_OP( 16, pv.maxu.sc.b, 0x9DF09D9D, 0x6FF07641, 0x5689109D ); + # pv.maxu.sci.b + TEST_UIMM6_OP( 17, pv.maxu.sci.b, 0x0B7062D8, 0x0A7062D8, 11 ); + TEST_UIMM6_OP( 18, pv.maxu.sci.b, 0x0B469D5D, 0x07469D5D, 11 ); + TEST_UIMM6_OP( 19, pv.maxu.sci.b, 0x4E875E27, 0x4E875E27, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_UIMM6_DEST_BYPASS, + # TEST_UIMM6_SRC1_BYPASS, TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_min.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_min.S new file mode 100644 index 000000000..c402ffad2 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_min.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_min.S +#----------------------------------------------------------------------------- +# +# Test pv.min instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.min.h + TEST_RR_OP( 2, pv.min.h, 0x82E7A6AB, 0x8A92A6AB, 0x82E77B73 ); + TEST_RR_OP( 3, pv.min.h, 0xE3770915, 0xE37773E8, 0x44330915 ); + TEST_RR_OP( 4, pv.min.h, 0x85B0BA95, 0x85B0E12E, 0x6CA1BA95 ); + # pv.min.sc.h + TEST_RR_OP( 5, pv.min.sc.h, 0x33A388EB, 0x33A388EB, 0xA73B6225 ); + TEST_RR_OP( 6, pv.min.sc.h, 0xB2D1B2D1, 0x6C255634, 0xC06DB2D1 ); + TEST_RR_OP( 7, pv.min.sc.h, 0xED24CC32, 0xED24CC32, 0x20DD20AE ); + # pv.min.sci.h + TEST_SIMM6_OP( 8, pv.min.sci.h, 0x000BABA8, 0x3116ABA8, 11 ); + TEST_SIMM6_OP( 9, pv.min.sci.h, 0xF270FB23, 0xF270FB23, 11 ); + TEST_SIMM6_OP( 10, pv.min.sci.h, 0xF45DE902, 0xF45DE902, 11 ); + # pv.min.b + TEST_RR_OP( 11, pv.min.b, 0x3BD1A58C, 0x3BF5A5CD, 0x59D1618C ); + TEST_RR_OP( 12, pv.min.b, 0x99C52CBA, 0xF4D42C6F, 0x99C57ABA ); + TEST_RR_OP( 13, pv.min.b, 0x13CB8AE9, 0x13CB8AE9, 0x47F8D538 ); + # pv.min.sc.b + TEST_RR_OP( 14, pv.min.sc.b, 0xDCC161BB, 0xDCC161BB, 0x41A0EA7B ); + TEST_RR_OP( 15, pv.min.sc.b, 0x3059A553, 0x3059A553, 0xB80EA978 ); + TEST_RR_OP( 16, pv.min.sc.b, 0x97ECEFEF, 0x97EC4211, 0x8059FEEF ); + # pv.min.sci.b + TEST_SIMM6_OP( 17, pv.min.sci.b, 0x0B0BB986, 0x732DB986, 11 ); + TEST_SIMM6_OP( 18, pv.min.sci.b, 0xF40B0BE7, 0xF47567E7, 11 ); + TEST_SIMM6_OP( 19, pv.min.sci.b, 0x0BF70B0B, 0x7DF77268, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_SIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_SIMM6_DEST_BYPASS, + # TEST_SIMM6_SRC1_BYPASS, TEST_SIMM6_ZEROSRC1, TEST_SIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_minu.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_minu.S new file mode 100644 index 000000000..4c875e427 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_minu.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_minu.S +#----------------------------------------------------------------------------- +# +# Test pv.minu instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.minu.h + TEST_RR_OP( 2, pv.minu.h, 0x6A212A68, 0xE2B42A68, 0x6A212B4A ); + TEST_RR_OP( 3, pv.minu.h, 0x50AA637D, 0xC219637D, 0x50AA84CC ); + TEST_RR_OP( 4, pv.minu.h, 0x579039EE, 0x92C439EE, 0x5790DCDC ); + # pv.minu.sc.h + TEST_RR_OP( 5, pv.minu.sc.h, 0x7EF4A8D0, 0x7EF4D67D, 0x268CA8D0 ); + TEST_RR_OP( 6, pv.minu.sc.h, 0x8C9F3E8A, 0x9A9F3E8A, 0x2E6A8C9F ); + TEST_RR_OP( 7, pv.minu.sc.h, 0x844C6178, 0x844C6178, 0x7A819ECF ); + # pv.minu.sci.h + TEST_UIMM6_OP( 8, pv.minu.sci.h, 0x000B000B, 0x2E8024BF, 11 ); + TEST_UIMM6_OP( 9, pv.minu.sci.h, 0x000B000B, 0x7070C7D7, 11 ); + TEST_UIMM6_OP( 10, pv.minu.sci.h, 0x000B000B, 0x6955494F, 11 ); + # pv.minu.b + TEST_RR_OP( 11, pv.minu.b, 0x46A4170C, 0x46F51795, 0xAAA4C60C ); + TEST_RR_OP( 12, pv.minu.b, 0xA33FAB2C, 0xA33FAB2C, 0xE140C044 ); + TEST_RR_OP( 13, pv.minu.b, 0x43015111, 0xF49B5111, 0x43018736 ); + # pv.minu.sc.b + TEST_RR_OP( 14, pv.minu.sc.b, 0x3EB0291F, 0x3EBF291F, 0x909B9AB0 ); + TEST_RR_OP( 15, pv.minu.sc.b, 0x000C0C0C, 0x00555837, 0x7F61610C ); + TEST_RR_OP( 16, pv.minu.sc.b, 0x01010101, 0x47AB06B4, 0x0E0F9001 ); + # pv.minu.sci.b + TEST_UIMM6_OP( 17, pv.minu.sci.b, 0x0B0B0B0B, 0xBEAF5AAB, 11 ); + TEST_UIMM6_OP( 18, pv.minu.sci.b, 0x0B0B0B0B, 0xDC152410, 11 ); + TEST_UIMM6_OP( 19, pv.minu.sci.b, 0x0B0B0B0B, 0x1DAD56C8, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_UIMM6_DEST_BYPASS, + # TEST_UIMM6_SRC1_BYPASS, TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_or.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_or.S new file mode 100644 index 000000000..821377c14 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_or.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_or.S +#----------------------------------------------------------------------------- +# +# Test pv.or instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.or.h + TEST_RR_OP( 2, pv.or.h, 0xDFEFB3F3, 0x9E678370, 0x418EB1F3 ); + TEST_RR_OP( 3, pv.or.h, 0x7EFFDD7D, 0x727D5079, 0x7C968D05 ); + TEST_RR_OP( 4, pv.or.h, 0x5FAEEFD9, 0x4BAEA991, 0x1604EFD9 ); + # pv.or.sc.h + TEST_RR_OP( 5, pv.or.sc.h, 0x7FB2EFF1, 0x5782C951, 0x886D2FB0 ); + TEST_RR_OP( 6, pv.or.sc.h, 0x7F9E6FF9, 0x5E0E04F9, 0x248B6F98 ); + TEST_RR_OP( 7, pv.or.sc.h, 0xBD9BBD4B, 0x85998C42, 0xD1F2B90B ); + # pv.or.sci.h + TEST_UIMM6_OP( 8, pv.or.sci.h, 0x4F6F5CBF, 0x4F645CB5, 11 ); + TEST_UIMM6_OP( 9, pv.or.sci.h, 0x34DF2B7B, 0x34DD2B73, 11 ); + TEST_UIMM6_OP( 10, pv.or.sci.h, 0xE73F5DEF, 0xE73D5DE5, 11 ); + # pv.or.b + TEST_RR_OP( 11, pv.or.b, 0xFDFAFC34, 0x25AA9830, 0xD8706434 ); + TEST_RR_OP( 12, pv.or.b, 0x9C7BF5EF, 0x9C41746A, 0x003BC1ED ); + TEST_RR_OP( 13, pv.or.b, 0x7BEBAEFF, 0x7B4BA8E2, 0x1BA3263F ); + # pv.or.sc.b + TEST_RR_OP( 14, pv.or.sc.b, 0xFFF7B7B7, 0xFDE72320, 0x26977A97 ); + TEST_RR_OP( 15, pv.or.sc.b, 0xFFD5F5FD, 0xEA55E02C, 0x0D23AFD5 ); + TEST_RR_OP( 16, pv.or.sc.b, 0xDBDFFBD3, 0x9B8E6BC3, 0xCF31CDD3 ); + # pv.or.sci.b + TEST_UIMM6_OP( 17, pv.or.sci.b, 0xCBCF2F9B, 0xC2CC2499, 11 ); + TEST_UIMM6_OP( 18, pv.or.sci.b, 0x3BEF1BEB, 0x33EE13E0, 11 ); + TEST_UIMM6_OP( 19, pv.or.sci.b, 0xFB5B5BEF, 0xF05252EE, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_UIMM6_DEST_BYPASS, + # TEST_UIMM6_SRC1_BYPASS, TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_sll.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sll.S new file mode 100644 index 000000000..3e44223bd --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sll.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_sll.S +#----------------------------------------------------------------------------- +# +# Test pv.sll instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.sll.h + TEST_RR_OP( 2, pv.sll.h, 0x40000E80, 0xCC85D0E8, 0x000E0004 ); + TEST_RR_OP( 3, pv.sll.h, 0xF0802C00, 0x83E1502C, 0x00070008 ); + TEST_RR_OP( 4, pv.sll.h, 0x20005470, 0x8AA1551C, 0x000D0002 ); + # pv.sll.sc.h + TEST_RR_OP( 5, pv.sll.sc.h, 0x81F03608, 0x103E26C1, 0x000A0003 ); + TEST_RR_OP( 6, pv.sll.sc.h, 0x1B800F00, 0x0437CE1E, 0x00080007 ); + TEST_RR_OP( 7, pv.sll.sc.h, 0xC7002900, 0xE5C75029, 0x000D0008 ); + # pv.sll.sci.h + TEST_UIMM6_OP( 8, pv.sll.sci.h, 0x46002600, 0x48233B93, 9 ); + TEST_UIMM6_OP( 9, pv.sll.sci.h, 0x9600AC00, 0x344B9356, 9 ); + TEST_UIMM6_OP( 10, pv.sll.sci.h, 0x40002E00, 0xB2A0E417, 9 ); + # pv.sll.b + TEST_RR_OP( 11, pv.sll.b, 0x7EE05CA8, 0x3FCE5C2A, 0x01040002 ); + TEST_RR_OP( 12, pv.sll.b, 0xA45E8034, 0xE95E5934, 0x02000700 ); + TEST_RR_OP( 13, pv.sll.b, 0xB0780068, 0xFB8FA8B4, 0x04030501 ); + # pv.sll.sc.b + TEST_RR_OP( 14, pv.sll.sc.b, 0xF05000C0, 0x5FF510FC, 0x02020004 ); + TEST_RR_OP( 15, pv.sll.sc.b, 0xA0C0B0A0, 0x2A6CFB1A, 0x01000604 ); + TEST_RR_OP( 16, pv.sll.sc.b, 0x047E94F8, 0x823FCAFC, 0x07010601 ); + # pv.sll.sci.b + TEST_UIMM6_OP( 17, pv.sll.sci.b, 0x60002020, 0x8BA0A901, 5 ); + TEST_UIMM6_OP( 18, pv.sll.sci.b, 0x604020E0, 0x83CAE947, 5 ); + TEST_UIMM6_OP( 19, pv.sll.sci.b, 0x0020A060, 0x98F185C3, 5 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_UIMM6_DEST_BYPASS, + # TEST_UIMM6_SRC1_BYPASS, TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_sra.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sra.S new file mode 100644 index 000000000..d0bfe0d89 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sra.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_sra.S +#----------------------------------------------------------------------------- +# +# Test pv.sra instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.sra.h + TEST_RR_OP( 2, pv.sra.h, 0x2C4BFFF8, 0x5896C0A3, 0x0001000B ); + TEST_RR_OP( 3, pv.sra.h, 0x005E760F, 0x5E43760F, 0x00080000 ); + TEST_RR_OP( 4, pv.sra.h, 0x0059F46B, 0x166BA35F, 0x00060003 ); + # pv.sra.sc.h + TEST_RR_OP( 5, pv.sra.sc.h, 0x00000005, 0x080A5F54, 0x000D000C ); + TEST_RR_OP( 6, pv.sra.sc.h, 0xFF0400FB, 0xE0871F6D, 0x00020005 ); + TEST_RR_OP( 7, pv.sra.sc.h, 0x00010000, 0x40FF2C98, 0x000B000E ); + # pv.sra.sci.h + TEST_UIMM6_OP( 8, pv.sra.sci.h, 0xFFE70029, 0xCEB053F9, 9 ); + TEST_UIMM6_OP( 9, pv.sra.sci.h, 0xFFD5FFF8, 0xAA4AF03F, 9 ); + TEST_UIMM6_OP( 10, pv.sra.sci.h, 0x00340008, 0x68E511A2, 9 ); + # pv.sra.b + TEST_RR_OP( 11, pv.sra.b, 0x08FF00F9, 0x11F61B9D, 0x01050704 ); + TEST_RR_OP( 12, pv.sra.b, 0xFCFCFFF3, 0x8FE3F89C, 0x05030603 ); + TEST_RR_OP( 13, pv.sra.b, 0x0096FFA3, 0x0296E1A3, 0x02000600 ); + # pv.sra.sc.b + TEST_RR_OP( 14, pv.sra.sc.b, 0x66F6E2A1, 0x66F6E2A1, 0x01050600 ); + TEST_RR_OP( 15, pv.sra.sc.b, 0x0DFD1A13, 0x36F56B4D, 0x03040302 ); + TEST_RR_OP( 16, pv.sra.sc.b, 0x00000201, 0x16135625, 0x01040205 ); + # pv.sra.sci.b + TEST_UIMM6_OP( 17, pv.sra.sci.b, 0xFCFC02FD, 0x848B57AD, 5 ); + TEST_UIMM6_OP( 18, pv.sra.sci.b, 0x02FE02FC, 0x40CD5290, 5 ); + TEST_UIMM6_OP( 19, pv.sra.sci.b, 0x02FCFF01, 0x549FFD20, 5 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_UIMM6_DEST_BYPASS, + # TEST_UIMM6_SRC1_BYPASS, TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_srl.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_srl.S new file mode 100644 index 000000000..47ebe0e49 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_srl.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_srl.S +#----------------------------------------------------------------------------- +# +# Test pv.srl instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.srl.h + TEST_RR_OP( 2, pv.srl.h, 0x06030067, 0xC076CE34, 0x00050009 ); + TEST_RR_OP( 3, pv.srl.h, 0x00A40047, 0xA41723DF, 0x00080007 ); + TEST_RR_OP( 4, pv.srl.h, 0x00142A49, 0x52ADA926, 0x000A0002 ); + # pv.srl.sc.h + TEST_RR_OP( 5, pv.srl.sc.h, 0x1EE01053, 0xF706829F, 0x00080003 ); + TEST_RR_OP( 6, pv.srl.sc.h, 0x00000001, 0x3BC79528, 0x000E000F ); + TEST_RR_OP( 7, pv.srl.sc.h, 0x001001BE, 0x04236F94, 0x000D0006 ); + # pv.srl.sci.h + TEST_UIMM6_OP( 8, pv.srl.sci.h, 0x00450077, 0x8AA9EF3A, 9 ); + TEST_UIMM6_OP( 9, pv.srl.sci.h, 0x0049006B, 0x93A9D63A, 9 ); + TEST_UIMM6_OP( 10, pv.srl.sci.h, 0x003F0040, 0x7E0D81AF, 9 ); + # pv.srl.b + TEST_RR_OP( 11, pv.srl.b, 0x030C6A01, 0xFAC8D4F6, 0x06040107 ); + TEST_RR_OP( 12, pv.srl.b, 0x07000105, 0x3F0B94B5, 0x03050705 ); + TEST_RR_OP( 13, pv.srl.b, 0x00311065, 0x29C54065, 0x07020200 ); + # pv.srl.sc.b + TEST_RR_OP( 14, pv.srl.sc.b, 0x3A37353E, 0x746E6A7C, 0x02010701 ); + TEST_RR_OP( 15, pv.srl.sc.b, 0x0A080A0D, 0xAA82A5D6, 0x01030204 ); + TEST_RR_OP( 16, pv.srl.sc.b, 0x6F5D6D75, 0xDEBBDAEB, 0x03040701 ); + # pv.srl.sci.b + TEST_UIMM6_OP( 17, pv.srl.sci.b, 0x06010704, 0xCD2DE193, 5 ); + TEST_UIMM6_OP( 18, pv.srl.sci.b, 0x00030507, 0x0B64B9E8, 5 ); + TEST_UIMM6_OP( 19, pv.srl.sci.b, 0x02070306, 0x50E572CB, 5 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_UIMM6_DEST_BYPASS, + # TEST_UIMM6_SRC1_BYPASS, TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_sub.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sub.S new file mode 100644 index 000000000..72336e0d9 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sub.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_sub.S +#----------------------------------------------------------------------------- +# +# Test pv.sub instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.sub.h + TEST_RR_OP( 2, pv.sub.h, 0x21549541, 0xC037A04F, 0x9EE30B0E ); + TEST_RR_OP( 3, pv.sub.h, 0x0A8F87AF, 0xA6011B6C, 0x9B7293BD ); + TEST_RR_OP( 4, pv.sub.h, 0xE3DFCB44, 0x2BD6C73C, 0x47F7FBF8 ); + # pv.sub.sc.h + TEST_RR_OP( 5, pv.sub.sc.h, 0x005D10C7, 0xA7DFB849, 0xA0DAA782 ); + TEST_RR_OP( 6, pv.sub.sc.h, 0x915182F6, 0xEB54DCF9, 0xE0AB5A03 ); + TEST_RR_OP( 7, pv.sub.sc.h, 0x8ADB0963, 0xC72645AE, 0x44033C4B ); + # pv.sub.sci.h + TEST_SIMM6_OP( 8, pv.sub.sci.h, 0x5F6A01D4, 0x5F7501DF, 11 ); + TEST_SIMM6_OP( 9, pv.sub.sci.h, 0xEDEBEE05, 0xEDF6EE10, 11 ); + TEST_SIMM6_OP( 10, pv.sub.sci.h, 0x5254F633, 0x525FF63E, 11 ); + # pv.sub.b + TEST_RR_OP( 11, pv.sub.b, 0xCFA312C4, 0xD6B51AA7, 0x071208E3 ); + TEST_RR_OP( 12, pv.sub.b, 0x399B9FC4, 0x273CF552, 0xEEA1568E ); + TEST_RR_OP( 13, pv.sub.b, 0x75B1BB23, 0x3E6DD37D, 0xC9BC185A ); + # pv.sub.sc.b + TEST_RR_OP( 14, pv.sub.sc.b, 0x85D1880A, 0xCE1AD153, 0x11D9D249 ); + TEST_RR_OP( 15, pv.sub.sc.b, 0xF53BE607, 0xB0F6A1C2, 0xA4990EBB ); + TEST_RR_OP( 16, pv.sub.sc.b, 0xB890FAF0, 0x4A228C82, 0x3DEA1692 ); + # pv.sub.sci.b + TEST_SIMM6_OP( 17, pv.sub.sci.b, 0x5282B987, 0x5D8DC492, 11 ); + TEST_SIMM6_OP( 18, pv.sub.sci.b, 0x12D59C9F, 0x1DE0A7AA, 11 ); + TEST_SIMM6_OP( 19, pv.sub.sci.b, 0x6C6D5D05, 0x77786810, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_SIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_SIMM6_DEST_BYPASS, + # TEST_SIMM6_SRC1_BYPASS, TEST_SIMM6_ZEROSRC1, TEST_SIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_xor.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_xor.S new file mode 100644 index 000000000..75fee565c --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_xor.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_xor.S +#----------------------------------------------------------------------------- +# +# Test pv.xor instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.xor.h + TEST_RR_OP( 2, pv.xor.h, 0x66F696DC, 0x5FCE4AD5, 0x3938DC09 ); + TEST_RR_OP( 3, pv.xor.h, 0x58A5BD3D, 0x672A5F61, 0x3F8FE25C ); + TEST_RR_OP( 4, pv.xor.h, 0x339E302C, 0xE468E8F4, 0xD7F6D8D8 ); + # pv.xor.sc.h + TEST_RR_OP( 5, pv.xor.sc.h, 0x5FB150BC, 0xC4A5CBA8, 0x43CC9B14 ); + TEST_RR_OP( 6, pv.xor.sc.h, 0x48030479, 0xD7F09B8A, 0xCB019FF3 ); + TEST_RR_OP( 7, pv.xor.sc.h, 0x0465D51A, 0x40CF91B0, 0x55DB44AA ); + # pv.xor.sci.h + TEST_UIMM6_OP( 8, pv.xor.sci.h, 0x0F43E04C, 0x0F48E047, 11 ); + TEST_UIMM6_OP( 9, pv.xor.sci.h, 0xEC22101C, 0xEC291017, 11 ); + TEST_UIMM6_OP( 10, pv.xor.sci.h, 0x137F208C, 0x13742087, 11 ); + # pv.xor.b + TEST_RR_OP( 11, pv.xor.b, 0x6A9EC5B4, 0x13518603, 0x79CF43B7 ); + TEST_RR_OP( 12, pv.xor.b, 0xEE0CDAEA, 0x59CAB02D, 0xB7C66AC7 ); + TEST_RR_OP( 13, pv.xor.b, 0x5B6E4CC1, 0x8B61A064, 0xD00FECA5 ); + # pv.xor.sc.b + TEST_RR_OP( 14, pv.xor.sc.b, 0x40CF2054, 0x0F806F1B, 0x7CD0414F ); + TEST_RR_OP( 15, pv.xor.sc.b, 0x89E5AA00, 0x127E319B, 0xC919409B ); + TEST_RR_OP( 16, pv.xor.sc.b, 0xFC7E17F9, 0xAE2C45AB, 0xB9254252 ); + # pv.xor.sci.b + TEST_UIMM6_OP( 17, pv.xor.sci.b, 0x2D2D131C, 0x26261817, 11 ); + TEST_UIMM6_OP( 18, pv.xor.sci.b, 0x23EC42D8, 0x28E749D3, 11 ); + TEST_UIMM6_OP( 19, pv.xor.sci.b, 0xCAA811C9, 0xC1A31AC2, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_UIMM6_DEST_BYPASS, + # TEST_UIMM6_SRC1_BYPASS, TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/snitch_isa.mk b/apps/riscv-tests/isa/snitch_isa.mk index 4b66b2d53..4cfca3257 100644 --- a/apps/riscv-tests/isa/snitch_isa.mk +++ b/apps/riscv-tests/isa/snitch_isa.mk @@ -49,6 +49,20 @@ ifeq ($(xpulpimg),1) p_clipr p_clipur \ p_beqimm p_bneimm \ p_mac p_msu \ + pv_add \ + pv_sub \ + pv_avg pv_avgu \ + pv_min pv_minu \ + pv_max pv_maxu \ + pv_srl \ + pv_sra \ + pv_sll \ + pv_or \ + pv_xor \ + pv_and \ + pv_abs \ + pv_extract pv_extractu \ + pv_insert \ endif From dca5d41a42da9e7315fc856ec86e146f14a2eef4 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 12 Jan 2021 11:46:38 +0100 Subject: [PATCH 21/65] [snitch] Implement Xpulpv2 SIMD arithmetical ops in IPU Added instructions: SIMD add, sub, avg, min, max, shifts, logicals, abs, extract, insert --- hardware/deps/snitch/src/snitch.sv | 175 ++++- hardware/deps/snitch/src/snitch_ipu.sv | 843 ++++++++++++++++++++++++- 2 files changed, 963 insertions(+), 55 deletions(-) diff --git a/hardware/deps/snitch/src/snitch.sv b/hardware/deps/snitch/src/snitch.sv index d29442e9a..08025fcaa 100644 --- a/hardware/deps/snitch/src/snitch.sv +++ b/hardware/deps/snitch/src/snitch.sv @@ -1075,62 +1075,175 @@ module snitch #( illegal_inst = 1'b1; end end - - // Generic ALU operations + // Immediate branching + riscv_instr::P_BEQIMM: begin // Xpulpimg: p.beqimm + if (snitch_pkg::XPULPIMG) begin + is_branch = 1'b1; + write_rd = 1'b0; + alu_op = Eq; + opa_select = Reg; + opb_select = PBImmediate; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_BNEIMM: begin // Xpulpimg: p.bneimm + if (snitch_pkg::XPULPIMG) begin + is_branch = 1'b1; + write_rd = 1'b0; + alu_op = Neq; + opa_select = Reg; + opb_select = PBImmediate; + end else begin + illegal_inst = 1'b1; + end + end // Off-load to IPU coprocessor - riscv_instr::P_ABS, // Xpulpimg: p.abs - riscv_instr::P_SLET, // Xpulpimg: p.slet - riscv_instr::P_SLETU, // Xpulpimg: p.sletu - riscv_instr::P_MIN, // Xpulpimg: p.min - riscv_instr::P_MINU, // Xpulpimg: p.minu - riscv_instr::P_MAX, // Xpulpimg: p.max - riscv_instr::P_MAXU, // Xpulpimg: p.maxu - riscv_instr::P_EXTHS, // Xpulpimg: p.exths - riscv_instr::P_EXTHZ, // Xpulpimg: p.exthz - riscv_instr::P_EXTBS, // Xpulpimg: p.extbs - riscv_instr::P_EXTBZ, // Xpulpimg: p.extbz - riscv_instr::P_CLIP, // Xpulpimg: p.clip - riscv_instr::P_CLIPU, // Xpulpimg: p.clipu - riscv_instr::P_CLIPR, // Xpulpimg: p.clipr - riscv_instr::P_CLIPUR: begin // Xpulpimg: p.clipur + // 1 source register (rs1) + riscv_instr::P_ABS, // Xpulpimg: p.abs + riscv_instr::P_EXTHS, // Xpulpimg: p.exths + riscv_instr::P_EXTHZ, // Xpulpimg: p.exthz + riscv_instr::P_EXTBS, // Xpulpimg: p.extbs + riscv_instr::P_EXTBZ, // Xpulpimg: p.extbz + riscv_instr::P_CLIP, // Xpulpimg: p.clip + riscv_instr::P_CLIPU, // Xpulpimg: p.clipu + riscv_instr::PV_ADD_SCI_H, // Xpulpimg: pv.add.sci.h + riscv_instr::PV_ADD_SCI_B, // Xpulpimg: pv.add.sci.b + riscv_instr::PV_SUB_SCI_H, // Xpulpimg: pv.sub.sci.h + riscv_instr::PV_SUB_SCI_B, // Xpulpimg: pv.sub.sci.b + riscv_instr::PV_AVG_SCI_H, // Xpulpimg: pv.avg.sci.h + riscv_instr::PV_AVG_SCI_B, // Xpulpimg: pv.avg.sci.b + riscv_instr::PV_AVGU_SCI_H, // Xpulpimg: pv.avgu.sci.h + riscv_instr::PV_AVGU_SCI_B, // Xpulpimg: pv.avgu.sci.b + riscv_instr::PV_MIN_SCI_H, // Xpulpimg: pv.min.sci.h + riscv_instr::PV_MIN_SCI_B, // Xpulpimg: pv.min.sci.b + riscv_instr::PV_MINU_SCI_H, // Xpulpimg: pv.minu.sci.h + riscv_instr::PV_MINU_SCI_B, // Xpulpimg: pv.minu.sci.b + riscv_instr::PV_MAX_SCI_H, // Xpulpimg: pv.max.sci.h + riscv_instr::PV_MAX_SCI_B, // Xpulpimg: pv.max.sci.b + riscv_instr::PV_MAXU_SCI_H, // Xpulpimg: pv.maxu.sci.h + riscv_instr::PV_MAXU_SCI_B, // Xpulpimg: pv.maxu.sci.b + riscv_instr::PV_SRL_SCI_H, // Xpulpimg: pv.srl.sci.h + riscv_instr::PV_SRL_SCI_B, // Xpulpimg: pv.srl.sci.b + riscv_instr::PV_SRA_SCI_H, // Xpulpimg: pv.sra.sci.h + riscv_instr::PV_SRA_SCI_B, // Xpulpimg: pv.sra.sci.b + riscv_instr::PV_SLL_SCI_H, // Xpulpimg: pv.sll.sci.h + riscv_instr::PV_SLL_SCI_B, // Xpulpimg: pv.sll.sci.b + riscv_instr::PV_OR_SCI_H, // Xpulpimg: pv.or.sci.h + riscv_instr::PV_OR_SCI_B, // Xpulpimg: pv.or.sci.b + riscv_instr::PV_XOR_SCI_H, // Xpulpimg: pv.xor.sci.h + riscv_instr::PV_XOR_SCI_B, // Xpulpimg: pv.xor.sci.b + riscv_instr::PV_AND_SCI_B, // Xpulpimg: pv.and.sci.b + riscv_instr::PV_AND_SCI_H, // Xpulpimg: pv.and.sci.h + riscv_instr::PV_ABS_H, // Xpulpimg: pv.abs.h + riscv_instr::PV_ABS_B, // Xpulpimg: pv.abs.b + riscv_instr::PV_EXTRACT_H, // Xpulpimg: pv.extract.h + riscv_instr::PV_EXTRACT_B, // Xpulpimg: pv.extract.b + riscv_instr::PV_EXTRACTU_H, // Xpulpimg: pv.extractu.h + riscv_instr::PV_EXTRACTU_B: begin // Xpulpimg: pv.extractu.b if (snitch_pkg::XPULPIMG) begin write_rd = 1'b0; uses_rd = 1'b1; acc_qvalid_o = valid_instr; opa_select = Reg; - opb_select = Reg; acc_register_rd = 1'b1; end else begin illegal_inst = 1'b1; end end - // Immediate branching - riscv_instr::P_BEQIMM: begin // Xpulpimg: p.beqimm + // 2 source registers (rs1, rs2) + riscv_instr::P_SLET, // Xpulpimg: p.slet + riscv_instr::P_SLETU, // Xpulpimg: p.sletu + riscv_instr::P_MIN, // Xpulpimg: p.min + riscv_instr::P_MINU, // Xpulpimg: p.minu + riscv_instr::P_MAX, // Xpulpimg: p.max + riscv_instr::P_MAXU, // Xpulpimg: p.maxu + riscv_instr::P_CLIPR, // Xpulpimg: p.clipr + riscv_instr::P_CLIPUR, // Xpulpimg: p.clipur + riscv_instr::PV_ADD_H, // Xpulpimg: pv.add.h + riscv_instr::PV_ADD_SC_H, // Xpulpimg: pv.add.sc.h + riscv_instr::PV_ADD_B, // Xpulpimg: pv.add.b + riscv_instr::PV_ADD_SC_B, // Xpulpimg: pv.add.sc.b + riscv_instr::PV_SUB_H, // Xpulpimg: pv.sub.h + riscv_instr::PV_SUB_SC_H, // Xpulpimg: pv.sub.sc.h + riscv_instr::PV_SUB_B, // Xpulpimg: pv.sub.b + riscv_instr::PV_SUB_SC_B, // Xpulpimg: pv.sub.sc.b + riscv_instr::PV_AVG_H, // Xpulpimg: pv.avg.h + riscv_instr::PV_AVG_SC_H, // Xpulpimg: pv.avg.sc.h + riscv_instr::PV_AVG_B, // Xpulpimg: pv.avg.b + riscv_instr::PV_AVG_SC_B, // Xpulpimg: pv.avg.sc.b + riscv_instr::PV_AVGU_H, // Xpulpimg: pv.avgu.h + riscv_instr::PV_AVGU_SC_H, // Xpulpimg: pv.avgu.sc.h + riscv_instr::PV_AVGU_B, // Xpulpimg: pv.avgu.b + riscv_instr::PV_AVGU_SC_B, // Xpulpimg: pv.avgu.sc.b + riscv_instr::PV_MIN_H, // Xpulpimg: pv.min.h + riscv_instr::PV_MIN_SC_H, // Xpulpimg: pv.min.sc.h + riscv_instr::PV_MIN_B, // Xpulpimg: pv.min.b + riscv_instr::PV_MIN_SC_B, // Xpulpimg: pv.min.sc.b + riscv_instr::PV_MINU_H, // Xpulpimg: pv.minu.h + riscv_instr::PV_MINU_SC_H, // Xpulpimg: pv.minu.sc.h + riscv_instr::PV_MINU_B, // Xpulpimg: pv.minu.b + riscv_instr::PV_MINU_SC_B, // Xpulpimg: pv.minu.sc.b + riscv_instr::PV_MAX_H, // Xpulpimg: pv.max.h + riscv_instr::PV_MAX_SC_H, // Xpulpimg: pv.max.sc.h + riscv_instr::PV_MAX_B, // Xpulpimg: pv.max.b + riscv_instr::PV_MAX_SC_B, // Xpulpimg: pv.max.sc.b + riscv_instr::PV_MAXU_H, // Xpulpimg: pv.maxu.h + riscv_instr::PV_MAXU_SC_H, // Xpulpimg: pv.maxu.sc.h + riscv_instr::PV_MAXU_B, // Xpulpimg: pv.maxu.b + riscv_instr::PV_MAXU_SC_B, // Xpulpimg: pv.maxu.sc.b + riscv_instr::PV_SRL_H, // Xpulpimg: pv.srl.h + riscv_instr::PV_SRL_SC_H, // Xpulpimg: pv.srl.sc.h + riscv_instr::PV_SRL_B, // Xpulpimg: pv.srl.b + riscv_instr::PV_SRL_SC_B, // Xpulpimg: pv.srl.sc.b + riscv_instr::PV_SRA_H, // Xpulpimg: pv.sra.h + riscv_instr::PV_SRA_SC_H, // Xpulpimg: pv.sra.sc.h + riscv_instr::PV_SRA_B, // Xpulpimg: pv.sra.b + riscv_instr::PV_SRA_SC_B, // Xpulpimg: pv.sra.sc.b + riscv_instr::PV_SLL_H, // Xpulpimg: pv.sll.h + riscv_instr::PV_SLL_SC_H, // Xpulpimg: pv.sll.sc.h + riscv_instr::PV_SLL_B, // Xpulpimg: pv.sll.b + riscv_instr::PV_SLL_SC_B, // Xpulpimg: pv.sll.sc.b + riscv_instr::PV_OR_H, // Xpulpimg: pv.or.h + riscv_instr::PV_OR_SC_H, // Xpulpimg: pv.or.sc.h + riscv_instr::PV_OR_B, // Xpulpimg: pv.or.b + riscv_instr::PV_OR_SC_B, // Xpulpimg: pv.or.sc.b + riscv_instr::PV_XOR_H, // Xpulpimg: pv.xor.h + riscv_instr::PV_XOR_SC_H, // Xpulpimg: pv.xor.sc.h + riscv_instr::PV_XOR_B, // Xpulpimg: pv.xor.b + riscv_instr::PV_XOR_SC_B, // Xpulpimg: pv.xor.sc.b + riscv_instr::PV_AND_H, // Xpulpimg: pv.and.h + riscv_instr::PV_AND_SC_H, // Xpulpimg: pv.and.sc.h + riscv_instr::PV_AND_B, // Xpulpimg: pv.and.b + riscv_instr::PV_AND_SC_B: begin // Xpulpimg: pv.and.sc.b if (snitch_pkg::XPULPIMG) begin - is_branch = 1'b1; write_rd = 1'b0; - alu_op = Eq; + uses_rd = 1'b1; + acc_qvalid_o = valid_instr; opa_select = Reg; - opb_select = PBImmediate; + opb_select = Reg; + acc_register_rd = 1'b1; end else begin illegal_inst = 1'b1; end end - riscv_instr::P_BNEIMM: begin // Xpulpimg: p.bneimm + // 2 source registers (rs1, rd) + riscv_instr::PV_INSERT_H, // Xpulpimg: pv.insert.h + riscv_instr::PV_INSERT_B: begin // Xpulpimg: pv.insert.b if (snitch_pkg::XPULPIMG) begin - is_branch = 1'b1; write_rd = 1'b0; - alu_op = Neq; + uses_rd = 1'b1; + acc_qvalid_o = valid_instr; opa_select = Reg; - opb_select = PBImmediate; + opc_select = Reg; + acc_register_rd = 1'b1; end else begin illegal_inst = 1'b1; end end - // MAC operations - // Off-load to IPU coprocessor - riscv_instr::P_MAC, // Xpulpimg: p.mac - riscv_instr::P_MSU: begin // Xpulpimg: p.msu + // 3 source registers (rs1, rs2, rd) + riscv_instr::P_MAC, // Xpulpimg: p.mac + riscv_instr::P_MSU: begin // Xpulpimg: p.msu if (snitch_pkg::XPULPIMG) begin write_rd = 1'b0; uses_rd = 1'b1; diff --git a/hardware/deps/snitch/src/snitch_ipu.sv b/hardware/deps/snitch/src/snitch_ipu.sv index 6192513ba..64064a257 100644 --- a/hardware/deps/snitch/src/snitch_ipu.sv +++ b/hardware/deps/snitch/src/snitch_ipu.sv @@ -66,23 +66,115 @@ module snitch_ipu #( div_valid_op = acc_qvalid_i; acc_qready_o = div_ready_op; end - riscv_instr::P_ABS, // Xpulpimg: p.abs - riscv_instr::P_SLET, // Xpulpimg: p.slet - riscv_instr::P_SLETU, // Xpulpimg: p.sletu - riscv_instr::P_MIN, // Xpulpimg: p.min - riscv_instr::P_MINU, // Xpulpimg: p.minu - riscv_instr::P_MAX, // Xpulpimg: p.max - riscv_instr::P_MAXU, // Xpulpimg: p.maxu - riscv_instr::P_EXTHS, // Xpulpimg: p.exths - riscv_instr::P_EXTHZ, // Xpulpimg: p.exthz - riscv_instr::P_EXTBS, // Xpulpimg: p.extbs - riscv_instr::P_EXTBZ, // Xpulpimg: p.extbz - riscv_instr::P_CLIP, // Xpulpimg: p.clip - riscv_instr::P_CLIPU, // Xpulpimg: p.clipu - riscv_instr::P_CLIPR, // Xpulpimg: p.clipr - riscv_instr::P_CLIPUR, // Xpulpimg: p.clipur - riscv_instr::P_MAC, // Xpulpimg: p.mac - riscv_instr::P_MSU: begin // Xpulpimg: p.msu + riscv_instr::P_ABS, // Xpulpimg: p.abs + riscv_instr::P_SLET, // Xpulpimg: p.slet + riscv_instr::P_SLETU, // Xpulpimg: p.sletu + riscv_instr::P_MIN, // Xpulpimg: p.min + riscv_instr::P_MINU, // Xpulpimg: p.minu + riscv_instr::P_MAX, // Xpulpimg: p.max + riscv_instr::P_MAXU, // Xpulpimg: p.maxu + riscv_instr::P_EXTHS, // Xpulpimg: p.exths + riscv_instr::P_EXTHZ, // Xpulpimg: p.exthz + riscv_instr::P_EXTBS, // Xpulpimg: p.extbs + riscv_instr::P_EXTBZ, // Xpulpimg: p.extbz + riscv_instr::P_CLIP, // Xpulpimg: p.clip + riscv_instr::P_CLIPU, // Xpulpimg: p.clipu + riscv_instr::P_CLIPR, // Xpulpimg: p.clipr + riscv_instr::P_CLIPUR, // Xpulpimg: p.clipur + riscv_instr::P_MAC, // Xpulpimg: p.mac + riscv_instr::P_MSU, // Xpulpimg: p.msu + riscv_instr::PV_ADD_H, // Xpulpimg: pv.add.h + riscv_instr::PV_ADD_SC_H, // Xpulpimg: pv.add.sc.h + riscv_instr::PV_ADD_SCI_H, // Xpulpimg: pv.add.sci.h + riscv_instr::PV_ADD_B, // Xpulpimg: pv.add.b + riscv_instr::PV_ADD_SC_B, // Xpulpimg: pv.add.sc.b + riscv_instr::PV_ADD_SCI_B, // Xpulpimg: pv.add.sci.b + riscv_instr::PV_SUB_H, // Xpulpimg: pv.sub.h + riscv_instr::PV_SUB_SC_H, // Xpulpimg: pv.sub.sc.h + riscv_instr::PV_SUB_SCI_H, // Xpulpimg: pv.sub.sci.h + riscv_instr::PV_SUB_B, // Xpulpimg: pv.sub.b + riscv_instr::PV_SUB_SC_B, // Xpulpimg: pv.sub.sc.b + riscv_instr::PV_SUB_SCI_B, // Xpulpimg: pv.sub.sci.b + riscv_instr::PV_AVG_H, // Xpulpimg: pv.avg.h + riscv_instr::PV_AVG_SC_H, // Xpulpimg: pv.avg.sc.h + riscv_instr::PV_AVG_SCI_H, // Xpulpimg: pv.avg.sci.h + riscv_instr::PV_AVG_B, // Xpulpimg: pv.avg.b + riscv_instr::PV_AVG_SC_B, // Xpulpimg: pv.avg.sc.b + riscv_instr::PV_AVG_SCI_B, // Xpulpimg: pv.avg.sci.b + riscv_instr::PV_AVGU_H, // Xpulpimg: pv.avgu.h + riscv_instr::PV_AVGU_SC_H, // Xpulpimg: pv.avgu.sc.h + riscv_instr::PV_AVGU_SCI_H, // Xpulpimg: pv.avgu.sci.h + riscv_instr::PV_AVGU_B, // Xpulpimg: pv.avgu.b + riscv_instr::PV_AVGU_SC_B, // Xpulpimg: pv.avgu.sc.b + riscv_instr::PV_AVGU_SCI_B, // Xpulpimg: pv.avgu.sci.b + riscv_instr::PV_MIN_H, // Xpulpimg: pv.min.h + riscv_instr::PV_MIN_SC_H, // Xpulpimg: pv.min.sc.h + riscv_instr::PV_MIN_SCI_H, // Xpulpimg: pv.min.sci.h + riscv_instr::PV_MIN_B, // Xpulpimg: pv.min.b + riscv_instr::PV_MIN_SC_B, // Xpulpimg: pv.min.sc.b + riscv_instr::PV_MIN_SCI_B, // Xpulpimg: pv.min.sci.b + riscv_instr::PV_MINU_H, // Xpulpimg: pv.minu.h + riscv_instr::PV_MINU_SC_H, // Xpulpimg: pv.minu.sc.h + riscv_instr::PV_MINU_SCI_H, // Xpulpimg: pv.minu.sci.h + riscv_instr::PV_MINU_B, // Xpulpimg: pv.minu.b + riscv_instr::PV_MINU_SC_B, // Xpulpimg: pv.minu.sc.b + riscv_instr::PV_MINU_SCI_B, // Xpulpimg: pv.minu.sci.b + riscv_instr::PV_MAX_H, // Xpulpimg: pv.max.h + riscv_instr::PV_MAX_SC_H, // Xpulpimg: pv.max.sc.h + riscv_instr::PV_MAX_SCI_H, // Xpulpimg: pv.max.sci.h + riscv_instr::PV_MAX_B, // Xpulpimg: pv.max.b + riscv_instr::PV_MAX_SC_B, // Xpulpimg: pv.max.sc.b + riscv_instr::PV_MAX_SCI_B, // Xpulpimg: pv.max.sci.b + riscv_instr::PV_MAXU_H, // Xpulpimg: pv.maxu.h + riscv_instr::PV_MAXU_SC_H, // Xpulpimg: pv.maxu.sc.h + riscv_instr::PV_MAXU_SCI_H, // Xpulpimg: pv.maxu.sci.h + riscv_instr::PV_MAXU_B, // Xpulpimg: pv.maxu.b + riscv_instr::PV_MAXU_SC_B, // Xpulpimg: pv.maxu.sc.b + riscv_instr::PV_MAXU_SCI_B, // Xpulpimg: pv.maxu.sci.b + riscv_instr::PV_SRL_H, // Xpulpimg: pv.srl.h + riscv_instr::PV_SRL_SC_H, // Xpulpimg: pv.srl.sc.h + riscv_instr::PV_SRL_SCI_H, // Xpulpimg: pv.srl.sci.h + riscv_instr::PV_SRL_B, // Xpulpimg: pv.srl.b + riscv_instr::PV_SRL_SC_B, // Xpulpimg: pv.srl.sc.b + riscv_instr::PV_SRL_SCI_B, // Xpulpimg: pv.srl.sci.b + riscv_instr::PV_SRA_H, // Xpulpimg: pv.sra.h + riscv_instr::PV_SRA_SC_H, // Xpulpimg: pv.sra.sc.h + riscv_instr::PV_SRA_SCI_H, // Xpulpimg: pv.sra.sci.h + riscv_instr::PV_SRA_B, // Xpulpimg: pv.sra.b + riscv_instr::PV_SRA_SC_B, // Xpulpimg: pv.sra.sc.b + riscv_instr::PV_SRA_SCI_B, // Xpulpimg: pv.sra.sci.b + riscv_instr::PV_SLL_H, // Xpulpimg: pv.sll.h + riscv_instr::PV_SLL_SC_H, // Xpulpimg: pv.sll.sc.h + riscv_instr::PV_SLL_SCI_H, // Xpulpimg: pv.sll.sci.h + riscv_instr::PV_SLL_B, // Xpulpimg: pv.sll.b + riscv_instr::PV_SLL_SC_B, // Xpulpimg: pv.sll.sc.b + riscv_instr::PV_SLL_SCI_B, // Xpulpimg: pv.sll.sci.b + riscv_instr::PV_OR_H, // Xpulpimg: pv.or.h + riscv_instr::PV_OR_SC_H, // Xpulpimg: pv.or.sc.h + riscv_instr::PV_OR_SCI_H, // Xpulpimg: pv.or.sci.h + riscv_instr::PV_OR_B, // Xpulpimg: pv.or.b + riscv_instr::PV_OR_SC_B, // Xpulpimg: pv.or.sc.b + riscv_instr::PV_OR_SCI_B, // Xpulpimg: pv.or.sci.b + riscv_instr::PV_XOR_H, // Xpulpimg: pv.xor.h + riscv_instr::PV_XOR_SC_H, // Xpulpimg: pv.xor.sc.h + riscv_instr::PV_XOR_SCI_H, // Xpulpimg: pv.xor.sci.h + riscv_instr::PV_XOR_B, // Xpulpimg: pv.xor.b + riscv_instr::PV_XOR_SC_B, // Xpulpimg: pv.xor.sc.b + riscv_instr::PV_XOR_SCI_B, // Xpulpimg: pv.xor.sci.b + riscv_instr::PV_AND_H, // Xpulpimg: pv.and.h + riscv_instr::PV_AND_SC_H, // Xpulpimg: pv.and.sc.h + riscv_instr::PV_AND_SCI_H, // Xpulpimg: pv.and.sci.h + riscv_instr::PV_AND_B, // Xpulpimg: pv.and.b + riscv_instr::PV_AND_SC_B, // Xpulpimg: pv.and.sc.b + riscv_instr::PV_AND_SCI_B, // Xpulpimg: pv.and.sci.b + riscv_instr::PV_ABS_H, // Xpulpimg: pv.abs.h + riscv_instr::PV_ABS_B, // Xpulpimg: pv.abs.b + riscv_instr::PV_EXTRACT_H, // Xpulpimg: pv.extract.h + riscv_instr::PV_EXTRACT_B, // Xpulpimg: pv.extract.b + riscv_instr::PV_EXTRACTU_H, // Xpulpimg: pv.extractu.h + riscv_instr::PV_EXTRACTU_B, // Xpulpimg: pv.extractu.b + riscv_instr::PV_INSERT_H, // Xpulpimg: pv.insert.h + riscv_instr::PV_INSERT_B: begin // Xpulpimg: pv.insert.b if (snitch_pkg::XPULPIMG) begin dsp_valid_op = acc_qvalid_i; acc_qready_o = dsp_ready_op; @@ -212,8 +304,10 @@ module dspu #( assign id_o = id_i; // Decoded fields - logic [4:0] ximm; - assign ximm = operator_i[24:20]; + logic [4:0] imm5; + logic [5:0] imm6; + assign imm5 = operator_i[24:20]; + assign imm6 = {operator_i[24:20], operator_i[25]}; // Internal control signals logic cmp_signed; // comparator operation is signed @@ -221,7 +315,7 @@ module dspu #( None, Reg, Zero, ClipBound } cmp_op_b_sel; // selection of shared comparator operands logic clip_unsigned; // clip operation has "0" as lower bound - logic clip_register; // if 1 clip operation uses rs2, else ximm + logic clip_register; // if 1 clip operation uses rs2, else imm5 enum logic [1:0] { NoMul, MulLow, MulHigh, MulMac } mul_op; // type of multiplication operation @@ -229,9 +323,20 @@ module dspu #( logic mul_op_a_sign; // sign of multiplier operand a logic mac_op_b_sign; // sign of multiplier operand b enum logic [3:0] { - Nop, Abs, Sle, Min, Max, Exths, Exthz, Extbs, Extbz, Clip, Mac + Nop, Abs, Sle, Min, Max, Exths, Exthz, Extbs, Extbz, Clip, Mac, Simd } res_sel; // result selection + enum logic [0:3] { + SimdNop, SimdAdd, SimdSub, SimdAvg, SimdMin, SimdMax, SimdSrl, SimdSra, SimdSll, SimdOr, SimdXor, SimdAnd, SimdAbs, SimdExt, SimdIns + } simd_op; + enum logic { + HalfWord, Byte + } simd_size; // SIMD granularity + enum logic [0:1] { + Vect, Sc, Sci + } simd_mode; // SIMD mode + logic simd_signed; // SIMD operation is signed and uses sign-extended imm6 + // -------------------- // Decoder // -------------------- @@ -246,6 +351,10 @@ module dspu #( mul_op_a_sign = 1'b0; mac_op_b_sign = 1'b0; res_sel = Nop; + simd_op = SimdNop; + simd_size = HalfWord; + simd_mode = Vect; + simd_signed = 1; unique casez (operator_i) // Multiplications from M extension riscv_instr::MUL: begin @@ -350,6 +459,532 @@ module dspu #( mac_op_b_sign = 1'b1; res_sel = Mac; end + riscv_instr::PV_ADD_H: begin + simd_op = SimdAdd; + res_sel = Simd; + end + riscv_instr::PV_ADD_SC_H: begin + simd_op = SimdAdd; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_ADD_SCI_H: begin + simd_op = SimdAdd; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_ADD_B: begin + simd_op = SimdAdd; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_ADD_SC_B: begin + simd_op = SimdAdd; + simd_size = Byte; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_ADD_SCI_B: begin + simd_op = SimdAdd; + simd_size = Byte; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_SUB_H: begin + simd_op = SimdSub; + res_sel = Simd; + end + riscv_instr::PV_SUB_SC_H: begin + simd_op = SimdSub; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_SUB_SCI_H: begin + simd_op = SimdSub; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_SUB_B: begin + simd_op = SimdSub; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_SUB_SC_B: begin + simd_op = SimdSub; + simd_size = Byte; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_SUB_SCI_B: begin + simd_op = SimdSub; + simd_size = Byte; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_AVG_H: begin + simd_op = SimdAvg; + res_sel = Simd; + end + riscv_instr::PV_AVG_SC_H: begin + simd_op = SimdAvg; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_AVG_SCI_H: begin + simd_op = SimdAvg; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_AVG_B: begin + simd_op = SimdAvg; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_AVG_SC_B: begin + simd_op = SimdAvg; + simd_size = Byte; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_AVG_SCI_B: begin + simd_op = SimdAvg; + simd_size = Byte; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_AVGU_H: begin + simd_op = SimdAvg; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_AVGU_SC_H: begin + simd_op = SimdAvg; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_AVGU_SCI_H: begin + simd_op = SimdAvg; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_AVGU_B: begin + simd_op = SimdAvg; + simd_size = Byte; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_AVGU_SC_B: begin + simd_op = SimdAvg; + simd_size = Byte; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_AVGU_SCI_B: begin + simd_op = SimdAvg; + simd_size = Byte; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MIN_H: begin + simd_op = SimdMin; + res_sel = Simd; + end + riscv_instr::PV_MIN_SC_H: begin + simd_op = SimdMin; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_MIN_SCI_H: begin + simd_op = SimdMin; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_MIN_B: begin + simd_op = SimdMin; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_MIN_SC_B: begin + simd_op = SimdMin; + simd_size = Byte; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_MIN_SCI_B: begin + simd_op = SimdMin; + simd_size = Byte; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_MINU_H: begin + simd_op = SimdMin; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MINU_SC_H: begin + simd_op = SimdMin; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MINU_SCI_H: begin + simd_op = SimdMin; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MINU_B: begin + simd_op = SimdMin; + simd_size = Byte; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MINU_SC_B: begin + simd_op = SimdMin; + simd_size = Byte; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MINU_SCI_B: begin + simd_op = SimdMin; + simd_size = Byte; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MAX_H: begin + simd_op = SimdMax; + res_sel = Simd; + end + riscv_instr::PV_MAX_SC_H: begin + simd_op = SimdMax; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_MAX_SCI_H: begin + simd_op = SimdMax; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_MAX_B: begin + simd_op = SimdMax; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_MAX_SC_B: begin + simd_op = SimdMax; + simd_size = Byte; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_MAX_SCI_B: begin + simd_op = SimdMax; + simd_size = Byte; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_MAXU_H: begin + simd_op = SimdMax; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MAXU_SC_H: begin + simd_op = SimdMax; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MAXU_SCI_H: begin + simd_op = SimdMax; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MAXU_B: begin + simd_op = SimdMax; + simd_size = Byte; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MAXU_SC_B: begin + simd_op = SimdMax; + simd_size = Byte; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MAXU_SCI_B: begin + simd_op = SimdMax; + simd_size = Byte; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_SRL_H: begin + simd_op = SimdSrl; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_SRL_SC_H: begin + simd_op = SimdSrl; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_SRL_SCI_H: begin + simd_op = SimdSrl; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_SRL_B: begin + simd_op = SimdSrl; + simd_size = Byte; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_SRL_SC_B: begin + simd_op = SimdSrl; + simd_size = Byte; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_SRL_SCI_B: begin + simd_op = SimdSrl; + simd_size = Byte; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_SRA_H: begin + simd_op = SimdSra; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_SRA_SC_H: begin + simd_op = SimdSra; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_SRA_SCI_H: begin + simd_op = SimdSra; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_SRA_B: begin + simd_op = SimdSra; + simd_size = Byte; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_SRA_SC_B: begin + simd_op = SimdSra; + simd_size = Byte; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_SRA_SCI_B: begin + simd_op = SimdSra; + simd_size = Byte; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_SLL_H: begin + simd_op = SimdSll; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_SLL_SC_H: begin + simd_op = SimdSll; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_SLL_SCI_H: begin + simd_op = SimdSll; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_SLL_B: begin + simd_op = SimdSll; + simd_size = Byte; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_SLL_SC_B: begin + simd_op = SimdSll; + simd_size = Byte; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_SLL_SCI_B: begin + simd_op = SimdSll; + simd_size = Byte; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_OR_H: begin + simd_op = SimdOr; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_OR_SC_H: begin + simd_op = SimdOr; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_OR_SCI_H: begin + simd_op = SimdOr; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_OR_B: begin + simd_op = SimdOr; + simd_size = Byte; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_OR_SC_B: begin + simd_op = SimdOr; + simd_size = Byte; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_OR_SCI_B: begin + simd_op = SimdOr; + simd_size = Byte; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_XOR_H: begin + simd_op = SimdXor; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_XOR_SC_H: begin + simd_op = SimdXor; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_XOR_SCI_H: begin + simd_op = SimdXor; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_XOR_B: begin + simd_op = SimdXor; + simd_size = Byte; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_XOR_SC_B: begin + simd_op = SimdXor; + simd_size = Byte; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_XOR_SCI_B: begin + simd_op = SimdXor; + simd_size = Byte; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_AND_H: begin + simd_op = SimdAnd; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_AND_SC_H: begin + simd_op = SimdAnd; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_AND_SCI_H: begin + simd_op = SimdAnd; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_AND_B: begin + simd_op = SimdAnd; + simd_size = Byte; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_AND_SC_B: begin + simd_op = SimdAnd; + simd_size = Byte; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_AND_SCI_B: begin + simd_op = SimdAnd; + simd_size = Byte; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_ABS_H: begin + simd_op = SimdAbs; + res_sel = Simd; + end + riscv_instr::PV_ABS_B: begin + simd_op = SimdAbs; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_EXTRACT_H: begin + simd_op = SimdExt; + res_sel = Simd; + end + riscv_instr::PV_EXTRACT_B: begin + simd_op = SimdExt; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_EXTRACTU_H: begin + simd_op = SimdExt; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_EXTRACTU_B: begin + simd_op = SimdExt; + simd_size = Byte; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_INSERT_H: begin + simd_op = SimdIns; + res_sel = Simd; + end + riscv_instr::PV_INSERT_B: begin + simd_op = SimdIns; + simd_size = Byte; + res_sel = Simd; + end default: ; endcase end @@ -368,8 +1003,8 @@ module dspu #( logic [Width-1:0] clip_lower; logic [Width-1:0] clip_comp; - // Generate -2^(ximm-1), 2^(ximm-1)-1 for clip/clipu and -rs2-1, rs2 for clipr, clipur - assign clip_lower = ({(Width+1){1'b1}} << $unsigned(ximm)) >> 1; + // Generate -2^(imm5-1), 2^(imm5-1)-1 for clip/clipu and -rs2-1, rs2 for clipr, clipur + assign clip_lower = ({(Width+1){1'b1}} << $unsigned(imm5)) >> 1; assign clip_op_b_n = clip_unsigned ? 'b0 : (clip_register ? ~op_b_i : clip_lower); assign clip_op_b = clip_register ? op_b_i : ~clip_lower; @@ -379,7 +1014,7 @@ module dspu #( // Select operand to use in comparison for clip operations: clips would need two comparisons // to clamp the result between the two bounds; but one comparison is enough if we select the // second operand basing on op_a and clip_op_b signs (i.e. rs1 and clip upper bound, being - // either rs2 or 2^(ximm-1)-1) + // either rs2 or 2^(imm5-1)-1) assign clip_comp = clip_use_n_bound ? clip_op_b_n : clip_op_b; // -------------------- @@ -426,6 +1061,165 @@ module dspu #( endcase end + // -------------------- + // SIMD operations + // -------------------- + + logic [1:0][15:0] simd_h_op_a, simd_h_op_b, simd_h_res; + logic [3:0][7:0] simd_b_op_a, simd_b_op_b, simd_b_res; + logic [15:0] simd_h_imm; + logic [7:0] simd_b_imm; + logic [Width-1:0] simd_rd; + logic [Width-1:0] simd_result; + + // half-word and byte immediate extensions + assign simd_h_imm = simd_signed ? $signed(imm6) : $unsigned(imm6); + assign simd_b_imm = simd_signed ? $signed(imm6) : $unsigned(imm6); + + // half-word granularity operands + assign simd_h_op_a[0] = op_a_i[15:0]; + assign simd_h_op_a[1] = op_a_i[31:16]; + assign simd_h_op_b[0] = (simd_mode == Vect) ? op_b_i[15:0] : ((simd_mode == Sc) ? op_b_i[15:0] : simd_h_imm); + assign simd_h_op_b[1] = (simd_mode == Vect) ? op_b_i[31:16] : ((simd_mode == Sc) ? op_b_i[15:0] : simd_h_imm); + + // byte granularity operands + assign simd_b_op_a[0] = op_a_i[7:0]; + assign simd_b_op_a[1] = op_a_i[15:8]; + assign simd_b_op_a[2] = op_a_i[23:16]; + assign simd_b_op_a[3] = op_a_i[31:24]; + assign simd_b_op_b[0] = (simd_mode == Vect) ? op_b_i[7:0] : ((simd_mode == Sc) ? op_b_i[7:0] : simd_b_imm); + assign simd_b_op_b[1] = (simd_mode == Vect) ? op_b_i[15:8] : ((simd_mode == Sc) ? op_b_i[7:0] : simd_b_imm); + assign simd_b_op_b[2] = (simd_mode == Vect) ? op_b_i[23:16] : ((simd_mode == Sc) ? op_b_i[7:0] : simd_b_imm); + assign simd_b_op_b[3] = (simd_mode == Vect) ? op_b_i[31:24] : ((simd_mode == Sc) ? op_b_i[7:0] : simd_b_imm); + + always_comb begin + simd_h_res = 'b0; + simd_b_res = 'b0; + unique case (simd_size) + // half-word granularity + HalfWord: begin + unique case (simd_op) + SimdAdd: begin + for (int i = 0; i < Width/16; i++) + simd_h_res[i] = simd_h_op_a[i] + simd_h_op_b[i]; + end + SimdSub: begin + for (int i = 0; i < Width/16; i++) + simd_h_res[i] = simd_h_op_a[i] - simd_h_op_b[i]; + end + SimdAvg: begin + for (int i = 0; i < Width/16; i++) + simd_h_res[i] = simd_signed ? (simd_h_op_a[i] + simd_h_op_b[i]) >>> 1 : + (simd_h_op_a[i] + simd_h_op_b[i]) >> 1; + end + SimdMin: begin + for (int i = 0; i < Width/16; i++) + simd_h_res[i] = $signed({simd_h_op_a[i][15] & simd_signed, simd_h_op_a[i]}) <= + $signed({simd_h_op_a[i][15] & simd_signed, simd_h_op_b[i]}) ? + simd_h_op_a[i] : simd_h_op_b[i]; + end + SimdMax: begin + for (int i = 0; i < Width/16; i++) + simd_h_res[i] = $signed({simd_h_op_a[i][15] & simd_signed, simd_h_op_a[i]}) > + $signed({simd_h_op_b[i][15] & simd_signed, simd_h_op_b[i]}) ? + simd_h_op_a[i] : simd_h_op_b[i]; + end + SimdSrl: begin + for (int i = 0; i < Width/16; i++) + simd_h_res[i] = simd_h_op_a[i] >> simd_h_op_b[i]; + end + SimdSra: begin + for (int i = 0; i < Width/16; i++) + simd_h_res[i] = simd_h_op_a[i] >>> simd_h_op_b[i]; + end + SimdSll: begin + for (int i = 0; i < Width/16; i++) + simd_h_res[i] = simd_h_op_a[i] << simd_h_op_b[i]; + end + SimdOr: simd_h_res = simd_h_op_a | simd_h_op_b; + SimdXor: simd_h_res = simd_h_op_a ^ simd_h_op_b; + SimdAnd: simd_h_res = simd_h_op_a & simd_h_op_b; + SimdAbs: begin + for (int i = 0; i < Width/16; i++) + simd_h_res[i] = $signed(simd_h_op_a[i]) > 0 ? simd_h_op_a[i] : -$signed(simd_h_op_a[i]); + end + SimdExt: begin + simd_h_res[0] = simd_h_op_a[imm6[0]]; + // sign extend if needed + simd_h_res[1] = {16{simd_h_op_a[imm6[0]][15] & simd_signed}}; + end + SimdIns: begin + simd_h_res = op_c_i; + simd_h_res[imm6[0]] = simd_h_op_a[0]; + end + default: ; + endcase + end + // byte granularity + Byte: begin + unique case (simd_op) + SimdAdd: begin + for (int i = 0; i < Width/8; i++) + simd_b_res[i] = simd_b_op_a[i] + simd_b_op_b[i]; + end + SimdSub: begin + for (int i = 0; i < Width/8; i++) + simd_b_res[i] = simd_b_op_a[i] - simd_b_op_b[i]; + end + SimdAvg: begin + for (int i = 0; i < Width/8; i++) + simd_b_res[i] = simd_signed ? (simd_b_op_a[i] + simd_b_op_b[i]) >>> 1 : + (simd_b_op_a[i] + simd_b_op_b[i]) >> 1; + end + SimdMin: begin + for (int i = 0; i < Width/8; i++) + simd_b_res[i] = $signed({simd_b_op_a[i][7] & simd_signed, simd_b_op_a[i]}) <= + $signed({simd_b_op_a[i][7] & simd_signed, simd_b_op_b[i]}) ? + simd_b_op_a[i] : simd_b_op_b[i]; + end + SimdMax: begin + for (int i = 0; i < Width/8; i++) + simd_b_res[i] = $signed({simd_b_op_a[i][7] & simd_signed, simd_b_op_a[i]}) > + $signed({simd_b_op_b[i][7] & simd_signed, simd_b_op_b[i]}) ? + simd_b_op_a[i] : simd_b_op_b[i]; + end + SimdSrl: begin + for (int i = 0; i < Width/8; i++) + simd_b_res[i] = simd_b_op_a[i] >> simd_b_op_b[i]; + end + SimdSra: begin + for (int i = 0; i < Width/8; i++) + simd_b_res[i] = simd_b_op_a[i] >>> simd_b_op_b[i]; + end + SimdSll: begin + for (int i = 0; i < Width/8; i++) + simd_b_res[i] = simd_b_op_a[i] << simd_b_op_b[i]; + end + SimdOr: simd_b_res = simd_b_op_a | simd_b_op_b; + SimdXor: simd_b_res = simd_b_op_a ^ simd_b_op_b; + SimdAnd: simd_b_res = simd_b_op_a & simd_b_op_b; + SimdAbs: begin + for (int i = 0; i < Width/8; i++) + simd_b_res[i] = $signed(simd_b_op_a[i]) > 0 ? simd_b_op_a[i] : -$signed(simd_b_op_a[i]); + end + SimdExt: begin + simd_b_res[0] = simd_b_op_a[imm6[0]]; + // sign extend if needed + simd_b_res[1] = {8{simd_b_op_a[imm6[0]][7] & simd_signed}}; + simd_b_res[2] = {8{simd_b_op_a[imm6[0]][7] & simd_signed}}; + simd_b_res[3] = {8{simd_b_op_a[imm6[0]][7] & simd_signed}}; + end + SimdIns: begin + simd_b_res = op_c_i; + simd_b_res[imm6[0]] = simd_b_op_a[0]; + end + default: ; + endcase + end + default: ; + endcase + end + // -------------------- // Result generation // -------------------- @@ -457,6 +1251,7 @@ module dspu #( // be clipped to the upper bound since rs1 > clip_op_b Clip: result_o = cmp_result ? (clip_use_n_bound ? clip_op_b_n : op_a_i) : (op_a_i[Width-1] ? op_a_i : clip_op_b); Mac: result_o = mac_result; + Simd: result_o = simd_result; default: result_o = '0; endcase end From c5d5c3b0ec9309784bc715bd6f081b57d7c74038 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 12 Jan 2021 14:14:52 +0100 Subject: [PATCH 22/65] [riscv-opcodes] Add Xpulpv2 SIMD dot-product instructions Added instructions: SIMD dotup, dotusp, dotsp, sdotup, sdotusp, sdotsp --- toolchain/riscv-opcodes/encoding_out.h | 108 ++++++++++++++++++ toolchain/riscv-opcodes/inst.sverilog | 36 ++++++ toolchain/riscv-opcodes/opcodes-rvv | 44 +++---- .../riscv-opcodes/opcodes-xpulpimg_CUSTOM | 42 +++++++ 4 files changed, 208 insertions(+), 22 deletions(-) diff --git a/toolchain/riscv-opcodes/encoding_out.h b/toolchain/riscv-opcodes/encoding_out.h index 85eca9d1e..f52fabf59 100644 --- a/toolchain/riscv-opcodes/encoding_out.h +++ b/toolchain/riscv-opcodes/encoding_out.h @@ -2114,6 +2114,78 @@ #define MASK_PV_INSERT_H 0xfc00707f #define MATCH_PV_INSERT_B 0xb0007057 #define MASK_PV_INSERT_B 0xfc00707f +#define MATCH_PV_DOTUP_H 0x80000057 +#define MASK_PV_DOTUP_H 0xfe00707f +#define MATCH_PV_DOTUP_SC_H 0x80004057 +#define MASK_PV_DOTUP_SC_H 0xfe00707f +#define MATCH_PV_DOTUP_SCI_H 0x80006057 +#define MASK_PV_DOTUP_SCI_H 0xfc00707f +#define MATCH_PV_DOTUP_B 0x80001057 +#define MASK_PV_DOTUP_B 0xfe00707f +#define MATCH_PV_DOTUP_SC_B 0x80005057 +#define MASK_PV_DOTUP_SC_B 0xfe00707f +#define MATCH_PV_DOTUP_SCI_B 0x80007057 +#define MASK_PV_DOTUP_SCI_B 0xfc00707f +#define MATCH_PV_DOTUSP_H 0x88000057 +#define MASK_PV_DOTUSP_H 0xfe00707f +#define MATCH_PV_DOTUSP_SC_H 0x88004057 +#define MASK_PV_DOTUSP_SC_H 0xfe00707f +#define MATCH_PV_DOTUSP_SCI_H 0x88006057 +#define MASK_PV_DOTUSP_SCI_H 0xfc00707f +#define MATCH_PV_DOTUSP_B 0x88001057 +#define MASK_PV_DOTUSP_B 0xfe00707f +#define MATCH_PV_DOTUSP_SC_B 0x88005057 +#define MASK_PV_DOTUSP_SC_B 0xfe00707f +#define MATCH_PV_DOTUSP_SCI_B 0x88007057 +#define MASK_PV_DOTUSP_SCI_B 0xfc00707f +#define MATCH_PV_DOTSP_H 0x98000057 +#define MASK_PV_DOTSP_H 0xfe00707f +#define MATCH_PV_DOTSP_SC_H 0x98004057 +#define MASK_PV_DOTSP_SC_H 0xfe00707f +#define MATCH_PV_DOTSP_SCI_H 0x98006057 +#define MASK_PV_DOTSP_SCI_H 0xfc00707f +#define MATCH_PV_DOTSP_B 0x98001057 +#define MASK_PV_DOTSP_B 0xfe00707f +#define MATCH_PV_DOTSP_SC_B 0x98005057 +#define MASK_PV_DOTSP_SC_B 0xfe00707f +#define MATCH_PV_DOTSP_SCI_B 0x98007057 +#define MASK_PV_DOTSP_SCI_B 0xfc00707f +#define MATCH_PV_SDOTUP_H 0xa0000057 +#define MASK_PV_SDOTUP_H 0xfe00707f +#define MATCH_PV_SDOTUP_SC_H 0xa0004057 +#define MASK_PV_SDOTUP_SC_H 0xfe00707f +#define MATCH_PV_SDOTUP_SCI_H 0xa0006057 +#define MASK_PV_SDOTUP_SCI_H 0xfc00707f +#define MATCH_PV_SDOTUP_B 0xa0001057 +#define MASK_PV_SDOTUP_B 0xfe00707f +#define MATCH_PV_SDOTUP_SC_B 0xa0005057 +#define MASK_PV_SDOTUP_SC_B 0xfe00707f +#define MATCH_PV_SDOTUP_SCI_B 0xa0007057 +#define MASK_PV_SDOTUP_SCI_B 0xfc00707f +#define MATCH_PV_SDOTUSP_H 0xa8000057 +#define MASK_PV_SDOTUSP_H 0xfe00707f +#define MATCH_PV_SDOTUSP_SC_H 0xa8004057 +#define MASK_PV_SDOTUSP_SC_H 0xfe00707f +#define MATCH_PV_SDOTUSP_SCI_H 0xa8006057 +#define MASK_PV_SDOTUSP_SCI_H 0xfc00707f +#define MATCH_PV_SDOTUSP_B 0xa8001057 +#define MASK_PV_SDOTUSP_B 0xfe00707f +#define MATCH_PV_SDOTUSP_SC_B 0xa8005057 +#define MASK_PV_SDOTUSP_SC_B 0xfe00707f +#define MATCH_PV_SDOTUSP_SCI_B 0xa8007057 +#define MASK_PV_SDOTUSP_SCI_B 0xfc00707f +#define MATCH_PV_SDOTSP_H 0xb8000057 +#define MASK_PV_SDOTSP_H 0xfe00707f +#define MATCH_PV_SDOTSP_SC_H 0xb8004057 +#define MASK_PV_SDOTSP_SC_H 0xfe00707f +#define MATCH_PV_SDOTSP_SCI_H 0xb8006057 +#define MASK_PV_SDOTSP_SCI_H 0xfc00707f +#define MATCH_PV_SDOTSP_B 0xb8001057 +#define MASK_PV_SDOTSP_B 0xfe00707f +#define MATCH_PV_SDOTSP_SC_B 0xb8005057 +#define MASK_PV_SDOTSP_SC_B 0xfe00707f +#define MATCH_PV_SDOTSP_SCI_B 0xb8007057 +#define MASK_PV_SDOTSP_SCI_B 0xfc00707f #define MATCH_FLAH 0x1007 #define MASK_FLAH 0x707f #define MATCH_FSAH 0x1027 @@ -3976,6 +4048,42 @@ DECLARE_INSN(pv_extractu_h, MATCH_PV_EXTRACTU_H, MASK_PV_EXTRACTU_H) DECLARE_INSN(pv_extractu_b, MATCH_PV_EXTRACTU_B, MASK_PV_EXTRACTU_B) DECLARE_INSN(pv_insert_h, MATCH_PV_INSERT_H, MASK_PV_INSERT_H) DECLARE_INSN(pv_insert_b, MATCH_PV_INSERT_B, MASK_PV_INSERT_B) +DECLARE_INSN(pv_dotup_h, MATCH_PV_DOTUP_H, MASK_PV_DOTUP_H) +DECLARE_INSN(pv_dotup_sc_h, MATCH_PV_DOTUP_SC_H, MASK_PV_DOTUP_SC_H) +DECLARE_INSN(pv_dotup_sci_h, MATCH_PV_DOTUP_SCI_H, MASK_PV_DOTUP_SCI_H) +DECLARE_INSN(pv_dotup_b, MATCH_PV_DOTUP_B, MASK_PV_DOTUP_B) +DECLARE_INSN(pv_dotup_sc_b, MATCH_PV_DOTUP_SC_B, MASK_PV_DOTUP_SC_B) +DECLARE_INSN(pv_dotup_sci_b, MATCH_PV_DOTUP_SCI_B, MASK_PV_DOTUP_SCI_B) +DECLARE_INSN(pv_dotusp_h, MATCH_PV_DOTUSP_H, MASK_PV_DOTUSP_H) +DECLARE_INSN(pv_dotusp_sc_h, MATCH_PV_DOTUSP_SC_H, MASK_PV_DOTUSP_SC_H) +DECLARE_INSN(pv_dotusp_sci_h, MATCH_PV_DOTUSP_SCI_H, MASK_PV_DOTUSP_SCI_H) +DECLARE_INSN(pv_dotusp_b, MATCH_PV_DOTUSP_B, MASK_PV_DOTUSP_B) +DECLARE_INSN(pv_dotusp_sc_b, MATCH_PV_DOTUSP_SC_B, MASK_PV_DOTUSP_SC_B) +DECLARE_INSN(pv_dotusp_sci_b, MATCH_PV_DOTUSP_SCI_B, MASK_PV_DOTUSP_SCI_B) +DECLARE_INSN(pv_dotsp_h, MATCH_PV_DOTSP_H, MASK_PV_DOTSP_H) +DECLARE_INSN(pv_dotsp_sc_h, MATCH_PV_DOTSP_SC_H, MASK_PV_DOTSP_SC_H) +DECLARE_INSN(pv_dotsp_sci_h, MATCH_PV_DOTSP_SCI_H, MASK_PV_DOTSP_SCI_H) +DECLARE_INSN(pv_dotsp_b, MATCH_PV_DOTSP_B, MASK_PV_DOTSP_B) +DECLARE_INSN(pv_dotsp_sc_b, MATCH_PV_DOTSP_SC_B, MASK_PV_DOTSP_SC_B) +DECLARE_INSN(pv_dotsp_sci_b, MATCH_PV_DOTSP_SCI_B, MASK_PV_DOTSP_SCI_B) +DECLARE_INSN(pv_sdotup_h, MATCH_PV_SDOTUP_H, MASK_PV_SDOTUP_H) +DECLARE_INSN(pv_sdotup_sc_h, MATCH_PV_SDOTUP_SC_H, MASK_PV_SDOTUP_SC_H) +DECLARE_INSN(pv_sdotup_sci_h, MATCH_PV_SDOTUP_SCI_H, MASK_PV_SDOTUP_SCI_H) +DECLARE_INSN(pv_sdotup_b, MATCH_PV_SDOTUP_B, MASK_PV_SDOTUP_B) +DECLARE_INSN(pv_sdotup_sc_b, MATCH_PV_SDOTUP_SC_B, MASK_PV_SDOTUP_SC_B) +DECLARE_INSN(pv_sdotup_sci_b, MATCH_PV_SDOTUP_SCI_B, MASK_PV_SDOTUP_SCI_B) +DECLARE_INSN(pv_sdotusp_h, MATCH_PV_SDOTUSP_H, MASK_PV_SDOTUSP_H) +DECLARE_INSN(pv_sdotusp_sc_h, MATCH_PV_SDOTUSP_SC_H, MASK_PV_SDOTUSP_SC_H) +DECLARE_INSN(pv_sdotusp_sci_h, MATCH_PV_SDOTUSP_SCI_H, MASK_PV_SDOTUSP_SCI_H) +DECLARE_INSN(pv_sdotusp_b, MATCH_PV_SDOTUSP_B, MASK_PV_SDOTUSP_B) +DECLARE_INSN(pv_sdotusp_sc_b, MATCH_PV_SDOTUSP_SC_B, MASK_PV_SDOTUSP_SC_B) +DECLARE_INSN(pv_sdotusp_sci_b, MATCH_PV_SDOTUSP_SCI_B, MASK_PV_SDOTUSP_SCI_B) +DECLARE_INSN(pv_sdotsp_h, MATCH_PV_SDOTSP_H, MASK_PV_SDOTSP_H) +DECLARE_INSN(pv_sdotsp_sc_h, MATCH_PV_SDOTSP_SC_H, MASK_PV_SDOTSP_SC_H) +DECLARE_INSN(pv_sdotsp_sci_h, MATCH_PV_SDOTSP_SCI_H, MASK_PV_SDOTSP_SCI_H) +DECLARE_INSN(pv_sdotsp_b, MATCH_PV_SDOTSP_B, MASK_PV_SDOTSP_B) +DECLARE_INSN(pv_sdotsp_sc_b, MATCH_PV_SDOTSP_SC_B, MASK_PV_SDOTSP_SC_B) +DECLARE_INSN(pv_sdotsp_sci_b, MATCH_PV_SDOTSP_SCI_B, MASK_PV_SDOTSP_SCI_B) DECLARE_INSN(flah, MATCH_FLAH, MASK_FLAH) DECLARE_INSN(fsah, MATCH_FSAH, MASK_FSAH) DECLARE_INSN(fmadd_ah, MATCH_FMADD_AH, MASK_FMADD_AH) diff --git a/toolchain/riscv-opcodes/inst.sverilog b/toolchain/riscv-opcodes/inst.sverilog index 8918e2c6b..f8a6ccfe7 100644 --- a/toolchain/riscv-opcodes/inst.sverilog +++ b/toolchain/riscv-opcodes/inst.sverilog @@ -918,6 +918,42 @@ package riscv_instr; localparam [31:0] PV_EXTRACTU_B = 32'b100100???????????111?????1010111; localparam [31:0] PV_INSERT_H = 32'b101100???????????110?????1010111; localparam [31:0] PV_INSERT_B = 32'b101100???????????111?????1010111; + localparam [31:0] PV_DOTUP_H = 32'b1000000??????????000?????1010111; + localparam [31:0] PV_DOTUP_SC_H = 32'b1000000??????????100?????1010111; + localparam [31:0] PV_DOTUP_SCI_H = 32'b100000???????????110?????1010111; + localparam [31:0] PV_DOTUP_B = 32'b1000000??????????001?????1010111; + localparam [31:0] PV_DOTUP_SC_B = 32'b1000000??????????101?????1010111; + localparam [31:0] PV_DOTUP_SCI_B = 32'b100000???????????111?????1010111; + localparam [31:0] PV_DOTUSP_H = 32'b1000100??????????000?????1010111; + localparam [31:0] PV_DOTUSP_SC_H = 32'b1000100??????????100?????1010111; + localparam [31:0] PV_DOTUSP_SCI_H = 32'b100010???????????110?????1010111; + localparam [31:0] PV_DOTUSP_B = 32'b1000100??????????001?????1010111; + localparam [31:0] PV_DOTUSP_SC_B = 32'b1000100??????????101?????1010111; + localparam [31:0] PV_DOTUSP_SCI_B = 32'b100010???????????111?????1010111; + localparam [31:0] PV_DOTSP_H = 32'b1001100??????????000?????1010111; + localparam [31:0] PV_DOTSP_SC_H = 32'b1001100??????????100?????1010111; + localparam [31:0] PV_DOTSP_SCI_H = 32'b100110???????????110?????1010111; + localparam [31:0] PV_DOTSP_B = 32'b1001100??????????001?????1010111; + localparam [31:0] PV_DOTSP_SC_B = 32'b1001100??????????101?????1010111; + localparam [31:0] PV_DOTSP_SCI_B = 32'b100110???????????111?????1010111; + localparam [31:0] PV_SDOTUP_H = 32'b1010000??????????000?????1010111; + localparam [31:0] PV_SDOTUP_SC_H = 32'b1010000??????????100?????1010111; + localparam [31:0] PV_SDOTUP_SCI_H = 32'b101000???????????110?????1010111; + localparam [31:0] PV_SDOTUP_B = 32'b1010000??????????001?????1010111; + localparam [31:0] PV_SDOTUP_SC_B = 32'b1010000??????????101?????1010111; + localparam [31:0] PV_SDOTUP_SCI_B = 32'b101000???????????111?????1010111; + localparam [31:0] PV_SDOTUSP_H = 32'b1010100??????????000?????1010111; + localparam [31:0] PV_SDOTUSP_SC_H = 32'b1010100??????????100?????1010111; + localparam [31:0] PV_SDOTUSP_SCI_H = 32'b101010???????????110?????1010111; + localparam [31:0] PV_SDOTUSP_B = 32'b1010100??????????001?????1010111; + localparam [31:0] PV_SDOTUSP_SC_B = 32'b1010100??????????101?????1010111; + localparam [31:0] PV_SDOTUSP_SCI_B = 32'b101010???????????111?????1010111; + localparam [31:0] PV_SDOTSP_H = 32'b1011100??????????000?????1010111; + localparam [31:0] PV_SDOTSP_SC_H = 32'b1011100??????????100?????1010111; + localparam [31:0] PV_SDOTSP_SCI_H = 32'b101110???????????110?????1010111; + localparam [31:0] PV_SDOTSP_B = 32'b1011100??????????001?????1010111; + localparam [31:0] PV_SDOTSP_SC_B = 32'b1011100??????????101?????1010111; + localparam [31:0] PV_SDOTSP_SCI_B = 32'b101110???????????111?????1010111; localparam [31:0] FLAH = 32'b?????????????????001?????0000111; localparam [31:0] FSAH = 32'b?????????????????001?????0100111; localparam [31:0] FMADD_AH = 32'b?????10??????????101?????1000011; diff --git a/toolchain/riscv-opcodes/opcodes-rvv b/toolchain/riscv-opcodes/opcodes-rvv index 613bd5be7..95b99ccd1 100644 --- a/toolchain/riscv-opcodes/opcodes-rvv +++ b/toolchain/riscv-opcodes/opcodes-rvv @@ -9,7 +9,7 @@ # configuration setting # https://github.com/riscv/riscv-v-spec/blob/master/vcfg-format.adoc @vsetvli 31=0 zimm11 rs1 14..12=0x7 rd 6..0=0x57 -vsetvl 31=1 30..25=0x0 rs2 rs1 14..12=0x7 rd 6..0=0x57 +@vsetvl 31=1 30..25=0x0 rs2 rs1 14..12=0x7 rd 6..0=0x57 # # Vector Loads and Store @@ -138,17 +138,17 @@ vmfne.vf 31..26=0x1c vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vmfgt.vf 31..26=0x1d vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vmfge.vf 31..26=0x1f vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfdiv.vf 31..26=0x20 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfdiv.vf 31..26=0x20 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfrdiv.vf 31..26=0x21 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfmul.vf 31..26=0x24 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfrsub.vf 31..26=0x27 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfmadd.vf 31..26=0x28 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfmadd.vf 31..26=0x28 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfnmadd.vf 31..26=0x29 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfmsub.vf 31..26=0x2a vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfmsub.vf 31..26=0x2a vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfnmsub.vf 31..26=0x2b vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfmacc.vf 31..26=0x2c vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfnmacc.vf 31..26=0x2d vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfmsac.vf 31..26=0x2e vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfmsac.vf 31..26=0x2e vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfnmsac.vf 31..26=0x2f vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfwadd.vf 31..26=0x30 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 @@ -180,15 +180,15 @@ vmfle.vv 31..26=0x19 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vmflt.vv 31..26=0x1b vm vs2 vs1 14..12=0x1 vd 6..0=0x57 @vmfne.vv 31..26=0x1c vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfdiv.vv 31..26=0x20 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfdiv.vv 31..26=0x20 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfmul.vv 31..26=0x24 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfmadd.vv 31..26=0x28 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfmadd.vv 31..26=0x28 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfnmadd.vv 31..26=0x29 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfmsub.vv 31..26=0x2a vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfmsub.vv 31..26=0x2a vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfnmsub.vv 31..26=0x2b vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfmacc.vv 31..26=0x2c vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfnmacc.vv 31..26=0x2d vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfmsac.vv 31..26=0x2e vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfmsac.vv 31..26=0x2e vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfnmsac.vv 31..26=0x2f vm vs2 vs1 14..12=0x1 vd 6..0=0x57 @vfcvt.xu.f.v 31..26=0x12 vm vs2 19..15=0x00 14..12=0x1 vd 6..0=0x57 @@ -263,19 +263,19 @@ vmsle.vx 31..26=0x1d vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vmsgtu.vx 31..26=0x1e vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vmsgt.vx 31..26=0x1f vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vsaddu.vx 31..26=0x20 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vsaddu.vx 31..26=0x20 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vsadd.vx 31..26=0x21 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vssubu.vx 31..26=0x22 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vssubu.vx 31..26=0x22 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vssub.vx 31..26=0x23 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vsll.vx 31..26=0x25 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vsmul.vx 31..26=0x27 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vsrl.vx 31..26=0x28 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vsrl.vx 31..26=0x28 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vsra.vx 31..26=0x29 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vssrl.vx 31..26=0x2a vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vssrl.vx 31..26=0x2a vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vssra.vx 31..26=0x2b vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vnsrl.wx 31..26=0x2c vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vnsra.wx 31..26=0x2d vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vnclipu.wx 31..26=0x2e vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vnclipu.wx 31..26=0x2e vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vnclip.wx 31..26=0x2f vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vqmaccu.vx 31..26=0x3c vm vs2 rs1 14..12=0x4 vd 6..0=0x57 @@ -309,19 +309,19 @@ vmslt.vv 31..26=0x1b vm vs2 vs1 14..12=0x0 vd 6..0=0x57 @vmsleu.vv 31..26=0x1c vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vmsle.vv 31..26=0x1d vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vsaddu.vv 31..26=0x20 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vsaddu.vv 31..26=0x20 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vsadd.vv 31..26=0x21 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vssubu.vv 31..26=0x22 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vssubu.vv 31..26=0x22 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vssub.vv 31..26=0x23 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vsll.vv 31..26=0x25 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vsmul.vv 31..26=0x27 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vsrl.vv 31..26=0x28 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vsrl.vv 31..26=0x28 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vsra.vv 31..26=0x29 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vssrl.vv 31..26=0x2a vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vssrl.vv 31..26=0x2a vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vssra.vv 31..26=0x2b vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vnsrl.wv 31..26=0x2c vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vnsra.wv 31..26=0x2d vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vnclipu.wv 31..26=0x2e vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vnclipu.wv 31..26=0x2e vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vnclip.wv 31..26=0x2f vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vwredsumu.vs 31..26=0x30 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 @@ -451,13 +451,13 @@ vasub.vx 31..26=0x0b vm vs2 rs1 14..12=0x6 vd 6..0=0x57 @vslide1up.vx 31..26=0x0e vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vslide1down.vx 31..26=0x0f vm vs2 rs1 14..12=0x6 vd 6..0=0x57 -vdivu.vx 31..26=0x20 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 +@vdivu.vx 31..26=0x20 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vdiv.vx 31..26=0x21 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 -vremu.vx 31..26=0x22 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 +@vremu.vx 31..26=0x22 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vrem.vx 31..26=0x23 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 @vmulhu.vx 31..26=0x24 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vmul.vx 31..26=0x25 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 -vmulhsu.vx 31..26=0x26 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 +@vmulhsu.vx 31..26=0x26 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vmulh.vx 31..26=0x27 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vmadd.vx 31..26=0x29 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vnmsub.vx 31..26=0x2b vm vs2 rs1 14..12=0x6 vd 6..0=0x57 diff --git a/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM b/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM index 94e364bde..17a8b6bf7 100644 --- a/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM +++ b/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM @@ -162,3 +162,45 @@ pv.extractu.h rd rs1 imm6 31..27=18 26=0 14..12=6 6..2=0x15 1..0=3 pv.extractu.b rd rs1 imm6 31..27=18 26=0 14..12=7 6..2=0x15 1..0=3 pv.insert.h rd rs1 imm6 31..27=22 26=0 14..12=6 6..2=0x15 1..0=3 pv.insert.b rd rs1 imm6 31..27=22 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.dotup.h rd rs1 rs2 31..27=16 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.dotup.sc.h rd rs1 rs2 31..27=16 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.dotup.sci.h rd rs1 imm6 31..27=16 26=0 14..12=6 6..2=0x15 1..0=3 +pv.dotup.b rd rs1 rs2 31..27=16 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.dotup.sc.b rd rs1 rs2 31..27=16 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.dotup.sci.b rd rs1 imm6 31..27=16 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.dotusp.h rd rs1 rs2 31..27=17 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.dotusp.sc.h rd rs1 rs2 31..27=17 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.dotusp.sci.h rd rs1 imm6 31..27=17 26=0 14..12=6 6..2=0x15 1..0=3 +pv.dotusp.b rd rs1 rs2 31..27=17 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.dotusp.sc.b rd rs1 rs2 31..27=17 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.dotusp.sci.b rd rs1 imm6 31..27=17 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.dotsp.h rd rs1 rs2 31..27=19 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.dotsp.sc.h rd rs1 rs2 31..27=19 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.dotsp.sci.h rd rs1 imm6 31..27=19 26=0 14..12=6 6..2=0x15 1..0=3 +pv.dotsp.b rd rs1 rs2 31..27=19 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.dotsp.sc.b rd rs1 rs2 31..27=19 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.dotsp.sci.b rd rs1 imm6 31..27=19 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.sdotup.h rd rs1 rs2 31..27=20 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.sdotup.sc.h rd rs1 rs2 31..27=20 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.sdotup.sci.h rd rs1 imm6 31..27=20 26=0 14..12=6 6..2=0x15 1..0=3 +pv.sdotup.b rd rs1 rs2 31..27=20 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.sdotup.sc.b rd rs1 rs2 31..27=20 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.sdotup.sci.b rd rs1 imm6 31..27=20 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.sdotusp.h rd rs1 rs2 31..27=21 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.sdotusp.sc.h rd rs1 rs2 31..27=21 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.sdotusp.sci.h rd rs1 imm6 31..27=21 26=0 14..12=6 6..2=0x15 1..0=3 +pv.sdotusp.b rd rs1 rs2 31..27=21 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.sdotusp.sc.b rd rs1 rs2 31..27=21 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.sdotusp.sci.b rd rs1 imm6 31..27=21 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.sdotsp.h rd rs1 rs2 31..27=23 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.sdotsp.sc.h rd rs1 rs2 31..27=23 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.sdotsp.sci.h rd rs1 imm6 31..27=23 26=0 14..12=6 6..2=0x15 1..0=3 +pv.sdotsp.b rd rs1 rs2 31..27=23 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.sdotsp.sc.b rd rs1 rs2 31..27=23 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.sdotsp.sci.b rd rs1 imm6 31..27=23 26=0 14..12=7 6..2=0x15 1..0=3 From 2e8a89da9e6f4a84d9e84d61b724a613f7e28aff Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 12 Jan 2021 15:45:50 +0100 Subject: [PATCH 23/65] [riscv-isa-sim] Add Xpulpv2 SIMD dot-product instructions Added instructions: SIMD dotup, dotusp, dotsp, sdotup, sdotusp, sdotsp --- toolchain/riscv-isa-sim/disasm/disasm.cc | 37 +++++++++++++++++++ .../riscv-isa-sim/riscv/insns/pv_dotsp_b.h | 6 +++ .../riscv-isa-sim/riscv/insns/pv_dotsp_h.h | 6 +++ .../riscv-isa-sim/riscv/insns/pv_dotsp_sc_b.h | 6 +++ .../riscv-isa-sim/riscv/insns/pv_dotsp_sc_h.h | 6 +++ .../riscv/insns/pv_dotsp_sci_b.h | 6 +++ .../riscv/insns/pv_dotsp_sci_h.h | 6 +++ .../riscv-isa-sim/riscv/insns/pv_dotup_b.h | 6 +++ .../riscv-isa-sim/riscv/insns/pv_dotup_h.h | 6 +++ .../riscv-isa-sim/riscv/insns/pv_dotup_sc_b.h | 6 +++ .../riscv-isa-sim/riscv/insns/pv_dotup_sc_h.h | 6 +++ .../riscv/insns/pv_dotup_sci_b.h | 6 +++ .../riscv/insns/pv_dotup_sci_h.h | 6 +++ .../riscv-isa-sim/riscv/insns/pv_dotusp_b.h | 6 +++ .../riscv-isa-sim/riscv/insns/pv_dotusp_h.h | 6 +++ .../riscv/insns/pv_dotusp_sc_b.h | 6 +++ .../riscv/insns/pv_dotusp_sc_h.h | 6 +++ .../riscv/insns/pv_dotusp_sci_b.h | 6 +++ .../riscv/insns/pv_dotusp_sci_h.h | 6 +++ .../riscv-isa-sim/riscv/insns/pv_sdotsp_b.h | 6 +++ .../riscv-isa-sim/riscv/insns/pv_sdotsp_h.h | 6 +++ .../riscv/insns/pv_sdotsp_sc_b.h | 6 +++ .../riscv/insns/pv_sdotsp_sc_h.h | 6 +++ .../riscv/insns/pv_sdotsp_sci_b.h | 6 +++ .../riscv/insns/pv_sdotsp_sci_h.h | 6 +++ .../riscv-isa-sim/riscv/insns/pv_sdotup_b.h | 6 +++ .../riscv-isa-sim/riscv/insns/pv_sdotup_h.h | 6 +++ .../riscv/insns/pv_sdotup_sc_b.h | 6 +++ .../riscv/insns/pv_sdotup_sc_h.h | 6 +++ .../riscv/insns/pv_sdotup_sci_b.h | 6 +++ .../riscv/insns/pv_sdotup_sci_h.h | 6 +++ .../riscv-isa-sim/riscv/insns/pv_sdotusp_b.h | 6 +++ .../riscv-isa-sim/riscv/insns/pv_sdotusp_h.h | 6 +++ .../riscv/insns/pv_sdotusp_sc_b.h | 6 +++ .../riscv/insns/pv_sdotusp_sc_h.h | 6 +++ .../riscv/insns/pv_sdotusp_sci_b.h | 6 +++ .../riscv/insns/pv_sdotusp_sci_h.h | 6 +++ toolchain/riscv-isa-sim/riscv/riscv.mk.in | 36 ++++++++++++++++++ 38 files changed, 289 insertions(+) create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sc_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sc_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sci_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sci_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_dotup_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_dotup_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sc_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sc_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sci_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sci_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sc_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sc_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sci_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sci_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sc_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sc_h.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sci_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sci_h.h diff --git a/toolchain/riscv-isa-sim/disasm/disasm.cc b/toolchain/riscv-isa-sim/disasm/disasm.cc index 8277ab077..42a39dcdc 100644 --- a/toolchain/riscv-isa-sim/disasm/disasm.cc +++ b/toolchain/riscv-isa-sim/disasm/disasm.cc @@ -1471,6 +1471,43 @@ disassembler_t::disassembler_t(int xlen) DEFINE_PI1ZTYPE(pv_insert_h); DEFINE_PI1ZTYPE(pv_insert_b); + DEFINE_RTYPE(pv_dotup_h); + DEFINE_RTYPE(pv_dotup_sc_h); + DEFINE_PI1ZTYPE(pv_dotup_sci_h); + DEFINE_RTYPE(pv_dotup_b); + DEFINE_RTYPE(pv_dotup_sc_b); + DEFINE_PI1ZTYPE(pv_dotup_sci_b); + DEFINE_RTYPE(pv_dotusp_h); + DEFINE_RTYPE(pv_dotusp_sc_h); + DEFINE_PI1ZTYPE(pv_dotusp_sci_h); + DEFINE_RTYPE(pv_dotusp_b); + DEFINE_RTYPE(pv_dotusp_sc_b); + DEFINE_PI1ZTYPE(pv_dotusp_sci_b); + DEFINE_RTYPE(pv_dotsp_h); + DEFINE_RTYPE(pv_dotsp_sc_h); + DEFINE_PI1ZTYPE(pv_dotsp_sci_h); + DEFINE_RTYPE(pv_dotsp_b); + DEFINE_RTYPE(pv_dotsp_sc_b); + DEFINE_PI1ZTYPE(pv_dotsp_sci_b); + DEFINE_RTYPE(pv_sdotup_h); + DEFINE_RTYPE(pv_sdotup_sc_h); + DEFINE_PI1ZTYPE(pv_sdotup_sci_h); + DEFINE_RTYPE(pv_sdotup_b); + DEFINE_RTYPE(pv_sdotup_sc_b); + DEFINE_PI1ZTYPE(pv_sdotup_sci_b); + DEFINE_RTYPE(pv_sdotusp_h); + DEFINE_RTYPE(pv_sdotusp_sc_h); + DEFINE_PI1ZTYPE(pv_sdotusp_sci_h); + DEFINE_RTYPE(pv_sdotusp_b); + DEFINE_RTYPE(pv_sdotusp_sc_b); + DEFINE_PI1ZTYPE(pv_sdotusp_sci_b); + DEFINE_RTYPE(pv_sdotsp_h); + DEFINE_RTYPE(pv_sdotsp_sc_h); + DEFINE_PI1ZTYPE(pv_sdotsp_sci_h); + DEFINE_RTYPE(pv_sdotsp_b); + DEFINE_RTYPE(pv_sdotsp_sc_b); + DEFINE_PI1ZTYPE(pv_sdotsp_sci_b); + // provide a default disassembly for all instructions as a fallback #define DECLARE_INSN(code, match, mask) \ add_insn(new disasm_insn_t(#code " (args unknown)", match, mask, {})); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_b.h new file mode 100644 index 000000000..93b7233cc --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * sext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_h.h new file mode 100644 index 000000000..9feed35ef --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * sext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sc_b.h new file mode 100644 index 000000000..cef11d5e7 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sc_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * sext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sc_h.h new file mode 100644 index 000000000..ef558d39f --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sc_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * sext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sci_b.h new file mode 100644 index 000000000..3470fd55a --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sci_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sci_h.h new file mode 100644 index 000000000..97e30eb29 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sci_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_b.h new file mode 100644 index 000000000..a6b9fb1a9 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_b.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * zext8(RS2_B(i)); + +WRITE_RD(zext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_h.h new file mode 100644 index 000000000..c59ea6d49 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_h.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * zext16(RS2_H(i)); + +WRITE_RD(zext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_b.h new file mode 100644 index 000000000..ab99cb0fb --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_b.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * zext8(RS2_B(0)); + +WRITE_RD(zext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_h.h new file mode 100644 index 000000000..63384d8b2 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_h.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * zext16(RS2_H(0)); + +WRITE_RD(zext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_b.h new file mode 100644 index 000000000..dafa7e8be --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_b.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * insn.p_zimm6(); + +WRITE_RD(zext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_h.h new file mode 100644 index 000000000..e38162320 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_h.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * insn.p_zimm6(); + +WRITE_RD(zext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_b.h new file mode 100644 index 000000000..1cdfc2f2c --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * sext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_h.h new file mode 100644 index 000000000..81968a14d --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * sext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sc_b.h new file mode 100644 index 000000000..d562a7d4d --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sc_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * sext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sc_h.h new file mode 100644 index 000000000..3815c3721 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sc_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * sext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sci_b.h new file mode 100644 index 000000000..92c229540 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sci_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sci_h.h new file mode 100644 index 000000000..8f91a89a3 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sci_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_b.h new file mode 100644 index 000000000..812e3d436 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * sext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_h.h new file mode 100644 index 000000000..9ccfae939 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * sext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sc_b.h new file mode 100644 index 000000000..e665a669f --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sc_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * sext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sc_h.h new file mode 100644 index 000000000..fa1ca93fe --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sc_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * sext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sci_b.h new file mode 100644 index 000000000..31aab1fe5 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sci_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sci_h.h new file mode 100644 index 000000000..151d16a2e --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sci_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_b.h new file mode 100644 index 000000000..b5fe24fd1 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_b.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * zext8(RS2_B(i)); + +WRITE_RD(zext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_h.h new file mode 100644 index 000000000..f552bb7b5 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_h.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * zext16(RS2_H(i)); + +WRITE_RD(zext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_b.h new file mode 100644 index 000000000..ed41df260 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_b.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * zext8(RS2_B(0)); + +WRITE_RD(zext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_h.h new file mode 100644 index 000000000..ad1e27e62 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_h.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * zext16(RS2_H(0)); + +WRITE_RD(zext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_b.h new file mode 100644 index 000000000..7a947b4b3 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_b.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * insn.p_zimm6(); + +WRITE_RD(zext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_h.h new file mode 100644 index 000000000..90550b66d --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_h.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * insn.p_zimm6(); + +WRITE_RD(zext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_b.h new file mode 100644 index 000000000..05d268ed2 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * sext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_h.h new file mode 100644 index 000000000..fdc550db1 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * sext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sc_b.h new file mode 100644 index 000000000..2840cd148 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sc_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * sext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sc_h.h new file mode 100644 index 000000000..ca4c25ac1 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sc_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * sext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sci_b.h new file mode 100644 index 000000000..d6823f83a --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sci_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sci_h.h new file mode 100644 index 000000000..42c4fbe88 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sci_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/riscv.mk.in b/toolchain/riscv-isa-sim/riscv/riscv.mk.in index a6ece4fb4..d49a98a8e 100644 --- a/toolchain/riscv-isa-sim/riscv/riscv.mk.in +++ b/toolchain/riscv-isa-sim/riscv/riscv.mk.in @@ -888,6 +888,42 @@ riscv_insn_ext_xpulpimg = \ pv_extractu_b \ pv_insert_h \ pv_insert_b \ + pv_dotup_h \ + pv_dotup_sc_h \ + pv_dotup_sci_h \ + pv_dotup_b \ + pv_dotup_sc_b \ + pv_dotup_sci_b \ + pv_dotusp_h \ + pv_dotusp_sc_h \ + pv_dotusp_sci_h \ + pv_dotusp_b \ + pv_dotusp_sc_b \ + pv_dotusp_sci_b \ + pv_dotsp_h \ + pv_dotsp_sc_h \ + pv_dotsp_sci_h \ + pv_dotsp_b \ + pv_dotsp_sc_b \ + pv_dotsp_sci_b \ + pv_sdotup_h \ + pv_sdotup_sc_h \ + pv_sdotup_sci_h \ + pv_sdotup_b \ + pv_sdotup_sc_b \ + pv_sdotup_sci_b \ + pv_sdotusp_h \ + pv_sdotusp_sc_h \ + pv_sdotusp_sci_h \ + pv_sdotusp_b \ + pv_sdotusp_sc_b \ + pv_sdotusp_sci_b \ + pv_sdotsp_h \ + pv_sdotsp_sc_h \ + pv_sdotsp_sci_h \ + pv_sdotsp_b \ + pv_sdotsp_sc_b \ + pv_sdotsp_sci_b \ riscv_insn_ext_h = \ hfence_gvma \ From 6c87127b0cf6640fac1635e41d26f35c5a2ece82 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Wed, 13 Jan 2021 11:21:20 +0100 Subject: [PATCH 24/65] [riscv-tests] Add basic tests for Xpulpv2 SIMD dotp Added instructions: SIMD dotup, dotusp, dotsp, sdotup, sdotusp, sdotsp --- .../isa/macros/scalar/test_macros.h | 28 +++++++ apps/riscv-tests/isa/rv32uxpulpimg/Makefrag | 6 ++ apps/riscv-tests/isa/rv32uxpulpimg/pv_dotsp.S | 74 +++++++++++++++++++ apps/riscv-tests/isa/rv32uxpulpimg/pv_dotup.S | 74 +++++++++++++++++++ .../riscv-tests/isa/rv32uxpulpimg/pv_dotusp.S | 74 +++++++++++++++++++ .../riscv-tests/isa/rv32uxpulpimg/pv_sdotsp.S | 74 +++++++++++++++++++ .../riscv-tests/isa/rv32uxpulpimg/pv_sdotup.S | 74 +++++++++++++++++++ .../isa/rv32uxpulpimg/pv_sdotusp.S | 74 +++++++++++++++++++ apps/riscv-tests/isa/snitch_isa.mk | 6 ++ 9 files changed, 484 insertions(+) create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_dotsp.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_dotup.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_dotusp.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotsp.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotup.S create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotusp.S diff --git a/apps/riscv-tests/isa/macros/scalar/test_macros.h b/apps/riscv-tests/isa/macros/scalar/test_macros.h index f904ae67a..26d11ee6d 100644 --- a/apps/riscv-tests/isa/macros/scalar/test_macros.h +++ b/apps/riscv-tests/isa/macros/scalar/test_macros.h @@ -495,6 +495,34 @@ test_ ## testnum: \ inst x0, x1, x2; \ ) +#----------------------------------------------------------------------- +# Tests for Xpulpimg instructions with 2 register operands (rd and rs1) +# and a 6-bit unsigned immediate input +#----------------------------------------------------------------------- + +#define TEST_RR_UIMM6_OP( testnum, inst, result, val1, imm, val3 ) \ + TEST_CASE( testnum, x14, result, \ + li x1, MASK_XLEN(val1); \ + li x14, MASK_XLEN(val3); \ + inst x14, x1, ZEXT_UIMM6(imm); \ + ) + +# TODO(smazzola): finish to write macros to cover all tests types + +#----------------------------------------------------------------------- +# Tests for Xpulpimg instructions with 2 register operands (rd and rs1) +# and a 6-bit signed immediate input +#----------------------------------------------------------------------- + +#define TEST_RR_SIMM6_OP( testnum, inst, result, val1, imm, val3 ) \ + TEST_CASE( testnum, x14, result, \ + li x1, MASK_XLEN(val1); \ + li x14, MASK_XLEN(val3); \ + inst x14, x1, SEXT_IMM6(imm); \ + ) + +# TODO(smazzola): finish to write macros to cover all tests types + #----------------------------------------------------------------------- # Test memory instructions (immediate offset) #----------------------------------------------------------------------- diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag b/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag index f66419a9b..0e97997c2 100644 --- a/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag +++ b/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag @@ -33,6 +33,12 @@ rv32uxpulpimg_sc_tests = \ pv_abs \ pv_extract pv_extractu \ pv_insert \ + pv_dotup \ + pv_dotusp \ + pv_dotsp \ + pv_sdotup \ + pv_sdotusp \ + pv_sdotsp \ rv32uxpulpimg_p_tests = $(addprefix rv32uxpulpimg-p-, $(rv32uxpulpimg_sc_tests)) rv32uxpulpimg_v_tests = $(addprefix rv32uxpulpimg-v-, $(rv32uxpulpimg_sc_tests)) diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_dotsp.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_dotsp.S new file mode 100644 index 000000000..9242c8bb9 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_dotsp.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_dotsp.S +#----------------------------------------------------------------------------- +# +# Test pv.dotsp instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.dotsp.h + TEST_RR_OP( 2, pv.dotsp.h, 0x0023A592, 0xFE67FB1A, 0x50E4DE57 ); + TEST_RR_OP( 3, pv.dotsp.h, 0x1EC63DAA, 0xADBC1E09, 0xA2C806FA ); + TEST_RR_OP( 4, pv.dotsp.h, 0x222B210B, 0x3FDAEFE7, 0x7BA5CB0F ); + # pv.dotsp.sc.h + TEST_RR_OP( 5, pv.dotsp.sc.h, 0xDE3EBCF3, 0x5E5C31BF, 0xCB24C409 ); + TEST_RR_OP( 6, pv.dotsp.sc.h, 0x03F34EE4, 0xEC042250, 0x230A4695 ); + TEST_RR_OP( 7, pv.dotsp.sc.h, 0x047909E6, 0x6BF5D085, 0x9AB012EF ); + # pv.dotsp.sci.h + TEST_SIMM6_OP( 8, pv.dotsp.sci.h, 0xFFFD1338, 0x36D2FEAA, -14 ); + TEST_SIMM6_OP( 9, pv.dotsp.sci.h, 0xFFFC68FB, 0x6752FECB, -9 ); + TEST_SIMM6_OP( 10, pv.dotsp.sci.h, 0x000098C4, 0x9747CFF5, -1 ); + # pv.dotsp.b + TEST_RR_OP( 11, pv.dotsp.b, 0x000003DA, 0xEB8A58F5, 0xCAECEE54 ); + TEST_RR_OP( 12, pv.dotsp.b, 0xFFFFAD05, 0x47665939, 0x9E989665 ); + TEST_RR_OP( 13, pv.dotsp.b, 0x00005335, 0x79D072B4, 0x5B8B4327 ); + # pv.dotsp.sc.b + TEST_RR_OP( 14, pv.dotsp.sc.b, 0x000059EF, 0x6F622436, 0x1E1E694D ); + TEST_RR_OP( 15, pv.dotsp.sc.b, 0x00001BDA, 0x77B8759A, 0xC1056E73 ); + TEST_RR_OP( 16, pv.dotsp.sc.b, 0x00002238, 0x74740933, 0xF898DF1E ); + # pv.dotsp.sci.b + TEST_SIMM6_OP( 17, pv.dotsp.sci.b, 0x0000006E, 0x4CD92920, 1 ); + TEST_SIMM6_OP( 18, pv.dotsp.sci.b, 0xFFFFFE20, 0xAFCE7172, -5 ); + TEST_SIMM6_OP( 19, pv.dotsp.sci.b, 0xFFFFF9FD, 0xDB25ABAA, 9 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_SIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_SIMM6_DEST_BYPASS, + # TEST_SIMM6_SRC1_BYPASS, TEST_SIMM6_ZEROSRC1, TEST_SIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_dotup.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_dotup.S new file mode 100644 index 000000000..71d3e470d --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_dotup.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_dotup.S +#----------------------------------------------------------------------------- +# +# Test pv.dotup instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.dotup.h + TEST_RR_OP( 2, pv.dotup.h, 0x2A78A592, 0xFE67FB1A, 0x50E4DE57 ); + TEST_RR_OP( 3, pv.dotup.h, 0x6F4A3DAA, 0xADBC1E09, 0xA2C806FA ); + TEST_RR_OP( 4, pv.dotup.h, 0xDD21210B, 0x3FDAEFE7, 0x7BA5CB0F ); + # pv.dotup.sc.h + TEST_RR_OP( 5, pv.dotup.sc.h, 0x6E59BCF3, 0x5E5C31BF, 0xCB24C409 ); + TEST_RR_OP( 6, pv.dotup.sc.h, 0x4A884EE4, 0xEC042250, 0x230A4695 ); + TEST_RR_OP( 7, pv.dotup.sc.h, 0x176809E6, 0x6BF5D085, 0x9AB012EF ); + # pv.dotup.sci.h + TEST_UIMM6_OP( 8, pv.dotup.sci.h, 0x00148D3C, 0x36D2FEAA, 17 ); + TEST_UIMM6_OP( 9, pv.dotup.sci.h, 0x0002CC3A, 0x6752FECB, 2 ); + TEST_UIMM6_OP( 10, pv.dotup.sci.h, 0x000F6F94, 0x9747CFF5, 11 ); + # pv.dotup.b + TEST_RR_OP( 11, pv.dotup.b, 0x0001DADA, 0xEB8A58F5, 0xCAECEE54 ); + TEST_RR_OP( 12, pv.dotup.b, 0x0000B305, 0x47665939, 0x9E989665 ); + TEST_RR_OP( 13, pv.dotup.b, 0x0000D535, 0x79D072B4, 0x5B8B4327 ); + # pv.dotup.sc.b + TEST_RR_OP( 14, pv.dotup.sc.b, 0x000059EF, 0x6F622436, 0x1E1E694D ); + TEST_RR_OP( 15, pv.dotup.sc.b, 0x000101DA, 0x77B8759A, 0xC1056E73 ); + TEST_RR_OP( 16, pv.dotup.sc.b, 0x00002238, 0x74740933, 0xF898DF1E ); + # pv.dotup.sci.b + TEST_UIMM6_OP( 17, pv.dotup.sci.b, 0x00000FBA, 0x4CD92920, 11 ); + TEST_UIMM6_OP( 18, pv.dotup.sci.b, 0x00002140, 0xAFCE7172, 14 ); + TEST_UIMM6_OP( 19, pv.dotup.sci.b, 0x00001053, 0xDB25ABAA, 7 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-uimm6 instructions TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-uimm6 instructions TEST_UIMM6_DEST_BYPASS, + # TEST_UIMM6_SRC1_BYPASS, TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_dotusp.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_dotusp.S new file mode 100644 index 000000000..17a92e165 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_dotusp.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_dotusp.S +#----------------------------------------------------------------------------- +# +# Test pv.dotusp instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.dotusp.h + TEST_RR_OP( 2, pv.dotusp.h, 0x2F5EA592, 0xFE67FB1A, 0x50E4DE57 ); + TEST_RR_OP( 3, pv.dotusp.h, 0xC18E3DAA, 0xADBC1E09, 0xA2C806FA ); + TEST_RR_OP( 4, pv.dotusp.h, 0xED3A210B, 0x3FDAEFE7, 0x7BA5CB0F ); + # pv.dotusp.sc.h + TEST_RR_OP( 5, pv.dotusp.sc.h, 0xDE3EBCF3, 0x5E5C31BF, 0xCB24C409 ); + TEST_RR_OP( 6, pv.dotusp.sc.h, 0x4A884EE4, 0xEC042250, 0x230A4695 ); + TEST_RR_OP( 7, pv.dotusp.sc.h, 0x176809E6, 0x6BF5D085, 0x9AB012EF ); + # pv.dotusp.sci.h + TEST_SIMM6_OP( 8, pv.dotusp.sci.h, 0xFFEF1338, 0x36D2FEAA, -14 ); + TEST_SIMM6_OP( 9, pv.dotusp.sci.h, 0xFFF368FB, 0x6752FECB, -9 ); + TEST_SIMM6_OP( 10, pv.dotusp.sci.h, 0xFFFE98C4, 0x9747CFF5, -1 ); + # pv.dotusp.b + TEST_RR_OP( 11, pv.dotusp.b, 0x00000DDA, 0xEB8A58F5, 0xCAECEE54 ); + TEST_RR_OP( 12, pv.dotusp.b, 0xFFFFAD05, 0x47665939, 0x9E989665 ); + TEST_RR_OP( 13, pv.dotusp.b, 0x00000535, 0x79D072B4, 0x5B8B4327 ); + # pv.dotusp.sc.b + TEST_RR_OP( 14, pv.dotusp.sc.b, 0x000059EF, 0x6F622436, 0x1E1E694D ); + TEST_RR_OP( 15, pv.dotusp.sc.b, 0x000101DA, 0x77B8759A, 0xC1056E73 ); + TEST_RR_OP( 16, pv.dotusp.sc.b, 0x00002238, 0x74740933, 0xF898DF1E ); + # pv.dotusp.sci.b + TEST_SIMM6_OP( 17, pv.dotusp.sci.b, 0x0000016E, 0x4CD92920, 1 ); + TEST_SIMM6_OP( 18, pv.dotusp.sci.b, 0xFFFFF420, 0xAFCE7172, -5 ); + TEST_SIMM6_OP( 19, pv.dotusp.sci.b, 0x000014FD, 0xDB25ABAA, 9 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_SIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_SIMM6_DEST_BYPASS, + # TEST_SIMM6_SRC1_BYPASS, TEST_SIMM6_ZEROSRC1, TEST_SIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotsp.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotsp.S new file mode 100644 index 000000000..c41784263 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotsp.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_sdotsp.S +#----------------------------------------------------------------------------- +# +# Test pv.sdotsp instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.sdotsp.h + TEST_RRR_OP( 2, pv.sdotsp.h, 0x8588AF48, 0xFE67FB1A, 0x50E4DE57, 0x856509B6 ); + TEST_RRR_OP( 3, pv.sdotsp.h, 0xA5102DA6, 0xADBC1E09, 0xA2C806FA, 0x8649EFFC ); + TEST_RRR_OP( 4, pv.sdotsp.h, 0xB6C05945, 0x3FDAEFE7, 0x7BA5CB0F, 0x9495383A ); + # pv.sdotsp.sc.h + TEST_RRR_OP( 5, pv.sdotsp.sc.h, 0x76464853, 0x5E5C31BF, 0xCB24C409, 0x98078B60 ); + TEST_RRR_OP( 6, pv.sdotsp.sc.h, 0xBAB1856D, 0xEC042250, 0x230A4695, 0xB6BE3689 ); + TEST_RRR_OP( 7, pv.sdotsp.sc.h, 0xA318DEC3, 0x6BF5D085, 0x9AB012EF, 0x9E9FD4DD ); + # pv.sdotsp.sci.h + TEST_RR_SIMM6_OP( 8, pv.sdotsp.sci.h, 0x6AA9C4BB, 0x36D2FEAA, -14, 0x6AACB183 ); + TEST_RR_SIMM6_OP( 9, pv.sdotsp.sci.h, 0xA61C8356, 0x6752FECB, -9, 0xA6201A5B ); + TEST_RR_SIMM6_OP( 10, pv.sdotsp.sci.h, 0x968EF09B, 0x9747CFF5, -1, 0x968E57D7 ); + # pv.sdotsp.b + TEST_RRR_OP( 11, pv.sdotsp.b, 0x6BF81516, 0xEB8A58F5, 0xCAECEE54, 0x6BF8113C ); + TEST_RRR_OP( 12, pv.sdotsp.b, 0x5D238DA6, 0x47665939, 0x9E989665, 0x5D23E0A1 ); + TEST_RRR_OP( 13, pv.sdotsp.b, 0xC511714F, 0x79D072B4, 0x5B8B4327, 0xC5111E1A ); + # pv.sdotsp.sc.b + TEST_RRR_OP( 14, pv.sdotsp.sc.b, 0x7C691AEB, 0x6F622436, 0x1E1E694D, 0x7C68C0FC ); + TEST_RRR_OP( 15, pv.sdotsp.sc.b, 0xAC521CE2, 0x77B8759A, 0xC1056E73, 0xAC520108 ); + TEST_RRR_OP( 16, pv.sdotsp.sc.b, 0xAEA211C3, 0x74740933, 0xF898DF1E, 0xAEA1EF8B ); + # pv.sdotsp.sci.b + TEST_RR_SIMM6_OP( 17, pv.sdotsp.sci.b, 0x86CD84EE, 0x4CD92920, 1, 0x86CD8480 ); + TEST_RR_SIMM6_OP( 18, pv.sdotsp.sci.b, 0x82399E03, 0xAFCE7172, -5, 0x82399FE3 ); + TEST_RR_SIMM6_OP( 19, pv.sdotsp.sci.b, 0x3F752492, 0xDB25ABAA, 9, 0x3F752A95 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for reg-reg-reg instructions TEST_RRR_SRC1_EQ_DEST, + # TEST_RRR_SRC2_EQ_DEST, TEST_RRR_SRC12_EQ_DEST + # for reg-simm6-reg instructions *macros still to be written* + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for reg-reg-reg instructions TEST_RRR_DEST_BYPASS, + # TEST_RRR_SRC12_BYPASS, TEST_RRR_SRC21_BYPASS, TEST_RRR_SRC3_BYPASS, + # TEST_RRR_ZEROSRC1, TEST_RRR_ZEROSRC2, TEST_RRR_ZEROSRC3, + # TEST_RRR_ZEROSRC12, TEST_RRR_ZEROSRC123, TEST_RRR_ZERODEST + # for reg-simm6-reg instructions *macros still to be written* + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotup.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotup.S new file mode 100644 index 000000000..7e99c6415 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotup.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_sdotup.S +#----------------------------------------------------------------------------- +# +# Test pv.sdotup instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.sdotup.h + TEST_RRR_OP( 2, pv.sdotup.h, 0xAFDDAF48, 0xFE67FB1A, 0x50E4DE57, 0x856509B6 ); + TEST_RRR_OP( 3, pv.sdotup.h, 0xF5942DA6, 0xADBC1E09, 0xA2C806FA, 0x8649EFFC ); + TEST_RRR_OP( 4, pv.sdotup.h, 0x71B65945, 0x3FDAEFE7, 0x7BA5CB0F, 0x9495383A ); + # pv.sdotup.sc.h + TEST_RRR_OP( 5, pv.sdotup.sc.h, 0x06614853, 0x5E5C31BF, 0xCB24C409, 0x98078B60 ); + TEST_RRR_OP( 6, pv.sdotup.sc.h, 0x0146856D, 0xEC042250, 0x230A4695, 0xB6BE3689 ); + TEST_RRR_OP( 7, pv.sdotup.sc.h, 0xB607DEC3, 0x6BF5D085, 0x9AB012EF, 0x9E9FD4DD ); + # pv.sdotup.sci.h + TEST_RR_UIMM6_OP( 8, pv.sdotup.sci.h, 0x6AC13EBF, 0x36D2FEAA, 17, 0x6AACB183 ); + TEST_RR_UIMM6_OP( 9, pv.sdotup.sci.h, 0xA622E695, 0x6752FECB, 2, 0xA6201A5B ); + TEST_RR_UIMM6_OP( 10, pv.sdotup.sci.h, 0x969DC76B, 0x9747CFF5, 11, 0x968E57D7 ); + # pv.sdotup.b + TEST_RRR_OP( 11, pv.sdotup.b, 0x6BF9EC16, 0xEB8A58F5, 0xCAECEE54, 0x6BF8113C ); + TEST_RRR_OP( 12, pv.sdotup.b, 0x5D2493A6, 0x47665939, 0x9E989665, 0x5D23E0A1 ); + TEST_RRR_OP( 13, pv.sdotup.b, 0xC511F34F, 0x79D072B4, 0x5B8B4327, 0xC5111E1A ); + # pv.sdotup.sc.b + TEST_RRR_OP( 14, pv.sdotup.sc.b, 0x7C691AEB, 0x6F622436, 0x1E1E694D, 0x7C68C0FC ); + TEST_RRR_OP( 15, pv.sdotup.sc.b, 0xAC5302E2, 0x77B8759A, 0xC1056E73, 0xAC520108 ); + TEST_RRR_OP( 16, pv.sdotup.sc.b, 0xAEA211C3, 0x74740933, 0xF898DF1E, 0xAEA1EF8B ); + # pv.sdotup.sci.b + TEST_RR_UIMM6_OP( 17, pv.sdotup.sci.b, 0x86CD943A, 0x4CD92920, 11, 0x86CD8480 ); + TEST_RR_UIMM6_OP( 18, pv.sdotup.sci.b, 0x8239C123, 0xAFCE7172, 14, 0x82399FE3 ); + TEST_RR_UIMM6_OP( 19, pv.sdotup.sci.b, 0x3F753AE8, 0xDB25ABAA, 7, 0x3F752A95 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for reg-reg-reg instructions TEST_RRR_SRC1_EQ_DEST, + # TEST_RRR_SRC2_EQ_DEST, TEST_RRR_SRC12_EQ_DEST + # for reg-uimm6-reg instructions *macros still to be written* + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for reg-reg-reg instructions TEST_RRR_DEST_BYPASS, + # TEST_RRR_SRC12_BYPASS, TEST_RRR_SRC21_BYPASS, TEST_RRR_SRC3_BYPASS, + # TEST_RRR_ZEROSRC1, TEST_RRR_ZEROSRC2, TEST_RRR_ZEROSRC3, + # TEST_RRR_ZEROSRC12, TEST_RRR_ZEROSRC123, TEST_RRR_ZERODEST + # for reg-uimm6-reg instructions *macros still to be written* + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotusp.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotusp.S new file mode 100644 index 000000000..30f30ba24 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotusp.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_sdotusp.S +#----------------------------------------------------------------------------- +# +# Test pv.sdotusp instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.sdotusp.h + TEST_RRR_OP( 2, pv.sdotusp.h, 0xB4C3AF48, 0xFE67FB1A, 0x50E4DE57, 0x856509B6 ); + TEST_RRR_OP( 3, pv.sdotusp.h, 0x47D82DA6, 0xADBC1E09, 0xA2C806FA, 0x8649EFFC ); + TEST_RRR_OP( 4, pv.sdotusp.h, 0x81CF5945, 0x3FDAEFE7, 0x7BA5CB0F, 0x9495383A ); + # pv.sdotusp.sc.h + TEST_RRR_OP( 5, pv.sdotusp.sc.h, 0x76464853, 0x5E5C31BF, 0xCB24C409, 0x98078B60 ); + TEST_RRR_OP( 6, pv.sdotusp.sc.h, 0x0146856D, 0xEC042250, 0x230A4695, 0xB6BE3689 ); + TEST_RRR_OP( 7, pv.sdotusp.sc.h, 0xB607DEC3, 0x6BF5D085, 0x9AB012EF, 0x9E9FD4DD ); + # pv.sdotusp.sci.h + TEST_RR_SIMM6_OP( 8, pv.sdotusp.sci.h, 0x6A9BC4BB, 0x36D2FEAA, -14, 0x6AACB183 ); + TEST_RR_SIMM6_OP( 9, pv.sdotusp.sci.h, 0xA6138356, 0x6752FECB, -9, 0xA6201A5B ); + TEST_RR_SIMM6_OP( 10, pv.sdotusp.sci.h, 0x968CF09B, 0x9747CFF5, -1, 0x968E57D7 ); + # pv.sdotusp.b + TEST_RRR_OP( 11, pv.sdotusp.b, 0x6BF81F16, 0xEB8A58F5, 0xCAECEE54, 0x6BF8113C ); + TEST_RRR_OP( 12, pv.sdotusp.b, 0x5D238DA6, 0x47665939, 0x9E989665, 0x5D23E0A1 ); + TEST_RRR_OP( 13, pv.sdotusp.b, 0xC511234F, 0x79D072B4, 0x5B8B4327, 0xC5111E1A ); + # pv.sdotusp.sc.b + TEST_RRR_OP( 14, pv.sdotusp.sc.b, 0x7C691AEB, 0x6F622436, 0x1E1E694D, 0x7C68C0FC ); + TEST_RRR_OP( 15, pv.sdotusp.sc.b, 0xAC5302E2, 0x77B8759A, 0xC1056E73, 0xAC520108 ); + TEST_RRR_OP( 16, pv.sdotusp.sc.b, 0xAEA211C3, 0x74740933, 0xF898DF1E, 0xAEA1EF8B ); + # pv.sdotusp.sci.b + TEST_RR_SIMM6_OP( 17, pv.sdotusp.sci.b, 0x86CD85EE, 0x4CD92920, 1, 0x86CD8480 ); + TEST_RR_SIMM6_OP( 18, pv.sdotusp.sci.b, 0x82399403, 0xAFCE7172, -5, 0x82399FE3 ); + TEST_RR_SIMM6_OP( 19, pv.sdotusp.sci.b, 0x3F753F92, 0xDB25ABAA, 9, 0x3F752A95 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for reg-reg-reg instructions TEST_RRR_SRC1_EQ_DEST, + # TEST_RRR_SRC2_EQ_DEST, TEST_RRR_SRC12_EQ_DEST + # for reg-simm6-reg instructions *macros still to be written* + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for reg-reg-reg instructions TEST_RRR_DEST_BYPASS, + # TEST_RRR_SRC12_BYPASS, TEST_RRR_SRC21_BYPASS, TEST_RRR_SRC3_BYPASS, + # TEST_RRR_ZEROSRC1, TEST_RRR_ZEROSRC2, TEST_RRR_ZEROSRC3, + # TEST_RRR_ZEROSRC12, TEST_RRR_ZEROSRC123, TEST_RRR_ZERODEST + # for reg-simm6-reg instructions *macros still to be written* + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/snitch_isa.mk b/apps/riscv-tests/isa/snitch_isa.mk index 4cfca3257..0cf3c91ed 100644 --- a/apps/riscv-tests/isa/snitch_isa.mk +++ b/apps/riscv-tests/isa/snitch_isa.mk @@ -63,6 +63,12 @@ ifeq ($(xpulpimg),1) pv_abs \ pv_extract pv_extractu \ pv_insert \ + pv_dotup \ + pv_dotusp \ + pv_dotsp \ + pv_sdotup \ + pv_sdotusp \ + pv_sdotsp \ endif From 660c4236c692d77dac63e2c81d812c17e8179b5c Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Mon, 18 Jan 2021 16:37:42 +0100 Subject: [PATCH 25/65] [riscv-opcodes] Disable V instructions overlapping with Xpulpv2 SIMD Disabled V extension instructions: vasubu_vx, vslide1up_vx, vaaddu_vx, vadc_vvm, vadc_vxm, vsbc_vvm, vsbc_vxm, vmulhu_vx, vdivu_vx, vmulhsu_vx, vfcvt_x_f_v, vsetvl --- toolchain/riscv-opcodes/encoding_out.h | 36 -------------------------- toolchain/riscv-opcodes/inst.sverilog | 12 --------- toolchain/riscv-opcodes/opcodes-rvv | 24 ++++++++--------- 3 files changed, 12 insertions(+), 60 deletions(-) diff --git a/toolchain/riscv-opcodes/encoding_out.h b/toolchain/riscv-opcodes/encoding_out.h index f52fabf59..5de9e96d9 100644 --- a/toolchain/riscv-opcodes/encoding_out.h +++ b/toolchain/riscv-opcodes/encoding_out.h @@ -926,8 +926,6 @@ #define MASK_C_FSWSP 0xe003 #define MATCH_VSETVLI 0x7057 #define MASK_VSETVLI 0x8000707f -#define MATCH_VSETVL 0x80007057 -#define MASK_VSETVL 0xfe00707f #define MATCH_VLE8_V 0x7 #define MASK_VLE8_V 0x1df0707f #define MATCH_VLE16_V 0x5007 @@ -1228,8 +1226,6 @@ #define MASK_VFNMSAC_VV 0xfc00707f #define MATCH_VFCVT_XU_F_V 0x48001057 #define MASK_VFCVT_XU_F_V 0xfc0ff07f -#define MATCH_VFCVT_X_F_V 0x48009057 -#define MASK_VFCVT_X_F_V 0xfc0ff07f #define MATCH_VFCVT_F_XU_V 0x48011057 #define MASK_VFCVT_F_XU_V 0xfc0ff07f #define MATCH_VFCVT_F_X_V 0x48019057 @@ -1326,12 +1322,8 @@ #define MASK_VSLIDEUP_VX 0xfc00707f #define MATCH_VSLIDEDOWN_VX 0x3c004057 #define MASK_VSLIDEDOWN_VX 0xfc00707f -#define MATCH_VADC_VXM 0x40004057 -#define MASK_VADC_VXM 0xfe00707f #define MATCH_VMADC_VXM 0x44004057 #define MASK_VMADC_VXM 0xfc00707f -#define MATCH_VSBC_VXM 0x48004057 -#define MASK_VSBC_VXM 0xfe00707f #define MATCH_VMSBC_VXM 0x4c004057 #define MASK_VMSBC_VXM 0xfc00707f #define MATCH_VMERGE_VXM 0x5c004057 @@ -1412,12 +1404,8 @@ #define MASK_VRGATHER_VV 0xfc00707f #define MATCH_VRGATHEREI16_VV 0x38000057 #define MASK_VRGATHEREI16_VV 0xfc00707f -#define MATCH_VADC_VVM 0x40000057 -#define MASK_VADC_VVM 0xfe00707f #define MATCH_VMADC_VVM 0x44000057 #define MASK_VMADC_VVM 0xfc00707f -#define MATCH_VSBC_VVM 0x48000057 -#define MASK_VSBC_VVM 0xfe00707f #define MATCH_VMSBC_VVM 0x4c000057 #define MASK_VMSBC_VVM 0xfc00707f #define MATCH_VMERGE_VVM 0x5c000057 @@ -1666,34 +1654,22 @@ #define MASK_VWMACC_VV 0xfc00707f #define MATCH_VWMACCSU_VV 0xfc002057 #define MASK_VWMACCSU_VV 0xfc00707f -#define MATCH_VAADDU_VX 0x20006057 -#define MASK_VAADDU_VX 0xfc00707f #define MATCH_VAADD_VX 0x24006057 #define MASK_VAADD_VX 0xfc00707f -#define MATCH_VASUBU_VX 0x28006057 -#define MASK_VASUBU_VX 0xfc00707f #define MATCH_VASUB_VX 0x2c006057 #define MASK_VASUB_VX 0xfc00707f #define MATCH_VMV_S_X 0x42006057 #define MASK_VMV_S_X 0xfff0707f -#define MATCH_VSLIDE1UP_VX 0x38006057 -#define MASK_VSLIDE1UP_VX 0xfc00707f #define MATCH_VSLIDE1DOWN_VX 0x3c006057 #define MASK_VSLIDE1DOWN_VX 0xfc00707f -#define MATCH_VDIVU_VX 0x80006057 -#define MASK_VDIVU_VX 0xfc00707f #define MATCH_VDIV_VX 0x84006057 #define MASK_VDIV_VX 0xfc00707f #define MATCH_VREMU_VX 0x88006057 #define MASK_VREMU_VX 0xfc00707f #define MATCH_VREM_VX 0x8c006057 #define MASK_VREM_VX 0xfc00707f -#define MATCH_VMULHU_VX 0x90006057 -#define MASK_VMULHU_VX 0xfc00707f #define MATCH_VMUL_VX 0x94006057 #define MASK_VMUL_VX 0xfc00707f -#define MATCH_VMULHSU_VX 0x98006057 -#define MASK_VMULHSU_VX 0xfc00707f #define MATCH_VMULH_VX 0x9c006057 #define MASK_VMULH_VX 0xfc00707f #define MATCH_VMADD_VX 0xa4006057 @@ -3454,7 +3430,6 @@ DECLARE_INSN(c_fsdsp, MATCH_C_FSDSP, MASK_C_FSDSP) DECLARE_INSN(c_swsp, MATCH_C_SWSP, MASK_C_SWSP) DECLARE_INSN(c_fswsp, MATCH_C_FSWSP, MASK_C_FSWSP) DECLARE_INSN(vsetvli, MATCH_VSETVLI, MASK_VSETVLI) -DECLARE_INSN(vsetvl, MATCH_VSETVL, MASK_VSETVL) DECLARE_INSN(vle8_v, MATCH_VLE8_V, MASK_VLE8_V) DECLARE_INSN(vle16_v, MATCH_VLE16_V, MASK_VLE16_V) DECLARE_INSN(vle32_v, MATCH_VLE32_V, MASK_VLE32_V) @@ -3605,7 +3580,6 @@ DECLARE_INSN(vfnmacc_vv, MATCH_VFNMACC_VV, MASK_VFNMACC_VV) DECLARE_INSN(vfmsac_vv, MATCH_VFMSAC_VV, MASK_VFMSAC_VV) DECLARE_INSN(vfnmsac_vv, MATCH_VFNMSAC_VV, MASK_VFNMSAC_VV) DECLARE_INSN(vfcvt_xu_f_v, MATCH_VFCVT_XU_F_V, MASK_VFCVT_XU_F_V) -DECLARE_INSN(vfcvt_x_f_v, MATCH_VFCVT_X_F_V, MASK_VFCVT_X_F_V) DECLARE_INSN(vfcvt_f_xu_v, MATCH_VFCVT_F_XU_V, MASK_VFCVT_F_XU_V) DECLARE_INSN(vfcvt_f_x_v, MATCH_VFCVT_F_X_V, MASK_VFCVT_F_X_V) DECLARE_INSN(vfcvt_rtz_xu_f_v, MATCH_VFCVT_RTZ_XU_F_V, MASK_VFCVT_RTZ_XU_F_V) @@ -3654,9 +3628,7 @@ DECLARE_INSN(vxor_vx, MATCH_VXOR_VX, MASK_VXOR_VX) DECLARE_INSN(vrgather_vx, MATCH_VRGATHER_VX, MASK_VRGATHER_VX) DECLARE_INSN(vslideup_vx, MATCH_VSLIDEUP_VX, MASK_VSLIDEUP_VX) DECLARE_INSN(vslidedown_vx, MATCH_VSLIDEDOWN_VX, MASK_VSLIDEDOWN_VX) -DECLARE_INSN(vadc_vxm, MATCH_VADC_VXM, MASK_VADC_VXM) DECLARE_INSN(vmadc_vxm, MATCH_VMADC_VXM, MASK_VMADC_VXM) -DECLARE_INSN(vsbc_vxm, MATCH_VSBC_VXM, MASK_VSBC_VXM) DECLARE_INSN(vmsbc_vxm, MATCH_VMSBC_VXM, MASK_VMSBC_VXM) DECLARE_INSN(vmerge_vxm, MATCH_VMERGE_VXM, MASK_VMERGE_VXM) DECLARE_INSN(vmv_v_x, MATCH_VMV_V_X, MASK_VMV_V_X) @@ -3697,9 +3669,7 @@ DECLARE_INSN(vor_vv, MATCH_VOR_VV, MASK_VOR_VV) DECLARE_INSN(vxor_vv, MATCH_VXOR_VV, MASK_VXOR_VV) DECLARE_INSN(vrgather_vv, MATCH_VRGATHER_VV, MASK_VRGATHER_VV) DECLARE_INSN(vrgatherei16_vv, MATCH_VRGATHEREI16_VV, MASK_VRGATHEREI16_VV) -DECLARE_INSN(vadc_vvm, MATCH_VADC_VVM, MASK_VADC_VVM) DECLARE_INSN(vmadc_vvm, MATCH_VMADC_VVM, MASK_VMADC_VVM) -DECLARE_INSN(vsbc_vvm, MATCH_VSBC_VVM, MASK_VSBC_VVM) DECLARE_INSN(vmsbc_vvm, MATCH_VMSBC_VVM, MASK_VMSBC_VVM) DECLARE_INSN(vmerge_vvm, MATCH_VMERGE_VVM, MASK_VMERGE_VVM) DECLARE_INSN(vmv_v_v, MATCH_VMV_V_V, MASK_VMV_V_V) @@ -3824,20 +3794,14 @@ DECLARE_INSN(vwmul_vv, MATCH_VWMUL_VV, MASK_VWMUL_VV) DECLARE_INSN(vwmaccu_vv, MATCH_VWMACCU_VV, MASK_VWMACCU_VV) DECLARE_INSN(vwmacc_vv, MATCH_VWMACC_VV, MASK_VWMACC_VV) DECLARE_INSN(vwmaccsu_vv, MATCH_VWMACCSU_VV, MASK_VWMACCSU_VV) -DECLARE_INSN(vaaddu_vx, MATCH_VAADDU_VX, MASK_VAADDU_VX) DECLARE_INSN(vaadd_vx, MATCH_VAADD_VX, MASK_VAADD_VX) -DECLARE_INSN(vasubu_vx, MATCH_VASUBU_VX, MASK_VASUBU_VX) DECLARE_INSN(vasub_vx, MATCH_VASUB_VX, MASK_VASUB_VX) DECLARE_INSN(vmv_s_x, MATCH_VMV_S_X, MASK_VMV_S_X) -DECLARE_INSN(vslide1up_vx, MATCH_VSLIDE1UP_VX, MASK_VSLIDE1UP_VX) DECLARE_INSN(vslide1down_vx, MATCH_VSLIDE1DOWN_VX, MASK_VSLIDE1DOWN_VX) -DECLARE_INSN(vdivu_vx, MATCH_VDIVU_VX, MASK_VDIVU_VX) DECLARE_INSN(vdiv_vx, MATCH_VDIV_VX, MASK_VDIV_VX) DECLARE_INSN(vremu_vx, MATCH_VREMU_VX, MASK_VREMU_VX) DECLARE_INSN(vrem_vx, MATCH_VREM_VX, MASK_VREM_VX) -DECLARE_INSN(vmulhu_vx, MATCH_VMULHU_VX, MASK_VMULHU_VX) DECLARE_INSN(vmul_vx, MATCH_VMUL_VX, MASK_VMUL_VX) -DECLARE_INSN(vmulhsu_vx, MATCH_VMULHSU_VX, MASK_VMULHSU_VX) DECLARE_INSN(vmulh_vx, MATCH_VMULH_VX, MASK_VMULH_VX) DECLARE_INSN(vmadd_vx, MATCH_VMADD_VX, MASK_VMADD_VX) DECLARE_INSN(vnmsub_vx, MATCH_VNMSUB_VX, MASK_VNMSUB_VX) diff --git a/toolchain/riscv-opcodes/inst.sverilog b/toolchain/riscv-opcodes/inst.sverilog index f8a6ccfe7..9f2bb6612 100644 --- a/toolchain/riscv-opcodes/inst.sverilog +++ b/toolchain/riscv-opcodes/inst.sverilog @@ -324,7 +324,6 @@ package riscv_instr; localparam [31:0] C_SWSP = 32'b????????????????110???????????10; localparam [31:0] C_FSWSP = 32'b????????????????111???????????10; localparam [31:0] VSETVLI = 32'b0????????????????111?????1010111; - localparam [31:0] VSETVL = 32'b1000000??????????111?????1010111; localparam [31:0] VLE8_V = 32'b???000?00000?????000?????0000111; localparam [31:0] VLE16_V = 32'b???000?00000?????101?????0000111; localparam [31:0] VLE32_V = 32'b???000?00000?????110?????0000111; @@ -475,7 +474,6 @@ package riscv_instr; localparam [31:0] VFMSAC_VV = 32'b101110???????????001?????1010111; localparam [31:0] VFNMSAC_VV = 32'b101111???????????001?????1010111; localparam [31:0] VFCVT_XU_F_V = 32'b010010??????00000001?????1010111; - localparam [31:0] VFCVT_X_F_V = 32'b010010??????00001001?????1010111; localparam [31:0] VFCVT_F_XU_V = 32'b010010??????00010001?????1010111; localparam [31:0] VFCVT_F_X_V = 32'b010010??????00011001?????1010111; localparam [31:0] VFCVT_RTZ_XU_F_V = 32'b010010??????00110001?????1010111; @@ -524,9 +522,7 @@ package riscv_instr; localparam [31:0] VRGATHER_VX = 32'b001100???????????100?????1010111; localparam [31:0] VSLIDEUP_VX = 32'b001110???????????100?????1010111; localparam [31:0] VSLIDEDOWN_VX = 32'b001111???????????100?????1010111; - localparam [31:0] VADC_VXM = 32'b0100000??????????100?????1010111; localparam [31:0] VMADC_VXM = 32'b010001???????????100?????1010111; - localparam [31:0] VSBC_VXM = 32'b0100100??????????100?????1010111; localparam [31:0] VMSBC_VXM = 32'b010011???????????100?????1010111; localparam [31:0] VMERGE_VXM = 32'b0101110??????????100?????1010111; localparam [31:0] VMV_V_X = 32'b010111100000?????100?????1010111; @@ -567,9 +563,7 @@ package riscv_instr; localparam [31:0] VXOR_VV = 32'b001011???????????000?????1010111; localparam [31:0] VRGATHER_VV = 32'b001100???????????000?????1010111; localparam [31:0] VRGATHEREI16_VV = 32'b001110???????????000?????1010111; - localparam [31:0] VADC_VVM = 32'b0100000??????????000?????1010111; localparam [31:0] VMADC_VVM = 32'b010001???????????000?????1010111; - localparam [31:0] VSBC_VVM = 32'b0100100??????????000?????1010111; localparam [31:0] VMSBC_VVM = 32'b010011???????????000?????1010111; localparam [31:0] VMERGE_VVM = 32'b0101110??????????000?????1010111; localparam [31:0] VMV_V_V = 32'b010111100000?????000?????1010111; @@ -694,20 +688,14 @@ package riscv_instr; localparam [31:0] VWMACCU_VV = 32'b111100???????????010?????1010111; localparam [31:0] VWMACC_VV = 32'b111101???????????010?????1010111; localparam [31:0] VWMACCSU_VV = 32'b111111???????????010?????1010111; - localparam [31:0] VAADDU_VX = 32'b001000???????????110?????1010111; localparam [31:0] VAADD_VX = 32'b001001???????????110?????1010111; - localparam [31:0] VASUBU_VX = 32'b001010???????????110?????1010111; localparam [31:0] VASUB_VX = 32'b001011???????????110?????1010111; localparam [31:0] VMV_S_X = 32'b010000100000?????110?????1010111; - localparam [31:0] VSLIDE1UP_VX = 32'b001110???????????110?????1010111; localparam [31:0] VSLIDE1DOWN_VX = 32'b001111???????????110?????1010111; - localparam [31:0] VDIVU_VX = 32'b100000???????????110?????1010111; localparam [31:0] VDIV_VX = 32'b100001???????????110?????1010111; localparam [31:0] VREMU_VX = 32'b100010???????????110?????1010111; localparam [31:0] VREM_VX = 32'b100011???????????110?????1010111; - localparam [31:0] VMULHU_VX = 32'b100100???????????110?????1010111; localparam [31:0] VMUL_VX = 32'b100101???????????110?????1010111; - localparam [31:0] VMULHSU_VX = 32'b100110???????????110?????1010111; localparam [31:0] VMULH_VX = 32'b100111???????????110?????1010111; localparam [31:0] VMADD_VX = 32'b101001???????????110?????1010111; localparam [31:0] VNMSUB_VX = 32'b101011???????????110?????1010111; diff --git a/toolchain/riscv-opcodes/opcodes-rvv b/toolchain/riscv-opcodes/opcodes-rvv index 95b99ccd1..d961215bd 100644 --- a/toolchain/riscv-opcodes/opcodes-rvv +++ b/toolchain/riscv-opcodes/opcodes-rvv @@ -9,7 +9,7 @@ # configuration setting # https://github.com/riscv/riscv-v-spec/blob/master/vcfg-format.adoc @vsetvli 31=0 zimm11 rs1 14..12=0x7 rd 6..0=0x57 -@vsetvl 31=1 30..25=0x0 rs2 rs1 14..12=0x7 rd 6..0=0x57 +#vsetvl 31=1 30..25=0x0 rs2 rs1 14..12=0x7 rd 6..0=0x57 # # Vector Loads and Store @@ -192,7 +192,7 @@ vfnmacc.vv 31..26=0x2d vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfnmsac.vv 31..26=0x2f vm vs2 vs1 14..12=0x1 vd 6..0=0x57 @vfcvt.xu.f.v 31..26=0x12 vm vs2 19..15=0x00 14..12=0x1 vd 6..0=0x57 -@vfcvt.x.f.v 31..26=0x12 vm vs2 19..15=0x01 14..12=0x1 vd 6..0=0x57 +#vfcvt.x.f.v 31..26=0x12 vm vs2 19..15=0x01 14..12=0x1 vd 6..0=0x57 @vfcvt.f.xu.v 31..26=0x12 vm vs2 19..15=0x02 14..12=0x1 vd 6..0=0x57 @vfcvt.f.x.v 31..26=0x12 vm vs2 19..15=0x03 14..12=0x1 vd 6..0=0x57 @vfcvt.rtz.xu.f.v 31..26=0x12 vm vs2 19..15=0x06 14..12=0x1 vd 6..0=0x57 @@ -248,9 +248,9 @@ vxor.vx 31..26=0x0b vm vs2 rs1 14..12=0x4 vd 6..0=0x57 @vslideup.vx 31..26=0x0e vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vslidedown.vx 31..26=0x0f vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -@vadc.vxm 31..26=0x10 25=0 vs2 rs1 14..12=0x4 vd 6..0=0x57 +#vadc.vxm 31..26=0x10 25=0 vs2 rs1 14..12=0x4 vd 6..0=0x57 vmadc.vxm 31..26=0x11 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -@vsbc.vxm 31..26=0x12 25=0 vs2 rs1 14..12=0x4 vd 6..0=0x57 +#vsbc.vxm 31..26=0x12 25=0 vs2 rs1 14..12=0x4 vd 6..0=0x57 vmsbc.vxm 31..26=0x13 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 @vmerge.vxm 31..26=0x17 25=0 vs2 rs1 14..12=0x4 vd 6..0=0x57 @vmv.v.x 31..26=0x17 25=1 24..20=0 rs1 14..12=0x4 vd 6..0=0x57 @@ -296,9 +296,9 @@ vxor.vv 31..26=0x0b vm vs2 vs1 14..12=0x0 vd 6..0=0x57 @vrgather.vv 31..26=0x0c vm vs2 vs1 14..12=0x0 vd 6..0=0x57 @vrgatherei16.vv 31..26=0x0e vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -@vadc.vvm 31..26=0x10 25=0 vs2 vs1 14..12=0x0 vd 6..0=0x57 +#vadc.vvm 31..26=0x10 25=0 vs2 vs1 14..12=0x0 vd 6..0=0x57 vmadc.vvm 31..26=0x11 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -@vsbc.vvm 31..26=0x12 25=0 vs2 vs1 14..12=0x0 vd 6..0=0x57 +#vsbc.vvm 31..26=0x12 25=0 vs2 vs1 14..12=0x0 vd 6..0=0x57 vmsbc.vvm 31..26=0x13 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vmerge.vvm 31..26=0x17 25=0 vs2 vs1 14..12=0x0 vd 6..0=0x57 vmv.v.v 31..26=0x17 25=1 24..20=0 vs1 14..12=0x0 vd 6..0=0x57 @@ -442,22 +442,22 @@ vwmacc.vv 31..26=0x3d vm vs2 vs1 14..12=0x2 vd 6..0=0x57 vwmaccsu.vv 31..26=0x3f vm vs2 vs1 14..12=0x2 vd 6..0=0x57 # OPMVX -@vaaddu.vx 31..26=0x08 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 +#vaaddu.vx 31..26=0x08 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vaadd.vx 31..26=0x09 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 -@vasubu.vx 31..26=0x0a vm vs2 rs1 14..12=0x6 vd 6..0=0x57 +#vasubu.vx 31..26=0x0a vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vasub.vx 31..26=0x0b vm vs2 rs1 14..12=0x6 vd 6..0=0x57 @vmv.s.x 31..26=0x10 25=1 24..20=0 rs1 14..12=0x6 vd 6..0=0x57 -@vslide1up.vx 31..26=0x0e vm vs2 rs1 14..12=0x6 vd 6..0=0x57 +#vslide1up.vx 31..26=0x0e vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vslide1down.vx 31..26=0x0f vm vs2 rs1 14..12=0x6 vd 6..0=0x57 -@vdivu.vx 31..26=0x20 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 +#vdivu.vx 31..26=0x20 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vdiv.vx 31..26=0x21 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 @vremu.vx 31..26=0x22 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vrem.vx 31..26=0x23 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 -@vmulhu.vx 31..26=0x24 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 +#vmulhu.vx 31..26=0x24 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vmul.vx 31..26=0x25 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 -@vmulhsu.vx 31..26=0x26 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 +#vmulhsu.vx 31..26=0x26 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vmulh.vx 31..26=0x27 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vmadd.vx 31..26=0x29 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vnmsub.vx 31..26=0x2b vm vs2 rs1 14..12=0x6 vd 6..0=0x57 From 3ab30617457a58a77a260fbd9f67ca3ea426f285 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Mon, 18 Jan 2021 16:39:01 +0100 Subject: [PATCH 26/65] [riscv-isa-sim] Verify Spike implementation of Xpulpv2 SIMD Also disable instructions from V extension overlapping with SIMD opcodes, creating errors in Spike simulations. Disabled V extension instructions: vasubu_vx, vslide1up_vx, vaaddu_vx, vadc_vvm, vadc_vxm, vsbc_vvm, vsbc_vxm, vmulhu_vx, vdivu_vx, vmulhsu_vx, vfcvt_x_f_v, vsetvl --- toolchain/riscv-isa-sim/disasm/disasm.cc | 20 ++++++------- toolchain/riscv-isa-sim/riscv/decode.h | 2 -- .../riscv-isa-sim/riscv/insns/pv_and_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_and_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_and_sc_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_and_sc_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_and_sci_b.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_and_sci_h.h | 6 ++-- .../riscv-isa-sim/riscv/insns/pv_avg_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_avg_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_avg_sc_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_avg_sc_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_avg_sci_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_avg_sci_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_avgu_b.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_avgu_h.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_avgu_sc_b.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_avgu_sc_h.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_avgu_sci_b.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_avgu_sci_h.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_dotup_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_dotup_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_dotup_sc_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_dotup_sc_h.h | 2 +- .../riscv/insns/pv_dotup_sci_b.h | 2 +- .../riscv/insns/pv_dotup_sci_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_extract_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_extract_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_extractu_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_extractu_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_insert_b.h | 7 ++++- .../riscv-isa-sim/riscv/insns/pv_insert_h.h | 7 ++++- .../riscv-isa-sim/riscv/insns/pv_maxu_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_maxu_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_maxu_sc_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_maxu_sc_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_maxu_sci_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_maxu_sci_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_minu_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_minu_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_minu_sc_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_minu_sc_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_minu_sci_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_minu_sci_h.h | 2 +- toolchain/riscv-isa-sim/riscv/insns/pv_or_b.h | 2 +- toolchain/riscv-isa-sim/riscv/insns/pv_or_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_or_sc_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_or_sc_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_or_sci_b.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_or_sci_h.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_sdotup_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_sdotup_h.h | 2 +- .../riscv/insns/pv_sdotup_sc_b.h | 2 +- .../riscv/insns/pv_sdotup_sc_h.h | 2 +- .../riscv/insns/pv_sdotup_sci_b.h | 2 +- .../riscv/insns/pv_sdotup_sci_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_sll_b.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_sll_h.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_sll_sc_b.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_sll_sc_h.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_sll_sci_b.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_sll_sci_h.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_sra_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_sra_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_sra_sc_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_sra_sc_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_sra_sci_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_sra_sci_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_srl_b.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_srl_h.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_srl_sc_b.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_srl_sc_h.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_srl_sci_b.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_srl_sci_h.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_xor_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_xor_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_xor_sc_b.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_xor_sc_h.h | 2 +- .../riscv-isa-sim/riscv/insns/pv_xor_sci_b.h | 4 +-- .../riscv-isa-sim/riscv/insns/pv_xor_sci_h.h | 4 +-- toolchain/riscv-isa-sim/riscv/riscv.mk.in | 30 +++++++++++-------- 81 files changed, 141 insertions(+), 127 deletions(-) diff --git a/toolchain/riscv-isa-sim/disasm/disasm.cc b/toolchain/riscv-isa-sim/disasm/disasm.cc index 42a39dcdc..0e4e41806 100644 --- a/toolchain/riscv-isa-sim/disasm/disasm.cc +++ b/toolchain/riscv-isa-sim/disasm/disasm.cc @@ -809,7 +809,7 @@ disassembler_t::disassembler_t(int xlen) DISASM_INSN("c.fsdsp", c_fsdsp, 0, {&rvc_fp_rs2, &rvc_sdsp_address}); DISASM_INSN("vsetvli", vsetvli, 0, {&xrd, &xrs1, &v_vtype}); - DISASM_INSN("vsetvl", vsetvl, 0, {&xrd, &xrs1, &xrs2}); + //DISASM_INSN("vsetvl", vsetvl, 0, {&xrd, &xrs1, &xrs2}); #define DISASM_VMEM_INSN(name, fmt, ff) \ add_insn(new disasm_insn_t(#name "8" #ff ".v", match_##name##8##ff##_v, mask_##name##8##ff##_v | mask_nf, fmt)); \ @@ -1020,9 +1020,9 @@ disassembler_t::disassembler_t(int xlen) DISASM_OPIV__XI_INSN(vslidedown, 0); //0b01_0000 - DISASM_OPIV_VXIM_INSN(vadc, 1, 0); + //DISASM_OPIV_VXIM_INSN(vadc, 1, 0); DISASM_OPIV_VXIM_INSN(vmadc, 1, 1); - DISASM_OPIV_VX_M_INSN(vsbc, 1, 0); + //DISASM_OPIV_VX_M_INSN(vsbc, 1, 0); DISASM_OPIV_VX_M_INSN(vmsbc, 1, 1); DISASM_OPIV_VXIM_INSN(vmerge, 1, 0); DISASM_INSN("vmv.v.i", vmv_v_i, 0, {&vd, &v_simm5}); @@ -1065,9 +1065,9 @@ disassembler_t::disassembler_t(int xlen) //OPMVV/OPMVX //0b00_0000 - DISASM_OPIV_VX__INSN(vaaddu, 0); + //DISASM_OPIV_VX__INSN(vaaddu, 0); DISASM_OPIV_VX__INSN(vaadd, 0); - DISASM_OPIV_VX__INSN(vasubu, 0); + //DISASM_OPIV_VX__INSN(vasubu, 0); DISASM_OPIV_VX__INSN(vasub, 0); DISASM_OPIV_S___INSN(vredsum, 1); @@ -1078,7 +1078,7 @@ disassembler_t::disassembler_t(int xlen) DISASM_OPIV_S___INSN(vredmin, 1); DISASM_OPIV_S___INSN(vredmaxu, 0); DISASM_OPIV_S___INSN(vredmax, 1); - DISASM_OPIV__X__INSN(vslide1up, 1); + //DISASM_OPIV__X__INSN(vslide1up, 1); DISASM_OPIV__X__INSN(vslide1down,1); //0b01_0000 @@ -1119,13 +1119,13 @@ disassembler_t::disassembler_t(int xlen) DISASM_OPIV_M___INSN(vmxnor, 1); //0b10_0000 - DISASM_OPIV_VX__INSN(vdivu, 0); + //DISASM_OPIV_VX__INSN(vdivu, 0); DISASM_OPIV_VX__INSN(vdiv, 1); DISASM_OPIV_VX__INSN(vremu, 0); DISASM_OPIV_VX__INSN(vrem, 1); - DISASM_OPIV_VX__INSN(vmulhu, 0); + //DISASM_OPIV_VX__INSN(vmulhu, 0); DISASM_OPIV_VX__INSN(vmul, 1); - DISASM_OPIV_VX__INSN(vmulhsu, 0); + //DISASM_OPIV_VX__INSN(vmulhsu, 0); DISASM_OPIV_VX__INSN(vmulh, 1); DISASM_OPIV_VX__INSN(vmadd, 1); DISASM_OPIV_VX__INSN(vnmsub, 1); @@ -1237,7 +1237,7 @@ disassembler_t::disassembler_t(int xlen) DISASM_OPIV__F_INSN(vfrdiv); //vfunary0 - DISASM_VFUNARY0_INSN(vf, v); + //DISASM_VFUNARY0_INSN(vf, v); DISASM_VFUNARY0_INSN(vfw, v); DISASM_INSN("vfwcvt.f.f.v", vfwcvt_f_f_v, 0, {&vd, &vs2, &opt, &vm}); diff --git a/toolchain/riscv-isa-sim/riscv/decode.h b/toolchain/riscv-isa-sim/riscv/decode.h index 11e711963..9f0739995 100644 --- a/toolchain/riscv-isa-sim/riscv/decode.h +++ b/toolchain/riscv-isa-sim/riscv/decode.h @@ -295,8 +295,6 @@ class regfile_t #define RS1_B(i) ((RS1 >> ((xlen >> 2) * (i & 0x3))) & 0xFF) /* select rs1 byte: i should only be from 0 to 3 */ #define RS2_H(i) ((RS2 >> ((xlen >> 1) * (i & 0x1))) & 0xFFFF) /* select rs2 half: i should only be 0 or 1 */ #define RS2_B(i) ((RS2 >> ((xlen >> 2) * (i & 0x3))) & 0xFF) /* select rs2 byte: i should only be from 0 to 3 */ -#define WRITE_RD_H(i, value) WRITE_RD((RD & ~(0xFFFF << ((xlen >> 1) * (i & 0x1)))) | ((value & 0xFFFF) << ((xlen >> 1) * (i & 0x1)))) /* select to which rd half to write the 16-bit value */ -#define WRITE_RD_B(i, value) WRITE_RD((RD & ~(0xFF << ((xlen >> 2) * (i & 0x3)))) | ((value & 0xFF) << ((xlen >> 2) * (i & 0x3)))) /* select to which rd byte to write the 8-bit value */ #define sext32(x) ((sreg_t)(int32_t)(x)) diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_and_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_and_b.h index e5c584083..d3711b762 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_and_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_and_b.h @@ -6,4 +6,4 @@ for(int i = xlen/8 - 1; i >= 0; i--){ simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_and_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_and_h.h index 8447455f5..8bae35685 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_and_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_and_h.h @@ -6,4 +6,4 @@ for(int i = xlen/16 - 1; i >= 0; i--){ simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_b.h index 5dfeca4d5..b1e6c865e 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_b.h @@ -6,4 +6,4 @@ for(int i = xlen/8 - 1; i >= 0; i--){ simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_h.h index 07d035cd0..2389d11e1 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_h.h @@ -6,4 +6,4 @@ for(int i = xlen/16 - 1; i >= 0; i--){ simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_b.h index 8501c70cd..7e4e9e0ac 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_b.h @@ -2,8 +2,8 @@ uint8_t temp; uint32_t simd_rd = 0; for(int i = xlen/8 - 1; i >= 0; i--){ - temp = RS1_B(i) & insn.p_zimm6(); + temp = RS1_B(i) & insn.p_simm6(); simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_h.h index 80c617e65..fbd57d116 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_h.h @@ -2,8 +2,8 @@ uint8_t temp; uint32_t simd_rd = 0; for(int i = xlen/8 - 1; i >= 0; i--){ - temp = RS1_H(i) & insn.p_zimm6(); - simd_rd <<= 8; + temp = RS1_H(i) & insn.p_simm6(); + simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_b.h index e35d95cfd..3d5d6d472 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_b.h @@ -2,7 +2,7 @@ int8_t temp; uint32_t simd_rd = 0; for(int i = xlen/8 - 1; i >= 0; i--){ - temp = (sext8(RS1_B(i)) + sext8(RS2_B(i))) >> 1; + temp = sext8(sext8(RS1_B(i)) + sext8(RS2_B(i))) >> 1; simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_h.h index 084b978c5..725f2f2e0 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_h.h @@ -2,7 +2,7 @@ int16_t temp; uint32_t simd_rd = 0; for(int i = xlen/16 - 1; i >= 0; i--){ - temp = (sext16(RS1_H(i)) + sext16(RS2_H(i))) >> 1; + temp = sext16(sext16(RS1_H(i)) + sext16(RS2_H(i))) >> 1; simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_b.h index 044f8450b..0b7d2f8d2 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_b.h @@ -2,7 +2,7 @@ int8_t temp; uint32_t simd_rd = 0; for(int i = xlen/8 - 1; i >= 0; i--){ - temp = (sext8(RS1_B(i)) + sext8(RS2_B(0))) >> 1; + temp = sext8(sext8(RS1_B(i)) + sext8(RS2_B(0))) >> 1; simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_h.h index f4877484a..8a6cb5e50 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_h.h @@ -2,7 +2,7 @@ int16_t temp; uint32_t simd_rd = 0; for(int i = xlen/16 - 1; i >= 0; i--){ - temp = (sext16(RS1_H(i)) + sext16(RS2_H(0))) >> 1; + temp = sext16(sext16(RS1_H(i)) + sext16(RS2_H(0))) >> 1; simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_b.h index 1c313fb2e..ff67065e3 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_b.h @@ -2,7 +2,7 @@ int8_t temp; uint32_t simd_rd = 0; for(int i = xlen/8 - 1; i >= 0; i--){ - temp = (sext8(RS1_B(i)) + insn.p_simm6()) >> 1; + temp = sext8(sext8(RS1_B(i)) + insn.p_simm6()) >> 1; simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_h.h index 7b9a834e6..f7deefd25 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_h.h @@ -2,7 +2,7 @@ int16_t temp; uint32_t simd_rd = 0; for(int i = xlen/16 - 1; i >= 0; i--){ - temp = (sext16(RS1_H(i)) + insn.p_simm6()) >> 1; + temp = sext16(sext16(RS1_H(i)) + insn.p_simm6()) >> 1; simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_b.h index ca173f6c9..435c4d22c 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_b.h @@ -2,8 +2,8 @@ uint8_t temp; uint32_t simd_rd = 0; for(int i = xlen/8 - 1; i >= 0; i--){ - temp = (zext8(RS1_B(i)) + zext8(RS2_B(i))) >> 1; + temp = zext8(zext8(RS1_B(i)) + zext8(RS2_B(i))) >> 1; simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_h.h index e1f517e5b..3fdbaf4dd 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_h.h @@ -2,8 +2,8 @@ uint16_t temp; uint32_t simd_rd = 0; for(int i = xlen/16 - 1; i >= 0; i--){ - temp = (zext16(RS1_H(i)) + zext16(RS2_H(i))) >> 1; + temp = zext16(zext16(RS1_H(i)) + zext16(RS2_H(i))) >> 1; simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_b.h index cc8b5e1a0..47ca3888b 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_b.h @@ -2,8 +2,8 @@ uint8_t temp; uint32_t simd_rd = 0; for(int i = xlen/8 - 1; i >= 0; i--){ - temp = (zext8(RS1_B(i)) + zext8(RS2_B(0))) >> 1; + temp = zext8(zext8(RS1_B(i)) + zext8(RS2_B(0))) >> 1; simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_h.h index 78173d664..0bf92f93b 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_h.h @@ -2,8 +2,8 @@ uint16_t temp; uint32_t simd_rd = 0; for(int i = xlen/16 - 1; i >= 0; i--){ - temp = (zext16(RS1_H(i)) + zext16(RS2_H(0))) >> 1; + temp = zext16(zext16(RS1_H(i)) + zext16(RS2_H(0))) >> 1; simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_b.h index 86db408ee..fbc0dff92 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_b.h @@ -2,8 +2,8 @@ uint8_t temp; uint32_t simd_rd = 0; for(int i = xlen/8 - 1; i >= 0; i--){ - temp = (zext8(RS1_B(i)) + insn.p_zimm6()) >> 1; + temp = zext8(zext8(RS1_B(i)) + insn.p_zimm6()) >> 1; simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_h.h index 13072157e..dd8cd3544 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_h.h @@ -2,8 +2,8 @@ uint16_t temp; uint32_t simd_rd = 0; for(int i = xlen/16 - 1; i >= 0; i--){ - temp = (zext16(RS1_H(i)) + insn.p_zimm6()) >> 1; + temp = zext16(zext16(RS1_H(i)) + insn.p_zimm6()) >> 1; simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_b.h index a6b9fb1a9..fa77f3667 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_b.h @@ -3,4 +3,4 @@ uint32_t acc = 0; for(int i = xlen/8 - 1; i >= 0; i--) acc += zext8(RS1_B(i)) * zext8(RS2_B(i)); -WRITE_RD(zext_xlen(acc)); +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_h.h index c59ea6d49..4e170b238 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_h.h @@ -3,4 +3,4 @@ uint32_t acc = 0; for(int i = xlen/16 - 1; i >= 0; i--) acc += zext16(RS1_H(i)) * zext16(RS2_H(i)); -WRITE_RD(zext_xlen(acc)); +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_b.h index ab99cb0fb..a581d0162 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_b.h @@ -3,4 +3,4 @@ uint32_t acc = 0; for(int i = xlen/8 - 1; i >= 0; i--) acc += zext8(RS1_B(i)) * zext8(RS2_B(0)); -WRITE_RD(zext_xlen(acc)); +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_h.h index 63384d8b2..b78762a87 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_h.h @@ -3,4 +3,4 @@ uint32_t acc = 0; for(int i = xlen/16 - 1; i >= 0; i--) acc += zext16(RS1_H(i)) * zext16(RS2_H(0)); -WRITE_RD(zext_xlen(acc)); +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_b.h index dafa7e8be..0dedb1caf 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_b.h @@ -3,4 +3,4 @@ uint32_t acc = 0; for(int i = xlen/8 - 1; i >= 0; i--) acc += zext8(RS1_B(i)) * insn.p_zimm6(); -WRITE_RD(zext_xlen(acc)); +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_h.h index e38162320..64a36d569 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_h.h @@ -3,4 +3,4 @@ uint32_t acc = 0; for(int i = xlen/16 - 1; i >= 0; i--) acc += zext16(RS1_H(i)) * insn.p_zimm6(); -WRITE_RD(zext_xlen(acc)); +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_extract_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_extract_b.h index c7711e189..fce80bbb6 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_extract_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_extract_b.h @@ -1 +1 @@ -WRITE_RD(sext8(RS1_B(insn.p_zimm6() & 0x1))); +WRITE_RD(sext8(RS1_B(insn.p_zimm6() & 0x03))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_extract_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_extract_h.h index dc4d6b9a5..ee35393d4 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_extract_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_extract_h.h @@ -1 +1 @@ -WRITE_RD(sext16(RS1_H(insn.p_zimm6() & 0x3))); +WRITE_RD(sext16(RS1_H(insn.p_zimm6() & 0x01))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_extractu_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_extractu_b.h index 4c72191ea..c24023387 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_extractu_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_extractu_b.h @@ -1 +1 @@ -WRITE_RD(zext8(RS1_B(insn.p_zimm6() & 0x1))); +WRITE_RD(zext8(RS1_B(insn.p_zimm6() & 0x03))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_extractu_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_extractu_h.h index f1192d1ef..90b679afd 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_extractu_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_extractu_h.h @@ -1 +1 @@ -WRITE_RD(zext16(RS1_H(insn.p_zimm6() & 0x3))); +WRITE_RD(zext16(RS1_H(insn.p_zimm6() & 0x01))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_insert_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_insert_b.h index 5c47662fb..5575e7967 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_insert_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_insert_b.h @@ -1 +1,6 @@ -WRITE_RD_B(insn.p_zimm6() & 0x1, RS1_B(0)); +uint32_t ins_rd = RD; +uint8_t i = insn.p_zimm6() & 0x03; /* select to which rd half to write the 16-bit value */ + +ins_rd = (ins_rd & ~(0xFF << ((xlen >> 2) * i))) | ((RS1_H(0) & 0xFF) << ((xlen >> 2) * i)); + +WRITE_RD(sext_xlen(ins_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_insert_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_insert_h.h index 094d1fe1d..eccb0eda6 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_insert_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_insert_h.h @@ -1 +1,6 @@ -WRITE_RD_H(insn.p_zimm6() & 0x3, RS1_H(0)); +uint32_t ins_rd = RD; +uint8_t i = insn.p_zimm6() & 0x01; /* select to which rd half to write the 16-bit value */ + +ins_rd = (ins_rd & ~(0xFFFF << ((xlen >> 1) * i))) | ((RS1_H(0) & 0xFFFF) << ((xlen >> 1) * i)); + +WRITE_RD(sext_xlen(ins_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_b.h index bd22a57e2..5821c1726 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_b.h @@ -6,4 +6,4 @@ for(int i = xlen/8 - 1; i >= 0; i--){ simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_h.h index 898052c24..3e587c3c9 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_h.h @@ -6,4 +6,4 @@ for(int i = xlen/16 - 1; i >= 0; i--){ simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_b.h index 3c8b341b1..c297b87ab 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_b.h @@ -6,4 +6,4 @@ for(int i = xlen/8 - 1; i >= 0; i--){ simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_h.h index 67cf56374..fbb5c7feb 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_h.h @@ -6,4 +6,4 @@ for(int i = xlen/16 - 1; i >= 0; i--){ simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_b.h index 108883d46..ab5f6e5f9 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_b.h @@ -6,5 +6,5 @@ for(int i = xlen/8 - 1; i >= 0; i--){ simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_h.h index 3ff763dd0..9aaf9effc 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_h.h @@ -6,4 +6,4 @@ for(int i = xlen/16 - 1; i >= 0; i--){ simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_b.h index 606dcdefd..bbb92ca55 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_b.h @@ -6,4 +6,4 @@ for(int i = xlen/8 - 1; i >= 0; i--){ simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_h.h index 287cf9322..fa7b0a4e2 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_h.h @@ -6,4 +6,4 @@ for(int i = xlen/16 - 1; i >= 0; i--){ simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_b.h index a8210a751..566bcce6d 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_b.h @@ -6,4 +6,4 @@ for(int i = xlen/8 - 1; i >= 0; i--){ simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_h.h index 0c5d317f4..7471d9678 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_h.h @@ -6,4 +6,4 @@ for(int i = xlen/16 - 1; i >= 0; i--){ simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_b.h index c2cacdf9a..75c43787c 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_b.h @@ -6,4 +6,4 @@ for(int i = xlen/8 - 1; i >= 0; i--){ simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_h.h index 90a1c2333..c665e92f4 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_h.h @@ -6,4 +6,4 @@ for(int i = xlen/16 - 1; i >= 0; i--){ simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_or_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_or_b.h index a47117ee1..d27a6e5d1 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_or_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_or_b.h @@ -6,4 +6,4 @@ for(int i = xlen/8 - 1; i >= 0; i--){ simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_or_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_or_h.h index df156074d..65b112893 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_or_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_or_h.h @@ -6,4 +6,4 @@ for(int i = xlen/16 - 1; i >= 0; i--){ simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_b.h index be527e067..cac508744 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_b.h @@ -6,4 +6,4 @@ for(int i = xlen/8 - 1; i >= 0; i--){ simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_h.h index 4ef4e475a..e6f567cf3 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_h.h @@ -6,4 +6,4 @@ for(int i = xlen/16 - 1; i >= 0; i--){ simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_b.h index c5abaed13..0cb7b5cb6 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_b.h @@ -2,8 +2,8 @@ uint8_t temp; uint32_t simd_rd = 0; for(int i = xlen/8 - 1; i >= 0; i--){ - temp = RS1_B(i) | insn.p_zimm6(); + temp = RS1_B(i) | insn.p_simm6(); simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_h.h index d9e383675..e95922e1e 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_h.h @@ -2,8 +2,8 @@ uint16_t temp; uint32_t simd_rd = 0; for(int i = xlen/16 - 1; i >= 0; i--){ - temp = RS1_H(i) | insn.p_zimm6(); + temp = RS1_H(i) | insn.p_simm6(); simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_b.h index b5fe24fd1..82e47b4f8 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_b.h @@ -3,4 +3,4 @@ uint32_t acc = RD; for(int i = xlen/8 - 1; i >= 0; i--) acc += zext8(RS1_B(i)) * zext8(RS2_B(i)); -WRITE_RD(zext_xlen(acc)); +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_h.h index f552bb7b5..de77009a0 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_h.h @@ -3,4 +3,4 @@ uint32_t acc = RD; for(int i = xlen/16 - 1; i >= 0; i--) acc += zext16(RS1_H(i)) * zext16(RS2_H(i)); -WRITE_RD(zext_xlen(acc)); +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_b.h index ed41df260..717fffc11 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_b.h @@ -3,4 +3,4 @@ uint32_t acc = RD; for(int i = xlen/8 - 1; i >= 0; i--) acc += zext8(RS1_B(i)) * zext8(RS2_B(0)); -WRITE_RD(zext_xlen(acc)); +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_h.h index ad1e27e62..ecf048566 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_h.h @@ -3,4 +3,4 @@ uint32_t acc = RD; for(int i = xlen/16 - 1; i >= 0; i--) acc += zext16(RS1_H(i)) * zext16(RS2_H(0)); -WRITE_RD(zext_xlen(acc)); +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_b.h index 7a947b4b3..bd4d850e6 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_b.h @@ -3,4 +3,4 @@ uint32_t acc = RD; for(int i = xlen/8 - 1; i >= 0; i--) acc += zext8(RS1_B(i)) * insn.p_zimm6(); -WRITE_RD(zext_xlen(acc)); +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_h.h index 90550b66d..145e73717 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_h.h @@ -3,4 +3,4 @@ uint32_t acc = RD; for(int i = xlen/16 - 1; i >= 0; i--) acc += zext16(RS1_H(i)) * insn.p_zimm6(); -WRITE_RD(zext_xlen(acc)); +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_b.h index 3d16dfc38..ca8bcd688 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_b.h @@ -2,8 +2,8 @@ uint8_t temp; uint32_t simd_rd = 0; for(int i = xlen/8 - 1; i >= 0; i--){ - temp = zext8(RS1_B(i)) << zext8(RS2_B(i)); + temp = zext8(RS1_B(i)) << (zext8(RS2_B(i)) & 0x07); simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_h.h index 129ccc90c..cb9200cac 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_h.h @@ -2,8 +2,8 @@ uint16_t temp; uint32_t simd_rd = 0; for(int i = xlen/16 - 1; i >= 0; i--){ - temp = zext16(RS1_H(i)) << zext16(RS2_H(i)); + temp = zext16(RS1_H(i)) << (zext16(RS2_H(i)) & 0x0F); simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_b.h index 13b2205b5..d32051998 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_b.h @@ -2,8 +2,8 @@ uint8_t temp; uint32_t simd_rd = 0; for(int i = xlen/8 - 1; i >= 0; i--){ - temp = zext8(RS1_B(i)) << zext8(RS2_B(0)); + temp = zext8(RS1_B(i)) << (zext8(RS2_B(0)) & 0x07); simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_h.h index a5e40c80e..e84cf0214 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_h.h @@ -2,8 +2,8 @@ uint16_t temp; uint32_t simd_rd = 0; for(int i = xlen/16 - 1; i >= 0; i--){ - temp = zext16(RS1_H(i)) << zext16(RS2_H(0)); + temp = zext16(RS1_H(i)) << (zext16(RS2_H(0)) & 0x0F); simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_b.h index 03379e44a..8e637bea8 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_b.h @@ -2,8 +2,8 @@ uint8_t temp; uint32_t simd_rd = 0; for(int i = xlen/8 - 1; i >= 0; i--){ - temp = zext8(RS1_B(i)) << insn.p_zimm6(); + temp = zext8(RS1_B(i)) << (insn.p_simm6() & 0x07); simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_h.h index 6a0945df9..ec94a2e28 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_h.h @@ -2,8 +2,8 @@ uint16_t temp; uint32_t simd_rd = 0; for(int i = xlen/16 - 1; i >= 0; i--){ - temp = zext16(RS1_H(i)) << insn.p_zimm6(); + temp = zext16(RS1_H(i)) << (insn.p_simm6() & 0x0F); simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_b.h index e69ef07e7..9525a0afc 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_b.h @@ -2,7 +2,7 @@ int8_t temp; uint32_t simd_rd = 0; for(int i = xlen/8 - 1; i >= 0; i--){ - temp = sext8(RS1_B(i)) >> zext8(RS2_B(i)); + temp = sext8(RS1_B(i)) >> (zext8(RS2_B(i)) & 0x07); simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_h.h index bf3c7cb75..b3e8a0b94 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_h.h @@ -2,7 +2,7 @@ int16_t temp; uint32_t simd_rd = 0; for(int i = xlen/16 - 1; i >= 0; i--){ - temp = sext8(RS1_H(i)) >> zext16(RS2_H(i)); + temp = sext16(RS1_H(i)) >> (zext16(RS2_H(i)) & 0x0F); simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_b.h index cd0e4409e..9442d9280 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_b.h @@ -2,7 +2,7 @@ int8_t temp; uint32_t simd_rd = 0; for(int i = xlen/8 - 1; i >= 0; i--){ - temp = sext8(RS1_B(i)) >> zext8(RS2_B(0)); + temp = sext8(RS1_B(i)) >> (zext8(RS2_B(0)) & 0x07); simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_h.h index 1a0e38d07..1e012f750 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_h.h @@ -2,7 +2,7 @@ int16_t temp; uint32_t simd_rd = 0; for(int i = xlen/16 - 1; i >= 0; i--){ - temp = sext8(RS1_H(i)) >> zext16(RS2_H(0)); + temp = sext16(RS1_H(i)) >> (zext16(RS2_H(0)) & 0x0F); simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_b.h index 2cf433784..3dafb3cb5 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_b.h @@ -2,7 +2,7 @@ int8_t temp; uint32_t simd_rd = 0; for(int i = xlen/8 - 1; i >= 0; i--){ - temp = sext8(RS1_B(i)) >> insn.p_zimm6(); + temp = sext8(RS1_B(i)) >> (insn.p_simm6() & 0x07); simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_h.h index 687b54608..4f56d0e5e 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_h.h @@ -2,7 +2,7 @@ int16_t temp; uint32_t simd_rd = 0; for(int i = xlen/16 - 1; i >= 0; i--){ - temp = sext8(RS1_H(i)) >> insn.p_zimm6(); + temp = sext16(RS1_H(i)) >> (insn.p_simm6() & 0x0F); simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_b.h index fc14f81ba..37be2e23a 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_b.h @@ -2,8 +2,8 @@ uint8_t temp; uint32_t simd_rd = 0; for(int i = xlen/8 - 1; i >= 0; i--){ - temp = zext8(RS1_B(i)) >> zext8(RS2_B(i)); + temp = zext8(RS1_B(i)) >> (zext8(RS2_B(i)) & 0x07); simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_h.h index 5942550bb..1b35116d3 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_h.h @@ -2,8 +2,8 @@ uint16_t temp; uint32_t simd_rd = 0; for(int i = xlen/16 - 1; i >= 0; i--){ - temp = zext16(RS1_H(i)) >> zext16(RS2_H(i)); + temp = zext16(RS1_H(i)) >> (zext16(RS2_H(i)) & 0x0F); simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_b.h index f12f828de..4b04ab6f7 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_b.h @@ -2,8 +2,8 @@ uint8_t temp; uint32_t simd_rd = 0; for(int i = xlen/8 - 1; i >= 0; i--){ - temp = zext8(RS1_B(i)) >> zext8(RS2_B(0)); + temp = zext8(RS1_B(i)) >> (zext8(RS2_B(0)) & 0x07); simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_h.h index 477b1df98..f49f784db 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_h.h @@ -2,8 +2,8 @@ uint16_t temp; uint32_t simd_rd = 0; for(int i = xlen/16 - 1; i >= 0; i--){ - temp = zext16(RS1_H(i)) >> zext16(RS2_H(0)); + temp = zext16(RS1_H(i)) >> (zext16(RS2_H(0)) & 0x0F); simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_b.h index 9edaac0b8..b0b38f2a9 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_b.h @@ -2,8 +2,8 @@ uint8_t temp; uint32_t simd_rd = 0; for(int i = xlen/8 - 1; i >= 0; i--){ - temp = zext8(RS1_B(i)) >> insn.p_zimm6(); + temp = zext8(RS1_B(i)) >> (insn.p_simm6() & 0x07); simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_h.h index 12755f62a..5aba29cc9 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_h.h @@ -2,8 +2,8 @@ uint16_t temp; uint32_t simd_rd = 0; for(int i = xlen/16 - 1; i >= 0; i--){ - temp = zext16(RS1_H(i)) >> insn.p_zimm6(); + temp = zext16(RS1_H(i)) >> (insn.p_simm6() & 0x0F); simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_b.h index 307beb5b3..2fc203b4d 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_b.h @@ -6,4 +6,4 @@ for(int i = xlen/8 - 1; i >= 0; i--){ simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_h.h index 30b0be448..56cf0b7c9 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_h.h @@ -6,4 +6,4 @@ for(int i = xlen/16 - 1; i >= 0; i--){ simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_b.h index f2e53bf82..ed3d5075a 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_b.h @@ -6,4 +6,4 @@ for(int i = xlen/8 - 1; i >= 0; i--){ simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_h.h index 69bbeae9b..9d632f367 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_h.h @@ -6,4 +6,4 @@ for(int i = xlen/16 - 1; i >= 0; i--){ simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_b.h index 5f765e609..7ecbf94fc 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_b.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_b.h @@ -2,8 +2,8 @@ uint8_t temp; uint32_t simd_rd = 0; for(int i = xlen/8 - 1; i >= 0; i--){ - temp = RS1_B(i) ^ insn.p_zimm6(); + temp = RS1_B(i) ^ insn.p_simm6(); simd_rd <<= 8; simd_rd += (uint32_t)temp & 0x000000FF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_h.h index ed0c46e06..0a02ced60 100644 --- a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_h.h +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_h.h @@ -2,8 +2,8 @@ uint16_t temp; uint32_t simd_rd = 0; for(int i = xlen/16 - 1; i >= 0; i--){ - temp = RS1_H(i) ^ insn.p_zimm6(); + temp = RS1_H(i) ^ insn.p_simm6(); simd_rd <<= 16; simd_rd += (uint32_t)temp & 0x0000FFFF; } -WRITE_RD(zext_xlen(simd_rd)); +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/riscv.mk.in b/toolchain/riscv-isa-sim/riscv/riscv.mk.in index d49a98a8e..ec3468723 100644 --- a/toolchain/riscv-isa-sim/riscv/riscv.mk.in +++ b/toolchain/riscv-isa-sim/riscv/riscv.mk.in @@ -333,14 +333,23 @@ riscv_insn_ext_q = \ fsqrt_q \ fsub_q \ +# Disabled riscv_insn_ext_v_alu_int instructions for opcode overlap: +#vasubu_vx +#vslide1up_vx +#vaaddu_vx +#vadc_vvm +#vadc_vxm +#vsbc_vvm +#vsbc_vxm +#vmulhu_vx +#vdivu_vx +#vmulhsu_vx + riscv_insn_ext_v_alu_int = \ vaadd_vv \ vaaddu_vv \ vaadd_vx \ - vaaddu_vx \ vadc_vim \ - vadc_vvm \ - vadc_vxm \ vadd_vi \ vadd_vv \ vadd_vx \ @@ -350,12 +359,10 @@ riscv_insn_ext_v_alu_int = \ vasub_vv \ vasubu_vv \ vasub_vx \ - vasubu_vx \ vcompress_vm \ vdiv_vv \ vdiv_vx \ vdivu_vv \ - vdivu_vx \ vdot_vv \ vdotu_vv \ vid_v \ @@ -416,9 +423,7 @@ riscv_insn_ext_v_alu_int = \ vmulh_vv \ vmulh_vx \ vmulhsu_vv \ - vmulhsu_vx \ vmulhu_vv \ - vmulhu_vx \ vmv_s_x \ vmv_v_i \ vmv_v_v \ @@ -473,13 +478,10 @@ riscv_insn_ext_v_alu_int = \ vsaddu_vi \ vsaddu_vv \ vsaddu_vx \ - vsbc_vvm \ - vsbc_vxm \ vsext_vf2 \ vsext_vf4 \ vsext_vf8 \ vslide1down_vx \ - vslide1up_vx \ vslidedown_vi \ vslidedown_vx \ vslideup_vi \ @@ -545,6 +547,9 @@ riscv_insn_ext_v_alu_int = \ vzext_vf4 \ vzext_vf8 \ +# Disabled riscv_insn_ext_v_alu_fp instructions for opcode overlap: +#vfcvt_x_f_v + riscv_insn_ext_v_alu_fp = \ vfadd_vf \ vfadd_vv \ @@ -553,7 +558,6 @@ riscv_insn_ext_v_alu_fp = \ vfcvt_f_xu_v \ vfcvt_rtz_x_f_v \ vfcvt_rtz_xu_f_v \ - vfcvt_x_f_v \ vfcvt_xu_f_v \ vfdiv_vf \ vfdiv_vv \ @@ -741,9 +745,11 @@ riscv_insn_ext_v_ldst = \ vs4r_v \ vs8r_v \ +# Disabled riscv_insn_ext_v_ctrl instructions for opcode overlap: +#vsetvl + riscv_insn_ext_v_ctrl = \ vsetvli \ - vsetvl \ riscv_insn_ext_v = \ $(riscv_insn_ext_v_alu_fp) \ From 3422de434563a51675650d1ae8fb9a3e94c802e6 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Mon, 18 Jan 2021 16:39:32 +0100 Subject: [PATCH 27/65] [riscv-tests] Verify Xpulpv2 SIMD test cases --- apps/riscv-tests/isa/rv32uxpulpimg/pv_avg.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_avg.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_avg.S index 030fa69d5..0a7e1ede3 100644 --- a/apps/riscv-tests/isa/rv32uxpulpimg/pv_avg.S +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_avg.S @@ -20,7 +20,7 @@ RVTEST_CODE_BEGIN # pv.avg.h TEST_RR_OP( 2, pv.avg.h, 0xDFA53D57, 0x2C5F4D25, 0x92EC2D89 ); TEST_RR_OP( 3, pv.avg.h, 0x18A2C49C, 0xD09FBFB6, 0x60A5C983 ); - TEST_RR_OP( 4, pv.avg.h, 0xD290A560, 0xE37F8F8F, 0xC1A2BB32 ); + TEST_RR_OP( 4, pv.avg.h, 0xD2902560, 0xE37F8F8F, 0xC1A2BB32 ); # pv.avg.sc.h TEST_RR_OP( 5, pv.avg.sc.h, 0xF8B0DF51, 0x6A263768, 0xD18D873A ); TEST_RR_OP( 6, pv.avg.sc.h, 0x29B50628, 0xDA3A9320, 0xDB667930 ); From 62043833c3b4061ff06fe111c3067e020de4041e Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Mon, 18 Jan 2021 21:06:08 +0100 Subject: [PATCH 28/65] [snitch] Implement SIMD dotp and verify all SIMD instructions --- hardware/deps/snitch/src/snitch.sv | 254 ++++---- hardware/deps/snitch/src/snitch_ipu.sv | 792 ++++++++++++++++--------- 2 files changed, 670 insertions(+), 376 deletions(-) diff --git a/hardware/deps/snitch/src/snitch.sv b/hardware/deps/snitch/src/snitch.sv index 08025fcaa..27eef254a 100644 --- a/hardware/deps/snitch/src/snitch.sv +++ b/hardware/deps/snitch/src/snitch.sv @@ -1100,47 +1100,53 @@ module snitch #( end // Off-load to IPU coprocessor // 1 source register (rs1) - riscv_instr::P_ABS, // Xpulpimg: p.abs - riscv_instr::P_EXTHS, // Xpulpimg: p.exths - riscv_instr::P_EXTHZ, // Xpulpimg: p.exthz - riscv_instr::P_EXTBS, // Xpulpimg: p.extbs - riscv_instr::P_EXTBZ, // Xpulpimg: p.extbz - riscv_instr::P_CLIP, // Xpulpimg: p.clip - riscv_instr::P_CLIPU, // Xpulpimg: p.clipu - riscv_instr::PV_ADD_SCI_H, // Xpulpimg: pv.add.sci.h - riscv_instr::PV_ADD_SCI_B, // Xpulpimg: pv.add.sci.b - riscv_instr::PV_SUB_SCI_H, // Xpulpimg: pv.sub.sci.h - riscv_instr::PV_SUB_SCI_B, // Xpulpimg: pv.sub.sci.b - riscv_instr::PV_AVG_SCI_H, // Xpulpimg: pv.avg.sci.h - riscv_instr::PV_AVG_SCI_B, // Xpulpimg: pv.avg.sci.b - riscv_instr::PV_AVGU_SCI_H, // Xpulpimg: pv.avgu.sci.h - riscv_instr::PV_AVGU_SCI_B, // Xpulpimg: pv.avgu.sci.b - riscv_instr::PV_MIN_SCI_H, // Xpulpimg: pv.min.sci.h - riscv_instr::PV_MIN_SCI_B, // Xpulpimg: pv.min.sci.b - riscv_instr::PV_MINU_SCI_H, // Xpulpimg: pv.minu.sci.h - riscv_instr::PV_MINU_SCI_B, // Xpulpimg: pv.minu.sci.b - riscv_instr::PV_MAX_SCI_H, // Xpulpimg: pv.max.sci.h - riscv_instr::PV_MAX_SCI_B, // Xpulpimg: pv.max.sci.b - riscv_instr::PV_MAXU_SCI_H, // Xpulpimg: pv.maxu.sci.h - riscv_instr::PV_MAXU_SCI_B, // Xpulpimg: pv.maxu.sci.b - riscv_instr::PV_SRL_SCI_H, // Xpulpimg: pv.srl.sci.h - riscv_instr::PV_SRL_SCI_B, // Xpulpimg: pv.srl.sci.b - riscv_instr::PV_SRA_SCI_H, // Xpulpimg: pv.sra.sci.h - riscv_instr::PV_SRA_SCI_B, // Xpulpimg: pv.sra.sci.b - riscv_instr::PV_SLL_SCI_H, // Xpulpimg: pv.sll.sci.h - riscv_instr::PV_SLL_SCI_B, // Xpulpimg: pv.sll.sci.b - riscv_instr::PV_OR_SCI_H, // Xpulpimg: pv.or.sci.h - riscv_instr::PV_OR_SCI_B, // Xpulpimg: pv.or.sci.b - riscv_instr::PV_XOR_SCI_H, // Xpulpimg: pv.xor.sci.h - riscv_instr::PV_XOR_SCI_B, // Xpulpimg: pv.xor.sci.b - riscv_instr::PV_AND_SCI_B, // Xpulpimg: pv.and.sci.b - riscv_instr::PV_AND_SCI_H, // Xpulpimg: pv.and.sci.h - riscv_instr::PV_ABS_H, // Xpulpimg: pv.abs.h - riscv_instr::PV_ABS_B, // Xpulpimg: pv.abs.b - riscv_instr::PV_EXTRACT_H, // Xpulpimg: pv.extract.h - riscv_instr::PV_EXTRACT_B, // Xpulpimg: pv.extract.b - riscv_instr::PV_EXTRACTU_H, // Xpulpimg: pv.extractu.h - riscv_instr::PV_EXTRACTU_B: begin // Xpulpimg: pv.extractu.b + riscv_instr::P_ABS, // Xpulpimg: p.abs + riscv_instr::P_EXTHS, // Xpulpimg: p.exths + riscv_instr::P_EXTHZ, // Xpulpimg: p.exthz + riscv_instr::P_EXTBS, // Xpulpimg: p.extbs + riscv_instr::P_EXTBZ, // Xpulpimg: p.extbz + riscv_instr::P_CLIP, // Xpulpimg: p.clip + riscv_instr::P_CLIPU, // Xpulpimg: p.clipu + riscv_instr::PV_ADD_SCI_H, // Xpulpimg: pv.add.sci.h + riscv_instr::PV_ADD_SCI_B, // Xpulpimg: pv.add.sci.b + riscv_instr::PV_SUB_SCI_H, // Xpulpimg: pv.sub.sci.h + riscv_instr::PV_SUB_SCI_B, // Xpulpimg: pv.sub.sci.b + riscv_instr::PV_AVG_SCI_H, // Xpulpimg: pv.avg.sci.h + riscv_instr::PV_AVG_SCI_B, // Xpulpimg: pv.avg.sci.b + riscv_instr::PV_AVGU_SCI_H, // Xpulpimg: pv.avgu.sci.h + riscv_instr::PV_AVGU_SCI_B, // Xpulpimg: pv.avgu.sci.b + riscv_instr::PV_MIN_SCI_H, // Xpulpimg: pv.min.sci.h + riscv_instr::PV_MIN_SCI_B, // Xpulpimg: pv.min.sci.b + riscv_instr::PV_MINU_SCI_H, // Xpulpimg: pv.minu.sci.h + riscv_instr::PV_MINU_SCI_B, // Xpulpimg: pv.minu.sci.b + riscv_instr::PV_MAX_SCI_H, // Xpulpimg: pv.max.sci.h + riscv_instr::PV_MAX_SCI_B, // Xpulpimg: pv.max.sci.b + riscv_instr::PV_MAXU_SCI_H, // Xpulpimg: pv.maxu.sci.h + riscv_instr::PV_MAXU_SCI_B, // Xpulpimg: pv.maxu.sci.b + riscv_instr::PV_SRL_SCI_H, // Xpulpimg: pv.srl.sci.h + riscv_instr::PV_SRL_SCI_B, // Xpulpimg: pv.srl.sci.b + riscv_instr::PV_SRA_SCI_H, // Xpulpimg: pv.sra.sci.h + riscv_instr::PV_SRA_SCI_B, // Xpulpimg: pv.sra.sci.b + riscv_instr::PV_SLL_SCI_H, // Xpulpimg: pv.sll.sci.h + riscv_instr::PV_SLL_SCI_B, // Xpulpimg: pv.sll.sci.b + riscv_instr::PV_OR_SCI_H, // Xpulpimg: pv.or.sci.h + riscv_instr::PV_OR_SCI_B, // Xpulpimg: pv.or.sci.b + riscv_instr::PV_XOR_SCI_H, // Xpulpimg: pv.xor.sci.h + riscv_instr::PV_XOR_SCI_B, // Xpulpimg: pv.xor.sci.b + riscv_instr::PV_AND_SCI_B, // Xpulpimg: pv.and.sci.b + riscv_instr::PV_AND_SCI_H, // Xpulpimg: pv.and.sci.h + riscv_instr::PV_ABS_H, // Xpulpimg: pv.abs.h + riscv_instr::PV_ABS_B, // Xpulpimg: pv.abs.b + riscv_instr::PV_EXTRACT_H, // Xpulpimg: pv.extract.h + riscv_instr::PV_EXTRACT_B, // Xpulpimg: pv.extract.b + riscv_instr::PV_EXTRACTU_H, // Xpulpimg: pv.extractu.h + riscv_instr::PV_EXTRACTU_B, // Xpulpimg: pv.extractu.b + riscv_instr::PV_DOTUP_SCI_H, // Xpulpimg: pv.dotup.sci.h + riscv_instr::PV_DOTUP_SCI_B, // Xpulpimg: pv.dotup.sci.b + riscv_instr::PV_DOTUSP_SCI_H, // Xpulpimg: pv.dotusp.sci.h + riscv_instr::PV_DOTUSP_SCI_B, // Xpulpimg: pv.dotusp.sci.b + riscv_instr::PV_DOTSP_SCI_H, // Xpulpimg: pv.dotsp.sci.h + riscv_instr::PV_DOTSP_SCI_B: begin // Xpulpimg: pv.dotsp.sci.b if (snitch_pkg::XPULPIMG) begin write_rd = 1'b0; uses_rd = 1'b1; @@ -1152,70 +1158,82 @@ module snitch #( end end // 2 source registers (rs1, rs2) - riscv_instr::P_SLET, // Xpulpimg: p.slet - riscv_instr::P_SLETU, // Xpulpimg: p.sletu - riscv_instr::P_MIN, // Xpulpimg: p.min - riscv_instr::P_MINU, // Xpulpimg: p.minu - riscv_instr::P_MAX, // Xpulpimg: p.max - riscv_instr::P_MAXU, // Xpulpimg: p.maxu - riscv_instr::P_CLIPR, // Xpulpimg: p.clipr - riscv_instr::P_CLIPUR, // Xpulpimg: p.clipur - riscv_instr::PV_ADD_H, // Xpulpimg: pv.add.h - riscv_instr::PV_ADD_SC_H, // Xpulpimg: pv.add.sc.h - riscv_instr::PV_ADD_B, // Xpulpimg: pv.add.b - riscv_instr::PV_ADD_SC_B, // Xpulpimg: pv.add.sc.b - riscv_instr::PV_SUB_H, // Xpulpimg: pv.sub.h - riscv_instr::PV_SUB_SC_H, // Xpulpimg: pv.sub.sc.h - riscv_instr::PV_SUB_B, // Xpulpimg: pv.sub.b - riscv_instr::PV_SUB_SC_B, // Xpulpimg: pv.sub.sc.b - riscv_instr::PV_AVG_H, // Xpulpimg: pv.avg.h - riscv_instr::PV_AVG_SC_H, // Xpulpimg: pv.avg.sc.h - riscv_instr::PV_AVG_B, // Xpulpimg: pv.avg.b - riscv_instr::PV_AVG_SC_B, // Xpulpimg: pv.avg.sc.b - riscv_instr::PV_AVGU_H, // Xpulpimg: pv.avgu.h - riscv_instr::PV_AVGU_SC_H, // Xpulpimg: pv.avgu.sc.h - riscv_instr::PV_AVGU_B, // Xpulpimg: pv.avgu.b - riscv_instr::PV_AVGU_SC_B, // Xpulpimg: pv.avgu.sc.b - riscv_instr::PV_MIN_H, // Xpulpimg: pv.min.h - riscv_instr::PV_MIN_SC_H, // Xpulpimg: pv.min.sc.h - riscv_instr::PV_MIN_B, // Xpulpimg: pv.min.b - riscv_instr::PV_MIN_SC_B, // Xpulpimg: pv.min.sc.b - riscv_instr::PV_MINU_H, // Xpulpimg: pv.minu.h - riscv_instr::PV_MINU_SC_H, // Xpulpimg: pv.minu.sc.h - riscv_instr::PV_MINU_B, // Xpulpimg: pv.minu.b - riscv_instr::PV_MINU_SC_B, // Xpulpimg: pv.minu.sc.b - riscv_instr::PV_MAX_H, // Xpulpimg: pv.max.h - riscv_instr::PV_MAX_SC_H, // Xpulpimg: pv.max.sc.h - riscv_instr::PV_MAX_B, // Xpulpimg: pv.max.b - riscv_instr::PV_MAX_SC_B, // Xpulpimg: pv.max.sc.b - riscv_instr::PV_MAXU_H, // Xpulpimg: pv.maxu.h - riscv_instr::PV_MAXU_SC_H, // Xpulpimg: pv.maxu.sc.h - riscv_instr::PV_MAXU_B, // Xpulpimg: pv.maxu.b - riscv_instr::PV_MAXU_SC_B, // Xpulpimg: pv.maxu.sc.b - riscv_instr::PV_SRL_H, // Xpulpimg: pv.srl.h - riscv_instr::PV_SRL_SC_H, // Xpulpimg: pv.srl.sc.h - riscv_instr::PV_SRL_B, // Xpulpimg: pv.srl.b - riscv_instr::PV_SRL_SC_B, // Xpulpimg: pv.srl.sc.b - riscv_instr::PV_SRA_H, // Xpulpimg: pv.sra.h - riscv_instr::PV_SRA_SC_H, // Xpulpimg: pv.sra.sc.h - riscv_instr::PV_SRA_B, // Xpulpimg: pv.sra.b - riscv_instr::PV_SRA_SC_B, // Xpulpimg: pv.sra.sc.b - riscv_instr::PV_SLL_H, // Xpulpimg: pv.sll.h - riscv_instr::PV_SLL_SC_H, // Xpulpimg: pv.sll.sc.h - riscv_instr::PV_SLL_B, // Xpulpimg: pv.sll.b - riscv_instr::PV_SLL_SC_B, // Xpulpimg: pv.sll.sc.b - riscv_instr::PV_OR_H, // Xpulpimg: pv.or.h - riscv_instr::PV_OR_SC_H, // Xpulpimg: pv.or.sc.h - riscv_instr::PV_OR_B, // Xpulpimg: pv.or.b - riscv_instr::PV_OR_SC_B, // Xpulpimg: pv.or.sc.b - riscv_instr::PV_XOR_H, // Xpulpimg: pv.xor.h - riscv_instr::PV_XOR_SC_H, // Xpulpimg: pv.xor.sc.h - riscv_instr::PV_XOR_B, // Xpulpimg: pv.xor.b - riscv_instr::PV_XOR_SC_B, // Xpulpimg: pv.xor.sc.b - riscv_instr::PV_AND_H, // Xpulpimg: pv.and.h - riscv_instr::PV_AND_SC_H, // Xpulpimg: pv.and.sc.h - riscv_instr::PV_AND_B, // Xpulpimg: pv.and.b - riscv_instr::PV_AND_SC_B: begin // Xpulpimg: pv.and.sc.b + riscv_instr::P_SLET, // Xpulpimg: p.slet + riscv_instr::P_SLETU, // Xpulpimg: p.sletu + riscv_instr::P_MIN, // Xpulpimg: p.min + riscv_instr::P_MINU, // Xpulpimg: p.minu + riscv_instr::P_MAX, // Xpulpimg: p.max + riscv_instr::P_MAXU, // Xpulpimg: p.maxu + riscv_instr::P_CLIPR, // Xpulpimg: p.clipr + riscv_instr::P_CLIPUR, // Xpulpimg: p.clipur + riscv_instr::PV_ADD_H, // Xpulpimg: pv.add.h + riscv_instr::PV_ADD_SC_H, // Xpulpimg: pv.add.sc.h + riscv_instr::PV_ADD_B, // Xpulpimg: pv.add.b + riscv_instr::PV_ADD_SC_B, // Xpulpimg: pv.add.sc.b + riscv_instr::PV_SUB_H, // Xpulpimg: pv.sub.h + riscv_instr::PV_SUB_SC_H, // Xpulpimg: pv.sub.sc.h + riscv_instr::PV_SUB_B, // Xpulpimg: pv.sub.b + riscv_instr::PV_SUB_SC_B, // Xpulpimg: pv.sub.sc.b + riscv_instr::PV_AVG_H, // Xpulpimg: pv.avg.h + riscv_instr::PV_AVG_SC_H, // Xpulpimg: pv.avg.sc.h + riscv_instr::PV_AVG_B, // Xpulpimg: pv.avg.b + riscv_instr::PV_AVG_SC_B, // Xpulpimg: pv.avg.sc.b + riscv_instr::PV_AVGU_H, // Xpulpimg: pv.avgu.h + riscv_instr::PV_AVGU_SC_H, // Xpulpimg: pv.avgu.sc.h + riscv_instr::PV_AVGU_B, // Xpulpimg: pv.avgu.b + riscv_instr::PV_AVGU_SC_B, // Xpulpimg: pv.avgu.sc.b + riscv_instr::PV_MIN_H, // Xpulpimg: pv.min.h + riscv_instr::PV_MIN_SC_H, // Xpulpimg: pv.min.sc.h + riscv_instr::PV_MIN_B, // Xpulpimg: pv.min.b + riscv_instr::PV_MIN_SC_B, // Xpulpimg: pv.min.sc.b + riscv_instr::PV_MINU_H, // Xpulpimg: pv.minu.h + riscv_instr::PV_MINU_SC_H, // Xpulpimg: pv.minu.sc.h + riscv_instr::PV_MINU_B, // Xpulpimg: pv.minu.b + riscv_instr::PV_MINU_SC_B, // Xpulpimg: pv.minu.sc.b + riscv_instr::PV_MAX_H, // Xpulpimg: pv.max.h + riscv_instr::PV_MAX_SC_H, // Xpulpimg: pv.max.sc.h + riscv_instr::PV_MAX_B, // Xpulpimg: pv.max.b + riscv_instr::PV_MAX_SC_B, // Xpulpimg: pv.max.sc.b + riscv_instr::PV_MAXU_H, // Xpulpimg: pv.maxu.h + riscv_instr::PV_MAXU_SC_H, // Xpulpimg: pv.maxu.sc.h + riscv_instr::PV_MAXU_B, // Xpulpimg: pv.maxu.b + riscv_instr::PV_MAXU_SC_B, // Xpulpimg: pv.maxu.sc.b + riscv_instr::PV_SRL_H, // Xpulpimg: pv.srl.h + riscv_instr::PV_SRL_SC_H, // Xpulpimg: pv.srl.sc.h + riscv_instr::PV_SRL_B, // Xpulpimg: pv.srl.b + riscv_instr::PV_SRL_SC_B, // Xpulpimg: pv.srl.sc.b + riscv_instr::PV_SRA_H, // Xpulpimg: pv.sra.h + riscv_instr::PV_SRA_SC_H, // Xpulpimg: pv.sra.sc.h + riscv_instr::PV_SRA_B, // Xpulpimg: pv.sra.b + riscv_instr::PV_SRA_SC_B, // Xpulpimg: pv.sra.sc.b + riscv_instr::PV_SLL_H, // Xpulpimg: pv.sll.h + riscv_instr::PV_SLL_SC_H, // Xpulpimg: pv.sll.sc.h + riscv_instr::PV_SLL_B, // Xpulpimg: pv.sll.b + riscv_instr::PV_SLL_SC_B, // Xpulpimg: pv.sll.sc.b + riscv_instr::PV_OR_H, // Xpulpimg: pv.or.h + riscv_instr::PV_OR_SC_H, // Xpulpimg: pv.or.sc.h + riscv_instr::PV_OR_B, // Xpulpimg: pv.or.b + riscv_instr::PV_OR_SC_B, // Xpulpimg: pv.or.sc.b + riscv_instr::PV_XOR_H, // Xpulpimg: pv.xor.h + riscv_instr::PV_XOR_SC_H, // Xpulpimg: pv.xor.sc.h + riscv_instr::PV_XOR_B, // Xpulpimg: pv.xor.b + riscv_instr::PV_XOR_SC_B, // Xpulpimg: pv.xor.sc.b + riscv_instr::PV_AND_H, // Xpulpimg: pv.and.h + riscv_instr::PV_AND_SC_H, // Xpulpimg: pv.and.sc.h + riscv_instr::PV_AND_B, // Xpulpimg: pv.and.b + riscv_instr::PV_AND_SC_B, // Xpulpimg: pv.and.sc.b + riscv_instr::PV_DOTUP_H, // Xpulpimg: pv.dotup.h + riscv_instr::PV_DOTUP_SC_H, // Xpulpimg: pv.dotup.sc.h + riscv_instr::PV_DOTUP_B, // Xpulpimg: pv.dotup.b + riscv_instr::PV_DOTUP_SC_B, // Xpulpimg: pv.dotup.sc.b + riscv_instr::PV_DOTUSP_H, // Xpulpimg: pv.dotusp.h + riscv_instr::PV_DOTUSP_SC_H, // Xpulpimg: pv.dotusp.sc.h + riscv_instr::PV_DOTUSP_B, // Xpulpimg: pv.dotusp.b + riscv_instr::PV_DOTUSP_SC_B, // Xpulpimg: pv.dotusp.sc.b + riscv_instr::PV_DOTSP_H, // Xpulpimg: pv.dotsp.h + riscv_instr::PV_DOTSP_SC_H, // Xpulpimg: pv.dotsp.sc.h + riscv_instr::PV_DOTSP_B, // Xpulpimg: pv.dotsp.b + riscv_instr::PV_DOTSP_SC_B: begin // Xpulpimg: pv.dotsp.sc.b if (snitch_pkg::XPULPIMG) begin write_rd = 1'b0; uses_rd = 1'b1; @@ -1228,8 +1246,14 @@ module snitch #( end end // 2 source registers (rs1, rd) - riscv_instr::PV_INSERT_H, // Xpulpimg: pv.insert.h - riscv_instr::PV_INSERT_B: begin // Xpulpimg: pv.insert.b + riscv_instr::PV_INSERT_H, // Xpulpimg: pv.insert.h + riscv_instr::PV_INSERT_B, // Xpulpimg: pv.insert.b + riscv_instr::PV_SDOTUP_SCI_H, // Xpulpimg: pv.sdotup.sci.h + riscv_instr::PV_SDOTUP_SCI_B, // Xpulpimg: pv.sdotup.sci.b + riscv_instr::PV_SDOTUSP_SCI_H, // Xpulpimg: pv.sdotusp.sci.h + riscv_instr::PV_SDOTUSP_SCI_B, // Xpulpimg: pv.sdotusp.sci.b + riscv_instr::PV_SDOTSP_SCI_H, // Xpulpimg: pv.sdotsp.sci.h + riscv_instr::PV_SDOTSP_SCI_B: begin // Xpulpimg: pv.sdotsp.sci.b if (snitch_pkg::XPULPIMG) begin write_rd = 1'b0; uses_rd = 1'b1; @@ -1242,8 +1266,20 @@ module snitch #( end end // 3 source registers (rs1, rs2, rd) - riscv_instr::P_MAC, // Xpulpimg: p.mac - riscv_instr::P_MSU: begin // Xpulpimg: p.msu + riscv_instr::P_MAC, // Xpulpimg: p.mac + riscv_instr::P_MSU, // Xpulpimg: p.msu + riscv_instr::PV_SDOTUP_H, // Xpulpimg: pv.sdotup.h + riscv_instr::PV_SDOTUP_SC_H, // Xpulpimg: pv.sdotup.sc.h + riscv_instr::PV_SDOTUP_B, // Xpulpimg: pv.sdotup.b + riscv_instr::PV_SDOTUP_SC_B, // Xpulpimg: pv.sdotup.sc.b + riscv_instr::PV_SDOTUSP_H, // Xpulpimg: pv.sdotusp.h + riscv_instr::PV_SDOTUSP_SC_H, // Xpulpimg: pv.sdotusp.sc.h + riscv_instr::PV_SDOTUSP_B, // Xpulpimg: pv.sdotusp.b + riscv_instr::PV_SDOTUSP_SC_B, // Xpulpimg: pv.sdotusp.sc.b + riscv_instr::PV_SDOTSP_H, // Xpulpimg: pv.sdotsp.h + riscv_instr::PV_SDOTSP_SC_H, // Xpulpimg: pv.sdotsp.sc.h + riscv_instr::PV_SDOTSP_B, // Xpulpimg: pv.sdotsp.b + riscv_instr::PV_SDOTSP_SC_B: begin // Xpulpimg: pv.sdotsp.sc.b if (snitch_pkg::XPULPIMG) begin write_rd = 1'b0; uses_rd = 1'b1; diff --git a/hardware/deps/snitch/src/snitch_ipu.sv b/hardware/deps/snitch/src/snitch_ipu.sv index 64064a257..ea52da070 100644 --- a/hardware/deps/snitch/src/snitch_ipu.sv +++ b/hardware/deps/snitch/src/snitch_ipu.sv @@ -66,115 +66,151 @@ module snitch_ipu #( div_valid_op = acc_qvalid_i; acc_qready_o = div_ready_op; end - riscv_instr::P_ABS, // Xpulpimg: p.abs - riscv_instr::P_SLET, // Xpulpimg: p.slet - riscv_instr::P_SLETU, // Xpulpimg: p.sletu - riscv_instr::P_MIN, // Xpulpimg: p.min - riscv_instr::P_MINU, // Xpulpimg: p.minu - riscv_instr::P_MAX, // Xpulpimg: p.max - riscv_instr::P_MAXU, // Xpulpimg: p.maxu - riscv_instr::P_EXTHS, // Xpulpimg: p.exths - riscv_instr::P_EXTHZ, // Xpulpimg: p.exthz - riscv_instr::P_EXTBS, // Xpulpimg: p.extbs - riscv_instr::P_EXTBZ, // Xpulpimg: p.extbz - riscv_instr::P_CLIP, // Xpulpimg: p.clip - riscv_instr::P_CLIPU, // Xpulpimg: p.clipu - riscv_instr::P_CLIPR, // Xpulpimg: p.clipr - riscv_instr::P_CLIPUR, // Xpulpimg: p.clipur - riscv_instr::P_MAC, // Xpulpimg: p.mac - riscv_instr::P_MSU, // Xpulpimg: p.msu - riscv_instr::PV_ADD_H, // Xpulpimg: pv.add.h - riscv_instr::PV_ADD_SC_H, // Xpulpimg: pv.add.sc.h - riscv_instr::PV_ADD_SCI_H, // Xpulpimg: pv.add.sci.h - riscv_instr::PV_ADD_B, // Xpulpimg: pv.add.b - riscv_instr::PV_ADD_SC_B, // Xpulpimg: pv.add.sc.b - riscv_instr::PV_ADD_SCI_B, // Xpulpimg: pv.add.sci.b - riscv_instr::PV_SUB_H, // Xpulpimg: pv.sub.h - riscv_instr::PV_SUB_SC_H, // Xpulpimg: pv.sub.sc.h - riscv_instr::PV_SUB_SCI_H, // Xpulpimg: pv.sub.sci.h - riscv_instr::PV_SUB_B, // Xpulpimg: pv.sub.b - riscv_instr::PV_SUB_SC_B, // Xpulpimg: pv.sub.sc.b - riscv_instr::PV_SUB_SCI_B, // Xpulpimg: pv.sub.sci.b - riscv_instr::PV_AVG_H, // Xpulpimg: pv.avg.h - riscv_instr::PV_AVG_SC_H, // Xpulpimg: pv.avg.sc.h - riscv_instr::PV_AVG_SCI_H, // Xpulpimg: pv.avg.sci.h - riscv_instr::PV_AVG_B, // Xpulpimg: pv.avg.b - riscv_instr::PV_AVG_SC_B, // Xpulpimg: pv.avg.sc.b - riscv_instr::PV_AVG_SCI_B, // Xpulpimg: pv.avg.sci.b - riscv_instr::PV_AVGU_H, // Xpulpimg: pv.avgu.h - riscv_instr::PV_AVGU_SC_H, // Xpulpimg: pv.avgu.sc.h - riscv_instr::PV_AVGU_SCI_H, // Xpulpimg: pv.avgu.sci.h - riscv_instr::PV_AVGU_B, // Xpulpimg: pv.avgu.b - riscv_instr::PV_AVGU_SC_B, // Xpulpimg: pv.avgu.sc.b - riscv_instr::PV_AVGU_SCI_B, // Xpulpimg: pv.avgu.sci.b - riscv_instr::PV_MIN_H, // Xpulpimg: pv.min.h - riscv_instr::PV_MIN_SC_H, // Xpulpimg: pv.min.sc.h - riscv_instr::PV_MIN_SCI_H, // Xpulpimg: pv.min.sci.h - riscv_instr::PV_MIN_B, // Xpulpimg: pv.min.b - riscv_instr::PV_MIN_SC_B, // Xpulpimg: pv.min.sc.b - riscv_instr::PV_MIN_SCI_B, // Xpulpimg: pv.min.sci.b - riscv_instr::PV_MINU_H, // Xpulpimg: pv.minu.h - riscv_instr::PV_MINU_SC_H, // Xpulpimg: pv.minu.sc.h - riscv_instr::PV_MINU_SCI_H, // Xpulpimg: pv.minu.sci.h - riscv_instr::PV_MINU_B, // Xpulpimg: pv.minu.b - riscv_instr::PV_MINU_SC_B, // Xpulpimg: pv.minu.sc.b - riscv_instr::PV_MINU_SCI_B, // Xpulpimg: pv.minu.sci.b - riscv_instr::PV_MAX_H, // Xpulpimg: pv.max.h - riscv_instr::PV_MAX_SC_H, // Xpulpimg: pv.max.sc.h - riscv_instr::PV_MAX_SCI_H, // Xpulpimg: pv.max.sci.h - riscv_instr::PV_MAX_B, // Xpulpimg: pv.max.b - riscv_instr::PV_MAX_SC_B, // Xpulpimg: pv.max.sc.b - riscv_instr::PV_MAX_SCI_B, // Xpulpimg: pv.max.sci.b - riscv_instr::PV_MAXU_H, // Xpulpimg: pv.maxu.h - riscv_instr::PV_MAXU_SC_H, // Xpulpimg: pv.maxu.sc.h - riscv_instr::PV_MAXU_SCI_H, // Xpulpimg: pv.maxu.sci.h - riscv_instr::PV_MAXU_B, // Xpulpimg: pv.maxu.b - riscv_instr::PV_MAXU_SC_B, // Xpulpimg: pv.maxu.sc.b - riscv_instr::PV_MAXU_SCI_B, // Xpulpimg: pv.maxu.sci.b - riscv_instr::PV_SRL_H, // Xpulpimg: pv.srl.h - riscv_instr::PV_SRL_SC_H, // Xpulpimg: pv.srl.sc.h - riscv_instr::PV_SRL_SCI_H, // Xpulpimg: pv.srl.sci.h - riscv_instr::PV_SRL_B, // Xpulpimg: pv.srl.b - riscv_instr::PV_SRL_SC_B, // Xpulpimg: pv.srl.sc.b - riscv_instr::PV_SRL_SCI_B, // Xpulpimg: pv.srl.sci.b - riscv_instr::PV_SRA_H, // Xpulpimg: pv.sra.h - riscv_instr::PV_SRA_SC_H, // Xpulpimg: pv.sra.sc.h - riscv_instr::PV_SRA_SCI_H, // Xpulpimg: pv.sra.sci.h - riscv_instr::PV_SRA_B, // Xpulpimg: pv.sra.b - riscv_instr::PV_SRA_SC_B, // Xpulpimg: pv.sra.sc.b - riscv_instr::PV_SRA_SCI_B, // Xpulpimg: pv.sra.sci.b - riscv_instr::PV_SLL_H, // Xpulpimg: pv.sll.h - riscv_instr::PV_SLL_SC_H, // Xpulpimg: pv.sll.sc.h - riscv_instr::PV_SLL_SCI_H, // Xpulpimg: pv.sll.sci.h - riscv_instr::PV_SLL_B, // Xpulpimg: pv.sll.b - riscv_instr::PV_SLL_SC_B, // Xpulpimg: pv.sll.sc.b - riscv_instr::PV_SLL_SCI_B, // Xpulpimg: pv.sll.sci.b - riscv_instr::PV_OR_H, // Xpulpimg: pv.or.h - riscv_instr::PV_OR_SC_H, // Xpulpimg: pv.or.sc.h - riscv_instr::PV_OR_SCI_H, // Xpulpimg: pv.or.sci.h - riscv_instr::PV_OR_B, // Xpulpimg: pv.or.b - riscv_instr::PV_OR_SC_B, // Xpulpimg: pv.or.sc.b - riscv_instr::PV_OR_SCI_B, // Xpulpimg: pv.or.sci.b - riscv_instr::PV_XOR_H, // Xpulpimg: pv.xor.h - riscv_instr::PV_XOR_SC_H, // Xpulpimg: pv.xor.sc.h - riscv_instr::PV_XOR_SCI_H, // Xpulpimg: pv.xor.sci.h - riscv_instr::PV_XOR_B, // Xpulpimg: pv.xor.b - riscv_instr::PV_XOR_SC_B, // Xpulpimg: pv.xor.sc.b - riscv_instr::PV_XOR_SCI_B, // Xpulpimg: pv.xor.sci.b - riscv_instr::PV_AND_H, // Xpulpimg: pv.and.h - riscv_instr::PV_AND_SC_H, // Xpulpimg: pv.and.sc.h - riscv_instr::PV_AND_SCI_H, // Xpulpimg: pv.and.sci.h - riscv_instr::PV_AND_B, // Xpulpimg: pv.and.b - riscv_instr::PV_AND_SC_B, // Xpulpimg: pv.and.sc.b - riscv_instr::PV_AND_SCI_B, // Xpulpimg: pv.and.sci.b - riscv_instr::PV_ABS_H, // Xpulpimg: pv.abs.h - riscv_instr::PV_ABS_B, // Xpulpimg: pv.abs.b - riscv_instr::PV_EXTRACT_H, // Xpulpimg: pv.extract.h - riscv_instr::PV_EXTRACT_B, // Xpulpimg: pv.extract.b - riscv_instr::PV_EXTRACTU_H, // Xpulpimg: pv.extractu.h - riscv_instr::PV_EXTRACTU_B, // Xpulpimg: pv.extractu.b - riscv_instr::PV_INSERT_H, // Xpulpimg: pv.insert.h - riscv_instr::PV_INSERT_B: begin // Xpulpimg: pv.insert.b + riscv_instr::P_ABS, // Xpulpimg: p.abs + riscv_instr::P_SLET, // Xpulpimg: p.slet + riscv_instr::P_SLETU, // Xpulpimg: p.sletu + riscv_instr::P_MIN, // Xpulpimg: p.min + riscv_instr::P_MINU, // Xpulpimg: p.minu + riscv_instr::P_MAX, // Xpulpimg: p.max + riscv_instr::P_MAXU, // Xpulpimg: p.maxu + riscv_instr::P_EXTHS, // Xpulpimg: p.exths + riscv_instr::P_EXTHZ, // Xpulpimg: p.exthz + riscv_instr::P_EXTBS, // Xpulpimg: p.extbs + riscv_instr::P_EXTBZ, // Xpulpimg: p.extbz + riscv_instr::P_CLIP, // Xpulpimg: p.clip + riscv_instr::P_CLIPU, // Xpulpimg: p.clipu + riscv_instr::P_CLIPR, // Xpulpimg: p.clipr + riscv_instr::P_CLIPUR, // Xpulpimg: p.clipur + riscv_instr::P_MAC, // Xpulpimg: p.mac + riscv_instr::P_MSU, // Xpulpimg: p.msu + riscv_instr::PV_ADD_H, // Xpulpimg: pv.add.h + riscv_instr::PV_ADD_SC_H, // Xpulpimg: pv.add.sc.h + riscv_instr::PV_ADD_SCI_H, // Xpulpimg: pv.add.sci.h + riscv_instr::PV_ADD_B, // Xpulpimg: pv.add.b + riscv_instr::PV_ADD_SC_B, // Xpulpimg: pv.add.sc.b + riscv_instr::PV_ADD_SCI_B, // Xpulpimg: pv.add.sci.b + riscv_instr::PV_SUB_H, // Xpulpimg: pv.sub.h + riscv_instr::PV_SUB_SC_H, // Xpulpimg: pv.sub.sc.h + riscv_instr::PV_SUB_SCI_H, // Xpulpimg: pv.sub.sci.h + riscv_instr::PV_SUB_B, // Xpulpimg: pv.sub.b + riscv_instr::PV_SUB_SC_B, // Xpulpimg: pv.sub.sc.b + riscv_instr::PV_SUB_SCI_B, // Xpulpimg: pv.sub.sci.b + riscv_instr::PV_AVG_H, // Xpulpimg: pv.avg.h + riscv_instr::PV_AVG_SC_H, // Xpulpimg: pv.avg.sc.h + riscv_instr::PV_AVG_SCI_H, // Xpulpimg: pv.avg.sci.h + riscv_instr::PV_AVG_B, // Xpulpimg: pv.avg.b + riscv_instr::PV_AVG_SC_B, // Xpulpimg: pv.avg.sc.b + riscv_instr::PV_AVG_SCI_B, // Xpulpimg: pv.avg.sci.b + riscv_instr::PV_AVGU_H, // Xpulpimg: pv.avgu.h + riscv_instr::PV_AVGU_SC_H, // Xpulpimg: pv.avgu.sc.h + riscv_instr::PV_AVGU_SCI_H, // Xpulpimg: pv.avgu.sci.h + riscv_instr::PV_AVGU_B, // Xpulpimg: pv.avgu.b + riscv_instr::PV_AVGU_SC_B, // Xpulpimg: pv.avgu.sc.b + riscv_instr::PV_AVGU_SCI_B, // Xpulpimg: pv.avgu.sci.b + riscv_instr::PV_MIN_H, // Xpulpimg: pv.min.h + riscv_instr::PV_MIN_SC_H, // Xpulpimg: pv.min.sc.h + riscv_instr::PV_MIN_SCI_H, // Xpulpimg: pv.min.sci.h + riscv_instr::PV_MIN_B, // Xpulpimg: pv.min.b + riscv_instr::PV_MIN_SC_B, // Xpulpimg: pv.min.sc.b + riscv_instr::PV_MIN_SCI_B, // Xpulpimg: pv.min.sci.b + riscv_instr::PV_MINU_H, // Xpulpimg: pv.minu.h + riscv_instr::PV_MINU_SC_H, // Xpulpimg: pv.minu.sc.h + riscv_instr::PV_MINU_SCI_H, // Xpulpimg: pv.minu.sci.h + riscv_instr::PV_MINU_B, // Xpulpimg: pv.minu.b + riscv_instr::PV_MINU_SC_B, // Xpulpimg: pv.minu.sc.b + riscv_instr::PV_MINU_SCI_B, // Xpulpimg: pv.minu.sci.b + riscv_instr::PV_MAX_H, // Xpulpimg: pv.max.h + riscv_instr::PV_MAX_SC_H, // Xpulpimg: pv.max.sc.h + riscv_instr::PV_MAX_SCI_H, // Xpulpimg: pv.max.sci.h + riscv_instr::PV_MAX_B, // Xpulpimg: pv.max.b + riscv_instr::PV_MAX_SC_B, // Xpulpimg: pv.max.sc.b + riscv_instr::PV_MAX_SCI_B, // Xpulpimg: pv.max.sci.b + riscv_instr::PV_MAXU_H, // Xpulpimg: pv.maxu.h + riscv_instr::PV_MAXU_SC_H, // Xpulpimg: pv.maxu.sc.h + riscv_instr::PV_MAXU_SCI_H, // Xpulpimg: pv.maxu.sci.h + riscv_instr::PV_MAXU_B, // Xpulpimg: pv.maxu.b + riscv_instr::PV_MAXU_SC_B, // Xpulpimg: pv.maxu.sc.b + riscv_instr::PV_MAXU_SCI_B, // Xpulpimg: pv.maxu.sci.b + riscv_instr::PV_SRL_H, // Xpulpimg: pv.srl.h + riscv_instr::PV_SRL_SC_H, // Xpulpimg: pv.srl.sc.h + riscv_instr::PV_SRL_SCI_H, // Xpulpimg: pv.srl.sci.h + riscv_instr::PV_SRL_B, // Xpulpimg: pv.srl.b + riscv_instr::PV_SRL_SC_B, // Xpulpimg: pv.srl.sc.b + riscv_instr::PV_SRL_SCI_B, // Xpulpimg: pv.srl.sci.b + riscv_instr::PV_SRA_H, // Xpulpimg: pv.sra.h + riscv_instr::PV_SRA_SC_H, // Xpulpimg: pv.sra.sc.h + riscv_instr::PV_SRA_SCI_H, // Xpulpimg: pv.sra.sci.h + riscv_instr::PV_SRA_B, // Xpulpimg: pv.sra.b + riscv_instr::PV_SRA_SC_B, // Xpulpimg: pv.sra.sc.b + riscv_instr::PV_SRA_SCI_B, // Xpulpimg: pv.sra.sci.b + riscv_instr::PV_SLL_H, // Xpulpimg: pv.sll.h + riscv_instr::PV_SLL_SC_H, // Xpulpimg: pv.sll.sc.h + riscv_instr::PV_SLL_SCI_H, // Xpulpimg: pv.sll.sci.h + riscv_instr::PV_SLL_B, // Xpulpimg: pv.sll.b + riscv_instr::PV_SLL_SC_B, // Xpulpimg: pv.sll.sc.b + riscv_instr::PV_SLL_SCI_B, // Xpulpimg: pv.sll.sci.b + riscv_instr::PV_OR_H, // Xpulpimg: pv.or.h + riscv_instr::PV_OR_SC_H, // Xpulpimg: pv.or.sc.h + riscv_instr::PV_OR_SCI_H, // Xpulpimg: pv.or.sci.h + riscv_instr::PV_OR_B, // Xpulpimg: pv.or.b + riscv_instr::PV_OR_SC_B, // Xpulpimg: pv.or.sc.b + riscv_instr::PV_OR_SCI_B, // Xpulpimg: pv.or.sci.b + riscv_instr::PV_XOR_H, // Xpulpimg: pv.xor.h + riscv_instr::PV_XOR_SC_H, // Xpulpimg: pv.xor.sc.h + riscv_instr::PV_XOR_SCI_H, // Xpulpimg: pv.xor.sci.h + riscv_instr::PV_XOR_B, // Xpulpimg: pv.xor.b + riscv_instr::PV_XOR_SC_B, // Xpulpimg: pv.xor.sc.b + riscv_instr::PV_XOR_SCI_B, // Xpulpimg: pv.xor.sci.b + riscv_instr::PV_AND_H, // Xpulpimg: pv.and.h + riscv_instr::PV_AND_SC_H, // Xpulpimg: pv.and.sc.h + riscv_instr::PV_AND_SCI_H, // Xpulpimg: pv.and.sci.h + riscv_instr::PV_AND_B, // Xpulpimg: pv.and.b + riscv_instr::PV_AND_SC_B, // Xpulpimg: pv.and.sc.b + riscv_instr::PV_AND_SCI_B, // Xpulpimg: pv.and.sci.b + riscv_instr::PV_ABS_H, // Xpulpimg: pv.abs.h + riscv_instr::PV_ABS_B, // Xpulpimg: pv.abs.b + riscv_instr::PV_EXTRACT_H, // Xpulpimg: pv.extract.h + riscv_instr::PV_EXTRACT_B, // Xpulpimg: pv.extract.b + riscv_instr::PV_EXTRACTU_H, // Xpulpimg: pv.extractu.h + riscv_instr::PV_EXTRACTU_B, // Xpulpimg: pv.extractu.b + riscv_instr::PV_INSERT_H, // Xpulpimg: pv.insert.h + riscv_instr::PV_INSERT_B, // Xpulpimg: pv.insert.b + riscv_instr::PV_DOTUP_H, // Xpulpimg: pv.dotup.h + riscv_instr::PV_DOTUP_SC_H, // Xpulpimg: pv.dotup.sc.h + riscv_instr::PV_DOTUP_SCI_H, // Xpulpimg: pv.dotup.sci.h + riscv_instr::PV_DOTUP_B, // Xpulpimg: pv.dotup.b + riscv_instr::PV_DOTUP_SC_B, // Xpulpimg: pv.dotup.sc.b + riscv_instr::PV_DOTUP_SCI_B, // Xpulpimg: pv.dotup.sci.b + riscv_instr::PV_DOTUSP_H, // Xpulpimg: pv.dotusp.h + riscv_instr::PV_DOTUSP_SC_H, // Xpulpimg: pv.dotusp.sc.h + riscv_instr::PV_DOTUSP_SCI_H, // Xpulpimg: pv.dotusp.sci.h + riscv_instr::PV_DOTUSP_B, // Xpulpimg: pv.dotusp.b + riscv_instr::PV_DOTUSP_SC_B, // Xpulpimg: pv.dotusp.sc.b + riscv_instr::PV_DOTUSP_SCI_B, // Xpulpimg: pv.dotusp.sci.b + riscv_instr::PV_DOTSP_H, // Xpulpimg: pv.dotsp.h + riscv_instr::PV_DOTSP_SC_H, // Xpulpimg: pv.dotsp.sc.h + riscv_instr::PV_DOTSP_SCI_H, // Xpulpimg: pv.dotsp.sci.h + riscv_instr::PV_DOTSP_B, // Xpulpimg: pv.dotsp.b + riscv_instr::PV_DOTSP_SC_B, // Xpulpimg: pv.dotsp.sc.b + riscv_instr::PV_DOTSP_SCI_B, // Xpulpimg: pv.dotsp.sci.b + riscv_instr::PV_SDOTUP_H, // Xpulpimg: pv.sdotup.h + riscv_instr::PV_SDOTUP_SC_H, // Xpulpimg: pv.sdotup.sc.h + riscv_instr::PV_SDOTUP_SCI_H, // Xpulpimg: pv.sdotup.sci.h + riscv_instr::PV_SDOTUP_B, // Xpulpimg: pv.sdotup.b + riscv_instr::PV_SDOTUP_SC_B, // Xpulpimg: pv.sdotup.sc.b + riscv_instr::PV_SDOTUP_SCI_B, // Xpulpimg: pv.sdotup.sci.b + riscv_instr::PV_SDOTUSP_H, // Xpulpimg: pv.sdotusp.h + riscv_instr::PV_SDOTUSP_SC_H, // Xpulpimg: pv.sdotusp.sc.h + riscv_instr::PV_SDOTUSP_SCI_H, // Xpulpimg: pv.sdotusp.sci.h + riscv_instr::PV_SDOTUSP_B, // Xpulpimg: pv.sdotusp.b + riscv_instr::PV_SDOTUSP_SC_B, // Xpulpimg: pv.sdotusp.sc.b + riscv_instr::PV_SDOTUSP_SCI_B, // Xpulpimg: pv.sdotusp.sci.b + riscv_instr::PV_SDOTSP_H, // Xpulpimg: pv.sdotsp.h + riscv_instr::PV_SDOTSP_SC_H, // Xpulpimg: pv.sdotsp.sc.h + riscv_instr::PV_SDOTSP_SCI_H, // Xpulpimg: pv.sdotsp.sci.h + riscv_instr::PV_SDOTSP_B, // Xpulpimg: pv.sdotsp.b + riscv_instr::PV_SDOTSP_SC_B, // Xpulpimg: pv.sdotsp.sc.b + riscv_instr::PV_SDOTSP_SCI_B: begin // Xpulpimg: pv.sdotsp.sci.b if (snitch_pkg::XPULPIMG) begin dsp_valid_op = acc_qvalid_i; acc_qready_o = dsp_ready_op; @@ -310,32 +346,35 @@ module dspu #( assign imm6 = {operator_i[24:20], operator_i[25]}; // Internal control signals - logic cmp_signed; // comparator operation is signed + logic cmp_signed; // comparator operation is signed enum logic [1:0] { None, Reg, Zero, ClipBound - } cmp_op_b_sel; // selection of shared comparator operands - logic clip_unsigned; // clip operation has "0" as lower bound - logic clip_register; // if 1 clip operation uses rs2, else imm5 + } cmp_op_b_sel; // selection of shared comparator operands + logic clip_unsigned; // clip operation has "0" as lower bound + logic clip_register; // if 1 clip operation uses rs2, else imm5 enum logic [1:0] { NoMul, MulLow, MulHigh, MulMac - } mul_op; // type of multiplication operation - logic mac_msu; // multiplication operation is MSU - logic mul_op_a_sign; // sign of multiplier operand a - logic mac_op_b_sign; // sign of multiplier operand b + } mul_op; // type of multiplication operation + logic mac_msu; // multiplication operation is MSU + logic mul_op_a_sign; // sign of multiplier operand a + logic mac_op_b_sign; // sign of multiplier operand b enum logic [3:0] { Nop, Abs, Sle, Min, Max, Exths, Exthz, Extbs, Extbz, Clip, Mac, Simd - } res_sel; // result selection + } res_sel; // result selection enum logic [0:3] { - SimdNop, SimdAdd, SimdSub, SimdAvg, SimdMin, SimdMax, SimdSrl, SimdSra, SimdSll, SimdOr, SimdXor, SimdAnd, SimdAbs, SimdExt, SimdIns + SimdNop, SimdAdd, SimdSub, SimdAvg, SimdMin, SimdMax, SimdSrl, SimdSra, SimdSll, SimdOr, SimdXor, SimdAnd, SimdAbs, SimdExt, SimdIns, SimdDotp } simd_op; enum logic { HalfWord, Byte - } simd_size; // SIMD granularity + } simd_size; // SIMD granularity enum logic [0:1] { Vect, Sc, Sci - } simd_mode; // SIMD mode - logic simd_signed; // SIMD operation is signed and uses sign-extended imm6 + } simd_mode; // SIMD mode + logic simd_signed; // SIMD operation is signed and uses sign-extended imm6 + logic simd_dotp_op_a_signed; // signedness of SIMD dotp operand a + logic simd_dotp_op_b_signed; // signedness of SIMD dotp operand b + logic simd_dotp_acc; // accumulate result of SIMD dotp on destination reg // -------------------- // Decoder @@ -355,6 +394,9 @@ module dspu #( simd_size = HalfWord; simd_mode = Vect; simd_signed = 1; + simd_dotp_op_a_signed = 1; + simd_dotp_op_b_signed = 1; + simd_dotp_acc = 0; unique casez (operator_i) // Multiplications from M extension riscv_instr::MUL: begin @@ -727,224 +769,188 @@ module dspu #( end riscv_instr::PV_SRL_H: begin simd_op = SimdSrl; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_SRL_SC_H: begin simd_op = SimdSrl; simd_mode = Sc; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_SRL_SCI_H: begin simd_op = SimdSrl; simd_mode = Sci; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_SRL_B: begin simd_op = SimdSrl; simd_size = Byte; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_SRL_SC_B: begin simd_op = SimdSrl; simd_size = Byte; simd_mode = Sc; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_SRL_SCI_B: begin simd_op = SimdSrl; simd_size = Byte; simd_mode = Sci; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_SRA_H: begin simd_op = SimdSra; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_SRA_SC_H: begin simd_op = SimdSra; simd_mode = Sc; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_SRA_SCI_H: begin simd_op = SimdSra; simd_mode = Sci; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_SRA_B: begin simd_op = SimdSra; simd_size = Byte; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_SRA_SC_B: begin simd_op = SimdSra; simd_size = Byte; simd_mode = Sc; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_SRA_SCI_B: begin simd_op = SimdSra; simd_size = Byte; simd_mode = Sci; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_SLL_H: begin simd_op = SimdSll; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_SLL_SC_H: begin simd_op = SimdSll; simd_mode = Sc; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_SLL_SCI_H: begin simd_op = SimdSll; simd_mode = Sci; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_SLL_B: begin simd_op = SimdSll; simd_size = Byte; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_SLL_SC_B: begin simd_op = SimdSll; simd_size = Byte; simd_mode = Sc; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_SLL_SCI_B: begin simd_op = SimdSll; simd_size = Byte; simd_mode = Sci; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_OR_H: begin simd_op = SimdOr; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_OR_SC_H: begin simd_op = SimdOr; simd_mode = Sc; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_OR_SCI_H: begin simd_op = SimdOr; simd_mode = Sci; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_OR_B: begin simd_op = SimdOr; simd_size = Byte; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_OR_SC_B: begin simd_op = SimdOr; simd_size = Byte; simd_mode = Sc; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_OR_SCI_B: begin simd_op = SimdOr; simd_size = Byte; simd_mode = Sci; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_XOR_H: begin simd_op = SimdXor; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_XOR_SC_H: begin simd_op = SimdXor; simd_mode = Sc; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_XOR_SCI_H: begin simd_op = SimdXor; simd_mode = Sci; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_XOR_B: begin simd_op = SimdXor; simd_size = Byte; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_XOR_SC_B: begin simd_op = SimdXor; simd_size = Byte; simd_mode = Sc; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_XOR_SCI_B: begin simd_op = SimdXor; simd_size = Byte; simd_mode = Sci; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_AND_H: begin simd_op = SimdAnd; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_AND_SC_H: begin simd_op = SimdAnd; simd_mode = Sc; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_AND_SCI_H: begin simd_op = SimdAnd; simd_mode = Sci; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_AND_B: begin simd_op = SimdAnd; simd_size = Byte; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_AND_SC_B: begin simd_op = SimdAnd; simd_size = Byte; simd_mode = Sc; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_AND_SCI_B: begin simd_op = SimdAnd; simd_size = Byte; simd_mode = Sci; - simd_signed = 0; res_sel = Simd; end riscv_instr::PV_ABS_H: begin @@ -985,6 +991,258 @@ module dspu #( simd_size = Byte; res_sel = Simd; end + riscv_instr::PV_DOTUP_H: begin + simd_op = SimdDotp; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUP_SC_H: begin + simd_op = SimdDotp; + simd_mode = Sc; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUP_SCI_H: begin + simd_op = SimdDotp; + simd_mode = Sci; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUP_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUP_SC_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sc; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUP_SCI_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sci; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUSP_H: begin + simd_op = SimdDotp; + simd_dotp_op_a_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUSP_SC_H: begin + simd_op = SimdDotp; + simd_mode = Sc; + simd_dotp_op_a_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUSP_SCI_H: begin + simd_op = SimdDotp; + simd_mode = Sci; + simd_dotp_op_a_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUSP_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_dotp_op_a_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUSP_SC_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sc; + simd_dotp_op_a_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUSP_SCI_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sci; + simd_dotp_op_a_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTSP_H: begin + simd_op = SimdDotp; + res_sel = Simd; + end + riscv_instr::PV_DOTSP_SC_H: begin + simd_op = SimdDotp; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_DOTSP_SCI_H: begin + simd_op = SimdDotp; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_DOTSP_B: begin + simd_op = SimdDotp; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_DOTSP_SC_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_DOTSP_SCI_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_SDOTUP_H: begin + simd_op = SimdDotp; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUP_SC_H: begin + simd_op = SimdDotp; + simd_mode = Sc; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUP_SCI_H: begin + simd_op = SimdDotp; + simd_mode = Sci; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUP_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUP_SC_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sc; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUP_SCI_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sci; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUSP_H: begin + simd_op = SimdDotp; + simd_dotp_op_a_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUSP_SC_H: begin + simd_op = SimdDotp; + simd_mode = Sc; + simd_dotp_op_a_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUSP_SCI_H: begin + simd_op = SimdDotp; + simd_mode = Sci; + simd_dotp_op_a_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUSP_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_dotp_op_a_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUSP_SC_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sc; + simd_dotp_op_a_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUSP_SCI_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sci; + simd_dotp_op_a_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTSP_H: begin + simd_op = SimdDotp; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTSP_SC_H: begin + simd_op = SimdDotp; + simd_mode = Sc; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTSP_SCI_H: begin + simd_op = SimdDotp; + simd_mode = Sci; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTSP_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTSP_SC_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sc; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTSP_SCI_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sci; + simd_dotp_acc = 1; + res_sel = Simd; + end default: ; endcase end @@ -1065,92 +1323,96 @@ module dspu #( // SIMD operations // -------------------- - logic [1:0][15:0] simd_h_op_a, simd_h_op_b, simd_h_res; - logic [3:0][7:0] simd_b_op_a, simd_b_op_b, simd_b_res; - logic [15:0] simd_h_imm; - logic [7:0] simd_b_imm; - logic [Width-1:0] simd_rd; - logic [Width-1:0] simd_result; + logic [3:0][7:0] simd_op_a, simd_op_b; + logic [1:0][7:0] simd_imm; + logic [3:0][7:0] simd_result; // half-word and byte immediate extensions - assign simd_h_imm = simd_signed ? $signed(imm6) : $unsigned(imm6); - assign simd_b_imm = simd_signed ? $signed(imm6) : $unsigned(imm6); + always_comb + if(simd_signed) simd_imm = $signed(imm6); + else simd_imm = $unsigned(imm6); - // half-word granularity operands - assign simd_h_op_a[0] = op_a_i[15:0]; - assign simd_h_op_a[1] = op_a_i[31:16]; - assign simd_h_op_b[0] = (simd_mode == Vect) ? op_b_i[15:0] : ((simd_mode == Sc) ? op_b_i[15:0] : simd_h_imm); - assign simd_h_op_b[1] = (simd_mode == Vect) ? op_b_i[31:16] : ((simd_mode == Sc) ? op_b_i[15:0] : simd_h_imm); - - // byte granularity operands - assign simd_b_op_a[0] = op_a_i[7:0]; - assign simd_b_op_a[1] = op_a_i[15:8]; - assign simd_b_op_a[2] = op_a_i[23:16]; - assign simd_b_op_a[3] = op_a_i[31:24]; - assign simd_b_op_b[0] = (simd_mode == Vect) ? op_b_i[7:0] : ((simd_mode == Sc) ? op_b_i[7:0] : simd_b_imm); - assign simd_b_op_b[1] = (simd_mode == Vect) ? op_b_i[15:8] : ((simd_mode == Sc) ? op_b_i[7:0] : simd_b_imm); - assign simd_b_op_b[2] = (simd_mode == Vect) ? op_b_i[23:16] : ((simd_mode == Sc) ? op_b_i[7:0] : simd_b_imm); - assign simd_b_op_b[3] = (simd_mode == Vect) ? op_b_i[31:24] : ((simd_mode == Sc) ? op_b_i[7:0] : simd_b_imm); + // SIMD operands composition + always_comb begin + simd_op_a = 'b0; + simd_op_b = 'b0; + unique case (simd_size) + // half-word granularity + HalfWord: + for (int i = 0; i < Width/16; i++) begin + simd_op_a[2*i +: 2] = op_a_i[16*i +: 16]; // operands A are the half-words of op_a_i + // operands B are the half-words of op_b_i, replicated lowest half-word of op_b_i or replicated 6-bit immediate + simd_op_b[2*i +: 2] = (simd_mode == Vect) ? op_b_i[16*i +: 16] : ((simd_mode == Sc) ? op_b_i[15:0] : simd_imm); + end + // byte granularity + Byte: + for (int i = 0; i < Width/8; i++) begin + simd_op_a[i] = op_a_i[8*i +: 8]; // operands A are the bytes of op_a_i + // operands B are the bytes of op_b_i, replicated lowest byte of op_b_i or replicated 6-bit immediate + simd_op_b[i] = (simd_mode == Vect) ? op_b_i[8*i +: 8] : ((simd_mode == Sc) ? op_b_i[7:0] : simd_imm[0]); + end + default: ; + endcase + end + // SIMD unit always_comb begin - simd_h_res = 'b0; - simd_b_res = 'b0; + simd_result = 'b0; unique case (simd_size) // half-word granularity HalfWord: begin unique case (simd_op) - SimdAdd: begin - for (int i = 0; i < Width/16; i++) - simd_h_res[i] = simd_h_op_a[i] + simd_h_op_b[i]; - end - SimdSub: begin + SimdAdd: for (int i = 0; i < Width/16; i++) - simd_h_res[i] = simd_h_op_a[i] - simd_h_op_b[i]; - end - SimdAvg: begin + simd_result[2*i +: 2] = $signed(simd_op_a[2*i +: 2]) + $signed(simd_op_b[2*i +: 2]); + SimdSub: for (int i = 0; i < Width/16; i++) - simd_h_res[i] = simd_signed ? (simd_h_op_a[i] + simd_h_op_b[i]) >>> 1 : - (simd_h_op_a[i] + simd_h_op_b[i]) >> 1; - end - SimdMin: begin + simd_result[2*i +: 2] = $signed(simd_op_a[2*i +: 2]) - $signed(simd_op_b[2*i +: 2]); + SimdAvg: + for (int i = 0; i < Width/16; i++) begin + simd_result[2*i +: 2] = $signed(simd_op_a[2*i +: 2]) + $signed(simd_op_b[2*i +: 2]); + simd_result[2*i +: 2] = {simd_result[2*i+1][7] & simd_signed, simd_result[2*i +: 2]} >> 1; + end + SimdMin: for (int i = 0; i < Width/16; i++) - simd_h_res[i] = $signed({simd_h_op_a[i][15] & simd_signed, simd_h_op_a[i]}) <= - $signed({simd_h_op_a[i][15] & simd_signed, simd_h_op_b[i]}) ? - simd_h_op_a[i] : simd_h_op_b[i]; - end - SimdMax: begin + simd_result[2*i +: 2] = $signed({simd_op_a[2*i+1][7] & simd_signed, simd_op_a[2*i +: 2]}) <= + $signed({simd_op_b[2*i+1][7] & simd_signed, simd_op_b[2*i +: 2]}) ? + simd_op_a[2*i +: 2] : simd_op_b[2*i +: 2]; + SimdMax: for (int i = 0; i < Width/16; i++) - simd_h_res[i] = $signed({simd_h_op_a[i][15] & simd_signed, simd_h_op_a[i]}) > - $signed({simd_h_op_b[i][15] & simd_signed, simd_h_op_b[i]}) ? - simd_h_op_a[i] : simd_h_op_b[i]; - end - SimdSrl: begin + simd_result[2*i +: 2] = $signed({simd_op_a[2*i+1][7] & simd_signed, simd_op_a[2*i +: 2]}) > + $signed({simd_op_b[2*i+1][7] & simd_signed, simd_op_b[2*i +: 2]}) ? + simd_op_a[2*i +: 2] : simd_op_b[2*i +: 2]; + SimdSrl: for (int i = 0; i < Width/16; i++) - simd_h_res[i] = simd_h_op_a[i] >> simd_h_op_b[i]; - end - SimdSra: begin + simd_result[2*i +: 2] = $unsigned(simd_op_a[2*i +: 2]) >> simd_op_b[2*i][3:0]; + SimdSra: for (int i = 0; i < Width/16; i++) - simd_h_res[i] = simd_h_op_a[i] >>> simd_h_op_b[i]; - end - SimdSll: begin + simd_result[2*i +: 2] = $signed(simd_op_a[2*i +: 2]) >>> simd_op_b[2*i][3:0]; + SimdSll: for (int i = 0; i < Width/16; i++) - simd_h_res[i] = simd_h_op_a[i] << simd_h_op_b[i]; - end - SimdOr: simd_h_res = simd_h_op_a | simd_h_op_b; - SimdXor: simd_h_res = simd_h_op_a ^ simd_h_op_b; - SimdAnd: simd_h_res = simd_h_op_a & simd_h_op_b; - SimdAbs: begin + simd_result[2*i +: 2] = $unsigned(simd_op_a[2*i +: 2]) << simd_op_b[2*i][3:0]; + SimdOr: simd_result = simd_op_a | simd_op_b; + SimdXor: simd_result = simd_op_a ^ simd_op_b; + SimdAnd: simd_result = simd_op_a & simd_op_b; + SimdAbs: for (int i = 0; i < Width/16; i++) - simd_h_res[i] = $signed(simd_h_op_a[i]) > 0 ? simd_h_op_a[i] : -$signed(simd_h_op_a[i]); - end + simd_result[2*i +: 2] = $signed(simd_op_a[2*i +: 2]) > 0 ? simd_op_a[2*i +: 2] : -$signed(simd_op_a[2*i +: 2]); SimdExt: begin - simd_h_res[0] = simd_h_op_a[imm6[0]]; - // sign extend if needed - simd_h_res[1] = {16{simd_h_op_a[imm6[0]][15] & simd_signed}}; + simd_result[1:0] = simd_op_a[2*imm6[0] +: 2]; + // sign- or zero-extend + simd_result[3:2] = {16{simd_op_a[2*imm6[0]+1][7] & simd_signed}}; end SimdIns: begin - simd_h_res = op_c_i; - simd_h_res[imm6[0]] = simd_h_op_a[0]; + simd_result = op_c_i; + simd_result[2*imm6[0] +: 2] = simd_op_a[1:0]; + end + SimdDotp: begin + simd_result = op_c_i & {(Width){simd_dotp_acc}}; // accumulate on rd or start from zero + for (int i = 0; i < Width/16; i++) begin + simd_result = $signed(simd_result) + $signed({simd_op_a[2*i+1][7] & simd_dotp_op_a_signed, simd_op_a[2*i +: 2]}) * + $signed({simd_op_b[2*i+1][7] & simd_dotp_op_b_signed, simd_op_b[2*i +: 2]}); + end end default: ; endcase @@ -1158,60 +1420,56 @@ module dspu #( // byte granularity Byte: begin unique case (simd_op) - SimdAdd: begin + SimdAdd: for (int i = 0; i < Width/8; i++) - simd_b_res[i] = simd_b_op_a[i] + simd_b_op_b[i]; - end - SimdSub: begin - for (int i = 0; i < Width/8; i++) - simd_b_res[i] = simd_b_op_a[i] - simd_b_op_b[i]; - end - SimdAvg: begin + simd_result[i] = $signed(simd_op_a[i]) + $signed(simd_op_b[i]); + SimdSub: for (int i = 0; i < Width/8; i++) - simd_b_res[i] = simd_signed ? (simd_b_op_a[i] + simd_b_op_b[i]) >>> 1 : - (simd_b_op_a[i] + simd_b_op_b[i]) >> 1; - end - SimdMin: begin + simd_result[i] = $signed(simd_op_a[i]) - $signed(simd_op_b[i]); + SimdAvg: + for (int i = 0; i < Width/8; i++) begin + simd_result[i] = $signed(simd_op_a[i]) + $signed(simd_op_b[i]); + simd_result[i] = {simd_result[i][7] & simd_signed, simd_result[i]} >> 1; + end + SimdMin: for (int i = 0; i < Width/8; i++) - simd_b_res[i] = $signed({simd_b_op_a[i][7] & simd_signed, simd_b_op_a[i]}) <= - $signed({simd_b_op_a[i][7] & simd_signed, simd_b_op_b[i]}) ? - simd_b_op_a[i] : simd_b_op_b[i]; - end - SimdMax: begin + simd_result[i] = $signed({simd_op_a[i][7] & simd_signed, simd_op_a[i]}) <= + $signed({simd_op_b[i][7] & simd_signed, simd_op_b[i]}) ? + simd_op_a[i] : simd_op_b[i]; + SimdMax: for (int i = 0; i < Width/8; i++) - simd_b_res[i] = $signed({simd_b_op_a[i][7] & simd_signed, simd_b_op_a[i]}) > - $signed({simd_b_op_b[i][7] & simd_signed, simd_b_op_b[i]}) ? - simd_b_op_a[i] : simd_b_op_b[i]; - end - SimdSrl: begin + simd_result[i] = $signed({simd_op_a[i][7] & simd_signed, simd_op_a[i]}) > + $signed({simd_op_b[i][7] & simd_signed, simd_op_b[i]}) ? + simd_op_a[i] : simd_op_b[i]; + SimdSrl: for (int i = 0; i < Width/8; i++) - simd_b_res[i] = simd_b_op_a[i] >> simd_b_op_b[i]; - end - SimdSra: begin + simd_result[i] = $unsigned(simd_op_a[i]) >> simd_op_b[i][2:0]; + SimdSra: for (int i = 0; i < Width/8; i++) - simd_b_res[i] = simd_b_op_a[i] >>> simd_b_op_b[i]; - end - SimdSll: begin + simd_result[i] = $signed(simd_op_a[i]) >>> simd_op_b[i][2:0]; + SimdSll: for (int i = 0; i < Width/8; i++) - simd_b_res[i] = simd_b_op_a[i] << simd_b_op_b[i]; - end - SimdOr: simd_b_res = simd_b_op_a | simd_b_op_b; - SimdXor: simd_b_res = simd_b_op_a ^ simd_b_op_b; - SimdAnd: simd_b_res = simd_b_op_a & simd_b_op_b; - SimdAbs: begin + simd_result[i] = $unsigned(simd_op_a[i]) << simd_op_b[i][2:0]; + SimdOr: simd_result = simd_op_a | simd_op_b; + SimdXor: simd_result = simd_op_a ^ simd_op_b; + SimdAnd: simd_result = simd_op_a & simd_op_b; + SimdAbs: for (int i = 0; i < Width/8; i++) - simd_b_res[i] = $signed(simd_b_op_a[i]) > 0 ? simd_b_op_a[i] : -$signed(simd_b_op_a[i]); - end + simd_result[i] = $signed(simd_op_a[i]) > 0 ? simd_op_a[i] : -$signed(simd_op_a[i]); SimdExt: begin - simd_b_res[0] = simd_b_op_a[imm6[0]]; - // sign extend if needed - simd_b_res[1] = {8{simd_b_op_a[imm6[0]][7] & simd_signed}}; - simd_b_res[2] = {8{simd_b_op_a[imm6[0]][7] & simd_signed}}; - simd_b_res[3] = {8{simd_b_op_a[imm6[0]][7] & simd_signed}}; + simd_result[0] = simd_op_a[imm6[1:0]]; + // sign- or zero-extend + simd_result[3:1] = {24{simd_op_a[imm6[1:0]][7] & simd_signed}}; end SimdIns: begin - simd_b_res = op_c_i; - simd_b_res[imm6[0]] = simd_b_op_a[0]; + simd_result = op_c_i; + simd_result[imm6[1:0]] = simd_op_a[0]; + end + SimdDotp: begin + simd_result = op_c_i & {(Width){simd_dotp_acc}}; // accumulate on rd or start from zero + for (int i = 0; i < Width/8; i++) + simd_result = $signed(simd_result) + $signed({simd_op_a[i][7] & simd_dotp_op_a_signed, simd_op_a[i]}) * + $signed({simd_op_b[i][7] & simd_dotp_op_b_signed, simd_op_b[i]}); end default: ; endcase From c910a80c2a3742d7876173cd2851a22714176362 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Sat, 23 Jan 2021 11:32:11 +0100 Subject: [PATCH 29/65] [snitch] :bug: Do not write back L/S post-incremented address if stall This commit fixes a bug in the implementation of the Xpulpimg post-increment load/store instructions: such instructions write back on rs1 the updated address; if the core stalled (for operands not ready or LSU not ready for a new request) in the cycle during which a post-increment instruction was issued, the write-back mechanism updated anyway the new rs1 in the register file, modifying the address on the LSU interface on next cycle, while the request of the memory access was still pending. The fix makes the rs1 write-back stall together with the core when a post-increment instruction is fetched; rs1 is actually written back with the post-incremented address only in the cycle when the LSU actually accepts the new request. --- hardware/deps/snitch/src/snitch.sv | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/hardware/deps/snitch/src/snitch.sv b/hardware/deps/snitch/src/snitch.sv index 27eef254a..3b6a5cf27 100644 --- a/hardware/deps/snitch/src/snitch.sv +++ b/hardware/deps/snitch/src/snitch.sv @@ -155,7 +155,6 @@ module snitch #( logic retire_load; // retire a load instruction logic retire_i; // retire the rest of the base instruction set - logic retire_i_rd, retire_i_rs1; // when retire_i = 1, write-back can be on rd or on rs1 logic retire_acc; // retire an instruction we offloaded logic acc_stall; @@ -184,7 +183,7 @@ module snitch #( logic write_rd; // write rd desitnation this cycle logic uses_rd; logic write_rs1; // write rs1 destination this cycle - logic uses_rs1; // useless for now, rs1 always written this cycle + logic uses_rs1; enum logic [1:0] {Consec, Alu, Exception} next_pc; enum logic [1:0] {RdAlu, RdConsecPC, RdBypass} rd_select; @@ -1536,11 +1535,12 @@ module snitch #( assign lsu_qvalid = valid_instr & (is_load | is_store) & ~(ld_addr_misaligned | st_addr_misaligned); + // NOTE(smazzola): write-backs "on rd from non-load or non-acc instructions" and "on rs1 from + // post-increment instructions" in the same cycle should be mutually exclusive + // retire post-incremented address on rs1 if valid postincr instruction and LSU not stalling + assign retire_p = write_rs1 & ~stall & (rs1 != 0); // we can retire if we are not stalling and if the instruction is writing a register - assign retire_i_rd = write_rd & valid_instr & (rd != 0); - assign retire_i_rs1 = write_rs1 & valid_instr & (rs1 != 0); - // NOTE(smazzola): write-backs on rd and rs1 in the same cycle should be mutually exclusive - assign retire_i = retire_i_rd | retire_i_rs1; + assign retire_i = write_rd & valid_instr & (rd != 0); // ----------------------- // Unaligned Address Check @@ -1588,7 +1588,7 @@ module snitch #( gpr_we[0] = 1'b0; // NOTE(smazzola): this works because write-backs on rd and rs1 in the same cycle are mutually // exclusive; if this should change, the following statement has to be written in another form - gpr_waddr[0] = write_rs1 ? rs1 : rd; // choose whether to writeback at RF[rs1] for post-increment load/stores + gpr_waddr[0] = retire_p ? rs1 : rd; // choose whether to writeback at RF[rs1] for post-increment load/stores gpr_wdata[0] = alu_writeback; // external interfaces lsu_pready = 1'b0; @@ -1596,7 +1596,7 @@ module snitch #( retire_acc = 1'b0; retire_load = 1'b0; - if (retire_i) begin + if (retire_i | retire_p) begin gpr_we[0] = 1'b1; // if we are not retiring another instruction retire the load now end else if (lsu_pvalid) begin @@ -1618,7 +1618,7 @@ module snitch #( gpr_we[0] = 1'b0; // NOTE(smazzola): this works because write-backs on rd and rs1 in the same cycle are mutually // exclusive; if this should change, the following statement has to be written in another form - gpr_waddr[0] = write_rs1 ? rs1 : rd; // choose whether to writeback at RF[rs1] for post-increment load/stores + gpr_waddr[0] = retire_p ? rs1 : rd; // choose whether to writeback at RF[rs1] for post-increment load/stores gpr_wdata[0] = alu_writeback; gpr_we[1] = 1'b0; gpr_waddr[1] = lsu_rd; @@ -1629,7 +1629,7 @@ module snitch #( retire_acc = 1'b0; retire_load = 1'b0; - if (retire_i) begin + if (retire_i | retire_p) begin gpr_we[0] = 1'b1; if (lsu_pvalid) begin retire_load = 1'b1; From a013a13fa2091ea61dabd9acb554960cac457b84 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Sat, 23 Jan 2021 15:54:46 +0100 Subject: [PATCH 30/65] [riscv-gnu-toolchain] Extend Xpulpimg with Xpulpv2 arithmetical SIMD instructions Added instructions: pv.{add, sub, avg, avgu, min, minu, max, maxu, srl, sra, sll, or, xor, and, dotsp, dotup, dotusp, sdotsp, sdotup, sdotusp}.[sc, sci].{h, b}, pv.{abs, extract, extractu, insert}.{h, b} --- toolchain/riscv-gnu-toolchain | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolchain/riscv-gnu-toolchain b/toolchain/riscv-gnu-toolchain index 24f16160f..37d6e53d9 160000 --- a/toolchain/riscv-gnu-toolchain +++ b/toolchain/riscv-gnu-toolchain @@ -1 +1 @@ -Subproject commit 24f16160f9da4c04ddda37003b026ad0e98a8623 +Subproject commit 37d6e53d91cc8ccd038474e5904d59a0aa496285 From bbdfef813f3dee399afb3cef07c24b1649f2613c Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 26 Jan 2021 15:44:01 +0100 Subject: [PATCH 31/65] [apps] Align loops to 16-byte L0 prefetcher boundary --- apps/common/runtime.mk | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/common/runtime.mk b/apps/common/runtime.mk index a7639e288..a6d6767d2 100644 --- a/apps/common/runtime.mk +++ b/apps/common/runtime.mk @@ -75,7 +75,8 @@ RISCV_LLVM_TARGET ?= --target=$(RISCV_TARGET) --sysroot=$(GCC_INSTALL_DIR)/$(RI RISCV_WARNINGS += -Wunused-variable -Wconversion -Wall -Wextra # -Werror RISCV_FLAGS_COMMON_TESTS ?= -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) -I$(CURDIR)/common -static RISCV_FLAGS_COMMON ?= $(RISCV_FLAGS_COMMON_TESTS) -std=gnu99 -O3 -ffast-math -fno-common -fno-builtin-printf $(DEFINES) $(RISCV_WARNINGS) -RISCV_FLAGS_GCC ?= -mcmodel=medany -Wa,-march=$(RISCV_ARCH_AS) +RISCV_FLAGS_GCC ?= -mcmodel=medany -Wa,-march=$(RISCV_ARCH_AS) -falign-loops=16 -falign-jumps=16 +#RISCV_FLAGS_GCC ?= -mcmodel=medany -Wa,-march=$(RISCV_ARCH_AS) -falign-loops=16 -falign-jumps=16 -funroll-loops RISCV_FLAGS_LLVM ?= -mcmodel=small -mllvm -enable-misched ifeq ($(COMPILER),gcc) From 90d1f2aad9cccf86ab7db7c8f22934f69403ab45 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Wed, 10 Feb 2021 18:56:36 +0100 Subject: [PATCH 32/65] [riscv-opcodes] Add Xpulpv2 pv.shuffle2.{h,b} instruction --- toolchain/riscv-opcodes/encoding_out.h | 6 ++++++ toolchain/riscv-opcodes/inst.sverilog | 2 ++ toolchain/riscv-opcodes/opcodes-rvv | 2 +- toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM | 3 +++ 4 files changed, 12 insertions(+), 1 deletion(-) diff --git a/toolchain/riscv-opcodes/encoding_out.h b/toolchain/riscv-opcodes/encoding_out.h index 5de9e96d9..f78afbd15 100644 --- a/toolchain/riscv-opcodes/encoding_out.h +++ b/toolchain/riscv-opcodes/encoding_out.h @@ -2162,6 +2162,10 @@ #define MASK_PV_SDOTSP_SC_B 0xfe00707f #define MATCH_PV_SDOTSP_SCI_B 0xb8007057 #define MASK_PV_SDOTSP_SCI_B 0xfc00707f +#define MATCH_PV_SHUFFLE2_H 0xc8000057 +#define MASK_PV_SHUFFLE2_H 0xfe00707f +#define MATCH_PV_SHUFFLE2_B 0xc8001057 +#define MASK_PV_SHUFFLE2_B 0xfe00707f #define MATCH_FLAH 0x1007 #define MASK_FLAH 0x707f #define MATCH_FSAH 0x1027 @@ -4048,6 +4052,8 @@ DECLARE_INSN(pv_sdotsp_sci_h, MATCH_PV_SDOTSP_SCI_H, MASK_PV_SDOTSP_SCI_H) DECLARE_INSN(pv_sdotsp_b, MATCH_PV_SDOTSP_B, MASK_PV_SDOTSP_B) DECLARE_INSN(pv_sdotsp_sc_b, MATCH_PV_SDOTSP_SC_B, MASK_PV_SDOTSP_SC_B) DECLARE_INSN(pv_sdotsp_sci_b, MATCH_PV_SDOTSP_SCI_B, MASK_PV_SDOTSP_SCI_B) +DECLARE_INSN(pv_shuffle2_h, MATCH_PV_SHUFFLE2_H, MASK_PV_SHUFFLE2_H) +DECLARE_INSN(pv_shuffle2_b, MATCH_PV_SHUFFLE2_B, MASK_PV_SHUFFLE2_B) DECLARE_INSN(flah, MATCH_FLAH, MASK_FLAH) DECLARE_INSN(fsah, MATCH_FSAH, MASK_FSAH) DECLARE_INSN(fmadd_ah, MATCH_FMADD_AH, MASK_FMADD_AH) diff --git a/toolchain/riscv-opcodes/inst.sverilog b/toolchain/riscv-opcodes/inst.sverilog index 9f2bb6612..f50df39ca 100644 --- a/toolchain/riscv-opcodes/inst.sverilog +++ b/toolchain/riscv-opcodes/inst.sverilog @@ -942,6 +942,8 @@ package riscv_instr; localparam [31:0] PV_SDOTSP_B = 32'b1011100??????????001?????1010111; localparam [31:0] PV_SDOTSP_SC_B = 32'b1011100??????????101?????1010111; localparam [31:0] PV_SDOTSP_SCI_B = 32'b101110???????????111?????1010111; + localparam [31:0] PV_SHUFFLE2_H = 32'b1100100??????????000?????1010111; + localparam [31:0] PV_SHUFFLE2_B = 32'b1100100??????????001?????1010111; localparam [31:0] FLAH = 32'b?????????????????001?????0000111; localparam [31:0] FSAH = 32'b?????????????????001?????0100111; localparam [31:0] FMADD_AH = 32'b?????10??????????101?????1000011; diff --git a/toolchain/riscv-opcodes/opcodes-rvv b/toolchain/riscv-opcodes/opcodes-rvv index d961215bd..f2e6ba6bc 100644 --- a/toolchain/riscv-opcodes/opcodes-rvv +++ b/toolchain/riscv-opcodes/opcodes-rvv @@ -222,7 +222,7 @@ vfclass.v 31..26=0x13 vm vs2 19..15=0x10 14..12=0x1 vd 6..0=0x57 vfwadd.vv 31..26=0x30 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfwredsum.vs 31..26=0x31 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfwsub.vv 31..26=0x32 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfwsub.vv 31..26=0x32 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfwredosum.vs 31..26=0x33 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfwadd.wv 31..26=0x34 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfwsub.wv 31..26=0x36 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 diff --git a/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM b/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM index 17a8b6bf7..1e4bc4956 100644 --- a/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM +++ b/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM @@ -204,3 +204,6 @@ pv.sdotsp.sci.h rd rs1 imm6 31..27=23 26=0 14..12=6 6..2=0x15 1..0=3 pv.sdotsp.b rd rs1 rs2 31..27=23 26=0 25=0 14..12=1 6..2=0x15 1..0=3 pv.sdotsp.sc.b rd rs1 rs2 31..27=23 26=0 25=0 14..12=5 6..2=0x15 1..0=3 pv.sdotsp.sci.b rd rs1 imm6 31..27=23 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.shuffle2.h rd rs1 rs2 31..27=25 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.shuffle2.b rd rs1 rs2 31..27=25 26=0 25=0 14..12=1 6..2=0x15 1..0=3 From f0688cf14a69b8020859ea2af22e82be545d4716 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Thu, 11 Feb 2021 10:41:13 +0100 Subject: [PATCH 33/65] [riscv-isa-sim] Add Xpulpv2 pv.shuffle2.{h,b} instruction --- toolchain/riscv-isa-sim/disasm/disasm.cc | 3 +++ toolchain/riscv-isa-sim/riscv/decode.h | 2 ++ .../riscv-isa-sim/riscv/insns/pv_shuffle2_b.h | 14 ++++++++++++++ .../riscv-isa-sim/riscv/insns/pv_shuffle2_h.h | 14 ++++++++++++++ toolchain/riscv-isa-sim/riscv/riscv.mk.in | 2 ++ 5 files changed, 35 insertions(+) create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_shuffle2_b.h create mode 100644 toolchain/riscv-isa-sim/riscv/insns/pv_shuffle2_h.h diff --git a/toolchain/riscv-isa-sim/disasm/disasm.cc b/toolchain/riscv-isa-sim/disasm/disasm.cc index 0e4e41806..3a73ddfe3 100644 --- a/toolchain/riscv-isa-sim/disasm/disasm.cc +++ b/toolchain/riscv-isa-sim/disasm/disasm.cc @@ -1508,6 +1508,9 @@ disassembler_t::disassembler_t(int xlen) DEFINE_RTYPE(pv_sdotsp_sc_b); DEFINE_PI1ZTYPE(pv_sdotsp_sci_b); + DEFINE_RTYPE(pv_shuffle2_h); + DEFINE_RTYPE(pv_shuffle2_b); + // provide a default disassembly for all instructions as a fallback #define DECLARE_INSN(code, match, mask) \ add_insn(new disasm_insn_t(#code " (args unknown)", match, mask, {})); diff --git a/toolchain/riscv-isa-sim/riscv/decode.h b/toolchain/riscv-isa-sim/riscv/decode.h index 9f0739995..d6d270af8 100644 --- a/toolchain/riscv-isa-sim/riscv/decode.h +++ b/toolchain/riscv-isa-sim/riscv/decode.h @@ -295,6 +295,8 @@ class regfile_t #define RS1_B(i) ((RS1 >> ((xlen >> 2) * (i & 0x3))) & 0xFF) /* select rs1 byte: i should only be from 0 to 3 */ #define RS2_H(i) ((RS2 >> ((xlen >> 1) * (i & 0x1))) & 0xFFFF) /* select rs2 half: i should only be 0 or 1 */ #define RS2_B(i) ((RS2 >> ((xlen >> 2) * (i & 0x3))) & 0xFF) /* select rs2 byte: i should only be from 0 to 3 */ +#define RD_H(i) ((RD >> ((xlen >> 1) * (i & 0x1))) & 0xFFFF) /* select rd half: i should only be 0 or 1 */ +#define RD_B(i) ((RD >> ((xlen >> 2) * (i & 0x3))) & 0xFF) /* select rd byte: i should only be from 0 to 3 */ #define sext32(x) ((sreg_t)(int32_t)(x)) diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_shuffle2_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_shuffle2_b.h new file mode 100644 index 000000000..8dd4e9994 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_shuffle2_b.h @@ -0,0 +1,14 @@ +uint8_t src_sel; // select rd or rs1 as source (bit [2] of second operand) +uint8_t byte_sel; // select which byte from source (bits [1:0] of second operand) +uint8_t source; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + byte_sel = RS2_B(i) & 0x03; // bits [1:0] of RS2_B(i) + src_sel = (RS2_B(i) >> 2) & 0x01; // bit [2] of RS2_B(i) + source = src_sel ? RS1_B(byte_sel) : RD_B(byte_sel); + simd_rd <<= 8; + simd_rd += (uint32_t)source & 0x000000FF; +} + +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_shuffle2_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_shuffle2_h.h new file mode 100644 index 000000000..362a4bdc7 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_shuffle2_h.h @@ -0,0 +1,14 @@ +uint8_t src_sel; // select rd or rs1 as source (bit [1] of second operand) +uint8_t half_sel; // select which half from source (bit [0] of second operand) +uint16_t source; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + half_sel = RS2_H(i) & 0x01; // bit [0] of RS2_H(i) + src_sel = (RS2_H(i) >> 1) & 0x01; // bit [1] of RS2_H(i) + source = src_sel ? RS1_H(half_sel) : RD_H(half_sel); + simd_rd <<= 16; + simd_rd += (uint32_t)source & 0x0000FFFF; +} + +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/riscv.mk.in b/toolchain/riscv-isa-sim/riscv/riscv.mk.in index ec3468723..20c11ab2a 100644 --- a/toolchain/riscv-isa-sim/riscv/riscv.mk.in +++ b/toolchain/riscv-isa-sim/riscv/riscv.mk.in @@ -930,6 +930,8 @@ riscv_insn_ext_xpulpimg = \ pv_sdotsp_b \ pv_sdotsp_sc_b \ pv_sdotsp_sci_b \ + pv_shuffle2_h \ + pv_shuffle2_b \ riscv_insn_ext_h = \ hfence_gvma \ From 5066aa4e87a2331ca626265152236f3368ef6aa3 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Thu, 11 Feb 2021 11:15:30 +0100 Subject: [PATCH 34/65] [riscv-tests] Add unit tests for Xpulpv2 pv.shuffle2.{h,b} instruction --- apps/riscv-tests/isa/rv32uxpulpimg/Makefrag | 1 + .../isa/rv32uxpulpimg/pv_shuffle2.S | 70 +++++++++++++++++++ apps/riscv-tests/isa/snitch_isa.mk | 1 + 3 files changed, 72 insertions(+) create mode 100644 apps/riscv-tests/isa/rv32uxpulpimg/pv_shuffle2.S diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag b/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag index 0e97997c2..8bf2c6741 100644 --- a/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag +++ b/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag @@ -39,6 +39,7 @@ rv32uxpulpimg_sc_tests = \ pv_sdotup \ pv_sdotusp \ pv_sdotsp \ + pv_shuffle2 \ rv32uxpulpimg_p_tests = $(addprefix rv32uxpulpimg-p-, $(rv32uxpulpimg_sc_tests)) rv32uxpulpimg_v_tests = $(addprefix rv32uxpulpimg-v-, $(rv32uxpulpimg_sc_tests)) diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_shuffle2.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_shuffle2.S new file mode 100644 index 000000000..fd3f2bf09 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_shuffle2.S @@ -0,0 +1,70 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_shuffle2.S +#----------------------------------------------------------------------------- +# +# Test pv.shuffle2 instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.shuffle2.h + TEST_RRR_OP( 2, pv.shuffle2.h, 0xABD0A85B, 0xABD03F6E, 0x1D4B26D0, 0xF0C3A85B ); + TEST_RRR_OP( 3, pv.shuffle2.h, 0x93A60706, 0x511B0706, 0xEB397322, 0x93A613C6 ); + TEST_RRR_OP( 4, pv.shuffle2.h, 0x2BCE15F2, 0x9D2D15F2, 0x5C71278E, 0x2BCEDA18 ); + TEST_RRR_OP( 5, pv.shuffle2.h, 0x2C48AA34, 0x2C48AA34, 0x4887D28E, 0x55247E80 ); + TEST_RRR_OP( 6, pv.shuffle2.h, 0xE999ADE8, 0xADE8E999, 0xD26AD68F, 0x23A14961 ); + TEST_RRR_OP( 7, pv.shuffle2.h, 0x0059517C, 0x6BF30059, 0xEFB6AF79, 0x517C1495 ); + TEST_RRR_OP( 8, pv.shuffle2.h, 0xB7FEA035, 0xB7FED864, 0x5BBB1058, 0x4583A035 ); + TEST_RRR_OP( 9, pv.shuffle2.h, 0xEA55FDC2, 0xFDC2EA55, 0x7292CF23, 0x4F82A53E ); + TEST_RRR_OP( 10, pv.shuffle2.h, 0xBE7232CB, 0x32CBBE72, 0x6DB6060F, 0x22C33B63 ); + TEST_RRR_OP( 11, pv.shuffle2.h, 0x4389A2A3, 0xCB19A2A3, 0x00BCDD22, 0xFB744389 ); + # pv.shuffle2.b + TEST_RRR_OP( 12, pv.shuffle2.b, 0xDAD9ECA3, 0x35A309D9, 0x8AE410B6, 0x22DA0BEC ); + TEST_RRR_OP( 13, pv.shuffle2.b, 0x0EF485F4, 0xCA850EB8, 0x256B969B, 0xF438D1D7 ); + TEST_RRR_OP( 14, pv.shuffle2.b, 0x1414E4C0, 0xE433C0A1, 0xC8381F65, 0xAC7DBC14 ); + TEST_RRR_OP( 15, pv.shuffle2.b, 0x81676762, 0x36DE6217, 0xC98AEA7D, 0x9D6781F4 ); + TEST_RRR_OP( 16, pv.shuffle2.b, 0xD80DD8B4, 0xD8CE132C, 0x67D8BF89, 0x166FB40D ); + TEST_RRR_OP( 17, pv.shuffle2.b, 0x7B9E0404, 0x657BF4D6, 0x06DB0232, 0x9E049D7E ); + TEST_RRR_OP( 18, pv.shuffle2.b, 0xD15526EE, 0x617EEED1, 0xE4D33275, 0x55264DEE ); + TEST_RRR_OP( 19, pv.shuffle2.b, 0x73AB4CAB, 0x43AB21CB, 0x4B2EC0BE, 0x7306984C ); + TEST_RRR_OP( 20, pv.shuffle2.b, 0x5235C41D, 0x052B5263, 0x85BB52D0, 0x35C4A31D ); + TEST_RRR_OP( 21, pv.shuffle2.b, 0xF1E0F194, 0xFFABF194, 0x35CBE594, 0xE0A7A1D1 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for reg-reg-reg instructions TEST_RRR_SRC1_EQ_DEST, + # TEST_RRR_SRC2_EQ_DEST, TEST_RRR_SRC12_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for reg-reg-reg instructions TEST_RRR_DEST_BYPASS, + # TEST_RRR_SRC12_BYPASS, TEST_RRR_SRC21_BYPASS, TEST_RRR_SRC3_BYPASS, + # TEST_RRR_ZEROSRC1, TEST_RRR_ZEROSRC2, TEST_RRR_ZEROSRC3, + # TEST_RRR_ZEROSRC12, TEST_RRR_ZEROSRC123, TEST_RRR_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/snitch_isa.mk b/apps/riscv-tests/isa/snitch_isa.mk index 0cf3c91ed..c914a0a8b 100644 --- a/apps/riscv-tests/isa/snitch_isa.mk +++ b/apps/riscv-tests/isa/snitch_isa.mk @@ -69,6 +69,7 @@ ifeq ($(xpulpimg),1) pv_sdotup \ pv_sdotusp \ pv_sdotsp \ + pv_shuffle2 \ endif From f73b915f4a0635a153cc05ed04c139bf0852bd27 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Thu, 11 Feb 2021 11:27:58 +0100 Subject: [PATCH 35/65] [riscv-gnu-toolchain] Add Xpulpv2 pv.shuffle2.{h,b} to Xpulpimg --- toolchain/riscv-gnu-toolchain | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolchain/riscv-gnu-toolchain b/toolchain/riscv-gnu-toolchain index 37d6e53d9..0c46580ac 160000 --- a/toolchain/riscv-gnu-toolchain +++ b/toolchain/riscv-gnu-toolchain @@ -1 +1 @@ -Subproject commit 37d6e53d91cc8ccd038474e5904d59a0aa496285 +Subproject commit 0c46580ac5e0cb6eca97e469d61751dda3bdcabb From 4ae2c24cf9ba55edf7600c565897e7e86a9d1ecb Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Thu, 11 Feb 2021 14:04:39 +0100 Subject: [PATCH 36/65] [snitch] Implement Xpulpv2 pv.shuffle2.{h,b} --- hardware/deps/snitch/src/snitch.sv | 9 ++++++-- hardware/deps/snitch/src/snitch_ipu.sv | 31 +++++++++++++++++++++----- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/hardware/deps/snitch/src/snitch.sv b/hardware/deps/snitch/src/snitch.sv index 3b6a5cf27..07a56c6a6 100644 --- a/hardware/deps/snitch/src/snitch.sv +++ b/hardware/deps/snitch/src/snitch.sv @@ -1278,7 +1278,9 @@ module snitch #( riscv_instr::PV_SDOTSP_H, // Xpulpimg: pv.sdotsp.h riscv_instr::PV_SDOTSP_SC_H, // Xpulpimg: pv.sdotsp.sc.h riscv_instr::PV_SDOTSP_B, // Xpulpimg: pv.sdotsp.b - riscv_instr::PV_SDOTSP_SC_B: begin // Xpulpimg: pv.sdotsp.sc.b + riscv_instr::PV_SDOTSP_SC_B, // Xpulpimg: pv.sdotsp.sc.b + riscv_instr::PV_SHUFFLE2_H, // Xpulpimg: pv.shuffle2.h + riscv_instr::PV_SHUFFLE2_B: begin // Xpulpimg: pv.shuffle2.b if (snitch_pkg::XPULPIMG) begin write_rd = 1'b0; uses_rd = 1'b1; @@ -1536,7 +1538,10 @@ module snitch #( assign lsu_qvalid = valid_instr & (is_load | is_store) & ~(ld_addr_misaligned | st_addr_misaligned); // NOTE(smazzola): write-backs "on rd from non-load or non-acc instructions" and "on rs1 from - // post-increment instructions" in the same cycle should be mutually exclusive + // post-increment instructions" in the same cycle should be mutually exclusive (currently valid + // assumption since write-back to rs1 happens on the cycle in which the post-increment load/store + // is issued, if that cycle is not a stall, and it is not postponed like offloaded instructions, + // so no other instructions writing back on rd can be issued in the same cycle) // retire post-incremented address on rs1 if valid postincr instruction and LSU not stalling assign retire_p = write_rs1 & ~stall & (rs1 != 0); // we can retire if we are not stalling and if the instruction is writing a register diff --git a/hardware/deps/snitch/src/snitch_ipu.sv b/hardware/deps/snitch/src/snitch_ipu.sv index ea52da070..8ea406219 100644 --- a/hardware/deps/snitch/src/snitch_ipu.sv +++ b/hardware/deps/snitch/src/snitch_ipu.sv @@ -210,7 +210,9 @@ module snitch_ipu #( riscv_instr::PV_SDOTSP_SCI_H, // Xpulpimg: pv.sdotsp.sci.h riscv_instr::PV_SDOTSP_B, // Xpulpimg: pv.sdotsp.b riscv_instr::PV_SDOTSP_SC_B, // Xpulpimg: pv.sdotsp.sc.b - riscv_instr::PV_SDOTSP_SCI_B: begin // Xpulpimg: pv.sdotsp.sci.b + riscv_instr::PV_SDOTSP_SCI_B, // Xpulpimg: pv.sdotsp.sci.b + riscv_instr::PV_SHUFFLE2_H, // Xpulpimg: pv.shuffle2.h + riscv_instr::PV_SHUFFLE2_B: begin // Xpulpimg: pv.shuffle2.b if (snitch_pkg::XPULPIMG) begin dsp_valid_op = acc_qvalid_i; acc_qready_o = dsp_ready_op; @@ -362,9 +364,10 @@ module dspu #( Nop, Abs, Sle, Min, Max, Exths, Exthz, Extbs, Extbz, Clip, Mac, Simd } res_sel; // result selection - enum logic [0:3] { - SimdNop, SimdAdd, SimdSub, SimdAvg, SimdMin, SimdMax, SimdSrl, SimdSra, SimdSll, SimdOr, SimdXor, SimdAnd, SimdAbs, SimdExt, SimdIns, SimdDotp - } simd_op; + enum logic [0:4] { + SimdNop, SimdAdd, SimdSub, SimdAvg, SimdMin, SimdMax, SimdSrl, SimdSra, SimdSll, SimdOr, + SimdXor, SimdAnd, SimdAbs, SimdExt, SimdIns, SimdDotp, SimdShuffle + } simd_op; // SIMD operation enum logic { HalfWord, Byte } simd_size; // SIMD granularity @@ -1243,6 +1246,15 @@ module dspu #( simd_dotp_acc = 1; res_sel = Simd; end + riscv_instr::PV_SHUFFLE2_H: begin + simd_op = SimdShuffle; + res_sel = Simd; + end + riscv_instr::PV_SHUFFLE2_B: begin + simd_op = SimdShuffle; + simd_size = Byte; + res_sel = Simd; + end default: ; endcase end @@ -1323,7 +1335,7 @@ module dspu #( // SIMD operations // -------------------- - logic [3:0][7:0] simd_op_a, simd_op_b; + logic [3:0][7:0] simd_op_a, simd_op_b, simd_op_c; logic [1:0][7:0] simd_imm; logic [3:0][7:0] simd_result; @@ -1336,6 +1348,7 @@ module dspu #( always_comb begin simd_op_a = 'b0; simd_op_b = 'b0; + simd_op_c = 'b0; unique case (simd_size) // half-word granularity HalfWord: @@ -1343,6 +1356,7 @@ module dspu #( simd_op_a[2*i +: 2] = op_a_i[16*i +: 16]; // operands A are the half-words of op_a_i // operands B are the half-words of op_b_i, replicated lowest half-word of op_b_i or replicated 6-bit immediate simd_op_b[2*i +: 2] = (simd_mode == Vect) ? op_b_i[16*i +: 16] : ((simd_mode == Sc) ? op_b_i[15:0] : simd_imm); + simd_op_c[2*i +: 2] = op_c_i[16*i +: 16]; // operands C are the half-words of op_c_i end // byte granularity Byte: @@ -1350,6 +1364,7 @@ module dspu #( simd_op_a[i] = op_a_i[8*i +: 8]; // operands A are the bytes of op_a_i // operands B are the bytes of op_b_i, replicated lowest byte of op_b_i or replicated 6-bit immediate simd_op_b[i] = (simd_mode == Vect) ? op_b_i[8*i +: 8] : ((simd_mode == Sc) ? op_b_i[7:0] : simd_imm[0]); + simd_op_c[i] = op_c_i[8*i +: 8]; // operands C are the bytes of op_c_i end default: ; endcase @@ -1414,6 +1429,9 @@ module dspu #( $signed({simd_op_b[2*i+1][7] & simd_dotp_op_b_signed, simd_op_b[2*i +: 2]}); end end + SimdShuffle: + for (int i = 0; i < Width/16; i++) + simd_result[2*i +: 2] = simd_op_b[2*i][1] ? simd_op_a[2*simd_op_b[2*i][0] +: 2] : simd_op_c[2*simd_op_b[2*i][0] +: 2]; default: ; endcase end @@ -1471,6 +1489,9 @@ module dspu #( simd_result = $signed(simd_result) + $signed({simd_op_a[i][7] & simd_dotp_op_a_signed, simd_op_a[i]}) * $signed({simd_op_b[i][7] & simd_dotp_op_b_signed, simd_op_b[i]}); end + SimdShuffle: + for (int i = 0; i < Width/8; i++) + simd_result[i] = simd_op_b[i][2] ? simd_op_a[simd_op_b[i][1:0]] : simd_op_c[simd_op_b[i][1:0]]; default: ; endcase end From 7d9245b015fcf54ea80cb12126fe565817e0a4fd Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 23 Feb 2021 12:52:23 +0100 Subject: [PATCH 37/65] [apps] Add custom matmul benchmark, optimized for Xpulp --- apps/common/xpulp/builtins_v2.h | 307 +++++++++++++++++++++++++++ apps/common/xpulp/mat_mul.h | 363 ++++++++++++++++++++++++++++++++ apps/matmul_b/main.c | 151 +++++++++++++ apps/matmul_w/main.c | 148 +++++++++++++ 4 files changed, 969 insertions(+) create mode 100644 apps/common/xpulp/builtins_v2.h create mode 100644 apps/common/xpulp/mat_mul.h create mode 100644 apps/matmul_b/main.c create mode 100644 apps/matmul_w/main.c diff --git a/apps/common/xpulp/builtins_v2.h b/apps/common/xpulp/builtins_v2.h new file mode 100644 index 000000000..ffbcfe1c9 --- /dev/null +++ b/apps/common/xpulp/builtins_v2.h @@ -0,0 +1,307 @@ +/* + * Copyright (C) 2019 ETH Zurich, University of Bologna + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __HAL_RISCV_BUILTINS_V2_H__ +#define __HAL_RISCV_BUILTINS_V2_H__ + +/* ARITHMETIC SECTION */ +typedef signed short v2s __attribute__((vector_size (4))); +typedef unsigned short v2u __attribute__((vector_size (4))); + +typedef signed char v4s __attribute__((vector_size (4))); +typedef unsigned char v4u __attribute__((vector_size (4))); + +/* Packing of scalars into vectors */ +#define __PACK2(x, y) __builtin_pulp_pack2((signed short) (x), (signed short) (y)) +#define __PACKU2(x, y) __builtin_pulp_pack2((unsigned short) (x), (unsigned short) (y)) + +#define __PACK4(x, y, z, t) __builtin_pulp_pack4((signed char) (x), (signed char) (y), (signed char) (z), (signed char) (t)) +#define __PACKU4(x, y, z, t) __builtin_pulp_pack4((unsigned char) (x), (unsigned char) (y), (unsigned char) (z), (unsigned char) (t)) + +/* Max */ +#define __MAX(a, b) __builtin_pulp_maxsi((a), (b)) + +#define __MAX2(x, y) __builtin_pulp_max2((x), (y)) +#define __MAX4(x, y) __builtin_pulp_max4((x), (y)) + +#define __MAXU2(x, y) __builtin_pulp_maxu2((x), (y)) +#define __MAXU4(x, y) __builtin_pulp_maxu4((x), (y)) + +/* Min */ +#define __MIN2(x, y) __builtin_pulp_min2((x), (y)) +#define __MIN4(x, y) __builtin_pulp_min4((x), (y)) + +#define __MINU2(x, y) __builtin_pulp_minu2((x), (y)) +#define __MINU4(x, y) __builtin_pulp_minu4((x), (y)) + +/* Clip */ +#define __CLIP(x, precision) __builtin_pulp_clip((x), -(1<<(precision)), (1< +#include + +#include "encoding.h" +#include "printf.h" +#include "runtime.h" +#include "synchronization.h" +#include "xpulp/mat_mul.h" + +// Define which kernel to use +#define XPULPIMG + +// Define Matrix dimensions: +// C = AB with A=[MxN], B=[NxP], C=[MxP] +#define matrix_M 64 +#define matrix_N 64 +#define matrix_P 64 + +int8_t matrix_a[matrix_M * matrix_N] __attribute__((section(".l1_prio"))); +int8_t matrix_b[matrix_N * matrix_P] __attribute__((section(".l1_prio"))); +int32_t matrix_c[matrix_M * matrix_P] __attribute__((section(".l1_prio"))); + +int volatile error __attribute__((section(".l1"))); + +void init_matrix(int8_t *matrix, uint32_t num_rows, uint32_t num_columns, + int8_t a, int8_t b, int8_t c, uint32_t core_id, + uint32_t num_cores) { + uint32_t const split = 8; // How many rows/columns to split the matrix into + if (num_columns > num_rows) { + // Parallelize over columns + uint32_t const c_start = (num_rows / split) * (core_id % split); + uint32_t const c_end = (num_rows / split) * ((core_id % split) + 1); + for (uint32_t j = (core_id / split); j < num_columns; + j += (num_cores / split)) { + for (uint32_t i = c_start; i < c_end; ++i) { + matrix[i * num_columns + j] = a * (int8_t)i + b * (int8_t)j + c; + } + } + } else { + // Parallelize over rows + uint32_t const c_start = (num_columns / split) * (core_id % split); + uint32_t const c_end = (num_columns / split) * ((core_id % split) + 1); + for (uint32_t i = (core_id / split); i < num_rows; + i += (num_cores / split)) { + for (uint32_t j = c_start; j < c_end; ++j) { + matrix[i * num_columns + j] = a * (int8_t)i + b * (int8_t)j + c; + } + } + } +} + +// Initialize the matrices in parallel +int verify_matrix(int32_t *matrix, uint32_t num_rows, uint32_t num_columns, + uint32_t inner_dim, int8_t aa, int8_t ab, int8_t ac, + int8_t ba, int8_t bb, int8_t bc, uint32_t core_id, + uint32_t num_cores) { + // Convert to signed + int32_t n = (int32_t)inner_dim; + // Parallelize over rows + for (uint32_t i = core_id; i < num_rows; i += num_cores) { + for (uint32_t j = 0; j < num_columns; ++j) { + int32_t ii = (int32_t)i; + int32_t jj = (int32_t)j; + int32_t lin = + ((int32_t)aa * bb * ii * jj + aa * bc * ii + ac * bb * jj + (int32_t)ac * bc) * n; + int32_t qua = + (((int32_t)aa * ba * ii + ab * bb * jj + ab * bc + (int32_t)ba * ac) * (n * (n - 1))) / + 2; + int32_t cub = (((int32_t)ab * ba) * (n * (n - 1) * (2 * n - 1))) / 6; + int32_t golden = lin + qua + cub; + if (matrix[i * num_columns + j] != golden) { + return (i + j) == 0 ? -1 : (int)(i * num_columns + j); + } + matrix[i * num_columns + j] = 0; + } + } + return 0; +} + +int test_matrix_multiplication(int8_t *__restrict__ A, int8_t *__restrict__ B, + int32_t *__restrict__ C, uint32_t M, uint32_t N, + uint32_t P, uint32_t core_id, + uint32_t num_cores) { + int8_t const A_a = 0; + int8_t const A_b = 1; + int8_t const A_c = -32; + int8_t const B_a = 0; + int8_t const B_b = 1; + int8_t const B_c = 16; + + // Initialize Matrices + init_matrix(A, M, N, A_a, A_b, A_c, core_id, num_cores); + init_matrix(B, N, P, B_a, B_b, B_c, core_id, num_cores); + // Wait at barrier until everyone is ready + mempool_barrier(num_cores, num_cores / 2); + // Execute function to test. + mempool_start_benchmark(); + + #ifdef XPULPIMG + matmul_unrolled_2x4_pincr_asm_parallel_i8_xpulpv2(A, B, M, N, P, C, core_id, num_cores); + #else + matmul_unrolled_2x2_parallel_i8_rv32im(A, B, C, M, N, P, core_id, num_cores); + #endif + + mempool_stop_benchmark(); + // Wait at barrier befor checking + mempool_barrier(num_cores, num_cores * 4); + if (verify_matrix(C, M, P, N, A_a, A_b, A_c, B_a, B_b, B_c, core_id, + num_cores)) { + error = 1; + return -1; + } + return 0; +} + +int main() { + uint32_t core_id = mempool_get_core_id(); + uint32_t num_cores = mempool_get_core_count(); + // Initialize barrier and synchronize + mempool_barrier_init(core_id, num_cores); + + if (core_id == 0) { + error = 0; + } + + // Test the Matrix multiplication + test_matrix_multiplication(matrix_a, matrix_b, matrix_c, matrix_M, matrix_N, + matrix_P, core_id, num_cores); + // wait until all cores have finished + mempool_barrier(num_cores, num_cores * 4); + + return error; +} diff --git a/apps/matmul_w/main.c b/apps/matmul_w/main.c new file mode 100644 index 000000000..5d2dd46d1 --- /dev/null +++ b/apps/matmul_w/main.c @@ -0,0 +1,148 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Samuel Riedel, ETH Zurich + +#include +#include + +#include "encoding.h" +#include "kernel/mat_mul.h" +#include "printf.h" +#include "runtime.h" +#include "synchronization.h" + +// Define Matrix dimensions: +// C = AB with A=[MxN], B=[NxP], C=[MxP] +#if NUM_CORES > 32 +#define matrix_M 32 +#define matrix_N 32 +#define matrix_P 32 +#else +#define matrix_M (NUM_CORES) +#define matrix_N (NUM_CORES) +#define matrix_P (NUM_CORES) +#endif + +int32_t matrix_a[matrix_M * matrix_N] __attribute__((section(".l1_prio"))); +int32_t matrix_b[matrix_N * matrix_P] __attribute__((section(".l1_prio"))); +int32_t matrix_c[matrix_M * matrix_P] __attribute__((section(".l1_prio"))); + +int volatile error __attribute__((section(".l1"))); + +void init_matrix(int32_t *matrix, uint32_t num_rows, uint32_t num_columns, + int32_t a, int32_t b, int32_t c, uint32_t core_id, + uint32_t num_cores) { + uint32_t const split = 8; // How many rows/columns to split the matrix into + if (num_columns > num_rows) { + // Parallelize over columns + uint32_t const c_start = (num_rows / split) * (core_id % split); + uint32_t const c_end = (num_rows / split) * ((core_id % split) + 1); + for (uint32_t j = (core_id / split); j < num_columns; + j += (num_cores / split)) { + for (uint32_t i = c_start; i < c_end; ++i) { + matrix[i * num_columns + j] = a * (int32_t)i + b * (int32_t)j + c; + } + } + } else { + // Parallelize over rows + uint32_t const c_start = (num_columns / split) * (core_id % split); + uint32_t const c_end = (num_columns / split) * ((core_id % split) + 1); + for (uint32_t i = (core_id / split); i < num_rows; + i += (num_cores / split)) { + for (uint32_t j = c_start; j < c_end; ++j) { + matrix[i * num_columns + j] = a * (int32_t)i + b * (int32_t)j + c; + } + } + } +} + +// Initialize the matrices in parallel +int verify_matrix(int32_t *matrix, uint32_t num_rows, uint32_t num_columns, + uint32_t inner_dim, int32_t aa, int32_t ab, int32_t ac, + int32_t ba, int32_t bb, int32_t bc, uint32_t core_id, + uint32_t num_cores) { + // Convert to signed + int32_t n = (int32_t)inner_dim; + // Parallelize over rows + for (uint32_t i = core_id; i < num_rows; i += num_cores) { + for (uint32_t j = 0; j < num_columns; ++j) { + int32_t ii = (int32_t)i; + int32_t jj = (int32_t)j; + int32_t lin = + (aa * bb * ii * jj + aa * bc * ii + ac * bb * jj + ac * bc) * n; + int32_t qua = + ((aa * ba * ii + ab * bb * jj + ab * bc + ba * ac) * (n * (n - 1))) / + 2; + int32_t cub = ((ab * ba) * (n * (n - 1) * (2 * n - 1))) / 6; + int32_t golden = lin + qua + cub; + if (matrix[i * num_columns + j] != golden) { + return (i + j) == 0 ? -1 : (int)(i * num_columns + j); + } + matrix[i * num_columns + j] = 0; + } + } + return 0; +} + +int test_matrix_multiplication(int32_t *__restrict__ A, int32_t *__restrict__ B, + int32_t *__restrict__ C, uint32_t M, uint32_t N, + uint32_t P, uint32_t core_id, + uint32_t num_cores) { + int32_t const A_a = 1; + int32_t const A_b = 1; + int32_t const A_c = -32; + int32_t const B_a = 2; + int32_t const B_b = 1; + int32_t const B_c = 16; + + // Initialize Matrices + init_matrix(A, M, N, A_a, A_b, A_c, core_id, num_cores); + init_matrix(B, N, P, B_a, B_b, B_c, core_id, num_cores); + // Wait at barrier until everyone is ready + mempool_barrier(num_cores, num_cores / 2); + // Execute function to test. + mempool_start_benchmark(); + mat_mul_unrolled_2x2_parallel(A, B, C, M, N, P, core_id, num_cores); + mempool_stop_benchmark(); + // Wait at barrier befor checking + mempool_barrier(num_cores, num_cores * 4); + if (verify_matrix(C, M, P, N, A_a, A_b, A_c, B_a, B_b, B_c, core_id, + num_cores)) { + error = 1; + return -1; + } + return 0; +} + +int main() { + uint32_t core_id = mempool_get_core_id(); + uint32_t num_cores = mempool_get_core_count(); + // Initialize barrier and synchronize + mempool_barrier_init(core_id, num_cores); + + if (core_id == 0) { + error = 0; + } + + // Test the Matrix multiplication + test_matrix_multiplication(matrix_a, matrix_b, matrix_c, matrix_M, matrix_N, + matrix_P, core_id, num_cores); + // wait until all cores have finished + mempool_barrier(num_cores, num_cores * 4); + + return error; +} From acce2f019692260c30b5318bb29d756d31794555 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 23 Feb 2021 13:56:33 +0100 Subject: [PATCH 38/65] [snitch] :bug: Add declaration of retire_p signal for post-incr write-back --- hardware/deps/snitch/src/snitch.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/hardware/deps/snitch/src/snitch.sv b/hardware/deps/snitch/src/snitch.sv index 07a56c6a6..563a5924a 100644 --- a/hardware/deps/snitch/src/snitch.sv +++ b/hardware/deps/snitch/src/snitch.sv @@ -154,6 +154,7 @@ module snitch #( logic [31:0] lsu_qaddr; logic retire_load; // retire a load instruction + logic retire_p; // retire from post-increment instructions logic retire_i; // retire the rest of the base instruction set logic retire_acc; // retire an instruction we offloaded From e6006daf44d7a4e11e0bc02d12e7fd246257a3d3 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 23 Feb 2021 14:05:28 +0100 Subject: [PATCH 39/65] [snitch] Remove FP extension from Verilator trace --- hardware/deps/snitch/src/mempool_cc.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hardware/deps/snitch/src/mempool_cc.sv b/hardware/deps/snitch/src/mempool_cc.sv index f0d393d3e..14f3efab7 100644 --- a/hardware/deps/snitch/src/mempool_cc.sv +++ b/hardware/deps/snitch/src/mempool_cc.sv @@ -321,8 +321,8 @@ module mempool_cc #( extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "acc_pid", i_snitch.acc_pid_i,); extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "acc_pdata_32",i_snitch.acc_pdata_i[31:0],); // FPU offload - extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "fpu_offload", (i_snitch.acc_qready_i && i_snitch.acc_qvalid_o && !snitch_pkg::shared_offload(i_snitch.acc_qdata_op_o)),); - extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "is_seq_insn", (i_snitch.inst_data_i ==? riscv_instr::FREP)); + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "fpu_offload", 1'b0); + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "is_seq_insn", 1'b0); extras_str = $sformatf("%s}", extras_str); `else fmt_extras(extras_snitch, extras_str); From 88cfc65d4f952dd8ded57172a63d545155ff9a7a Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 23 Feb 2021 14:10:07 +0100 Subject: [PATCH 40/65] [snitch] :art: Fix little endian arrays --- hardware/deps/snitch/src/snitch_ipu.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hardware/deps/snitch/src/snitch_ipu.sv b/hardware/deps/snitch/src/snitch_ipu.sv index 8ea406219..eee90b482 100644 --- a/hardware/deps/snitch/src/snitch_ipu.sv +++ b/hardware/deps/snitch/src/snitch_ipu.sv @@ -364,14 +364,14 @@ module dspu #( Nop, Abs, Sle, Min, Max, Exths, Exthz, Extbs, Extbz, Clip, Mac, Simd } res_sel; // result selection - enum logic [0:4] { + enum logic [4:0] { SimdNop, SimdAdd, SimdSub, SimdAvg, SimdMin, SimdMax, SimdSrl, SimdSra, SimdSll, SimdOr, SimdXor, SimdAnd, SimdAbs, SimdExt, SimdIns, SimdDotp, SimdShuffle } simd_op; // SIMD operation enum logic { HalfWord, Byte } simd_size; // SIMD granularity - enum logic [0:1] { + enum logic [1:0] { Vect, Sc, Sci } simd_mode; // SIMD mode logic simd_signed; // SIMD operation is signed and uses sign-extended imm6 From b54e1d00276ce39573e4cbc0f5411fec27d2c5e1 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 23 Feb 2021 14:13:18 +0100 Subject: [PATCH 41/65] [snitch] Add lint undriven for Verilator in Snitch IPU --- hardware/deps/snitch/src/snitch_ipu.sv | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hardware/deps/snitch/src/snitch_ipu.sv b/hardware/deps/snitch/src/snitch_ipu.sv index eee90b482..c7c72e52f 100644 --- a/hardware/deps/snitch/src/snitch_ipu.sv +++ b/hardware/deps/snitch/src/snitch_ipu.sv @@ -30,8 +30,10 @@ module snitch_ipu #( } result_t; // input handshake logic div_valid_op, div_ready_op; + /* verilator lint_off UNDRIVEN */ logic mul_valid_op, mul_ready_op; logic dsp_valid_op, dsp_ready_op; + /* verilator lint_on UNDRIVEN */ // output handshake logic mul_valid, mul_ready; logic div_valid, div_ready; From 79daedfd793d9040ed74b5edde101a2564a3c58f Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 23 Feb 2021 17:42:05 +0100 Subject: [PATCH 42/65] [readme] Add Verilator instructions --- README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.md b/README.md index ca4a8416b..120aa91d0 100644 --- a/README.md +++ b/README.md @@ -111,6 +111,18 @@ app=hello_world make benchmark You can set up the configuration of the system in the file `config/config.mk`, controlling the total number of cores, the number of cores per tile and whether the Xpulpimg extension is enabled or not in the Snitch core; the `xpulpimg` parameter also control the default core architecture considered when compiling applications for MemPool. +To simulate the MemPool system with Verilator use the same format, but with the target +```bash +make verilate +``` +If, during the Verilator model compilation, you run out of space on your disk, use +```bash +export OBJCACHE='' +``` +to disable the use of `ccache`. This will make the following compilations slower, but avoid to use storage. + +If the tracer is enabled, its output traces are found under `hardware/build`, for both ModelSim and Verilator simulations. + ## Common Problems - If building the GCC toolchain fails because *makeinfo/texinfo* is missing, try the following command: From 1709c164b154e79ce6da20606bcc6f0eeca3bfa9 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 23 Feb 2021 18:46:54 +0100 Subject: [PATCH 43/65] [apps] Align loops to wider intruction cache lines --- apps/common/runtime.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/common/runtime.mk b/apps/common/runtime.mk index a6d6767d2..06e4e9877 100644 --- a/apps/common/runtime.mk +++ b/apps/common/runtime.mk @@ -75,8 +75,8 @@ RISCV_LLVM_TARGET ?= --target=$(RISCV_TARGET) --sysroot=$(GCC_INSTALL_DIR)/$(RI RISCV_WARNINGS += -Wunused-variable -Wconversion -Wall -Wextra # -Werror RISCV_FLAGS_COMMON_TESTS ?= -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) -I$(CURDIR)/common -static RISCV_FLAGS_COMMON ?= $(RISCV_FLAGS_COMMON_TESTS) -std=gnu99 -O3 -ffast-math -fno-common -fno-builtin-printf $(DEFINES) $(RISCV_WARNINGS) -RISCV_FLAGS_GCC ?= -mcmodel=medany -Wa,-march=$(RISCV_ARCH_AS) -falign-loops=16 -falign-jumps=16 -#RISCV_FLAGS_GCC ?= -mcmodel=medany -Wa,-march=$(RISCV_ARCH_AS) -falign-loops=16 -falign-jumps=16 -funroll-loops +RISCV_FLAGS_GCC ?= -mcmodel=medany -Wa,-march=$(RISCV_ARCH_AS) -falign-loops=32 -falign-jumps=32 +#RISCV_FLAGS_GCC ?= -mcmodel=medany -Wa,-march=$(RISCV_ARCH_AS) -falign-loops=32 -falign-jumps=32 -funroll-loops RISCV_FLAGS_LLVM ?= -mcmodel=small -mllvm -enable-misched ifeq ($(COMPILER),gcc) From 70944c4c38654ee89e6a8f58cddd3bbca1d90e2e Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Tue, 23 Feb 2021 18:49:19 +0100 Subject: [PATCH 44/65] [Makefile] Fix trace target for when transcript not available The file hardware/build/transcript is generated by ModelSim simulations only; when the target `make trace` is run after a Verilator simulation, transcript is not there and the recipe failed prior to this fix --- hardware/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hardware/Makefile b/hardware/Makefile index 836d7b457..941ae8df8 100644 --- a/hardware/Makefile +++ b/hardware/Makefile @@ -215,7 +215,7 @@ pre_trace: post_trace: mkdir -p "$(result_dir)" - cp $(buildpath)/transcript "$(result_dir)/" + cp $(buildpath)/transcript "$(result_dir)/" | true cp $(traceresult) "$(result_dir)" cp $(trace) "$(result_dir)" From 93b3288e22bde90b14ed6f481a6d05b75ce3d220 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Mon, 1 Mar 2021 16:45:15 +0100 Subject: [PATCH 45/65] [snitch] :bug: Fix register-register stores scoreboarding bug Register-register stores instructions introduced from Xpulpv2 are of the kind: p.s{b,h,w} rs2, rs3(rs1[!]) They have a different encoding from all other instructions and rs3 is used as source (rs3 is the same as rd); the bug was caused by the fact that when a reg-reg store was issued, before executing it, RAW dependencies on rd were not checked before using if as source register, thus RAW hazards happened even if rd register was set as busy in the scoreboard. --- hardware/deps/snitch/src/snitch.sv | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/hardware/deps/snitch/src/snitch.sv b/hardware/deps/snitch/src/snitch.sv index 563a5924a..6e9ad329d 100644 --- a/hardware/deps/snitch/src/snitch.sv +++ b/hardware/deps/snitch/src/snitch.sv @@ -178,7 +178,7 @@ module snitch #( } alu_op; enum logic [3:0] { - None, Reg, IImmediate, UImmediate, JImmediate, SImmediate, SFImmediate, PC, CSR, CSRImmediate, PBImmediate, RegRd + None, Reg, IImmediate, UImmediate, JImmediate, SImmediate, SFImmediate, PC, CSR, CSRImmediate, PBImmediate, RegRd, RegRs2 } opa_select, opb_select, opc_select; logic write_rd; // write rd desitnation this cycle @@ -249,7 +249,7 @@ module snitch #( // TODO(zarubaf): This can probably be described a bit more efficient assign opa_ready = (opa_select != Reg) | ~sb_q[rs1]; assign opb_ready = ((opb_select != Reg & opb_select != SImmediate) | ~sb_q[rs2]) & ((opb_select != RegRd) | ~sb_q[rd]); - assign opc_ready = (opc_select != Reg) | ~sb_q[rd]; + assign opc_ready = ((opc_select != Reg) | ~sb_q[rd]) & ((opc_select != RegRs2) | ~sb_q[rs2]); assign operands_ready = opa_ready & opb_ready & opc_ready; // either we are not using the destination register or we need to make // sure that its destination operand is not marked busy in the scoreboard. @@ -1005,14 +1005,19 @@ module snitch #( illegal_inst = 1'b1; end end + // opb is usually assigned with the content of rs2; in stores with reg-reg + // addressing mode, however, the offset is stored in rd, so rd content is + // instead assigned to opb: if we cross such signals now (rd -> opb, + // rs2 -> opc) we don't have to do that in the ALU, with bigger muxes riscv_instr::P_SB_RRPOST: begin // Xpulpimg: p.sb rs2,rs3(rs1!) if (snitch_pkg::XPULPIMG) begin write_rd = 1'b0; write_rs1 = 1'b1; is_store = 1'b1; is_postincr = 1'b1; - opa_select = Reg; - opb_select = RegRd; + opa_select = Reg; // rs1 base address + opb_select = RegRd; // rs3 (i.e. rd) offset + opc_select = RegRs2; // rs2 source data end else begin illegal_inst = 1'b1; end @@ -1026,6 +1031,7 @@ module snitch #( ls_size = HalfWord; opa_select = Reg; opb_select = RegRd; + opc_select = RegRs2; end else begin illegal_inst = 1'b1; end @@ -1039,6 +1045,7 @@ module snitch #( ls_size = Word; opa_select = Reg; opb_select = RegRd; + opc_select = RegRs2; end else begin illegal_inst = 1'b1; end @@ -1049,6 +1056,7 @@ module snitch #( is_store = 1'b1; opa_select = Reg; opb_select = RegRd; + opc_select = RegRs2; end else begin illegal_inst = 1'b1; end @@ -1060,6 +1068,7 @@ module snitch #( ls_size = HalfWord; opa_select = Reg; opb_select = RegRd; + opc_select = RegRs2; end else begin illegal_inst = 1'b1; end @@ -1071,6 +1080,7 @@ module snitch #( ls_size = Word; opa_select = Reg; opb_select = RegRd; + opc_select = RegRs2; end else begin illegal_inst = 1'b1; end From e9d027d2b54a5cd5b7afb069034408764ffcd3be Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Mon, 1 Mar 2021 16:46:27 +0100 Subject: [PATCH 46/65] [apps] Add matmul kernel for benchmarking --- apps/common/xpulp/mat_mul.h | 566 ++++++++++++++++++++++----- apps/matmul_i16/main.c | 150 +++++++ apps/{matmul_w => matmul_i32}/main.c | 20 +- apps/{matmul_b => matmul_i8}/main.c | 26 +- 4 files changed, 649 insertions(+), 113 deletions(-) create mode 100644 apps/matmul_i16/main.c rename apps/{matmul_w => matmul_i32}/main.c (95%) rename apps/{matmul_b => matmul_i8}/main.c (91%) diff --git a/apps/common/xpulp/mat_mul.h b/apps/common/xpulp/mat_mul.h index 262d0baf5..c46f72baf 100644 --- a/apps/common/xpulp/mat_mul.h +++ b/apps/common/xpulp/mat_mul.h @@ -19,8 +19,8 @@ #include "xpulp/builtins_v2.h" -/* This library implements the matrix multiplication on 8-bit elements in - * multiple different ways. The functions all follow the following format: +/* This library implements the matrix multiplication for several data widths + * in Zmultiple different ways. The functions all follow the following format: * * A is an M x N matrix, B is a N x P matrix, and C is a M x P matrix * C = AB @@ -30,41 +30,17 @@ * considered, leading to wrong results */ +// Define which kernel to use +#define __XPULPIMG -// matmul rv32im, unrolled (4 dest elements per iteration, 1x4 chunks), parallelized -void matmul_unrolled_1x4_parallel_i8_rv32im(int8_t const *__restrict__ A, - int8_t const *__restrict__ B, - int32_t *__restrict__ C, uint32_t M, - uint32_t N, uint32_t P, uint32_t id, - uint32_t numThreads) { - // Parallelize by assigning each core one row - for (uint32_t i = id; i < M; i += numThreads) { - for (uint32_t j = 0; j < P; j += 4) { - int32_t c0 = 0; - int32_t c1 = 0; - int32_t c2 = 0; - int32_t c3 = 0; - for (uint32_t k = 0; k < N; ++k) { - // Explicitly load the values first to help with scheduling - int8_t val_a = A[i * N + k]; - int8_t val_b0 = B[k * P + j + 0]; - int8_t val_b1 = B[k * P + j + 1]; - int8_t val_b2 = B[k * P + j + 2]; - int8_t val_b3 = B[k * P + j + 3]; - c0 += val_a * val_b0; - c1 += val_a * val_b1; - c2 += val_a * val_b2; - c3 += val_a * val_b3; - } - C[i * P + j + 0] = c0; - C[i * P + j + 1] = c1; - C[i * P + j + 2] = c2; - C[i * P + j + 3] = c3; - } - } -} - -// matmul rv32im, unrolled (4 dest elements per iteration, 2x2 chunks), parallelized +/* + * Matrix multiplication ---------------------------------- + * kernel = matmul_unrolled_2x2_parallel_i8_rv32im + * data type = 8-bit integer + * multi-core = yes + * unrolling = 4 elements of C per iteration (2x2 chunks) + * simd = no + */ void matmul_unrolled_2x2_parallel_i8_rv32im(int8_t const *__restrict__ A, int8_t const *__restrict__ B, int32_t *__restrict__ C, uint32_t M, @@ -107,12 +83,71 @@ void matmul_unrolled_2x2_parallel_i8_rv32im(int8_t const *__restrict__ A, } } -// matmul with xpulpv2 SIMD builtins, unrolled (8 elements per iteration, 2x4 chunks), single-core -// original plp_mat_mult_i8s_xpulpv2, from pulp-dsp library +/* + * Matrix multiplication ---------------------------------- + * kernel = matmul_unrolled_2x2_parallel_i16_rv32im + * data type = 16-bit integer + * multi-core = yes + * unrolling = 4 elements of C per iteration (2x2 chunks) + * simd = no + */ +void matmul_unrolled_2x2_parallel_i16_rv32im(int16_t const *__restrict__ A, + int16_t const *__restrict__ B, + int32_t *__restrict__ C, + uint32_t M, uint32_t N, uint32_t P, + uint32_t id, uint32_t numThreads) { + // Parallelize by assigning each core one row + uint32_t const c = 8; // How many columns to split the matrix into + uint32_t const c_start = (P / c) * (id % c); + uint32_t const c_end = (P / c) * ((id % c) + 1); + for (uint32_t i = 2 * (id / c); i < M; i += 2 * (numThreads / c)) { + for (uint32_t j = c_start; j < c_end; j += 2) { + int32_t c00 = 0; + int32_t c01 = 0; + int32_t c10 = 0; + int32_t c11 = 0; + for (uint32_t k = 0; k < N; k += 2) { + // Explicitly load the values first to help with scheduling + int16_t val_a00 = A[(i + 0) * N + k + 0]; + int16_t val_a01 = A[(i + 0) * N + k + 1]; + int16_t val_a10 = A[(i + 1) * N + k + 0]; + int16_t val_a11 = A[(i + 1) * N + k + 1]; + int16_t val_b00 = B[(k + 0) * P + j + 0]; + int16_t val_b01 = B[(k + 0) * P + j + 1]; + int16_t val_b10 = B[(k + 1) * P + j + 0]; + int16_t val_b11 = B[(k + 1) * P + j + 1]; + c00 += val_a00 * val_b00; + c00 += val_a01 * val_b10; + c01 += val_a00 * val_b01; + c01 += val_a01 * val_b11; + c10 += val_a10 * val_b00; + c10 += val_a11 * val_b10; + c11 += val_a10 * val_b01; + c11 += val_a11 * val_b11; + } + C[(i + 0) * P + j + 0] = c00; + C[(i + 0) * P + j + 1] = c01; + C[(i + 1) * P + j + 0] = c10; + C[(i + 1) * P + j + 1] = c11; + } + } +} + +/* + * Matrix multiplication ---------------------------------- + * kernel = matmul_unrolled_2x4_i8_xpulpv2 + * data type = 8-bit integer + * multi-core = no + * unrolling = 8 elements of C per iteration (2x4 chunks) + * simd = yes, Xpulpv2 intrinsics + * + * Original plp_mat_mult_i8s_xpulpv2 from pulp-dsp + */ void matmul_unrolled_2x4_i8_xpulpv2(const int8_t *__restrict__ pSrcA, - const int8_t *__restrict__ pSrcB, uint32_t M, - uint32_t N, uint32_t P, - int32_t *__restrict__ pDstC) { + const int8_t *__restrict__ pSrcB, + int32_t *__restrict__ pDstC, uint32_t M, + uint32_t N, uint32_t P) { +#ifdef __XPULPIMG static v4s mask0 = {0, 1, 4, 5}; static v4s mask1 = {2, 3, 6, 7}; static v4s mask2 = {0, 2, 4, 6}; @@ -174,15 +209,26 @@ void matmul_unrolled_2x4_i8_xpulpv2(const int8_t *__restrict__ pSrcA, pDstC[(i * 2 + 1) * P + (k * 4 + 3)] = sum13; } } +#endif } -// matmul with xpulpv2 SIMD builtins, unrolled (8 elements per iteration, 2x4 chunks), parallelized -// original plp_mat_mult_i8p_xpulpv2, from pulp-dsp library +/* + * Matrix multiplication ---------------------------------- + * kernel = matmul_unrolled_2x4_parallel_i8_xpulpv2 + * data type = 8-bit integer + * multi-core = yes + * unrolling = 8 elements of C per iteration (2x4 chunks) + * simd = yes, Xpulpv2 intrinsics + * + * Original plp_mat_mult_i8p_xpulpv2 from pulp-dsp + */ void matmul_unrolled_2x4_parallel_i8_xpulpv2(const int8_t *__restrict__ pSrcA, - const int8_t *__restrict__ pSrcB, uint32_t M, - uint32_t N, uint32_t P, - int32_t *__restrict__ pDstC, uint32_t core_id, + const int8_t *__restrict__ pSrcB, + int32_t *__restrict__ pDstC, + uint32_t M, uint32_t N, uint32_t P, + uint32_t core_id, uint32_t numThreads) { +#ifdef __XPULPIMG static v4s mask0 = {0, 1, 4, 5}; static v4s mask1 = {2, 3, 6, 7}; static v4s mask2 = {0, 2, 4, 6}; @@ -244,17 +290,27 @@ void matmul_unrolled_2x4_parallel_i8_xpulpv2(const int8_t *__restrict__ pSrcA, pDstC[(i * 2 + 1) * P + (k * 4 + 3)] = sum13; } } +#endif } -// matmul with xpulpv2 SIMD builtins, unrolled (8 elements per iteration, 2x4 chunks), parallelized, -// loops use pointer incrementing instead of array indexing, loads/stores explicitly written in asm -// pro: better register utilization and smarter load/store pattern -// inspired from plp_mat_mult_i8p_xpulpv2, from pulp-dsp library -void matmul_unrolled_2x4_pincr_asm_parallel_i8_xpulpv2(const int8_t *__restrict__ pSrcA, - const int8_t *__restrict__ pSrcB, uint32_t M, - uint32_t N, uint32_t P, - int32_t *__restrict__ pDstC, uint32_t core_id, - uint32_t numThreads) { +/* + * Matrix multiplication ---------------------------------- + * kernel = matmul_unrolled_2x4_pincr_asm_parallel_i8_xpulpv2 + * data type = 8-bit integer + * multi-core = yes + * unrolling = 8 elements of C per iteration (2x4 chunks) + * simd = yes, Xpulpv2 intrinsics + * other = using pointer incrementing insteady of array + * indexing and loads/stores explicitly written + * in asm, for optimal register utilization + * + * Inspired from plp_mat_mult_i8p_xpulpv2 from pulp-dsp + */ +void matmul_unrolled_2x4_pincr_asm_parallel_i8_xpulpv2( + const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, + int32_t *__restrict__ pDstC, uint32_t M, uint32_t N, uint32_t P, + uint32_t core_id, uint32_t numThreads) { +#ifdef __XPULPIMG // Masks for shuffles static v4s mask0 = {0, 1, 4, 5}; static v4s mask1 = {2, 3, 6, 7}; @@ -264,14 +320,14 @@ void matmul_unrolled_2x4_pincr_asm_parallel_i8_xpulpv2(const int8_t *__restrict_ // Loop counter for P uint32_t k = 0; // Row decrement for A matrix - int32_t const N_decr = - N + 4; + int32_t const N_decr = -N + 4; // Row increment for C matrix uint32_t const P_incr = (P * 4) - 12; for (k = core_id; k < P / 4; k += numThreads) { - int8_t *idx_a = &pSrcA[0]; // start_a - int32_t *idx_c = &pDstC[k * 4]; // start_c - int32_t const *end_c = &pDstC[(P * M) + (k * 4)]; + int8_t *idx_a = &pSrcA[0]; // start_a + int32_t *idx_c = &pDstC[k * 4]; // start_c + int32_t const *end_c = &pDstC[P * M]; // actually (P * M) + (k * 4) while (idx_c < end_c) { int32_t sum00 = 0; @@ -291,29 +347,31 @@ void matmul_unrolled_2x4_pincr_asm_parallel_i8_xpulpv2(const int8_t *__restrict_ v4s temp0, temp1, temp2, temp3; __asm__ volatile( - "p.lw %[a0], %[a_incr](%[addr_a]!) \n\t" - "p.lw %[a1], %[a_decr](%[addr_a]!) \n\t" - "p.lw %[t0], %[b_incr](%[addr_b]!) \n\t" - "p.lw %[t1], %[b_incr](%[addr_b]!) \n\t" - "p.lw %[t2], %[b_incr](%[addr_b]!) \n\t" - "p.lw %[t3], %[b_incr](%[addr_b]!) \n\t" - : [ a0 ] "=&r"(aVec0), [ a1 ] "=&r"(aVec1), - [ t0 ] "=&r"(temp0), [ t1 ] "=&r"(temp1), [ t2 ] "=&r"(temp2), [ t3 ] "=&r"(temp3), - [ addr_a ] "+&r"(idx_a), [ addr_b ] "+&r"(idx_b) - : [ a_incr ] "r"(N), [ a_decr ] "r"(N_decr), [ b_incr ] "r"(P) - : "memory" - ); + "p.lw %[a0], %[a_incr](%[addr_a]!) \n\t" + "p.lw %[a1], %[a_decr](%[addr_a]!) \n\t" + "p.lw %[t0], %[b_incr](%[addr_b]!) \n\t" + "p.lw %[t1], %[b_incr](%[addr_b]!) \n\t" + "p.lw %[t2], %[b_incr](%[addr_b]!) \n\t" + "p.lw %[t3], %[b_incr](%[addr_b]!) \n\t" + : [ a0 ] "=&r"(aVec0), [ a1 ] "=&r"(aVec1), [ t0 ] "=&r"(temp0), + [ t1 ] "=&r"(temp1), [ t2 ] "=&r"(temp2), [ t3 ] "=&r"(temp3), + [ addr_a ] "+&r"(idx_a), [ addr_b ] "+&r"(idx_b) + : [ a_incr ] "r"(N), [ a_decr ] "r"(N_decr), [ b_incr ] "r"(P) + : "memory"); /* The asm code above implements the following commented C code */ - // v4s aVec0 = *((v4s *)idx_a); idx_a += N; // go to next row, same column - // v4s aVec1 = *((v4s *)idx_a); idx_a -= N - 4; // go to previous row, one column forward + // go to next row, same column + // v4s aVec0 = *((v4s *)idx_a); idx_a += N; + // go to previous row, one column forward + // v4s aVec1 = *((v4s *)idx_a); idx_a -= N - 4; // v4s temp0 = *((v4s *)idx_b); idx_b += P; // v4s temp1 = *((v4s *)idx_b); idx_b += P; // v4s temp2 = *((v4s *)idx_b); idx_b += P; // v4s temp3 = *((v4s *)idx_b); idx_b += P; - // Shuffles to transpose at runtime the chunk extracted from B before multiplying with A chunk - // temp0-3 variables needed because shuffles use rD as source, but also modify it, - // thus we need a copy of their content to use it twice in their original form + // Shuffles to transpose at runtime the chunk extracted from B before + // multiplying with A chunk temp0-3 variables needed because shuffles + // use rD as source, but also modify it, thus we need a copy of their + // content to use it twice in their original form v4s temp4 = __builtin_shuffle(temp0, temp1, mask0); // 0,1,4,5 v4s temp5 = __builtin_shuffle(temp2, temp3, mask0); // 8,9,12,13 v4s temp6 = __builtin_shuffle(temp0, temp1, mask1); // 2,3,6,7 @@ -335,20 +393,19 @@ void matmul_unrolled_2x4_pincr_asm_parallel_i8_xpulpv2(const int8_t *__restrict_ } __asm__ volatile( - "p.sw %[s00], 4(%[addr_c]!) \n\t" - "p.sw %[s01], 4(%[addr_c]!) \n\t" - "p.sw %[s02], 4(%[addr_c]!) \n\t" - "p.sw %[s03], %[c_incr](%[addr_c]!) \n\t" - "p.sw %[s10], 4(%[addr_c]!) \n\t" - "p.sw %[s11], 4(%[addr_c]!) \n\t" - "p.sw %[s12], 4(%[addr_c]!) \n\t" - "p.sw %[s13], %[c_incr](%[addr_c]!) \n\t" - : [ addr_c ] "+&r"(idx_c) - : [ s00 ] "r"(sum00), [ s01 ] "r"(sum01), [ s02 ] "r"(sum02), [ s03 ] "r"(sum03), - [ s10 ] "r"(sum10), [ s11 ] "r"(sum11), [ s12 ] "r"(sum12), [ s13 ] "r"(sum13), - [ c_incr ] "r"(P_incr) - : "memory" - ); + "p.sw %[s00], 4(%[addr_c]!) \n\t" + "p.sw %[s01], 4(%[addr_c]!) \n\t" + "p.sw %[s02], 4(%[addr_c]!) \n\t" + "p.sw %[s03], %[c_incr](%[addr_c]!) \n\t" + "p.sw %[s10], 4(%[addr_c]!) \n\t" + "p.sw %[s11], 4(%[addr_c]!) \n\t" + "p.sw %[s12], 4(%[addr_c]!) \n\t" + "p.sw %[s13], %[c_incr](%[addr_c]!) \n\t" + : [ addr_c ] "+&r"(idx_c) + : [ s00 ] "r"(sum00), [ s01 ] "r"(sum01), [ s02 ] "r"(sum02), + [ s03 ] "r"(sum03), [ s10 ] "r"(sum10), [ s11 ] "r"(sum11), + [ s12 ] "r"(sum12), [ s13 ] "r"(sum13), [ c_incr ] "r"(P_incr) + : "memory"); /* The asm code above implements the following commented C code */ // *(idx_c++) = sum00; // *(idx_c++) = sum01; @@ -358,6 +415,335 @@ void matmul_unrolled_2x4_pincr_asm_parallel_i8_xpulpv2(const int8_t *__restrict_ // *(idx_c++) = sum11; // *(idx_c++) = sum12; // *(idx_c) = sum13; idx_c += P - 3; + + idx_a += N; // adjust A matrix pointer + } + } +#endif +} + +/* + * Matrix multiplication ---------------------------------- + * kernel = matmul_unrolled_4x2_parallel_i16_xpulpv2 + * data type = 16-bit integer + * multi-core = yes + * unrolling = 8 elements of C per iteration (4x2 chunks) + * simd = yes, Xpulpv2 intrinsics + * + * Original plp_mat_mult_i16p_xpulpv2 from pulp-dsp + */ +void matmul_unrolled_4x2_parallel_i16_xpulpv2(const int16_t *__restrict__ pSrcA, + const int16_t *__restrict__ pSrcB, + int32_t *__restrict__ pDstC, + uint32_t M, uint32_t N, + uint32_t P, uint32_t core_id, + uint32_t numThreads) { +#ifdef __XPULPIMG + uint32_t i = 0; // loop counter for M + uint32_t j = 0; // loop counter for N + uint32_t k = 0; // loop counter for P + + for (k = core_id; k < P / 2; k += numThreads) { + for (i = 0; i < M / 4; i++) { + + int32_t sum00 = 0; + int32_t sum01 = 0; + int32_t sum10 = 0; + int32_t sum11 = 0; + int32_t sum20 = 0; + int32_t sum21 = 0; + int32_t sum30 = 0; + int32_t sum31 = 0; + + for (j = 0; j < N / 2; j++) { + + v2s aVec0 = *((v2s *)&(pSrcA[(i * 4) * N + (j * 2)])); + v2s aVec1 = *((v2s *)&(pSrcA[(i * 4 + 1) * N + (j * 2)])); + v2s aVec2 = *((v2s *)&(pSrcA[(i * 4 + 2) * N + (j * 2)])); + v2s aVec3 = *((v2s *)&(pSrcA[(i * 4 + 3) * N + (j * 2)])); + + v2s bTemp0 = *((v2s *)&(pSrcB[(j * 2) * P + (k * 2)])); + v2s bTemp1 = *((v2s *)&(pSrcB[(j * 2 + 1) * P + (k * 2)])); + + v2s bVec0 = __builtin_shuffle(bTemp0, bTemp1, (v2s){0, 2}); + v2s bVec1 = __builtin_shuffle(bTemp0, bTemp1, (v2s){1, 3}); + + sum00 = __SUMDOTP2(aVec0, bVec0, sum00); + sum01 = __SUMDOTP2(aVec0, bVec1, sum01); + sum10 = __SUMDOTP2(aVec1, bVec0, sum10); + sum11 = __SUMDOTP2(aVec1, bVec1, sum11); + sum20 = __SUMDOTP2(aVec2, bVec0, sum20); + sum21 = __SUMDOTP2(aVec2, bVec1, sum21); + sum30 = __SUMDOTP2(aVec3, bVec0, sum30); + sum31 = __SUMDOTP2(aVec3, bVec1, sum31); + } + + pDstC[(i * 4) * P + (k * 2)] = sum00; + pDstC[(i * 4) * P + (k * 2 + 1)] = sum01; + pDstC[(i * 4 + 1) * P + (k * 2)] = sum10; + pDstC[(i * 4 + 1) * P + (k * 2 + 1)] = sum11; + pDstC[(i * 4 + 2) * P + (k * 2)] = sum20; + pDstC[(i * 4 + 2) * P + (k * 2 + 1)] = sum21; + pDstC[(i * 4 + 3) * P + (k * 2)] = sum30; + pDstC[(i * 4 + 3) * P + (k * 2 + 1)] = sum31; + } + } +#endif +} + +/* + * Matrix multiplication ---------------------------------- + * kernel = matmul_unrolled_4x2_pincr_asm_parallel_i16_xpulpv2 + * data type = 16-bit integer + * multi-core = yes + * unrolling = 8 elements of C per iteration (4x2 chunks) + * simd = yes, Xpulpv2 intrinsics + * other = using pointer incrementing insteady of array + * indexing and loads/stores explicitly written + * in asm, for optimal register utilization + * + * Inspired from plp_mat_mult_i16p_xpulpv2 from pulp-dsp + */ +void matmul_unrolled_4x2_pincr_asm_parallel_i16_xpulpv2( + const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, + int32_t *__restrict__ pDstC, uint32_t M, uint32_t N, uint32_t P, + uint32_t core_id, uint32_t numThreads) { +#ifdef __XPULPIMG + // Loop counter for P + uint32_t k = 0; + // Increment for A matrix = 1 row forward + uint32_t const A_incr = N * sizeof(int16_t); + // Decrement for A matrix = 3 rows backward and 2 words forward + int32_t const A_decr = -(N * 3 * sizeof(int16_t)) + 2 * sizeof(int16_t); + // Increment for B matrix = 1 row forward + uint32_t const B_incr = P * sizeof(int16_t); // bytes in 1 row + // Increment for C matrix = 1 row forward and 1 word backward + uint32_t const C_incr = (P * sizeof(int32_t)) - sizeof(int32_t); + + for (k = core_id; k < P / 2; k += numThreads) { + int16_t *idx_a = &pSrcA[0]; // start_a + int32_t *idx_c = &pDstC[k * 2]; // start_c + int32_t const *end_c = &pDstC[P * M]; // actually (P * M) + (k * 2) + + while (idx_c < end_c) { + + int32_t sum00 = 0; + int32_t sum01 = 0; + int32_t sum10 = 0; + int32_t sum11 = 0; + int32_t sum20 = 0; + int32_t sum21 = 0; + int32_t sum30 = 0; + int32_t sum31 = 0; + + int16_t const *end_a = idx_a + N; + int16_t *idx_b = &pSrcB[k * 2]; // start_b + + while (idx_a < end_a) { + + v2s aVec0, aVec1, aVec2, aVec3; + v2s bTemp0, bTemp1; + + __asm__ volatile("p.lw %[a0], %[a_incr](%[addr_a]!) \n\t" + "p.lw %[a1], %[a_incr](%[addr_a]!) \n\t" + "p.lw %[a2], %[a_incr](%[addr_a]!) \n\t" + "p.lw %[a3], %[a_decr](%[addr_a]!) \n\t" + "p.lw %[t0], %[b_incr](%[addr_b]!) \n\t" + "p.lw %[t1], %[b_incr](%[addr_b]!) \n\t" + : [ a0 ] "=&r"(aVec0), [ a1 ] "=&r"(aVec1), + [ a2 ] "=&r"(aVec2), [ a3 ] "=&r"(aVec3), + [ t0 ] "=&r"(bTemp0), [ t1 ] "=&r"(bTemp1), + [ addr_a ] "+&r"(idx_a), [ addr_b ] "+&r"(idx_b) + : [ a_incr ] "r"(A_incr), [ a_decr ] "r"(A_decr), + [ b_incr ] "r"(B_incr) + : "memory"); + /* The asm code above implements the following commented C code */ + // v2s aVec0 = *((v2s *)&(pSrcA[(i * 4) * N + (j * 2)])); + // v2s aVec1 = *((v2s *)&(pSrcA[(i * 4 + 1) * N + (j * 2)])); + // v2s aVec2 = *((v2s *)&(pSrcA[(i * 4 + 2) * N + (j * 2)])); + // v2s aVec3 = *((v2s *)&(pSrcA[(i * 4 + 3) * N + (j * 2)])); + // v2s bTemp0 = *((v2s *)&(pSrcB[(j * 2) * P + (k * 2)])); + // v2s bTemp1 = *((v2s *)&(pSrcB[(j * 2 + 1) * P + (k * 2)])); + + v2s bVec0 = __builtin_shuffle(bTemp0, bTemp1, (v2s){0, 2}); + v2s bVec1 = __builtin_shuffle(bTemp0, bTemp1, (v2s){1, 3}); + + sum00 = __SUMDOTP2(aVec0, bVec0, sum00); + sum01 = __SUMDOTP2(aVec0, bVec1, sum01); + sum10 = __SUMDOTP2(aVec1, bVec0, sum10); + sum11 = __SUMDOTP2(aVec1, bVec1, sum11); + sum20 = __SUMDOTP2(aVec2, bVec0, sum20); + sum21 = __SUMDOTP2(aVec2, bVec1, sum21); + sum30 = __SUMDOTP2(aVec3, bVec0, sum30); + sum31 = __SUMDOTP2(aVec3, bVec1, sum31); + } + + __asm__ volatile( + "p.sw %[s00], 4(%[addr_c]!) \n\t" + "p.sw %[s01], %[c_incr](%[addr_c]!) \n\t" + "p.sw %[s10], 4(%[addr_c]!) \n\t" + "p.sw %[s11], %[c_incr](%[addr_c]!) \n\t" + "p.sw %[s20], 4(%[addr_c]!) \n\t" + "p.sw %[s21], %[c_incr](%[addr_c]!) \n\t" + "p.sw %[s30], 4(%[addr_c]!) \n\t" + "p.sw %[s31], %[c_incr](%[addr_c]!) \n\t" + : [ addr_c ] "+&r"(idx_c) + : [ s00 ] "r"(sum00), [ s01 ] "r"(sum01), [ s10 ] "r"(sum10), + [ s11 ] "r"(sum11), [ s20 ] "r"(sum20), [ s21 ] "r"(sum21), + [ s30 ] "r"(sum30), [ s31 ] "r"(sum31), [ c_incr ] "r"(C_incr) + : "memory"); + /* The asm code above implements the following commented C code */ + // pDstC[(i * 4) * P + (k * 2)] = sum00; + // pDstC[(i * 4) * P + (k * 2 + 1)] = sum01; + // pDstC[(i * 4 + 1) * P + (k * 2)] = sum10; + // pDstC[(i * 4 + 1) * P + (k * 2 + 1)] = sum11; + // pDstC[(i * 4 + 2) * P + (k * 2)] = sum20; + // pDstC[(i * 4 + 2) * P + (k * 2 + 1)] = sum21; + // pDstC[(i * 4 + 3) * P + (k * 2)] = sum30; + // pDstC[(i * 4 + 3) * P + (k * 2 + 1)] = sum31; + + idx_a += N * 3; + } + } +#endif +} + +/* + * Matrix multiplication ---------------------------------- + * kernel = matmul_unrolled_2x2_parallel_i32_rv32im + * data type = 32-bit integer + * multi-core = yes + * unrolling = 4 elements of C per iteration (2x2 chunks) + * simd = no + */ +void matmul_unrolled_2x2_parallel_i32_rv32im(int32_t const *__restrict__ A, + int32_t const *__restrict__ B, + int32_t *__restrict__ C, uint32_t M, + uint32_t N, uint32_t P, uint32_t id, + uint32_t numThreads) { + // Parallelize by assigning each core one row + uint32_t const c = 8; // How many columns to split the matrix into + uint32_t const c_start = (P / c) * (id % c); + uint32_t const c_end = (P / c) * ((id % c) + 1); + for (uint32_t i = 2 * (id / c); i < M; i += 2 * (numThreads / c)) { + for (uint32_t j = c_start; j < c_end; j += 2) { + int32_t c00 = 0; + int32_t c01 = 0; + int32_t c10 = 0; + int32_t c11 = 0; + for (uint32_t k = 0; k < N; k += 2) { + // Explicitly load the values first to help with scheduling + int32_t val_a00 = A[(i + 0) * N + k + 0]; + int32_t val_a01 = A[(i + 0) * N + k + 1]; + int32_t val_a10 = A[(i + 1) * N + k + 0]; + int32_t val_a11 = A[(i + 1) * N + k + 1]; + int32_t val_b00 = B[(k + 0) * P + j + 0]; + int32_t val_b01 = B[(k + 0) * P + j + 1]; + int32_t val_b10 = B[(k + 1) * P + j + 0]; + int32_t val_b11 = B[(k + 1) * P + j + 1]; + c00 += val_a00 * val_b00; + c00 += val_a01 * val_b10; + c01 += val_a00 * val_b01; + c01 += val_a01 * val_b11; + c10 += val_a10 * val_b00; + c10 += val_a11 * val_b10; + c11 += val_a10 * val_b01; + c11 += val_a11 * val_b11; + } + C[(i + 0) * P + j + 0] = c00; + C[(i + 0) * P + j + 1] = c01; + C[(i + 1) * P + j + 0] = c10; + C[(i + 1) * P + j + 1] = c11; + } + } +} + +/* + * Matrix multiplication ---------------------------------- + * kernel = matmul_unrolled_2x2_parallel_i32_xpulpv2 + * data type = 32-bit integer + * multi-core = yes + * unrolling = 4 elements of C per iteration (2x2 chunks) + * simd = no + * other = loads/stores explicitly written in asm + * for optimal register utilization + */ +void matmul_unrolled_2x2_parallel_i32_xpulpv2(int32_t const *__restrict__ A, + int32_t const *__restrict__ B, + int32_t *__restrict__ C, uint32_t M, + uint32_t N, uint32_t P, uint32_t id, + uint32_t numThreads) { +#ifdef __XPULPIMG + // Parallelize by assigning each core one row + uint32_t const c = 8; // How many columns to split the matrix into + uint32_t const c_start = (P / c) * (id % c); + uint32_t const c_end = (P / c) * ((id % c) + 1); + + uint32_t const A_incr = (N * sizeof(int32_t)) - sizeof(int32_t); + uint32_t const B_incr = (P * sizeof(int32_t)) - sizeof(int32_t); + + for (uint32_t i = 2 * (id / c); i < M; i += 2 * (numThreads / c)) { + for (uint32_t j = c_start; j < c_end; j += 2) { + int32_t c00 = 0; + int32_t c01 = 0; + int32_t c10 = 0; + int32_t c11 = 0; + + for (uint32_t k = 0; k < N; k += 2) { + int32_t *idx_a = &A[i * N + k]; + int32_t *idx_b = &B[k * P + j]; + int32_t val_a00, val_a01, val_a10, val_a11, val_b00, val_b01, val_b10, val_b11; + __asm__ volatile( + "p.lw %[a00], 4(%[addr_a]!) \n\t" + "p.lw %[a01], %[a_incr](%[addr_a]!) \n\t" + "p.lw %[a10], 4(%[addr_a]!) \n\t" + "p.lw %[a11], 0(%[addr_a]) \n\t" + "p.lw %[b00], 4(%[addr_b]!) \n\t" + "p.lw %[b01], %[b_incr](%[addr_b]!) \n\t" + "p.lw %[b10], 4(%[addr_b]!) \n\t" + "p.lw %[b11], 0(%[addr_b]) \n\t" + : [ a00 ] "=&r"(val_a00), [ a01 ] "=&r"(val_a01), + [ a10 ] "=&r"(val_a10), [ a11 ] "=&r"(val_a11), + [ b00 ] "=&r"(val_b00), [ b01 ] "=&r"(val_b01), + [ b10 ] "=&r"(val_b10), [ b11 ] "=&r"(val_b11), + [ addr_a ] "+&r"(idx_a), [ addr_b ] "+&r"(idx_b) + : [ a_incr ] "r"(A_incr), [ b_incr ] "r"(B_incr) + : "memory"); + /* The asm code above implements the following commented C code */ + // int32_t val_a00 = A[(i + 0) * N + k + 0]; + // int32_t val_a01 = A[(i + 0) * N + k + 1]; + // int32_t val_a10 = A[(i + 1) * N + k + 0]; + // int32_t val_a11 = A[(i + 1) * N + k + 1]; + // int32_t val_b00 = B[(k + 0) * P + j + 0]; + // int32_t val_b01 = B[(k + 0) * P + j + 1]; + // int32_t val_b10 = B[(k + 1) * P + j + 0]; + // int32_t val_b11 = B[(k + 1) * P + j + 1]; + c00 += val_a00 * val_b00; + c00 += val_a01 * val_b10; + c01 += val_a00 * val_b01; + c01 += val_a01 * val_b11; + c10 += val_a10 * val_b00; + c10 += val_a11 * val_b10; + c11 += val_a10 * val_b01; + c11 += val_a11 * val_b11; + } + int32_t *idx_c = &C[i * P + j]; + __asm__ volatile( + "p.sw %[s00], 4(%[addr_c]!) \n\t" + "p.sw %[s01], %[c_incr](%[addr_c]!) \n\t" + "p.sw %[s10], 4(%[addr_c]!) \n\t" + "p.sw %[s11], 0(%[addr_c]) \n\t" + : [ addr_c ] "+&r"(idx_c) + : [ s00 ] "r"(c00), [ s01 ] "r"(c01), + [ s10 ] "r"(c10), [ s11 ] "r"(c11), + [ c_incr ] "r"(B_incr) + : "memory"); + /* The asm code above implements the following commented C code */ + // C[(i + 0) * P + j + 0] = c00; + // C[(i + 0) * P + j + 1] = c01; + // C[(i + 1) * P + j + 0] = c10; + // C[(i + 1) * P + j + 1] = c11; } } +#endif } diff --git a/apps/matmul_i16/main.c b/apps/matmul_i16/main.c new file mode 100644 index 000000000..ec4707327 --- /dev/null +++ b/apps/matmul_i16/main.c @@ -0,0 +1,150 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Samuel Riedel, ETH Zurich + +#include +#include + +#include "encoding.h" +#include "printf.h" +#include "runtime.h" +#include "synchronization.h" +#include "xpulp/mat_mul.h" + +// Define Matrix dimensions: +// C = AB with A=[MxN], B=[NxP], C=[MxP] +#define matrix_M 64 +#define matrix_N 64 +#define matrix_P 64 + +int16_t matrix_a[matrix_M * matrix_N] __attribute__((section(".l1_prio"))); +int16_t matrix_b[matrix_N * matrix_P] __attribute__((section(".l1_prio"))); +int32_t matrix_c[matrix_M * matrix_P] __attribute__((section(".l1_prio"))); + +int volatile error __attribute__((section(".l1"))); + +void init_matrix(int16_t *matrix, uint32_t num_rows, uint32_t num_columns, + int16_t a, int16_t b, int16_t c, uint32_t core_id, + uint32_t num_cores) { + uint32_t const split = 8; // How many rows/columns to split the matrix into + if (num_columns > num_rows) { + // Parallelize over columns + uint32_t const c_start = (num_rows / split) * (core_id % split); + uint32_t const c_end = (num_rows / split) * ((core_id % split) + 1); + for (uint32_t j = (core_id / split); j < num_columns; + j += (num_cores / split)) { + for (uint32_t i = c_start; i < c_end; ++i) { + matrix[i * num_columns + j] = a * (int16_t)i + b * (int16_t)j + c; + } + } + } else { + // Parallelize over rows + uint32_t const c_start = (num_columns / split) * (core_id % split); + uint32_t const c_end = (num_columns / split) * ((core_id % split) + 1); + for (uint32_t i = (core_id / split); i < num_rows; + i += (num_cores / split)) { + for (uint32_t j = c_start; j < c_end; ++j) { + matrix[i * num_columns + j] = a * (int16_t)i + b * (int16_t)j + c; + } + } + } +} + +// Initialize the matrices in parallel +int verify_matrix(int32_t *matrix, uint32_t num_rows, uint32_t num_columns, + uint32_t inner_dim, int16_t aa, int16_t ab, int16_t ac, + int16_t ba, int16_t bb, int16_t bc, uint32_t core_id, + uint32_t num_cores) { + // Convert to signed + int32_t n = (int32_t)inner_dim; + // Parallelize over rows + for (uint32_t i = core_id; i < num_rows; i += num_cores) { + for (uint32_t j = 0; j < num_columns; ++j) { + int32_t ii = (int32_t)i; + int32_t jj = (int32_t)j; + int32_t lin = ((int32_t)aa * bb * ii * jj + aa * bc * ii + ac * bb * jj + + (int32_t)ac * bc) * + n; + int32_t qua = + (((int32_t)aa * ba * ii + ab * bb * jj + ab * bc + (int32_t)ba * ac) * + (n * (n - 1))) / + 2; + int32_t cub = (((int32_t)ab * ba) * (n * (n - 1) * (2 * n - 1))) / 6; + int32_t golden = lin + qua + cub; + if (matrix[i * num_columns + j] != golden) { + return (i + j) == 0 ? -1 : (int)(i * num_columns + j); + } + matrix[i * num_columns + j] = 0; + } + } + return 0; +} + +int test_matrix_multiplication(int16_t *__restrict__ A, int16_t *__restrict__ B, + int32_t *__restrict__ C, uint32_t M, uint32_t N, + uint32_t P, uint32_t core_id, + uint32_t num_cores) { + int16_t const A_a = 1; + int16_t const A_b = 1; + int16_t const A_c = -40; + int16_t const B_a = 0; + int16_t const B_b = 1; + int16_t const B_c = 19; + + // Initialize Matrices + init_matrix(A, M, N, A_a, A_b, A_c, core_id, num_cores); + init_matrix(B, N, P, B_a, B_b, B_c, core_id, num_cores); + // Wait at barrier until everyone is ready + mempool_barrier(num_cores, num_cores / 2); + // Execute function to test. + mempool_start_benchmark(); + +#ifdef __XPULPIMG + matmul_unrolled_4x2_pincr_asm_parallel_i16_xpulpv2(A, B, C, M, N, P, core_id, num_cores); +#else + matmul_unrolled_2x2_parallel_i16_rv32im(A, B, C, M, N, P, core_id, num_cores); +#endif + + mempool_stop_benchmark(); + // Wait at barrier befor checking + mempool_barrier(num_cores, num_cores * 4); + if (verify_matrix(C, M, P, N, A_a, A_b, A_c, B_a, B_b, B_c, core_id, + num_cores)) { + error = 1; + return -1; + } + return 0; +} + +int main() { + uint32_t core_id = mempool_get_core_id(); + uint32_t num_cores = mempool_get_core_count(); + // Initialize barrier and synchronize + mempool_barrier_init(core_id, num_cores); + + if (core_id == 0) { + error = 0; + } + + // Test the Matrix multiplication + test_matrix_multiplication(matrix_a, matrix_b, matrix_c, matrix_M, matrix_N, + matrix_P, core_id, num_cores); + // wait until all cores have finished + mempool_barrier(num_cores, num_cores * 4); + + return error; +} diff --git a/apps/matmul_w/main.c b/apps/matmul_i32/main.c similarity index 95% rename from apps/matmul_w/main.c rename to apps/matmul_i32/main.c index 5d2dd46d1..73ba928a1 100644 --- a/apps/matmul_w/main.c +++ b/apps/matmul_i32/main.c @@ -20,22 +20,16 @@ #include #include "encoding.h" -#include "kernel/mat_mul.h" #include "printf.h" #include "runtime.h" #include "synchronization.h" +#include "xpulp/mat_mul.h" // Define Matrix dimensions: // C = AB with A=[MxN], B=[NxP], C=[MxP] -#if NUM_CORES > 32 -#define matrix_M 32 +#define matrix_M 64 #define matrix_N 32 -#define matrix_P 32 -#else -#define matrix_M (NUM_CORES) -#define matrix_N (NUM_CORES) -#define matrix_P (NUM_CORES) -#endif +#define matrix_P 64 int32_t matrix_a[matrix_M * matrix_N] __attribute__((section(".l1_prio"))); int32_t matrix_b[matrix_N * matrix_P] __attribute__((section(".l1_prio"))); @@ -116,7 +110,13 @@ int test_matrix_multiplication(int32_t *__restrict__ A, int32_t *__restrict__ B, mempool_barrier(num_cores, num_cores / 2); // Execute function to test. mempool_start_benchmark(); - mat_mul_unrolled_2x2_parallel(A, B, C, M, N, P, core_id, num_cores); + +#ifdef __XPULPIMG + matmul_unrolled_2x2_parallel_i32_xpulpv2(A, B, C, M, N, P, core_id, num_cores); +#else + matmul_unrolled_2x2_parallel_i32_rv32im(A, B, C, M, N, P, core_id, num_cores); +#endif + mempool_stop_benchmark(); // Wait at barrier befor checking mempool_barrier(num_cores, num_cores * 4); diff --git a/apps/matmul_b/main.c b/apps/matmul_i8/main.c similarity index 91% rename from apps/matmul_b/main.c rename to apps/matmul_i8/main.c index ca5966599..516f61af5 100644 --- a/apps/matmul_b/main.c +++ b/apps/matmul_i8/main.c @@ -25,9 +25,6 @@ #include "synchronization.h" #include "xpulp/mat_mul.h" -// Define which kernel to use -#define XPULPIMG - // Define Matrix dimensions: // C = AB with A=[MxN], B=[NxP], C=[MxP] #define matrix_M 64 @@ -79,10 +76,12 @@ int verify_matrix(int32_t *matrix, uint32_t num_rows, uint32_t num_columns, for (uint32_t j = 0; j < num_columns; ++j) { int32_t ii = (int32_t)i; int32_t jj = (int32_t)j; - int32_t lin = - ((int32_t)aa * bb * ii * jj + aa * bc * ii + ac * bb * jj + (int32_t)ac * bc) * n; + int32_t lin = ((int32_t)aa * bb * ii * jj + aa * bc * ii + ac * bb * jj + + (int32_t)ac * bc) * + n; int32_t qua = - (((int32_t)aa * ba * ii + ab * bb * jj + ab * bc + (int32_t)ba * ac) * (n * (n - 1))) / + (((int32_t)aa * ba * ii + ab * bb * jj + ab * bc + (int32_t)ba * ac) * + (n * (n - 1))) / 2; int32_t cub = (((int32_t)ab * ba) * (n * (n - 1) * (2 * n - 1))) / 6; int32_t golden = lin + qua + cub; @@ -99,12 +98,12 @@ int test_matrix_multiplication(int8_t *__restrict__ A, int8_t *__restrict__ B, int32_t *__restrict__ C, uint32_t M, uint32_t N, uint32_t P, uint32_t core_id, uint32_t num_cores) { - int8_t const A_a = 0; + int8_t const A_a = 1; int8_t const A_b = 1; - int8_t const A_c = -32; + int8_t const A_c = -40; int8_t const B_a = 0; int8_t const B_b = 1; - int8_t const B_c = 16; + int8_t const B_c = 19; // Initialize Matrices init_matrix(A, M, N, A_a, A_b, A_c, core_id, num_cores); @@ -114,11 +113,12 @@ int test_matrix_multiplication(int8_t *__restrict__ A, int8_t *__restrict__ B, // Execute function to test. mempool_start_benchmark(); - #ifdef XPULPIMG - matmul_unrolled_2x4_pincr_asm_parallel_i8_xpulpv2(A, B, M, N, P, C, core_id, num_cores); - #else +#ifdef __XPULPIMG + matmul_unrolled_2x4_pincr_asm_parallel_i8_xpulpv2(A, B, C, M, N, P, core_id, num_cores); + //matmul_unrolled_2x4_parallel_i8_xpulpv2(A, B, C, M, N, P, core_id, num_cores); +#else matmul_unrolled_2x2_parallel_i8_rv32im(A, B, C, M, N, P, core_id, num_cores); - #endif +#endif mempool_stop_benchmark(); // Wait at barrier befor checking From e6a1857859839bdce996ab96c68fc7cf0c3c6da7 Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Mon, 1 Mar 2021 16:46:53 +0100 Subject: [PATCH 47/65] [apps] Add 2d convolution kernel for benchmarking --- apps/common/xpulp/conv_2d.h | 261 ++++++++++++++++++++++++++++++++++++ apps/conv2d_i8/main.c | 111 +++++++++++++++ 2 files changed, 372 insertions(+) create mode 100644 apps/common/xpulp/conv_2d.h create mode 100644 apps/conv2d_i8/main.c diff --git a/apps/common/xpulp/conv_2d.h b/apps/common/xpulp/conv_2d.h new file mode 100644 index 000000000..6a755c8ee --- /dev/null +++ b/apps/common/xpulp/conv_2d.h @@ -0,0 +1,261 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Samuel Riedel, ETH Zurich +// Davide Schiavone, ETH Zurich +// Sergio Mazzola, ETH Zurich + +#include "xpulp/builtins_v2.h" +#include +#include + +#include "encoding.h" +#include "printf.h" +#include "runtime.h" +#include "synchronization.h" + +// Define which kernel to use +//#define __XPULPIMG + +/* + * 2D Convolution 3x3 ---------------------------------- + * kernel = conv2d_3x3_unrolled_i8_rv32im + * data type = 8-bit integer + * multi-core = no + * unrolling = whole 3x3 kernel + * simd = no + */ +void conv2d_3x3_unrolled_i8_rv32im(int8_t const volatile *__restrict__ in, uint32_t in_x, + uint32_t in_y, uint8_t const volatile *__restrict__ k, + int32_t volatile *__restrict__ out) { + int32_t sum; + uint32_t weight = 0; + for (int i = 0; i < 9; ++i) { + weight += k[i]; + } + + for (uint32_t i = 1; i < in_x - 1; ++i) { + for (uint32_t j = 1; j < in_y - 1; j++) { + sum = 1; + sum += in[(j - 1) * in_x + (i - 1)] * k[0]; + sum += in[(j - 1) * in_x + (i + 0)] * k[1]; + sum += in[(j - 1) * in_x + (i + 1)] * k[2]; + sum += in[(j + 0) * in_x + (i - 1)] * k[3]; + sum += in[(j + 0) * in_x + (i + 0)] * k[4]; + sum += in[(j + 0) * in_x + (i + 1)] * k[5]; + sum += in[(j + 1) * in_x + (i - 1)] * k[6]; + sum += in[(j + 1) * in_x + (i + 0)] * k[7]; + sum += in[(j + 1) * in_x + (i + 1)] * k[8]; + out[j * in_x + i] = sum / weight; + } + } +} + +/* + * 2D Convolution 3x3 ---------------------------------- + * kernel = conv_3x3_unrolled_i8_xpulpv2 + * data type = 8-bit integer + * multi-core = no + * unrolling = whole 3x3 kernel + * simd = yes, Xpulpv2 intrinsics + * + * Insipired from Conv3x3_Vector from pulp-training + */ +void conv2d_3x3_unrolled_i8_xpulpv2(int8_t const volatile *__restrict__ In_Img, int32_t volatile *__restrict__ Out_Img, + uint32_t R, uint32_t C, uint8_t const volatile *__restrict__ Kernel){ +#ifdef __XPULPIMG + v4s coeff_0, coeff_1, coeff_2; + v4s Img_0, Img_1, Img_2; + v4s new_data; + uint32_t r, c, t; + volatile int32_t S; + static v4s and_mask = {0xFF, 0xFF, 0xFF, 0x00}; + + uint32_t weight = 0; + for (int i = 0; i < 9; ++i) { + weight += Kernel[i]; + } + + coeff_0 = (v4s){Kernel[0], Kernel[1], Kernel[2], 0}; + coeff_1 = (v4s){Kernel[3], Kernel[4], Kernel[5], 0}; + coeff_2 = (v4s){Kernel[6], Kernel[7], Kernel[8], 0}; + + // image board is black + for (c = 1; c < C - 1; c++) { + + Img_0 = (v4s){In_Img[c - 1], In_Img[c], In_Img[c + 1], 0}; + Img_1 = (v4s){In_Img[c - 1 + R], In_Img[c + R], In_Img[c + 1 + R], 0}; + Img_2 = (v4s){In_Img[c - 1 + R * 2], In_Img[c + R * 2], In_Img[c + 1 + R * 2], 0}; + + for (r = 1; r < R - 1; r++) { + t = r * R + c; + S = __builtin_pulp_dotsp4(Img_0, coeff_0); + S = __builtin_pulp_sdotsp4(Img_1, coeff_1, S); + S = __builtin_pulp_sdotsp4(Img_2, coeff_2, S); + + Out_Img[t] = S/weight; + + // load a new rod + new_data = (v4s){In_Img[(r + 2) * R + c - 1], In_Img[(r + 2) * R + c], In_Img[(r + 2) * R + c + 1], 0}; + // move the window: move each vector one line down + Img_0 = Img_1; + Img_1 = Img_2; + Img_2 = new_data; + } + } +#endif +} + +// Testing +// Initialize the image in parallel +void init_conv2d_image_i8(int8_t *img, uint32_t img_x, uint32_t img_y) { + if (img_y > img_x) { + for (int i = 0; i < img_y; ++i) { + for (int j = 0; j < img_x; ++j) { + img[i * img_x + j] = (i % 16) + (j % 4); + } + } + } else { + for (int j = 0; j < img_x; ++j) { + for (int i = 0; i < img_y; ++i) { + img[i * img_x + j] = (i % 16) + (j % 4); + } + } + } +} + +// Verify and reset the image +int verify_conv2d_image_i8(int32_t *img, uint32_t img_x, uint32_t img_y) { + for (int i = 1; i < img_y - 1; ++i) { + int32_t y = i % 16; + if (i % 16 == 0) + y = 4; + if (i % 16 == 15) + y = 11; + for (int32_t j = 1; j < img_x - 1; ++j) { + int32_t x = ((j % 4) / 2) + 1; + if ((int32_t)img[i * img_x + j] != (int32_t)(x + y)) { + return (i + j) == 0 ? -1 : i * img_x + j; + } + img[i * img_x + j] = 0; + } + } + return 0; +} + +// Verify and reset the image +int verify_conv2d_image_i8_verbose(int32_t *img, uint32_t img_x, uint32_t img_y) { + for (int i = 1; i < img_y - 1; ++i) { + int32_t y = i % 16; + if (i % 16 == 0) + y = 4; + if (i % 16 == 15) + y = 11; + printf("|"); + for (int32_t j = 1; j < img_x - 1; ++j) { + int32_t x = ((j % 4) / 2) + 1; + printf(" %2u - %2u |", img[i * img_x + j], x + y); + } + printf("\n"); + } + return 0; +} + +void conv2d_3x3_unrolled_i8_xpulpv2_verbose(int8_t const *__restrict__ In_Img, int32_t volatile *__restrict__ Out_Img, + uint32_t R, uint32_t C, uint8_t const volatile *__restrict__ Kernel){ +#ifdef __XPULPIMG + v4s coeff_0, coeff_1, coeff_2; + v4s Img_0, Img_1, Img_2; + v4s new_data; + uint32_t r, c, t; + volatile int32_t S; + + uint32_t weight = 0; + for (int i = 0; i < 9; ++i) { + weight += Kernel[i]; + } + + // __asm__ volatile( + // "lw %[c0], 0(%[addr_ker]) \n\t" + // "lw %[c1], 3(%[addr_ker]) \n\t" + // "lw %[c2], 6(%[addr_ker]) \n\t" + // : [ c0 ] "=&r"(coeff_0), [ c1 ] "=&r"(coeff_1), [ c2 ] "=&r"(coeff_2) + // : [ addr_ker ] "r"(Kernel) + // : "memory"); + // + // coeff_0 = coeff_0 & 0xFFFFFF00; + // coeff_1 = coeff_1 & 0xFFFFFF00; + // coeff_2 = coeff_2 & 0xFFFFFF00; + + coeff_0 = (v4s){Kernel[0], Kernel[1], Kernel[2], 0}; + coeff_1 = (v4s){Kernel[3], Kernel[4], Kernel[5], 0}; + coeff_2 = (v4s){Kernel[6], Kernel[7], Kernel[8], 0}; + + // image board is black + for (c = 1; c < C - 1; c++) { + + Img_0 = (v4s){In_Img[c - 1], In_Img[c], In_Img[c + 1], 0}; + Img_1 = (v4s){In_Img[c - 1 + R], In_Img[c + R], In_Img[c + 1 + R], 0}; + Img_2 = (v4s){In_Img[c - 1 + R * 2], In_Img[c + R * 2], In_Img[c + 1 + R * 2], 0}; + + for (r = 1; r < R - 1; r++) { + printf("-------------\n"); + + printf("[ %u, %u, %u]\n", Img_0[0], Img_0[1], Img_0[2]); + printf("[ %u, %u, %u]\n", Img_1[0], Img_1[1], Img_1[2]); + printf("[ %u, %u, %u]\n", Img_2[0], Img_2[1], Img_2[2]); + + t = r * R + c; + S = __builtin_pulp_dotsp4(Img_0, coeff_0); + S = __builtin_pulp_sdotsp4(Img_1, coeff_1, S); + S = __builtin_pulp_sdotsp4(Img_2, coeff_2, S); + + printf("S = %d\n", S); + printf("S/weight = %d\n", S/weight); + + Out_Img[t] = S/weight; + printf("Out_Img[%d] = %d\n", t, Out_Img[t]); + + new_data = (v4s){In_Img[(r + 2) * R + c - 1], In_Img[(r + 2) * R + c], In_Img[(r + 2) * R + c + 1], 0}; + + // Move the window + /* + Three vectors: + Img_0 = {A0, A1, A2, 0} + Img_1 = {B0, B1, B2, 0} + Img_2 = {C0, C1, C2, 0} + Current Windonw: + XX XX XX + A0 A1 A2 + B0 B1 B2 + C0 C1 C2 + D0 D1 D2 + XX XX XX + We want to load next line (D0, D1, D2) in vector new_data + new_data = {D0, D1, D2, 0} + Move each vector one line down + Img_0 = Img_1 + Img_1 = Img_2 + Img_2 = new_data + */ + + Img_0 = Img_1; + Img_1 = Img_2; + Img_2 = new_data; + } + } +#endif +} diff --git a/apps/conv2d_i8/main.c b/apps/conv2d_i8/main.c new file mode 100644 index 000000000..d60bcc7bf --- /dev/null +++ b/apps/conv2d_i8/main.c @@ -0,0 +1,111 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Samuel Riedel, ETH Zurich + +#include +#include + +#include "encoding.h" +#include "printf.h" +#include "runtime.h" +#include "synchronization.h" +#include "xpulp/conv_2d.h" + +#define M 32 +#define N 32 +#define KERNEL_N 3 +//#define VERBOSE_IN +//#define VERBOSE_OUT + +volatile int8_t in[M * N] __attribute__((section(".l1_prio"))); +volatile int32_t out[M * N] __attribute__((section(".l1_prio"))); +volatile uint8_t kernel[KERNEL_N * KERNEL_N] __attribute__((section(".l1"))); +volatile int error __attribute__((section(".l1"))); + +int main() { + uint32_t core_id = mempool_get_core_id(); + uint32_t num_cores = mempool_get_core_count(); + mempool_barrier_init(core_id, num_cores); + + mempool_barrier(num_cores, num_cores / 2); + + if (core_id == 0) { + // Initialize error + error = 0; + // Initialize kernel + kernel[0] = 1; + kernel[1] = 2; + kernel[2] = 1; + + kernel[3] = 2; + kernel[4] = 4; + kernel[5] = 2; + + kernel[6] = 1; + kernel[7] = 2; + kernel[8] = 1; + + // Initialize img + init_conv2d_image_i8(in, N, M); + +#ifdef VERBOSE_IN + printf("A:\n"); + for (int i = 0; i < M; i++) { + for (int j = 0; j < N; j++) { + printf("%4u ", in[i * N + j]); + } + printf("\n"); + } + printf("kernel:\n"); + for (int i = 0; i < KERNEL_N; i++) { + for (int j = 0; j < KERNEL_N; j++) { + printf("%4u ", kernel[i * KERNEL_N + j]); + } + printf("\n"); + } +#endif + + mempool_start_benchmark(); +#ifdef __XPULPIMG + conv2d_3x3_unrolled_i8_xpulpv2(in, out, M, N, kernel); +#else + conv2d_3x3_unrolled_i8_rv32im(in, N, M, kernel, out); +#endif + mempool_stop_benchmark(); + +#ifdef VERBOSE_OUT + printf("out:\n"); + for (int i = 1; i < M - 1; i++) { + for (int j = 1; j < N - 1; j++) { + printf("%4u ", out[i * N + j]); + } + printf("\n"); + } +#endif + + //verify_conv2d_image_i8_verbose(out, N, M); + // Check result + if (verify_conv2d_image_i8(out, N, M)){ + error = 1; + } + } + + // wait until all cores have finished + mempool_barrier(num_cores, 4 * num_cores); + + return error; +} From ebe67d7017b785758674f4e4c4b6ec945fb4bb2b Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Sat, 6 Mar 2021 16:29:04 +0100 Subject: [PATCH 48/65] [apps] Fix xpulp convolution induced error --- apps/common/xpulp/conv_2d.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/common/xpulp/conv_2d.h b/apps/common/xpulp/conv_2d.h index 6a755c8ee..9c047e61d 100644 --- a/apps/common/xpulp/conv_2d.h +++ b/apps/common/xpulp/conv_2d.h @@ -49,7 +49,7 @@ void conv2d_3x3_unrolled_i8_rv32im(int8_t const volatile *__restrict__ in, uint3 for (uint32_t i = 1; i < in_x - 1; ++i) { for (uint32_t j = 1; j < in_y - 1; j++) { - sum = 1; + sum = 0; sum += in[(j - 1) * in_x + (i - 1)] * k[0]; sum += in[(j - 1) * in_x + (i + 0)] * k[1]; sum += in[(j - 1) * in_x + (i + 1)] * k[2]; From 7a082cf87e42502808c8a30534e100f47aa8e97b Mon Sep 17 00:00:00 2001 From: Sergio Mazzola Date: Wed, 17 Mar 2021 11:36:05 +0100 Subject: [PATCH 49/65] [apps] Improve conv2d 3x3 algorithm --- apps/common/xpulp/conv_2d.h | 135 +++++++++++++++++++++++++++++++++++- apps/conv2d_i8/main.c | 4 +- 2 files changed, 135 insertions(+), 4 deletions(-) diff --git a/apps/common/xpulp/conv_2d.h b/apps/common/xpulp/conv_2d.h index 9c047e61d..2fd0c7863 100644 --- a/apps/common/xpulp/conv_2d.h +++ b/apps/common/xpulp/conv_2d.h @@ -64,6 +64,67 @@ void conv2d_3x3_unrolled_i8_rv32im(int8_t const volatile *__restrict__ in, uint3 } } +/* + * 2D Convolution 3x3 ---------------------------------- + * kernel = conv2d_3x3_unrolled_i8_rv32im + * data type = 8-bit integer + * multi-core = no + * unrolling = whole 3x3 kernel + * simd = no + */ +void conv2d_3x3_unrolled2_i8_rv32im(int8_t const volatile *__restrict__ in, uint32_t in_x, + uint32_t in_y, uint8_t const volatile *__restrict__ k, + int32_t volatile *__restrict__ out) { + int32_t sum; + uint32_t weight = 0; + + int8_t elem_00, elem_01, elem_02; + int8_t elem_10, elem_11, elem_12; + int8_t elem_20, elem_21, elem_22; + + uint32_t j; + + for (int i = 0; i < 9; ++i) { + weight += k[i]; + } + + for (uint32_t i = 1; i < in_x - 1; ++i) { + elem_00 = in[i - 1]; + elem_01 = in[i + 0]; + elem_02 = in[i + 1]; + elem_10 = in[in_x + (i - 1)]; + elem_11 = in[in_x + (i + 0)]; + elem_12 = in[in_x + (i + 1)]; + elem_20 = in[2 * in_x + (i - 1)]; + elem_21 = in[2 * in_x + (i + 0)]; + elem_22 = in[2 * in_x + (i + 1)]; + for (j = 1; j < in_y - 1; j++) { + sum = 0; + sum += elem_00 * k[0]; + sum += elem_01 * k[1]; + sum += elem_02 * k[2]; + sum += elem_10 * k[3]; + sum += elem_11 * k[4]; + sum += elem_12 * k[5]; + sum += elem_20 * k[6]; + sum += elem_21 * k[7]; + sum += elem_22 * k[8]; + + elem_00 = elem_10; + elem_01 = elem_11; + elem_02 = elem_12; + elem_10 = elem_20; + elem_11 = elem_21; + elem_12 = elem_22; + elem_20 = in[(j + 2) * in_x + (i - 1)]; + elem_21 = in[(j + 2) * in_x + (i + 0)]; + elem_22 = in[(j + 2) * in_x + (i + 1)]; + + out[j * in_x + i] = sum / weight; + } + } +} + /* * 2D Convolution 3x3 ---------------------------------- * kernel = conv_3x3_unrolled_i8_xpulpv2 @@ -81,8 +142,7 @@ void conv2d_3x3_unrolled_i8_xpulpv2(int8_t const volatile *__restrict__ In_Img, v4s Img_0, Img_1, Img_2; v4s new_data; uint32_t r, c, t; - volatile int32_t S; - static v4s and_mask = {0xFF, 0xFF, 0xFF, 0x00}; + int32_t S; uint32_t weight = 0; for (int i = 0; i < 9; ++i) { @@ -119,6 +179,77 @@ void conv2d_3x3_unrolled_i8_xpulpv2(int8_t const volatile *__restrict__ In_Img, #endif } +/* + * 2D Convolution 3x3 ---------------------------------- + * kernel = conv_3x3_unrolled2_i8_xpulpv2 + * data type = 8-bit integer + * multi-core = no + * unrolling = whole 3x3 kernel, 2 kernels per iteration + * simd = yes, Xpulpv2 intrinsics + * + * Insipired from Conv3x3_Vector from pulp-training + */ +void conv2d_3x3_unrolled2_i8_xpulpv2(int8_t const volatile *__restrict__ In_Img, int32_t volatile *__restrict__ Out_Img, + uint32_t R, uint32_t C, uint8_t const volatile *__restrict__ Kernel){ +#ifdef __XPULPIMG + v4s coeff_0, coeff_1, coeff_2; + v4s Img_00, Img_10, Img_20; + v4s Img_01, Img_11, Img_21; + v4s new_data_0, new_data_1; + uint32_t r, c; + int32_t S_0, S_1; + + uint32_t weight = 0; + for (int i = 0; i < 9; ++i) { + weight += Kernel[i]; + } + + coeff_0 = (v4s){Kernel[0], Kernel[1], Kernel[2], 0}; + coeff_1 = (v4s){Kernel[3], Kernel[4], Kernel[5], 0}; + coeff_2 = (v4s){Kernel[6], Kernel[7], Kernel[8], 0}; + + // image board is black + for (c = 1; c < C/2; c++) { + + Img_00 = (v4s){In_Img[2*c - 2], In_Img[2*c - 1], In_Img[2*c], 0}; + Img_10 = (v4s){In_Img[2*c - 2 + R], In_Img[2*c - 1 + R], In_Img[2*c + R], 0}; + Img_20 = (v4s){In_Img[2*c - 2 + R * 2], In_Img[2*c - 1 + R * 2], In_Img[2*c + R * 2], 0}; + + Img_01 = (v4s){In_Img[2*c - 1], In_Img[2*c], In_Img[2*c + 1], 0}; + Img_11 = (v4s){In_Img[2*c - 1 + R], In_Img[2*c + R], In_Img[2*c + 1 + R], 0}; + Img_21 = (v4s){In_Img[2*c - 1 + R * 2], In_Img[2*c + R * 2], In_Img[2*c + 1 + R * 2], 0}; + + for (r = 1; r < R - 1; r++) { + S_0 = __builtin_pulp_dotsp4(Img_00, coeff_0); + S_1 = __builtin_pulp_dotsp4(Img_01, coeff_0); + + S_0 = __builtin_pulp_sdotsp4(Img_10, coeff_1, S_0); + S_1 = __builtin_pulp_sdotsp4(Img_11, coeff_1, S_1); + + S_0 = __builtin_pulp_sdotsp4(Img_20, coeff_2, S_0); + S_1 = __builtin_pulp_sdotsp4(Img_21, coeff_2, S_1); + + int32_t res_0 = S_0/weight; + int32_t res_1 = S_1/weight; + + // load a new rod + new_data_0 = (v4s){In_Img[(r + 2) * R + (2*c - 1) - 1], In_Img[(r + 2) * R + (2*c - 1)], In_Img[(r + 2) * R + (2*c - 1) + 1], 0}; + new_data_1 = (v4s){In_Img[(r + 2) * R + 2*c - 1], In_Img[(r + 2) * R + 2*c], In_Img[(r + 2) * R + 2*c + 1], 0}; + // move the window: move each vector one line down + Img_00 = Img_10; + Img_10 = Img_20; + Img_20 = new_data_0; + Img_01 = Img_11; + Img_11 = Img_21; + Img_21 = new_data_1; + + Out_Img[r * R + (2*c - 1)] = res_0; + Out_Img[r * R + 2*c] = res_1; + } + } +#endif +} + // Testing // Initialize the image in parallel void init_conv2d_image_i8(int8_t *img, uint32_t img_x, uint32_t img_y) { diff --git a/apps/conv2d_i8/main.c b/apps/conv2d_i8/main.c index d60bcc7bf..cfca1f0bf 100644 --- a/apps/conv2d_i8/main.c +++ b/apps/conv2d_i8/main.c @@ -81,9 +81,9 @@ int main() { mempool_start_benchmark(); #ifdef __XPULPIMG - conv2d_3x3_unrolled_i8_xpulpv2(in, out, M, N, kernel); + conv2d_3x3_unrolled2_i8_xpulpv2(in, out, M, N, kernel); #else - conv2d_3x3_unrolled_i8_rv32im(in, N, M, kernel, out); + conv2d_3x3_unrolled2_i8_rv32im(in, N, M, kernel, out); #endif mempool_stop_benchmark(); From 4ed9d2028a3ae332c87be8aac830e69c04782eed Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Tue, 30 Mar 2021 15:06:12 +0200 Subject: [PATCH 50/65] [ci] :bug: Fix file filtering function for clang-format styling --- .gitlab-ci.d/lint.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci.d/lint.sh b/.gitlab-ci.d/lint.sh index 17b2d35a8..5202e33cd 100755 --- a/.gitlab-ci.d/lint.sh +++ b/.gitlab-ci.d/lint.sh @@ -34,11 +34,11 @@ EXIT_STATUS=0 # Only check C and C++ files for clang-format compatibility echo "Checking C/C++ files for clang-format compliance" -clang_files=$(echo $files | tr ' ' '\n' | grep -P "(? Date: Tue, 30 Mar 2021 15:14:49 +0200 Subject: [PATCH 51/65] [apps] :art: Align applications with the code-style --- apps/common/xpulp/builtins_v2.h | 429 ++++++++++++++++++-------------- apps/common/xpulp/conv_2d.h | 92 ++++--- apps/common/xpulp/mat_mul.h | 63 +++-- apps/conv2d_i8/main.c | 8 +- apps/matmul_i16/main.c | 3 +- apps/matmul_i32/main.c | 3 +- apps/matmul_i8/main.c | 6 +- 7 files changed, 339 insertions(+), 265 deletions(-) diff --git a/apps/common/xpulp/builtins_v2.h b/apps/common/xpulp/builtins_v2.h index ffbcfe1c9..60923b321 100644 --- a/apps/common/xpulp/builtins_v2.h +++ b/apps/common/xpulp/builtins_v2.h @@ -18,290 +18,341 @@ #define __HAL_RISCV_BUILTINS_V2_H__ /* ARITHMETIC SECTION */ -typedef signed short v2s __attribute__((vector_size (4))); -typedef unsigned short v2u __attribute__((vector_size (4))); +typedef signed short v2s __attribute__((vector_size(4))); +typedef unsigned short v2u __attribute__((vector_size(4))); -typedef signed char v4s __attribute__((vector_size (4))); -typedef unsigned char v4u __attribute__((vector_size (4))); +typedef signed char v4s __attribute__((vector_size(4))); +typedef unsigned char v4u __attribute__((vector_size(4))); /* Packing of scalars into vectors */ -#define __PACK2(x, y) __builtin_pulp_pack2((signed short) (x), (signed short) (y)) -#define __PACKU2(x, y) __builtin_pulp_pack2((unsigned short) (x), (unsigned short) (y)) +#define __PACK2(x, y) __builtin_pulp_pack2((signed short)(x), (signed short)(y)) +#define __PACKU2(x, y) \ + __builtin_pulp_pack2((unsigned short)(x), (unsigned short)(y)) -#define __PACK4(x, y, z, t) __builtin_pulp_pack4((signed char) (x), (signed char) (y), (signed char) (z), (signed char) (t)) -#define __PACKU4(x, y, z, t) __builtin_pulp_pack4((unsigned char) (x), (unsigned char) (y), (unsigned char) (z), (unsigned char) (t)) +#define __PACK4(x, y, z, t) \ + __builtin_pulp_pack4((signed char)(x), (signed char)(y), (signed char)(z), \ + (signed char)(t)) +#define __PACKU4(x, y, z, t) \ + __builtin_pulp_pack4((unsigned char)(x), (unsigned char)(y), \ + (unsigned char)(z), (unsigned char)(t)) /* Max */ -#define __MAX(a, b) __builtin_pulp_maxsi((a), (b)) +#define __MAX(a, b) __builtin_pulp_maxsi((a), (b)) -#define __MAX2(x, y) __builtin_pulp_max2((x), (y)) -#define __MAX4(x, y) __builtin_pulp_max4((x), (y)) +#define __MAX2(x, y) __builtin_pulp_max2((x), (y)) +#define __MAX4(x, y) __builtin_pulp_max4((x), (y)) -#define __MAXU2(x, y) __builtin_pulp_maxu2((x), (y)) -#define __MAXU4(x, y) __builtin_pulp_maxu4((x), (y)) +#define __MAXU2(x, y) __builtin_pulp_maxu2((x), (y)) +#define __MAXU4(x, y) __builtin_pulp_maxu4((x), (y)) /* Min */ -#define __MIN2(x, y) __builtin_pulp_min2((x), (y)) -#define __MIN4(x, y) __builtin_pulp_min4((x), (y)) +#define __MIN2(x, y) __builtin_pulp_min2((x), (y)) +#define __MIN4(x, y) __builtin_pulp_min4((x), (y)) -#define __MINU2(x, y) __builtin_pulp_minu2((x), (y)) -#define __MINU4(x, y) __builtin_pulp_minu4((x), (y)) +#define __MINU2(x, y) __builtin_pulp_minu2((x), (y)) +#define __MINU4(x, y) __builtin_pulp_minu4((x), (y)) /* Clip */ -#define __CLIP(x, precision) __builtin_pulp_clip((x), -(1<<(precision)), (1< Date: Tue, 30 Mar 2021 15:16:39 +0200 Subject: [PATCH 52/65] [apps] :art: Prevent clang-format from touchting the test_macros.h assembly header --- apps/riscv-tests/isa/macros/scalar/test_macros.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/apps/riscv-tests/isa/macros/scalar/test_macros.h b/apps/riscv-tests/isa/macros/scalar/test_macros.h index 26d11ee6d..10b31a5e5 100644 --- a/apps/riscv-tests/isa/macros/scalar/test_macros.h +++ b/apps/riscv-tests/isa/macros/scalar/test_macros.h @@ -3,6 +3,7 @@ #ifndef __TEST_MACROS_SCALAR_H #define __TEST_MACROS_SCALAR_H +// clang-format off #----------------------------------------------------------------------- # Helper macros @@ -1346,4 +1347,6 @@ pass: \ #define TEST_DATA +// clang-format on + #endif From 2d35589692c7f3fd56a412feca083aebbdb15d6a Mon Sep 17 00:00:00 2001 From: Samuel Riedel Date: Tue, 30 Mar 2021 17:35:40 +0200 Subject: [PATCH 53/65] [snitch] Disable the branch prediction if there are multiple early-hits --- CHANGELOG.md | 2 + .../src/snitch_icache/snitch_icache_l0.sv | 42 ++++++++++--------- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1095a1e30..fed56a31c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## Unreleased +### Fixed +- Disable the branch prediction if there are multiple early-hits ## 0.2.0 - 2021-03-29 diff --git a/hardware/deps/snitch/src/snitch_icache/snitch_icache_l0.sv b/hardware/deps/snitch/src/snitch_icache/snitch_icache_l0.sv index 8544d3078..147dbf093 100644 --- a/hardware/deps/snitch/src/snitch_icache/snitch_icache_l0.sv +++ b/hardware/deps/snitch/src/snitch_icache/snitch_icache_l0.sv @@ -298,26 +298,28 @@ module snitch_icache_l0 import snitch_icache_pkg::*; #( always_comb begin is_branch_taken[i] = 1'b0; is_jal[i] = 1'b0; - unique casez (ins_data[i*32+:32]) - // static prediction - riscv_instr::BEQ, - riscv_instr::BNE, - riscv_instr::BLT, - riscv_instr::BGE, - riscv_instr::BLTU, - riscv_instr::BGEU: begin - // look at the sign bit of the immediate field - // backward branches (immediate negative) taken - // forward branches not taken - is_branch_taken[i] = ins_data[i*32+31]; - end - riscv_instr::JAL: begin - is_jal[i] = 1'b1; - end - // we can't do anything about the JALR case as we don't - // know the destination. - default:; - endcase + if (hit_early_is_onehot) begin + unique casez (ins_data[i*32+:32]) + // static prediction + riscv_instr::BEQ, + riscv_instr::BNE, + riscv_instr::BLT, + riscv_instr::BGE, + riscv_instr::BLTU, + riscv_instr::BGEU: begin + // look at the sign bit of the immediate field + // backward branches (immediate negative) taken + // forward branches not taken + is_branch_taken[i] = ins_data[i*32+31]; + end + riscv_instr::JAL: begin + is_jal[i] = 1'b1; + end + // we can't do anything about the JALR case as we don't + // know the destination. + default:; + endcase + end end end From 2a063ddb2971f986c889cd7e5eef0a2214061cff Mon Sep 17 00:00:00 2001 From: Samuel Riedel Date: Tue, 30 Mar 2021 20:30:59 +0200 Subject: [PATCH 54/65] [apps] Align end of `.text` section with the instruction cache This guarantees that the instruction cache will not be polluted by data that is mapped to the same block as instructions --- CHANGELOG.md | 1 + apps/common/link.ld | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fed56a31c..643f8f638 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Fixed - Disable the branch prediction if there are multiple early-hits +- Align end of `.text` section with the instruction cache ## 0.2.0 - 2021-03-29 diff --git a/apps/common/link.ld b/apps/common/link.ld index 169194115..b51601744 100644 --- a/apps/common/link.ld +++ b/apps/common/link.ld @@ -25,6 +25,7 @@ SECTIONS { .text : { *(.text.init) *(.text) + . = ALIGN(0x40); } > l2 /* Data on L2 */ From 66a231b52614d84fe7d65afb0d99fa4d3527e5ef Mon Sep 17 00:00:00 2001 From: Samuel Riedel Date: Tue, 30 Mar 2021 20:18:01 +0200 Subject: [PATCH 55/65] [hardware] Disable prefetching if the instruction interface is invalid --- .../snitch/src/snitch_icache/snitch_icache.sv | 38 +++++++++---------- hardware/src/mempool_tile.sv | 2 +- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/hardware/deps/snitch/src/snitch_icache/snitch_icache.sv b/hardware/deps/snitch/src/snitch_icache/snitch_icache.sv index 8b66d80d3..9bcc4a4d6 100644 --- a/hardware/deps/snitch/src/snitch_icache/snitch_icache.sv +++ b/hardware/deps/snitch/src/snitch_icache/snitch_icache.sv @@ -52,7 +52,7 @@ module snitch_icache #( input logic clk_d2_i, input logic rst_ni, - input logic enable_prefetching_i, + input logic [NR_FETCH_PORTS-1:0] enable_prefetching_i, output snitch_icache_pkg::icache_events_t [NR_FETCH_PORTS-1:0] icache_events_o, input logic flush_valid_i, @@ -220,24 +220,24 @@ module snitch_icache #( .clk_i ( clk_d2_i ), .rst_ni, .flush_valid_i, - .enable_prefetching_i, - .icache_events_o ( icache_events_o [i] ), - .in_addr_i ( inst_addr_i [i] ), - .in_data_o ( in_cache_data [i] ), - .in_error_o ( in_cache_error [i] ), - .in_valid_i ( in_cache_valid [i] ), - .in_ready_o ( in_cache_ready [i] ), - - .out_req_addr_o ( local_prefetch_req.addr ), - .out_req_id_o ( local_prefetch_req.id ), - .out_req_valid_o ( local_prefetch_req_valid ), - .out_req_ready_i ( local_prefetch_req_ready ), - - .out_rsp_data_i ( local_prefetch_rsp.data ), - .out_rsp_error_i ( local_prefetch_rsp.error ), - .out_rsp_id_i ( local_prefetch_rsp.id ), - .out_rsp_valid_i ( local_prefetch_rsp_valid ), - .out_rsp_ready_o ( local_prefetch_rsp_ready ) + .enable_prefetching_i ( enable_prefetching_i [i] ), + .icache_events_o ( icache_events_o [i] ), + .in_addr_i ( inst_addr_i [i] ), + .in_data_o ( in_cache_data [i] ), + .in_error_o ( in_cache_error [i] ), + .in_valid_i ( in_cache_valid [i] ), + .in_ready_o ( in_cache_ready [i] ), + + .out_req_addr_o ( local_prefetch_req.addr ), + .out_req_id_o ( local_prefetch_req.id ), + .out_req_valid_o ( local_prefetch_req_valid ), + .out_req_ready_i ( local_prefetch_req_ready ), + + .out_rsp_data_i ( local_prefetch_rsp.data ), + .out_rsp_error_i ( local_prefetch_rsp.error ), + .out_rsp_id_i ( local_prefetch_rsp.id ), + .out_rsp_valid_i ( local_prefetch_rsp_valid ), + .out_rsp_ready_o ( local_prefetch_rsp_ready ) ); isochronous_spill_register #( diff --git a/hardware/src/mempool_tile.sv b/hardware/src/mempool_tile.sv index 79e5546a2..5a20ccf51 100644 --- a/hardware/src/mempool_tile.sv +++ b/hardware/src/mempool_tile.sv @@ -173,7 +173,7 @@ module mempool_tile .clk_i (clk_i ), .clk_d2_i (clk_i ), .rst_ni (rst_ni ), - .enable_prefetching_i (1'b1 ), + .enable_prefetching_i (snitch_inst_valid[c] ), .icache_events_o (/* Unused */ ), .flush_valid_i (1'b0 ), .flush_ready_o (/* Unused */ ), From 0f8ee0055ebb5a0687758c09fccfdfdd896c1f25 Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Wed, 31 Mar 2021 13:07:27 +0200 Subject: [PATCH 56/65] [apps] :recycle: Clean-up sign conversion warnings from the convolution kernels --- apps/common/kernel/convolution.h | 127 ++++++++++++++++--------------- apps/common/xpulp/conv_2d.h | 86 +++++++++------------ 2 files changed, 103 insertions(+), 110 deletions(-) diff --git a/apps/common/kernel/convolution.h b/apps/common/kernel/convolution.h index d8ec60969..743ff3387 100644 --- a/apps/common/kernel/convolution.h +++ b/apps/common/kernel/convolution.h @@ -27,29 +27,30 @@ void conv2d_parallel(int32_t const *__restrict__ in, uint32_t in_x, uint32_t k_x, uint32_t k_y, int32_t volatile *__restrict__ out, uint32_t id, uint32_t numThreads) { - int boundary_x = k_x / 2; - int boundary_y = k_y / 2; + int boundary_x = (int)(k_x / 2); + int boundary_y = (int)(k_y / 2); // Now we only care about valid entries - while (id < boundary_x) { + while (id < (unsigned int)boundary_x) { id += numThreads; } int32_t sum; uint32_t weight = 0; - for (int i = 0; i < k_x * k_y; ++i) { + for (unsigned int i = 0; i < k_x * k_y; ++i) { weight += k[i]; } // TODO implement boundary halo // Start at the boundary_x - for (int i = id; i < in_x - boundary_x; i += numThreads) { - for (int j = boundary_y; j < in_y - boundary_y; j++) { + for (int i = (int)id; i < (int)in_x - boundary_x; i += (int)numThreads) { + for (int j = boundary_y; j < (int)in_y - boundary_y; j++) { sum = 0; - for (int m = -boundary_y; m < (int)(k_y - boundary_y); m++) { - for (int n = -boundary_x; n < (int)(k_x - boundary_x); n++) { - sum += in[(j + m) * in_x + (i + n)] * - k[(m + boundary_y) * k_x + (n + boundary_x)]; + for (int m = -boundary_y; m < (int)k_y - boundary_y; m++) { + for (int n = -boundary_x; n < (int)k_x - boundary_x; n++) { + sum += in[(unsigned int)(j + m) * in_x + (unsigned int)(i + n)] * + (int)k[(unsigned int)(m + boundary_y) * k_x + + (unsigned int)(n + boundary_x)]; } } - out[j * in_x + i] = sum / weight; + out[(unsigned int)j * in_x + (unsigned int)i] = sum / (int)weight; } } } @@ -59,24 +60,26 @@ void conv2d_shifted_parallel(int32_t const *__restrict__ in, uint32_t in_x, uint32_t k_x, uint32_t k_y, int32_t volatile *__restrict__ out, uint32_t id, uint32_t numThreads) { - uint32_t boundary_x = k_x / 2; - uint32_t boundary_y = k_y / 2; + int boundary_x = (int)(k_x / 2); + int boundary_y = (int)(k_y / 2); int32_t sum; uint32_t weight = 0; - for (int i = 0; i < k_x * k_y; ++i) { + for (unsigned int i = 0; i < k_x * k_y; ++i) { weight += k[i]; } // TODO implement boundary halo // Now we only care about valid entries - for (uint32_t i = id; i < in_x - (2 * boundary_x); i += numThreads) { - for (uint32_t j = 0; j < in_y - (2 * boundary_y); j++) { + for (unsigned int i = id; i < in_x - (unsigned int)(2 * boundary_x); + i += numThreads) { + for (unsigned int j = 0; j < in_y - (unsigned int)(2 * boundary_y); j++) { sum = 0; - for (uint32_t m = 0; m < k_y; m++) { - for (uint32_t n = 0; n < k_x; n++) { - sum += in[(j + m) * in_x + (i + n)] * k[m * k_x + n]; + for (unsigned int m = 0; m < k_y; m++) { + for (unsigned int n = 0; n < k_x; n++) { + sum += in[(j + m) * in_x + (i + n)] * (int)k[m * k_x + n]; } } - out[(j + boundary_y) * in_x + (i + boundary_x)] = sum / weight; + out[(j + (unsigned int)boundary_y) * in_x + + (i + (unsigned int)boundary_x)] = sum / (int)weight; } } } @@ -87,7 +90,7 @@ void conv2d_3x3_unrolled_parallel(int32_t const *__restrict__ in, uint32_t in_x, uint32_t id, uint32_t numThreads) { int32_t sum; uint32_t weight = 0; - for (int i = 0; i < 9; ++i) { + for (unsigned int i = 0; i < 9; ++i) { weight += k[i]; } // TODO implement boundary halo @@ -109,16 +112,16 @@ void conv2d_3x3_unrolled_parallel(int32_t const *__restrict__ in, uint32_t in_x, for (uint32_t i = start; i < end; ++i) { for (uint32_t j = 1; j < in_y - 1; j++) { sum = 0; - sum += in[(j - 1) * in_x + (i - 1)] * k[0]; - sum += in[(j - 1) * in_x + (i + 0)] * k[1]; - sum += in[(j - 1) * in_x + (i + 1)] * k[2]; - sum += in[(j + 0) * in_x + (i - 1)] * k[3]; - sum += in[(j + 0) * in_x + (i + 0)] * k[4]; - sum += in[(j + 0) * in_x + (i + 1)] * k[5]; - sum += in[(j + 1) * in_x + (i - 1)] * k[6]; - sum += in[(j + 1) * in_x + (i + 0)] * k[7]; - sum += in[(j + 1) * in_x + (i + 1)] * k[8]; - out[j * in_x + i] = sum / weight; + sum += in[(j - 1) * in_x + (i - 1)] * (int)k[0]; + sum += in[(j - 1) * in_x + (i + 0)] * (int)k[1]; + sum += in[(j - 1) * in_x + (i + 1)] * (int)k[2]; + sum += in[(j + 0) * in_x + (i - 1)] * (int)k[3]; + sum += in[(j + 0) * in_x + (i + 0)] * (int)k[4]; + sum += in[(j + 0) * in_x + (i + 1)] * (int)k[5]; + sum += in[(j + 1) * in_x + (i - 1)] * (int)k[6]; + sum += in[(j + 1) * in_x + (i + 0)] * (int)k[7]; + sum += in[(j + 1) * in_x + (i + 1)] * (int)k[8]; + out[j * in_x + i] = sum / (int)weight; } } } @@ -135,19 +138,19 @@ void conv2d_3x3_shifted_unrolled_parallel(int32_t const *__restrict__ in, } // TODO implement boundary halo // Now we only care about valid entries - for (int i = id; i < in_x - 2; i += numThreads) { - for (int j = 0; j < in_y - 2; j++) { + for (unsigned int i = id; i < in_x - 2; i += numThreads) { + for (unsigned int j = 0; j < in_y - 2; j++) { sum = 0; - sum += in[(j + 0) * in_x + (i + 0)] * k[0]; - sum += in[(j + 0) * in_x + (i + 1)] * k[1]; - sum += in[(j + 0) * in_x + (i + 2)] * k[2]; - sum += in[(j + 1) * in_x + (i + 0)] * k[3]; - sum += in[(j + 1) * in_x + (i + 1)] * k[4]; - sum += in[(j + 1) * in_x + (i + 2)] * k[5]; - sum += in[(j + 2) * in_x + (i + 0)] * k[6]; - sum += in[(j + 2) * in_x + (i + 1)] * k[7]; - sum += in[(j + 2) * in_x + (i + 2)] * k[8]; - out[(j + 1) * in_x + (i + 1)] = sum / weight; + sum += in[(j + 0) * in_x + (i + 0)] * (int)k[0]; + sum += in[(j + 0) * in_x + (i + 1)] * (int)k[1]; + sum += in[(j + 0) * in_x + (i + 2)] * (int)k[2]; + sum += in[(j + 1) * in_x + (i + 0)] * (int)k[3]; + sum += in[(j + 1) * in_x + (i + 1)] * (int)k[4]; + sum += in[(j + 1) * in_x + (i + 2)] * (int)k[5]; + sum += in[(j + 2) * in_x + (i + 0)] * (int)k[6]; + sum += in[(j + 2) * in_x + (i + 1)] * (int)k[7]; + sum += in[(j + 2) * in_x + (i + 2)] * (int)k[8]; + out[(j + 1) * in_x + (i + 1)] = sum / (int)weight; } } } @@ -158,15 +161,15 @@ void init_conv2d_image(volatile int32_t *img, uint32_t img_x, uint32_t img_y, uint32_t id, uint32_t numThreads) { // Parallelize over rows if (img_y > img_x) { - for (int i = id; i < img_y; i += numThreads) { - for (int j = 0; j < img_x; ++j) { - img[i * img_x + j] = (i % 16) + (j % 4); + for (int i = (int)id; i < (int)img_y; i += (int)numThreads) { + for (int j = 0; j < (int)img_x; ++j) { + img[(unsigned int)i * img_x + (unsigned int)j] = (i % 16) + (j % 4); } } } else { - for (int j = id; j < img_x; j += numThreads) { - for (int i = 0; i < img_y; ++i) { - img[i * img_x + j] = (i % 16) + (j % 4); + for (int j = (int)id; j < (int)img_x; j += (int)numThreads) { + for (int i = 0; i < (int)img_y; ++i) { + img[(unsigned int)i * img_x + (unsigned int)j] = (i % 16) + (j % 4); } } } @@ -177,15 +180,15 @@ void zero_conv2d_image(volatile int32_t *img, uint32_t img_x, uint32_t img_y, uint32_t id, uint32_t numThreads) { // Parallelize over rows if (img_y > img_x) { - for (int i = id; i < img_y; i += numThreads) { - for (int j = 0; j < img_x; ++j) { - img[i * img_x + j] = 0; + for (int i = (int)id; i < (int)img_y; i += (int)numThreads) { + for (int j = 0; j < (int)img_x; ++j) { + img[(unsigned int)i * img_x + (unsigned int)j] = 0; } } } else { - for (int j = id; j < img_x; j += numThreads) { - for (int i = 0; i < img_y; ++i) { - img[i * img_x + j] = 0; + for (int j = (int)id; j < (int)img_x; j += (int)numThreads) { + for (int i = 0; i < (int)img_y; ++i) { + img[(unsigned int)i * img_x + (unsigned int)j] = 0; } } } @@ -197,18 +200,18 @@ extern uint32_t barrier_init; int verify_conv2d_image(volatile int32_t *img, uint32_t img_x, uint32_t img_y, uint32_t id, uint32_t numThreads) { // Parallelize over rows - for (int i = id + 1; i < img_y - 1; i += numThreads) { - int32_t y = i % 16; + for (int i = (int)id + 1; i < (int)img_y - 1; i += (int)numThreads) { + int y = i % 16; if (i % 16 == 0) y = 4; if (i % 16 == 15) y = 11; - for (int32_t j = 1; j < img_x - 1; ++j) { - int32_t x = ((j % 4) / 2) + 1; - if (img[i * img_x + j] != x + y) { - return (i + j) == 0 ? -1 : i * img_x + j; + for (int j = 1; j < (int)img_x - 1; ++j) { + int x = ((j % 4) / 2) + 1; + if ((int)img[i * (int)img_x + j] != x + y) { + return (i + j) == 0 ? -1 : i * (int)img_x + j; } - img[i * img_x + j] = 0; + img[i * (int)img_x + j] = 0; } } return 0; diff --git a/apps/common/xpulp/conv_2d.h b/apps/common/xpulp/conv_2d.h index d0ddc0802..02a3007af 100644 --- a/apps/common/xpulp/conv_2d.h +++ b/apps/common/xpulp/conv_2d.h @@ -28,7 +28,7 @@ #include "synchronization.h" // Define which kernel to use -//#define __XPULPIMG +#define __XPULPIMG /* * 2D Convolution 3x3 ---------------------------------- @@ -60,7 +60,7 @@ void conv2d_3x3_unrolled_i8_rv32im(int8_t const volatile *__restrict__ in, sum += in[(j + 1) * in_x + (i - 1)] * k[6]; sum += in[(j + 1) * in_x + (i + 0)] * k[7]; sum += in[(j + 1) * in_x + (i + 1)] * k[8]; - out[j * in_x + i] = sum / weight; + out[j * in_x + i] = sum / (int)weight; } } } @@ -122,7 +122,7 @@ void conv2d_3x3_unrolled2_i8_rv32im(int8_t const volatile *__restrict__ in, elem_21 = in[(j + 2) * in_x + (i + 0)]; elem_22 = in[(j + 2) * in_x + (i + 1)]; - out[j * in_x + i] = sum / weight; + out[j * in_x + i] = sum / (int)weight; } } } @@ -142,7 +142,7 @@ void conv2d_3x3_unrolled_i8_xpulpv2( int32_t volatile *__restrict__ Out_Img, uint32_t R, uint32_t C, uint8_t const volatile *__restrict__ Kernel) { #ifdef __XPULPIMG - v4s coeff_0, coeff_1, coeff_2; + v4u coeff_0, coeff_1, coeff_2; v4s Img_0, Img_1, Img_2; v4s new_data; uint32_t r, c, t; @@ -153,9 +153,9 @@ void conv2d_3x3_unrolled_i8_xpulpv2( weight += Kernel[i]; } - coeff_0 = (v4s){Kernel[0], Kernel[1], Kernel[2], 0}; - coeff_1 = (v4s){Kernel[3], Kernel[4], Kernel[5], 0}; - coeff_2 = (v4s){Kernel[6], Kernel[7], Kernel[8], 0}; + coeff_0 = (v4u){Kernel[0], Kernel[1], Kernel[2], 0}; + coeff_1 = (v4u){Kernel[3], Kernel[4], Kernel[5], 0}; + coeff_2 = (v4u){Kernel[6], Kernel[7], Kernel[8], 0}; // image board is black for (c = 1; c < C - 1; c++) { @@ -171,7 +171,7 @@ void conv2d_3x3_unrolled_i8_xpulpv2( S = __builtin_pulp_sdotsp4(Img_1, coeff_1, S); S = __builtin_pulp_sdotsp4(Img_2, coeff_2, S); - Out_Img[t] = S / weight; + Out_Img[t] = S / (int)weight; // load a new rod new_data = (v4s){In_Img[(r + 2) * R + c - 1], In_Img[(r + 2) * R + c], @@ -200,7 +200,7 @@ void conv2d_3x3_unrolled2_i8_xpulpv2( int32_t volatile *__restrict__ Out_Img, uint32_t R, uint32_t C, uint8_t const volatile *__restrict__ Kernel) { #ifdef __XPULPIMG - v4s coeff_0, coeff_1, coeff_2; + v4u coeff_0, coeff_1, coeff_2; v4s Img_00, Img_10, Img_20; v4s Img_01, Img_11, Img_21; v4s new_data_0, new_data_1; @@ -212,9 +212,9 @@ void conv2d_3x3_unrolled2_i8_xpulpv2( weight += Kernel[i]; } - coeff_0 = (v4s){Kernel[0], Kernel[1], Kernel[2], 0}; - coeff_1 = (v4s){Kernel[3], Kernel[4], Kernel[5], 0}; - coeff_2 = (v4s){Kernel[6], Kernel[7], Kernel[8], 0}; + coeff_0 = (v4u){Kernel[0], Kernel[1], Kernel[2], 0}; + coeff_1 = (v4u){Kernel[3], Kernel[4], Kernel[5], 0}; + coeff_2 = (v4u){Kernel[6], Kernel[7], Kernel[8], 0}; // image board is black for (c = 1; c < C / 2; c++) { @@ -241,8 +241,8 @@ void conv2d_3x3_unrolled2_i8_xpulpv2( S_0 = __builtin_pulp_sdotsp4(Img_20, coeff_2, S_0); S_1 = __builtin_pulp_sdotsp4(Img_21, coeff_2, S_1); - int32_t res_0 = S_0 / weight; - int32_t res_1 = S_1 / weight; + int32_t res_0 = S_0 / (int)weight; + int32_t res_1 = S_1 / (int)weight; // load a new rod new_data_0 = (v4s){In_Img[(r + 2) * R + (2 * c - 1) - 1], @@ -268,36 +268,38 @@ void conv2d_3x3_unrolled2_i8_xpulpv2( // Testing // Initialize the image in parallel -void init_conv2d_image_i8(int8_t *img, uint32_t img_x, uint32_t img_y) { +void init_conv2d_image_i8(volatile int8_t *img, uint32_t img_x, + uint32_t img_y) { if (img_y > img_x) { - for (int i = 0; i < img_y; ++i) { - for (int j = 0; j < img_x; ++j) { - img[i * img_x + j] = (i % 16) + (j % 4); + for (int i = 0; i < (int)img_y; ++i) { + for (int j = 0; j < (int)img_x; ++j) { + img[i * (int)img_x + j] = (int8_t)((i % 16) + (j % 4)); } } } else { - for (int j = 0; j < img_x; ++j) { - for (int i = 0; i < img_y; ++i) { - img[i * img_x + j] = (i % 16) + (j % 4); + for (int j = 0; j < (int)img_x; ++j) { + for (int i = 0; i < (int)img_y; ++i) { + img[i * (int)img_x + j] = (int8_t)((i % 16) + (j % 4)); } } } } // Verify and reset the image -int verify_conv2d_image_i8(int32_t *img, uint32_t img_x, uint32_t img_y) { - for (int i = 1; i < img_y - 1; ++i) { +int verify_conv2d_image_i8(volatile int32_t *img, uint32_t img_x, + uint32_t img_y) { + for (int i = 1; i < (int)img_y - 1; ++i) { int32_t y = i % 16; if (i % 16 == 0) y = 4; if (i % 16 == 15) y = 11; - for (int32_t j = 1; j < img_x - 1; ++j) { + for (int32_t j = 1; j < (int)img_x - 1; ++j) { int32_t x = ((j % 4) / 2) + 1; - if ((int32_t)img[i * img_x + j] != (int32_t)(x + y)) { - return (i + j) == 0 ? -1 : i * img_x + j; + if ((int32_t)img[i * (int)img_x + j] != (int32_t)(x + y)) { + return (i + j) == 0 ? -1 : i * (int)img_x + j; } - img[i * img_x + j] = 0; + img[i * (int)img_x + j] = 0; } } return 0; @@ -306,16 +308,16 @@ int verify_conv2d_image_i8(int32_t *img, uint32_t img_x, uint32_t img_y) { // Verify and reset the image int verify_conv2d_image_i8_verbose(int32_t *img, uint32_t img_x, uint32_t img_y) { - for (int i = 1; i < img_y - 1; ++i) { + for (int i = 1; i < (int)img_y - 1; ++i) { int32_t y = i % 16; if (i % 16 == 0) y = 4; if (i % 16 == 15) y = 11; printf("|"); - for (int32_t j = 1; j < img_x - 1; ++j) { + for (int32_t j = 1; j < (int)img_x - 1; ++j) { int32_t x = ((j % 4) / 2) + 1; - printf(" %2u - %2u |", img[i * img_x + j], x + y); + printf(" %2u - %2u |", img[i * (int)img_x + j], x + y); } printf("\n"); } @@ -326,7 +328,7 @@ void conv2d_3x3_unrolled_i8_xpulpv2_verbose( int8_t const *__restrict__ In_Img, int32_t volatile *__restrict__ Out_Img, uint32_t R, uint32_t C, uint8_t const volatile *__restrict__ Kernel) { #ifdef __XPULPIMG - v4s coeff_0, coeff_1, coeff_2; + v4u coeff_0, coeff_1, coeff_2; v4s Img_0, Img_1, Img_2; v4s new_data; uint32_t r, c, t; @@ -337,21 +339,9 @@ void conv2d_3x3_unrolled_i8_xpulpv2_verbose( weight += Kernel[i]; } - // __asm__ volatile( - // "lw %[c0], 0(%[addr_ker]) \n\t" - // "lw %[c1], 3(%[addr_ker]) \n\t" - // "lw %[c2], 6(%[addr_ker]) \n\t" - // : [ c0 ] "=&r"(coeff_0), [ c1 ] "=&r"(coeff_1), [ c2 ] "=&r"(coeff_2) - // : [ addr_ker ] "r"(Kernel) - // : "memory"); - // - // coeff_0 = coeff_0 & 0xFFFFFF00; - // coeff_1 = coeff_1 & 0xFFFFFF00; - // coeff_2 = coeff_2 & 0xFFFFFF00; - - coeff_0 = (v4s){Kernel[0], Kernel[1], Kernel[2], 0}; - coeff_1 = (v4s){Kernel[3], Kernel[4], Kernel[5], 0}; - coeff_2 = (v4s){Kernel[6], Kernel[7], Kernel[8], 0}; + coeff_0 = (v4u){Kernel[0], Kernel[1], Kernel[2], 0}; + coeff_1 = (v4u){Kernel[3], Kernel[4], Kernel[5], 0}; + coeff_2 = (v4u){Kernel[6], Kernel[7], Kernel[8], 0}; // image board is black for (c = 1; c < C - 1; c++) { @@ -374,9 +364,9 @@ void conv2d_3x3_unrolled_i8_xpulpv2_verbose( S = __builtin_pulp_sdotsp4(Img_2, coeff_2, S); printf("S = %d\n", S); - printf("S/weight = %d\n", S / weight); + printf("S/weight = %d\n", S / (int)weight); - Out_Img[t] = S / weight; + Out_Img[t] = S / (int)weight; printf("Out_Img[%d] = %d\n", t, Out_Img[t]); new_data = (v4s){In_Img[(r + 2) * R + c - 1], In_Img[(r + 2) * R + c], From d4577bd677017b7a9b9e5634b872b55ea0a61e5c Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Wed, 31 Mar 2021 13:08:18 +0200 Subject: [PATCH 57/65] [apps] Expose the atomic or instruction --- apps/common/synchronization.c | 19 ---------------- apps/common/synchronization.h | 41 +++++++++++++++++++++++++++++++++++ apps/convolution/main.c | 2 +- 3 files changed, 42 insertions(+), 20 deletions(-) diff --git a/apps/common/synchronization.c b/apps/common/synchronization.c index 8d03d754f..eae5189be 100644 --- a/apps/common/synchronization.c +++ b/apps/common/synchronization.c @@ -16,8 +16,6 @@ // Author: Samuel Riedel, ETH Zurich -static inline unsigned amo_add(void volatile *const address, unsigned value); - #include #include @@ -71,20 +69,3 @@ void mempool_barrier(uint32_t num_cores, uint32_t cycles) { mempool_wait(cycles); } } - -/** - - * Expose the atomic add instruction. - * - * @param address A pointer to an address on L2 memory to store the value. - * @param value Value to add to the specified memory location. - * - * @return Value previously stored in memory. - */ -static inline unsigned amo_add(void volatile *const address, unsigned value) { - unsigned ret; - __asm__ __volatile__("" : : : "memory"); - asm volatile("amoadd.w %0, %1, (%2)" : "=r"(ret) : "r"(value), "r"(address)); - __asm__ __volatile__("" : : : "memory"); - return ret; -} diff --git a/apps/common/synchronization.h b/apps/common/synchronization.h index b23bb834b..0df50af8b 100644 --- a/apps/common/synchronization.h +++ b/apps/common/synchronization.h @@ -16,6 +16,47 @@ // Author: Samuel Riedel, ETH Zurich +#ifndef __SYNCHRONIZATION_H__ +#define __SYNCHRONIZATION_H__ + // Barrier functions void mempool_barrier_init(uint32_t core_id, uint32_t num_cores); void mempool_barrier(uint32_t num_cores, uint32_t cycles); + +// Atomics + +/** + + * Expose the atomic add instruction. + * + * @param address A pointer to an address on L2 memory to store the value. + * @param value Value to add to the specified memory location. + * + * @return Value previously stored in memory. + */ +static inline unsigned amo_add(void volatile *const address, unsigned value) { + unsigned ret; + asm volatile("" : : : "memory"); + asm volatile("amoadd.w %0, %1, (%2)" : "=r"(ret) : "r"(value), "r"(address)); + asm volatile("" : : : "memory"); + return ret; +} + +/** + + * Expose the atomic or instruction. + * + * @param address A pointer to an address on L2 memory to store the value. + * @param value Value to add to the specified memory location. + * + * @return Value previously stored in memory. + */ +static inline unsigned amo_or(void volatile *const address, unsigned value) { + unsigned ret; + asm volatile("" : : : "memory"); + asm volatile("amoor.w %0, %1, (%2)" : "=r"(ret) : "r"(value), "r"(address)); + asm volatile("" : : : "memory"); + return ret; +} + +#endif // __SYNCHRONIZATION_H__ diff --git a/apps/convolution/main.c b/apps/convolution/main.c index 990b23c34..df47be73d 100644 --- a/apps/convolution/main.c +++ b/apps/convolution/main.c @@ -125,7 +125,7 @@ int main() { mempool_barrier(num_cores, num_cores * 4); // Check result if (verify_conv2d_image(out, N, M, core_id, num_cores)) { - __atomic_fetch_or(&error, i, __ATOMIC_SEQ_CST); + amo_or(&error, (unsigned)i); } } From ee90ed8ec6b2f66ce63767c8c8242d46ac4b7f87 Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Wed, 31 Mar 2021 09:30:45 +0200 Subject: [PATCH 58/65] [apps] :recycle: Clean-up sign conversion warnings from sleep_wakeup --- apps/sleep_wakeup/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/sleep_wakeup/main.c b/apps/sleep_wakeup/main.c index f4b9bfa00..308a68689 100644 --- a/apps/sleep_wakeup/main.c +++ b/apps/sleep_wakeup/main.c @@ -6,7 +6,7 @@ #include "runtime.h" #include "synchronization.h" -volatile uint32_t atomic __attribute__((section(".l2"))) = -1; +volatile uint32_t atomic __attribute__((section(".l2"))) = (uint32_t)-1; extern volatile uint32_t tcdm_start_address_reg; extern volatile uint32_t tcdm_end_address_reg; From e1abb3c8db0f7f8c02355fc8cef3d01129aab64f Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Wed, 31 Mar 2021 12:22:20 +0200 Subject: [PATCH 59/65] [apps] :recycle: Clean-up pedantic conversion warnings from the matmul kernels --- apps/common/xpulp/mat_mul.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/apps/common/xpulp/mat_mul.h b/apps/common/xpulp/mat_mul.h index a27c0135e..b3ee1b96f 100644 --- a/apps/common/xpulp/mat_mul.h +++ b/apps/common/xpulp/mat_mul.h @@ -320,12 +320,12 @@ void matmul_unrolled_2x4_pincr_asm_parallel_i8_xpulpv2( // Loop counter for P uint32_t k = 0; // Row decrement for A matrix - int32_t const N_decr = -N + 4; + int32_t const N_decr = -(int)N + 4; // Row increment for C matrix uint32_t const P_incr = (P * 4) - 12; for (k = core_id; k < P / 4; k += numThreads) { - int8_t *idx_a = &pSrcA[0]; // start_a + const int8_t *idx_a = &pSrcA[0]; // start_a int32_t *idx_c = &pDstC[k * 4]; // start_c int32_t const *end_c = &pDstC[P * M]; // actually (P * M) + (k * 4) while (idx_c < end_c) { @@ -340,7 +340,7 @@ void matmul_unrolled_2x4_pincr_asm_parallel_i8_xpulpv2( int32_t sum13 = 0; int8_t const *end_a = idx_a + N; - int8_t *idx_b = &pSrcB[k * 4]; // start_b + const int8_t *idx_b = &pSrcB[k * 4]; // start_b while (idx_a < end_a) { v4s aVec0, aVec1; @@ -514,14 +514,15 @@ void matmul_unrolled_4x2_pincr_asm_parallel_i16_xpulpv2( // Increment for A matrix = 1 row forward uint32_t const A_incr = N * sizeof(int16_t); // Decrement for A matrix = 3 rows backward and 2 words forward - int32_t const A_decr = -(N * 3 * sizeof(int16_t)) + 2 * sizeof(int16_t); + int32_t const A_decr = + -(int)(N * 3 * sizeof(int16_t)) + 2 * (int)sizeof(int16_t); // Increment for B matrix = 1 row forward uint32_t const B_incr = P * sizeof(int16_t); // bytes in 1 row // Increment for C matrix = 1 row forward and 1 word backward uint32_t const C_incr = (P * sizeof(int32_t)) - sizeof(int32_t); for (k = core_id; k < P / 2; k += numThreads) { - int16_t *idx_a = &pSrcA[0]; // start_a + const int16_t *idx_a = &pSrcA[0]; // start_a int32_t *idx_c = &pDstC[k * 2]; // start_c int32_t const *end_c = &pDstC[P * M]; // actually (P * M) + (k * 2) @@ -537,7 +538,7 @@ void matmul_unrolled_4x2_pincr_asm_parallel_i16_xpulpv2( int32_t sum31 = 0; int16_t const *end_a = idx_a + N; - int16_t *idx_b = &pSrcB[k * 2]; // start_b + const int16_t *idx_b = &pSrcB[k * 2]; // start_b while (idx_a < end_a) { @@ -691,8 +692,8 @@ void matmul_unrolled_2x2_parallel_i32_xpulpv2(int32_t const *__restrict__ A, int32_t c11 = 0; for (uint32_t k = 0; k < N; k += 2) { - int32_t *idx_a = &A[i * N + k]; - int32_t *idx_b = &B[k * P + j]; + const int32_t *idx_a = &A[i * N + k]; + const int32_t *idx_b = &B[k * P + j]; int32_t val_a00, val_a01, val_a10, val_a11, val_b00, val_b01, val_b10, val_b11; __asm__ volatile("p.lw %[a00], 4(%[addr_a]!) \n\t" From 6f8809991fe2f38eb4efdbb91a0c6f001817a0de Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Wed, 31 Mar 2021 12:27:30 +0200 Subject: [PATCH 60/65] [apps] Compile kernels with __XPULPIMG defined, when the extension is active --- apps/common/runtime.mk | 4 ++++ apps/common/xpulp/conv_2d.h | 15 ++++++--------- apps/common/xpulp/mat_mul.h | 27 ++++++++++++--------------- 3 files changed, 22 insertions(+), 24 deletions(-) diff --git a/apps/common/runtime.mk b/apps/common/runtime.mk index 06e4e9877..3a6e399cb 100644 --- a/apps/common/runtime.mk +++ b/apps/common/runtime.mk @@ -68,6 +68,10 @@ RISCV_STRIP ?= $(RISCV_PREFIX)strip # Defines DEFINES := -DNUM_CORES=$(num_cores) -DBOOT_ADDR=0x$(boot_addr) -DL2_BASE=0x$(l2_base) -DL2_SIZE=0x$(l2_size) +# Define __XPULPIMG if the extension is active +ifeq ($(XPULPIMG),1) + DEFINES += -D__XPULPIMG +endif # Specify cross compilation target. This can be omitted if LLVM is built with riscv as default target RISCV_LLVM_TARGET ?= --target=$(RISCV_TARGET) --sysroot=$(GCC_INSTALL_DIR)/$(RISCV_TARGET) --gcc-toolchain=$(GCC_INSTALL_DIR) diff --git a/apps/common/xpulp/conv_2d.h b/apps/common/xpulp/conv_2d.h index 02a3007af..31b0ad167 100644 --- a/apps/common/xpulp/conv_2d.h +++ b/apps/common/xpulp/conv_2d.h @@ -27,9 +27,6 @@ #include "runtime.h" #include "synchronization.h" -// Define which kernel to use -#define __XPULPIMG - /* * 2D Convolution 3x3 ---------------------------------- * kernel = conv2d_3x3_unrolled_i8_rv32im @@ -137,11 +134,11 @@ void conv2d_3x3_unrolled2_i8_rv32im(int8_t const volatile *__restrict__ in, * * Insipired from Conv3x3_Vector from pulp-training */ +#ifdef __XPULPIMG void conv2d_3x3_unrolled_i8_xpulpv2( int8_t const volatile *__restrict__ In_Img, int32_t volatile *__restrict__ Out_Img, uint32_t R, uint32_t C, uint8_t const volatile *__restrict__ Kernel) { -#ifdef __XPULPIMG v4u coeff_0, coeff_1, coeff_2; v4s Img_0, Img_1, Img_2; v4s new_data; @@ -182,8 +179,8 @@ void conv2d_3x3_unrolled_i8_xpulpv2( Img_2 = new_data; } } -#endif } +#endif /* * 2D Convolution 3x3 ---------------------------------- @@ -195,11 +192,11 @@ void conv2d_3x3_unrolled_i8_xpulpv2( * * Insipired from Conv3x3_Vector from pulp-training */ +#ifdef __XPULPIMG void conv2d_3x3_unrolled2_i8_xpulpv2( int8_t const volatile *__restrict__ In_Img, int32_t volatile *__restrict__ Out_Img, uint32_t R, uint32_t C, uint8_t const volatile *__restrict__ Kernel) { -#ifdef __XPULPIMG v4u coeff_0, coeff_1, coeff_2; v4s Img_00, Img_10, Img_20; v4s Img_01, Img_11, Img_21; @@ -263,8 +260,8 @@ void conv2d_3x3_unrolled2_i8_xpulpv2( Out_Img[r * R + 2 * c] = res_1; } } -#endif } +#endif // Testing // Initialize the image in parallel @@ -324,10 +321,10 @@ int verify_conv2d_image_i8_verbose(int32_t *img, uint32_t img_x, return 0; } +#ifdef __XPULPIMG void conv2d_3x3_unrolled_i8_xpulpv2_verbose( int8_t const *__restrict__ In_Img, int32_t volatile *__restrict__ Out_Img, uint32_t R, uint32_t C, uint8_t const volatile *__restrict__ Kernel) { -#ifdef __XPULPIMG v4u coeff_0, coeff_1, coeff_2; v4s Img_0, Img_1, Img_2; v4s new_data; @@ -398,5 +395,5 @@ void conv2d_3x3_unrolled_i8_xpulpv2_verbose( Img_2 = new_data; } } -#endif } +#endif diff --git a/apps/common/xpulp/mat_mul.h b/apps/common/xpulp/mat_mul.h index b3ee1b96f..78297c999 100644 --- a/apps/common/xpulp/mat_mul.h +++ b/apps/common/xpulp/mat_mul.h @@ -30,9 +30,6 @@ * considered, leading to wrong results */ -// Define which kernel to use -#define __XPULPIMG - /* * Matrix multiplication ---------------------------------- * kernel = matmul_unrolled_2x2_parallel_i8_rv32im @@ -143,11 +140,11 @@ void matmul_unrolled_2x2_parallel_i16_rv32im(int16_t const *__restrict__ A, * * Original plp_mat_mult_i8s_xpulpv2 from pulp-dsp */ +#ifdef __XPULPIMG void matmul_unrolled_2x4_i8_xpulpv2(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, int32_t *__restrict__ pDstC, uint32_t M, uint32_t N, uint32_t P) { -#ifdef __XPULPIMG static v4s mask0 = {0, 1, 4, 5}; static v4s mask1 = {2, 3, 6, 7}; static v4s mask2 = {0, 2, 4, 6}; @@ -209,8 +206,8 @@ void matmul_unrolled_2x4_i8_xpulpv2(const int8_t *__restrict__ pSrcA, pDstC[(i * 2 + 1) * P + (k * 4 + 3)] = sum13; } } -#endif } +#endif /* * Matrix multiplication ---------------------------------- @@ -222,13 +219,13 @@ void matmul_unrolled_2x4_i8_xpulpv2(const int8_t *__restrict__ pSrcA, * * Original plp_mat_mult_i8p_xpulpv2 from pulp-dsp */ +#ifdef __XPULPIMG void matmul_unrolled_2x4_parallel_i8_xpulpv2(const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, int32_t *__restrict__ pDstC, uint32_t M, uint32_t N, uint32_t P, uint32_t core_id, uint32_t numThreads) { -#ifdef __XPULPIMG static v4s mask0 = {0, 1, 4, 5}; static v4s mask1 = {2, 3, 6, 7}; static v4s mask2 = {0, 2, 4, 6}; @@ -290,8 +287,8 @@ void matmul_unrolled_2x4_parallel_i8_xpulpv2(const int8_t *__restrict__ pSrcA, pDstC[(i * 2 + 1) * P + (k * 4 + 3)] = sum13; } } -#endif } +#endif /* * Matrix multiplication ---------------------------------- @@ -306,11 +303,11 @@ void matmul_unrolled_2x4_parallel_i8_xpulpv2(const int8_t *__restrict__ pSrcA, * * Inspired from plp_mat_mult_i8p_xpulpv2 from pulp-dsp */ +#ifdef __XPULPIMG void matmul_unrolled_2x4_pincr_asm_parallel_i8_xpulpv2( const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, int32_t *__restrict__ pDstC, uint32_t M, uint32_t N, uint32_t P, uint32_t core_id, uint32_t numThreads) { -#ifdef __XPULPIMG // Masks for shuffles static v4s mask0 = {0, 1, 4, 5}; static v4s mask1 = {2, 3, 6, 7}; @@ -419,8 +416,8 @@ void matmul_unrolled_2x4_pincr_asm_parallel_i8_xpulpv2( idx_a += N; // adjust A matrix pointer } } -#endif } +#endif /* * Matrix multiplication ---------------------------------- @@ -432,13 +429,13 @@ void matmul_unrolled_2x4_pincr_asm_parallel_i8_xpulpv2( * * Original plp_mat_mult_i16p_xpulpv2 from pulp-dsp */ +#ifdef __XPULPIMG void matmul_unrolled_4x2_parallel_i16_xpulpv2(const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, int32_t *__restrict__ pDstC, uint32_t M, uint32_t N, uint32_t P, uint32_t core_id, uint32_t numThreads) { -#ifdef __XPULPIMG uint32_t i = 0; // loop counter for M uint32_t j = 0; // loop counter for N uint32_t k = 0; // loop counter for P @@ -488,8 +485,8 @@ void matmul_unrolled_4x2_parallel_i16_xpulpv2(const int16_t *__restrict__ pSrcA, pDstC[(i * 4 + 3) * P + (k * 2 + 1)] = sum31; } } -#endif } +#endif /* * Matrix multiplication ---------------------------------- @@ -504,11 +501,11 @@ void matmul_unrolled_4x2_parallel_i16_xpulpv2(const int16_t *__restrict__ pSrcA, * * Inspired from plp_mat_mult_i16p_xpulpv2 from pulp-dsp */ +#ifdef __XPULPIMG void matmul_unrolled_4x2_pincr_asm_parallel_i16_xpulpv2( const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, int32_t *__restrict__ pDstC, uint32_t M, uint32_t N, uint32_t P, uint32_t core_id, uint32_t numThreads) { -#ifdef __XPULPIMG // Loop counter for P uint32_t k = 0; // Increment for A matrix = 1 row forward @@ -606,8 +603,8 @@ void matmul_unrolled_4x2_pincr_asm_parallel_i16_xpulpv2( idx_a += N * 3; } } -#endif } +#endif /* * Matrix multiplication ---------------------------------- @@ -669,13 +666,13 @@ void matmul_unrolled_2x2_parallel_i32_rv32im(int32_t const *__restrict__ A, * other = loads/stores explicitly written in asm * for optimal register utilization */ +#ifdef __XPULPIMG void matmul_unrolled_2x2_parallel_i32_xpulpv2(int32_t const *__restrict__ A, int32_t const *__restrict__ B, int32_t *__restrict__ C, uint32_t M, uint32_t N, uint32_t P, uint32_t id, uint32_t numThreads) { -#ifdef __XPULPIMG // Parallelize by assigning each core one row uint32_t const c = 8; // How many columns to split the matrix into uint32_t const c_start = (P / c) * (id % c); @@ -745,5 +742,5 @@ void matmul_unrolled_2x2_parallel_i32_xpulpv2(int32_t const *__restrict__ A, // C[(i + 1) * P + j + 1] = c11; } } -#endif } +#endif From a53d0249ea30dfa31d2ef04139498603dd9efb56 Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Wed, 31 Mar 2021 12:30:59 +0200 Subject: [PATCH 61/65] [snitch] :art: Remove commented-out RTL --- hardware/deps/snitch/src/snitch.sv | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/hardware/deps/snitch/src/snitch.sv b/hardware/deps/snitch/src/snitch.sv index 6e9ad329d..148549b0a 100644 --- a/hardware/deps/snitch/src/snitch.sv +++ b/hardware/deps/snitch/src/snitch.sv @@ -104,7 +104,6 @@ module snitch #( /* verilator lint_on WIDTH */ logic [31:0] opa, opb; -//logic [31:0] opa, opb, opc; logic [32:0] adder_result; logic [31:0] alu_result; @@ -222,7 +221,6 @@ module snitch #( assign acc_qdata_op_o = inst_data_i; assign acc_qdata_arga_o = {{32{gpr_rdata[0][31]}}, gpr_rdata[0]}; assign acc_qdata_argb_o = {{32{gpr_rdata[1][31]}}, gpr_rdata[1]}; -//assign acc_qdata_argc_o = {32'b0, alu_result}; assign acc_qdata_argc_o = {{32{gpr_rdata[2][31]}}, gpr_rdata[2]}; // instruction fetch interface @@ -1406,14 +1404,6 @@ module snitch #( endcase end -// always_comb begin -// unique case (opc_select) -// None: opc = '0; -// Reg: opc = gpr_rdata[2]; -// default: opc = '0; -// endcase -// end - assign gpr_raddr[0] = rs1; assign gpr_raddr[1] = rs2; // connect third read port only if present From 4605475d918beb93a5a063d18613fbb0bbd017cb Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Wed, 31 Mar 2021 12:31:35 +0200 Subject: [PATCH 62/65] [apps] Compile kernels without -funroll-loops --- apps/common/runtime.mk | 1 - 1 file changed, 1 deletion(-) diff --git a/apps/common/runtime.mk b/apps/common/runtime.mk index 3a6e399cb..00697da4c 100644 --- a/apps/common/runtime.mk +++ b/apps/common/runtime.mk @@ -80,7 +80,6 @@ RISCV_WARNINGS += -Wunused-variable -Wconversion -Wall -Wextra # -Werror RISCV_FLAGS_COMMON_TESTS ?= -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) -I$(CURDIR)/common -static RISCV_FLAGS_COMMON ?= $(RISCV_FLAGS_COMMON_TESTS) -std=gnu99 -O3 -ffast-math -fno-common -fno-builtin-printf $(DEFINES) $(RISCV_WARNINGS) RISCV_FLAGS_GCC ?= -mcmodel=medany -Wa,-march=$(RISCV_ARCH_AS) -falign-loops=32 -falign-jumps=32 -#RISCV_FLAGS_GCC ?= -mcmodel=medany -Wa,-march=$(RISCV_ARCH_AS) -falign-loops=32 -falign-jumps=32 -funroll-loops RISCV_FLAGS_LLVM ?= -mcmodel=small -mllvm -enable-misched ifeq ($(COMPILER),gcc) From 2c3be5202ee384010d3f3c1d87a736700ec5505f Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Wed, 31 Mar 2021 12:34:27 +0200 Subject: [PATCH 63/65] [README] :memo: Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 120aa91d0..fde0f29e0 100644 --- a/README.md +++ b/README.md @@ -119,7 +119,7 @@ If, during the Verilator model compilation, you run out of space on your disk, u ```bash export OBJCACHE='' ``` -to disable the use of `ccache`. This will make the following compilations slower, but avoid to use storage. +to disable the use of `ccache`. Keep in mind that this will make the following compilations slower, since compiled object files will no longer be cached. If the tracer is enabled, its output traces are found under `hardware/build`, for both ModelSim and Verilator simulations. From accbf1d924affcc4a88a42c0425027f2181ffe82 Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Wed, 31 Mar 2021 12:47:42 +0200 Subject: [PATCH 64/65] [CHANGELOG] Update CHANGELOG --- CHANGELOG.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 643f8f638..143a3a20a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,21 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## Unreleased +### Added + +- Toolchain and hardware support for Xpulp instructions: + - Post-incrementing and register-register loads and stores (`pv.lb[u]`, `pv.lh[u]`, `pv.lw`) + - 32-bit multiply-accumulate instructions (`pv.mac`, `pv.msu`) + - Arithmetic SIMD instructions (`pv.{add, sub, abs, avg, avgu, min, minu, max, maxu, srl, sra, sll, or, xor, and, dotsp, dotup, dotusp, sdotsp, sdotup, sdotusp}.{h, b}` + - Sub-word manipulation SIMD instructions (`pv.{extract, extractu, insert, shuffle2}.{h, b}`) + ### Fixed - Disable the branch prediction if there are multiple early-hits - Align end of `.text` section with the instruction cache +- Observe the code style guidelines in the matrix multiplication and convolution kernels + +### Changed +- Clean-up the pedantic compilation warnings of the matrix multiplication and convolution kernels ## 0.2.0 - 2021-03-29 From 9570e7553b107f48b4102d81127f77daa342a543 Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Wed, 31 Mar 2021 13:06:11 +0200 Subject: [PATCH 65/65] [CHANGELOG] Release 0.3.0 --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 143a3a20a..adffc5144 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,8 +7,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## Unreleased -### Added +## 0.3.0 - 2021-03-31 +### Added - Toolchain and hardware support for Xpulp instructions: - Post-incrementing and register-register loads and stores (`pv.lb[u]`, `pv.lh[u]`, `pv.lw`) - 32-bit multiply-accumulate instructions (`pv.mac`, `pv.msu`)