From 27d1bbe0804efdb5c5d22cf9415d013097bdfb5a Mon Sep 17 00:00:00 2001 From: Charalampos Mitrodimas Date: Wed, 15 Mar 2023 13:46:13 +0100 Subject: [PATCH] Zvknh[ab]: add "vsha2ms.vv" instruction The "vsha2ms.vv" instruction performs a Vector SHA-2 message schedule. Reserved encodings: * Zvknha: SEW is any value other than 32. * Zvknhb: SEW is any value other than 32 or 64. Signed-off-by: Charalampos Mitrodimas --- model/riscv_insts_zvknhab.sail | 96 ++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/model/riscv_insts_zvknhab.sail b/model/riscv_insts_zvknhab.sail index 449ab80d0..8da151724 100644 --- a/model/riscv_insts_zvknhab.sail +++ b/model/riscv_insts_zvknhab.sail @@ -16,6 +16,28 @@ function zvk_check_elements(VLEN, num_elem, LMUL, SEW) = { val ROTR : forall 'n 'm, 'm >= 0. (bits('n), bits('m), bits('m)) -> bits('n) function ROTR (x, n, SEW) = (x >> n) | (x << (SEW - n)) +val SHR : forall 'n 'm, 'm >= 0. (bits('n), bits('m)) -> bits('n) +function SHR (x, n) = x >> n + + +val sig0 : forall 'n, 'n >= 0. (bits('n), int) -> bits('n) +function sig0 (x, SEW) = { + let sew_bits : bits('n) = to_bits('n, SEW); + match SEW { + 32 => (ROTR(x, to_bits('n, 7), sew_bits) ^ ROTR(x, to_bits('n, 18), sew_bits) ^ SHR(x, to_bits('n, 3))), + 64 => (ROTR(x, to_bits('n, 1), sew_bits) ^ ROTR(x, to_bits('n, 8), sew_bits) ^ SHR(x, to_bits('n, 7))), + } +} + +val sig1 : forall 'n, 'n >= 0. (bits('n), int) -> bits('n) +function sig1 (x, SEW) = { + let sew_bits : bits('n) = to_bits('n, SEW); + match SEW { + 32 => (ROTR(x, to_bits('n, 17), sew_bits) ^ ROTR(x, to_bits('n, 19), sew_bits) ^ SHR(x, to_bits('n, 10))), + 64 => (ROTR(x, to_bits('n, 19), sew_bits) ^ ROTR(x, to_bits('n, 61), sew_bits) ^ SHR(x, to_bits('n, 6 ))), + } +} + val ch : forall 'n, 'n >= 0. (bits('n), bits('n), bits('n)) -> bits('n) function ch (x, y, z) = ((x & y) ^ (not_vec(x) & z)) @@ -40,6 +62,80 @@ function sum1 (x, SEW) = { } } +/* VSHA2MS.VV */ + +union clause ast = RISCV_VSHA2ms : (regidx, regidx, regidx) + +mapping clause encdec = RISCV_VSHA2ms(vs2, vs1, vd) if (haveRVV() & (haveZvknha() | haveZvknhb())) + <-> 0b1011011 @ vs2 @ vs1 @ 0b010 @ vd @ 0b1110111 if (haveRVV() & (haveZvknha() | haveZvknhb())) + +mapping clause assembly = RISCV_VSHA2ms(vs2, vs1, vd) + <-> "vsha2ms.vv" ^ spc() ^ vreg_name(vd) ^ sep() ^ vreg_name(vs1) ^ vreg_name(vs2) + +function clause execute (RISCV_VSHA2ms(vs2, vs1, vd)) = { + let SEW = get_sew(); + let LMUL_pow = get_lmul_pow(); + let LMUL = if LMUL_pow < 0 then 0 else LMUL_pow; + let VLEN = int_power(2, get_vlen_pow()); + let num_elem = get_num_elem(LMUL_pow, SEW); + + if (zvk_check_elements(VLEN, num_elem, LMUL, SEW) == false) + then { + handle_illegal(); + RETIRE_FAIL + } else { + let 'n = num_elem; + let 'm = SEW; + + let vm_val : vector('n, dec, bool) = read_vmask(num_elem, 0b1, vreg_name("v0")); + let vs2_val : vector('n, dec, bits('m)) = read_vreg(num_elem, SEW, LMUL_pow, vs2); + let vs1_val : vector('n, dec, bits('m)) = read_vreg(num_elem, SEW, LMUL_pow, vs1); + let vd_val : vector('n, dec, bits('m)) = read_vreg(num_elem, SEW, LMUL_pow, vd); + result : vector('n, dec, bits('m)) = undefined; + mask : vector('n, dec, bool) = undefined; + + (result, mask) = init_masked_result(num_elem, SEW, LMUL_pow, vd_val, vm_val); + + w : vector(20, dec, bits('m)) = undefined; + + eg_len = (unsigned(vl) / 'n); + eg_start = (unsigned(vstart) / 'n); + + foreach (i from eg_start to (eg_len - 1)) { + assert(0 <= ((i * 4) + 3) & ((i * 4) + 3) < 'n); + w[0] = vd_val[i*4+0]; + w[1] = vd_val[i*4+1]; + w[2] = vd_val[i*4+2]; + w[3] = vd_val[i*4+3]; + + w[4] = vs2_val[i*4+0]; + w[9] = vs2_val[i*4+1]; + w[10] = vs2_val[i*4+2]; + w[11] = vs2_val[i*4+3]; + + w[12] = vs1_val[i*4+0]; + w[13] = vs1_val[i*4+1]; + w[14] = vs1_val[i*4+2]; + w[15] = vs1_val[i*4+3]; + + w[16] = sig1(w[14], SEW) + w[9] + sig0(w[1], SEW) + w[0]; + w[17] = sig1(w[15], SEW) + w[10] + sig0(w[2], SEW) + w[1]; + w[18] = sig1(w[16], SEW) + w[11] + sig0(w[3], SEW) + w[2]; + w[19] = sig1(w[17], SEW) + w[12] + sig0(w[4], SEW) + w[3]; + + assert(0 <= 3 & 3 < 'n); + result[i*4+0] = w[16]; + result[i*4+1] = w[17]; + result[i*4+2] = w[18]; + result[i*4+3] = w[19]; + }; + + write_single_vreg(num_elem, 'm, vd, result); + vstart = EXTZ(0b0); + RETIRE_SUCCESS + } +} + /* VSHA2C[HL].VV */ mapping ch_or_cl : string <-> bits(7) = {