diff --git a/.gitlab-ci.d/lint.sh b/.gitlab-ci.d/lint.sh index 17b2d35a8..5202e33cd 100755 --- a/.gitlab-ci.d/lint.sh +++ b/.gitlab-ci.d/lint.sh @@ -34,11 +34,11 @@ EXIT_STATUS=0 # Only check C and C++ files for clang-format compatibility echo "Checking C/C++ files for clang-format compliance" -clang_files=$(echo $files | tr ' ' '\n' | grep -P "(? img_x) { - for (int i = id; i < img_y; i += numThreads) { - for (int j = 0; j < img_x; ++j) { - img[i * img_x + j] = (i % 16) + (j % 4); + for (int i = (int)id; i < (int)img_y; i += (int)numThreads) { + for (int j = 0; j < (int)img_x; ++j) { + img[(unsigned int)i * img_x + (unsigned int)j] = (i % 16) + (j % 4); } } } else { - for (int j = id; j < img_x; j += numThreads) { - for (int i = 0; i < img_y; ++i) { - img[i * img_x + j] = (i % 16) + (j % 4); + for (int j = (int)id; j < (int)img_x; j += (int)numThreads) { + for (int i = 0; i < (int)img_y; ++i) { + img[(unsigned int)i * img_x + (unsigned int)j] = (i % 16) + (j % 4); } } } @@ -177,15 +180,15 @@ void zero_conv2d_image(volatile int32_t *img, uint32_t img_x, uint32_t img_y, uint32_t id, uint32_t numThreads) { // Parallelize over rows if (img_y > img_x) { - for (int i = id; i < img_y; i += numThreads) { - for (int j = 0; j < img_x; ++j) { - img[i * img_x + j] = 0; + for (int i = (int)id; i < (int)img_y; i += (int)numThreads) { + for (int j = 0; j < (int)img_x; ++j) { + img[(unsigned int)i * img_x + (unsigned int)j] = 0; } } } else { - for (int j = id; j < img_x; j += numThreads) { - for (int i = 0; i < img_y; ++i) { - img[i * img_x + j] = 0; + for (int j = (int)id; j < (int)img_x; j += (int)numThreads) { + for (int i = 0; i < (int)img_y; ++i) { + img[(unsigned int)i * img_x + (unsigned int)j] = 0; } } } @@ -197,18 +200,18 @@ extern uint32_t barrier_init; int verify_conv2d_image(volatile int32_t *img, uint32_t img_x, uint32_t img_y, uint32_t id, uint32_t numThreads) { // Parallelize over rows - for (int i = id + 1; i < img_y - 1; i += numThreads) { - int32_t y = i % 16; + for (int i = (int)id + 1; i < (int)img_y - 1; i += (int)numThreads) { + int y = i % 16; if (i % 16 == 0) y = 4; if (i % 16 == 15) y = 11; - for (int32_t j = 1; j < img_x - 1; ++j) { - int32_t x = ((j % 4) / 2) + 1; - if (img[i * img_x + j] != x + y) { - return (i + j) == 0 ? -1 : i * img_x + j; + for (int j = 1; j < (int)img_x - 1; ++j) { + int x = ((j % 4) / 2) + 1; + if ((int)img[i * (int)img_x + j] != x + y) { + return (i + j) == 0 ? -1 : i * (int)img_x + j; } - img[i * img_x + j] = 0; + img[i * (int)img_x + j] = 0; } } return 0; diff --git a/apps/common/link.ld b/apps/common/link.ld index 169194115..b51601744 100644 --- a/apps/common/link.ld +++ b/apps/common/link.ld @@ -25,6 +25,7 @@ SECTIONS { .text : { *(.text.init) *(.text) + . = ALIGN(0x40); } > l2 /* Data on L2 */ diff --git a/apps/common/runtime.mk b/apps/common/runtime.mk index 07e8b96a5..00697da4c 100644 --- a/apps/common/runtime.mk +++ b/apps/common/runtime.mk @@ -28,7 +28,7 @@ GCC_INSTALL_DIR ?= $(INSTALL_DIR)/riscv-gcc LLVM_INSTALL_DIR ?= $(INSTALL_DIR)/llvm HALIDE_INSTALL_DIR ?= $(INSTALL_DIR)/halide -COMPILER ?= llvm +COMPILER ?= gcc XPULPIMG ?= $(xpulpimg) RISCV_XLEN ?= 32 @@ -68,6 +68,10 @@ RISCV_STRIP ?= $(RISCV_PREFIX)strip # Defines DEFINES := -DNUM_CORES=$(num_cores) -DBOOT_ADDR=0x$(boot_addr) -DL2_BASE=0x$(l2_base) -DL2_SIZE=0x$(l2_size) +# Define __XPULPIMG if the extension is active +ifeq ($(XPULPIMG),1) + DEFINES += -D__XPULPIMG +endif # Specify cross compilation target. This can be omitted if LLVM is built with riscv as default target RISCV_LLVM_TARGET ?= --target=$(RISCV_TARGET) --sysroot=$(GCC_INSTALL_DIR)/$(RISCV_TARGET) --gcc-toolchain=$(GCC_INSTALL_DIR) @@ -75,7 +79,7 @@ RISCV_LLVM_TARGET ?= --target=$(RISCV_TARGET) --sysroot=$(GCC_INSTALL_DIR)/$(RI RISCV_WARNINGS += -Wunused-variable -Wconversion -Wall -Wextra # -Werror RISCV_FLAGS_COMMON_TESTS ?= -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) -I$(CURDIR)/common -static RISCV_FLAGS_COMMON ?= $(RISCV_FLAGS_COMMON_TESTS) -std=gnu99 -O3 -ffast-math -fno-common -fno-builtin-printf $(DEFINES) $(RISCV_WARNINGS) -RISCV_FLAGS_GCC ?= -mcmodel=medany -Wa,-march=$(RISCV_ARCH_AS) +RISCV_FLAGS_GCC ?= -mcmodel=medany -Wa,-march=$(RISCV_ARCH_AS) -falign-loops=32 -falign-jumps=32 RISCV_FLAGS_LLVM ?= -mcmodel=small -mllvm -enable-misched ifeq ($(COMPILER),gcc) diff --git a/apps/common/synchronization.c b/apps/common/synchronization.c index 8d03d754f..eae5189be 100644 --- a/apps/common/synchronization.c +++ b/apps/common/synchronization.c @@ -16,8 +16,6 @@ // Author: Samuel Riedel, ETH Zurich -static inline unsigned amo_add(void volatile *const address, unsigned value); - #include #include @@ -71,20 +69,3 @@ void mempool_barrier(uint32_t num_cores, uint32_t cycles) { mempool_wait(cycles); } } - -/** - - * Expose the atomic add instruction. - * - * @param address A pointer to an address on L2 memory to store the value. - * @param value Value to add to the specified memory location. - * - * @return Value previously stored in memory. - */ -static inline unsigned amo_add(void volatile *const address, unsigned value) { - unsigned ret; - __asm__ __volatile__("" : : : "memory"); - asm volatile("amoadd.w %0, %1, (%2)" : "=r"(ret) : "r"(value), "r"(address)); - __asm__ __volatile__("" : : : "memory"); - return ret; -} diff --git a/apps/common/synchronization.h b/apps/common/synchronization.h index b23bb834b..0df50af8b 100644 --- a/apps/common/synchronization.h +++ b/apps/common/synchronization.h @@ -16,6 +16,47 @@ // Author: Samuel Riedel, ETH Zurich +#ifndef __SYNCHRONIZATION_H__ +#define __SYNCHRONIZATION_H__ + // Barrier functions void mempool_barrier_init(uint32_t core_id, uint32_t num_cores); void mempool_barrier(uint32_t num_cores, uint32_t cycles); + +// Atomics + +/** + + * Expose the atomic add instruction. + * + * @param address A pointer to an address on L2 memory to store the value. + * @param value Value to add to the specified memory location. + * + * @return Value previously stored in memory. + */ +static inline unsigned amo_add(void volatile *const address, unsigned value) { + unsigned ret; + asm volatile("" : : : "memory"); + asm volatile("amoadd.w %0, %1, (%2)" : "=r"(ret) : "r"(value), "r"(address)); + asm volatile("" : : : "memory"); + return ret; +} + +/** + + * Expose the atomic or instruction. + * + * @param address A pointer to an address on L2 memory to store the value. + * @param value Value to add to the specified memory location. + * + * @return Value previously stored in memory. + */ +static inline unsigned amo_or(void volatile *const address, unsigned value) { + unsigned ret; + asm volatile("" : : : "memory"); + asm volatile("amoor.w %0, %1, (%2)" : "=r"(ret) : "r"(value), "r"(address)); + asm volatile("" : : : "memory"); + return ret; +} + +#endif // __SYNCHRONIZATION_H__ diff --git a/apps/common/xpulp/builtins_v2.h b/apps/common/xpulp/builtins_v2.h new file mode 100644 index 000000000..60923b321 --- /dev/null +++ b/apps/common/xpulp/builtins_v2.h @@ -0,0 +1,358 @@ +/* + * Copyright (C) 2019 ETH Zurich, University of Bologna + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __HAL_RISCV_BUILTINS_V2_H__ +#define __HAL_RISCV_BUILTINS_V2_H__ + +/* ARITHMETIC SECTION */ +typedef signed short v2s __attribute__((vector_size(4))); +typedef unsigned short v2u __attribute__((vector_size(4))); + +typedef signed char v4s __attribute__((vector_size(4))); +typedef unsigned char v4u __attribute__((vector_size(4))); + +/* Packing of scalars into vectors */ +#define __PACK2(x, y) __builtin_pulp_pack2((signed short)(x), (signed short)(y)) +#define __PACKU2(x, y) \ + __builtin_pulp_pack2((unsigned short)(x), (unsigned short)(y)) + +#define __PACK4(x, y, z, t) \ + __builtin_pulp_pack4((signed char)(x), (signed char)(y), (signed char)(z), \ + (signed char)(t)) +#define __PACKU4(x, y, z, t) \ + __builtin_pulp_pack4((unsigned char)(x), (unsigned char)(y), \ + (unsigned char)(z), (unsigned char)(t)) + +/* Max */ +#define __MAX(a, b) __builtin_pulp_maxsi((a), (b)) + +#define __MAX2(x, y) __builtin_pulp_max2((x), (y)) +#define __MAX4(x, y) __builtin_pulp_max4((x), (y)) + +#define __MAXU2(x, y) __builtin_pulp_maxu2((x), (y)) +#define __MAXU4(x, y) __builtin_pulp_maxu4((x), (y)) + +/* Min */ +#define __MIN2(x, y) __builtin_pulp_min2((x), (y)) +#define __MIN4(x, y) __builtin_pulp_min4((x), (y)) + +#define __MINU2(x, y) __builtin_pulp_minu2((x), (y)) +#define __MINU4(x, y) __builtin_pulp_minu4((x), (y)) + +/* Clip */ +#define __CLIP(x, precision) \ + __builtin_pulp_clip((x), -(1 << (precision)), (1 << precision) - 1) +#define __CLIP_R(x, bound) __builtin_pulp_clip_r((x), (bound)) + +#define __CLIPU(x, precision) __builtin_pulp_clipu((x), 0, (1 << precision) - 1) +#define __CLIPU_R(x, bound) __builtin_pulp_clipu_r((x), (bound)) + +/* Abs */ +#define __ABS2(x) __builtin_pulp_abs2((x)) +#define __ABS4(x) __builtin_pulp_abs4((x)) + +/* Unary minus */ +#define __NEG2(x) __builtin_pulp_neg2((x)) +#define __NEG4(x) __builtin_pulp_neg4((x)) + +/* Addition */ +#define __ADD2(x, y) __builtin_pulp_add2((x), (y)) +#define __ADD4(x, y) __builtin_pulp_add4((x), (y)) + +/* Substraction */ +#define __SUB2(x, y) __builtin_pulp_sub2((x), (y)) +#define __SUB4(x, y) __builtin_pulp_sub4((x), (y)) + +/* Average */ +#define __AVG2(x, y) __builtin_pulp_avg2((x), (y)) +#define __AVG4(x, y) __builtin_pulp_avg4((x), (y)) + +/* Average unsigned */ +#define __AVGU2(x, y) __builtin_pulp_avgu2((x), (y)) +#define __AVGU4(x, y) __builtin_pulp_avgu4((x), (y)) + +/* Bitwise and */ +#define __AND2(x, y) __builtin_pulp_and2((x), (y)) +#define __AND4(x, y) __builtin_pulp_and4((x), (y)) + +/* Bitwise or */ +#define __OR2(x, y) __builtin_pulp_or2((x), (y)) +#define __OR4(x, y) __builtin_pulp_or4((x), (y)) + +/* Bitwise exor */ +#define __EXOR2(x, y) __builtin_pulp_exor2(x, y) +#define __EXOR4(x, y) __builtin_pulp_exor4(x, y) + +/* Logical shift right */ +#define __SRL2(x, y) __builtin_pulp_srl2(x, y) +#define __SRL4(x, y) __builtin_pulp_srl4(x, y) + +/* Arithmetic shift right */ +#define __SRA2(x, y) __builtin_pulp_sra2(x, y) +#define __SRA4(x, y) __builtin_pulp_sra4(x, y) + +/* Logical shift left */ +#define __SLL2(x, y) __builtin_pulp_sll2(x, y) +#define __SLL4(x, y) __builtin_pulp_sll4(x, y) + +/* Mac */ +#define __MAC(Acc, x, y) __builtin_pulp_mac((x), (y), (Acc)) +#define __MSU(Acc, x, y) __builtin_pulp_msu((x), (y), (Acc)) + +#define __MACS(Acc, x, y) __builtin_pulp_macs((x), (y), (Acc)) +#define __MACHHS(Acc, x, y) __builtin_pulp_machhs((x), (y), (Acc)) +#define __MACU(Acc, x, y) __builtin_pulp_macu((x), (y), (Acc)) +#define __MACHHU(Acc, x, y) __builtin_pulp_machhu((x), (y), (Acc)) + +#define __MACSN(Acc, x, y, n) __builtin_pulp_macsN((x), (y), (Acc), (n)) +#define __MACUN(Acc, x, y, n) __builtin_pulp_macuN((x), (y), (Acc), (n)) +#define __MACSRN(Acc, x, y, n) \ + __builtin_pulp_macsRN((x), (y), (Acc), (n), (1 << ((n)-1))) +#define __MACURN(Acc, x, y, n) \ + __builtin_pulp_macuRN((x), (y), (Acc), (n), (1 << ((n)-1))) + +#define __MACHHSN(Acc, x, y, n) __builtin_pulp_machhsN((x), (y), (Acc), (n)) +#define __MACHHUN(Acc, x, y, n) __builtin_pulp_machhuN((x), (y), (Acc), (n)) +#define __MACHHSRN(Acc, x, y, n) \ + __builtin_pulp_machhsN((x), (y), (Acc), (n), (1 << ((n)-1))) +#define __MACHHURN(Acc, x, y, n) \ + __builtin_pulp_machhuN((x), (y), (Acc), (n), (1 << ((n)-1))) + +/* Multiplications */ +#define __MULS(x, y) __builtin_pulp_muls((x), (y)) +#define __MULU(x, y) __builtin_pulp_mulu((x), (y)) +#define __MULHHS(x, y) __builtin_pulp_mulhhs((x), (y)) +#define __MULHHU(x, y) __builtin_pulp_mulhhu((x), (y)) + +#define __MULSN(x, y, n) __builtin_pulp_mulsN((x), (y), (n)) +#define __MULSRN(x, y, n) __builtin_pulp_mulsRN((x), (y), (n), (1 << ((n)-1))) +#define __MULUN(x, y, n) __builtin_pulp_muluN((x), (y), (n)) +#define __MULURN(x, y, n) __builtin_pulp_muluRN((x), (y), (n), (1 << ((n)-1))) + +#define __MULHHSN(x, y, n) __builtin_pulp_mulhhsN((x), (y), (n)) +#define __MULHHSRN(x, y, n) \ + __builtin_pulp_mulhhsRN((x), (y), (n), (1 << ((n)-1))) +#define __MULHHUN(x, y, n) __builtin_pulp_mulhhuN((x), (y), (n)) +#define __MULHHURN(x, y, n) \ + __builtin_pulp_mulhhuRN((x), (y), (n), (1 << ((n)-1))) + +/* Vectorial product and sum of products */ +#define __DOTP2(x, y) __builtin_pulp_dotsp2((x), (y)) +#define __DOTPU2(x, y) __builtin_pulp_dotup2((x), (y)) +#define __DOTPUS2(x, y) __builtin_pulp_dotusp2((x), (y)) + +#define __DOTPSC2(x, y) __builtin_pulp_dotspsc2((x), (y)) +#define __DOTPUSC2(x, y) __builtin_pulp_dotupsc2((x), (y)) +#define __DOTPUSSC2(x, y) __builtin_pulp_dotuspsc2((x), (y)) + +#define __SUMDOTP2(x, y, z) __builtin_pulp_sdotsp2((x), (y), (z)) +#define __SUMDOTPU2(x, y, z) __builtin_pulp_sdotup2((x), (y), (z)) +#define __SUMDOTPUS2(x, y, z) __builtin_pulp_sdotusp2((x), (y), (z)) + +#define __SUMDOTPSC2(x, y, z) __builtin_pulp_sdotspsc2((x), (y), (z)) +#define __SUMDOTPUSC2(x, y, z) __builtin_pulp_sdotupsc2((x), (y), (z)) +#define __SUMDOTPUSSC2(x, y, z) __builtin_pulp_sdotuspsc2((x), (y), (z)) + +#define __DOTP4(x, y) __builtin_pulp_dotsp4((x), (y)) +#define __DOTPU4(x, y) __builtin_pulp_dotup4((x), (y)) +#define __DOTPUS4(x, y) __builtin_pulp_dotusp4((x), (y)) + +#define __DOTPSC4(x, y) __builtin_pulp_dotspsc4((x), (y)) +#define __DOTPUSC4(x, y) __builtin_pulp_dotupsc4((x), (y)) +#define __DOTPUSSC4(x, y) __builtin_pulp_dotuspsc4((x), (y)) + +#define __SUMDOTP4(x, y, z) __builtin_pulp_sdotsp4((x), (y), (z)) +#define __SUMDOTPU4(x, y, z) __builtin_pulp_sdotup4((x), (y), (z)) +#define __SUMDOTPUS4(x, y, z) __builtin_pulp_sdotusp4((x), (y), (z)) + +#define __SUMDOTPSC4(x, y, z) __builtin_pulp_sdotspsc4((x), (y), (z)) +#define __SUMDOTPUSC4(x, y, z) __builtin_pulp_sdotupsc4((x), (y), (z)) +#define __SUMDOTPUSSC4(x, y, z) __builtin_pulp_sdotuspsc4((x), (y), (z)) + +#ifdef ARCHI_CORE_HAS_CPLX + +/* Complex Multiplication, Q15x15 into Q15, with optional post scaling by 1 or 2 + */ +#define __CPLXMULS(x, y) __builtin_pulp_cplxmuls((x), (y)) +#define __CPLXMULSDIV2(x, y) __builtin_pulp_cplxmulsdiv2((x), (y)) +#define __CPLXMULSDIV4(x, y) __builtin_pulp_cplxmulsdiv4((x), (y)) + +/* Complex conjugate */ +#define __CPLXCONJ(x) __builtin_pulp_cplx_conj((x)) + +/* Complex substration, result rotated by -pi/2 */ +#define __SUB2ROTMJ(x, y) __builtin_pulp_sub2rotmj((x), (y)) + +/* Complex addition with post scaling by 1 or 2 */ +#define __ADD2DIV2(x, y) __builtin_pulp_add2div2((x), (y)) +#define __ADD2DIV4(x, y) __builtin_pulp_add2div4((x), (y)) + +#define __ADD4DIV2(x, y) __builtin_pulp_add4div2((x), (y)) +#define __ADD4DIV4(x, y) __builtin_pulp_add4div4((x), (y)) + +/* Complex subtraction with post scaling by 1 or 2 */ +#define __SUB2DIV2(x, y) __builtin_pulp_sub2div2((x), (y)) +#define __SUB2DIV4(x, y) __builtin_pulp_sub2div4((x), (y)) + +#define __SUB4DIV2(x, y) __builtin_pulp_sub4div2((x), (y)) +#define __SUB4DIV4(x, y) __builtin_pulp_sub4div4((x), (y)) + +/* Viterbi Max and Viterbi Select, pair of Q15 */ +#define __VITMAX(x, y) __builtin_pulp_vitmax2((x), (y)) +#define __VITSEL(x, y) __builtin_pulp_vitsel2((x), (y)) + +#endif + +/* Position of the most significant bit of x */ +#define __FF1(x) __builtin_pulp_ff1((x)) +#define __FL1(x) __builtin_pulp_fl1((x)) + +/* Number of sign bits */ +#define __CLB(x) __builtin_pulp_clb((x)) + +static inline unsigned int __attribute__((always_inline)) +__ExtInsMaskFast(unsigned int Size, unsigned int Offset) { + return ((((Size - 1)) << 5) | (Offset)); +} +static inline unsigned int __attribute__((always_inline)) +__ExtInsMaskSafe(unsigned int Size, unsigned int Offset) { + return ((((Size - 1) & 0x1F) << 5) | (Offset & 0x1F)); +} + +/* Bit set */ +#define __BITSET(x, size, off) \ + __builtin_pulp_bset((x), (((1 << (size)) - 1) << (off))) +#define __BITSET_R(x, size, off) \ + __builtin_pulp_bset_r((x), __ExtInsMaskFast((size), (off))) +#define __BITSET_R_SAFE(x, size, off) \ + __builtin_pulp_bset_r((x), __ExtInsMaskSafe((size), (off))) + +/* Bit clr */ +#define __BITCLR(x, size, off) \ + __builtin_pulp_bclr((x), ~(((1 << (size)) - 1) << (off))) +#define __BITCLR_R(x, size, off) \ + __builtin_pulp_bclr_r((x), __ExtInsMaskFast((size), (off))) +#define __BITCLR_R_SAFE(x, size, off) \ + __builtin_pulp_bclr_r((x), __ExtInsMaskSafe((size), (off))) + +/* Bit Extraction */ +#define __BITEXTRACT(x, size, off) __builtin_pulp_bextract((x), (size), (off)) +#define __BITEXTRACTU(x, size, off) __builtin_pulp_bextractu((x), (size), (off)) + +#define __BITEXTRACT_R(x, size, off) \ + __builtin_pulp_bextract_r((x), __ExtInsMaskFast((size), (off))) +#define __BITEXTRACTU_R(x, size, off) \ + __builtin_pulp_bextractu_r((x), __ExtInsMaskFast((size), (off))) + +#define __BITEXTRACT_R_SAFE(x, size, off) \ + __builtin_pulp_bextract_r((x), __ExtInsMaskSafe((size), (off))) +#define __BITEXTRACTU_R_SAFE(x, size, off) \ + __builtin_pulp_bextractu_r((x), __ExtInsMaskSafe((size), (off))) + +/* Bit insertion */ +#define __BITINSERT(dst, src, size, off) \ + __builtin_pulp_binsert((dst), ~(((1 << (size)) - 1) << (off)), (src), \ + (((1 << (size)) - 1) << (off)), (off)) +#define __BITINSERT_R(dst, src, size, off) \ + __builtin_pulp_binsert_r((dst), (src), __ExtInsMaskFast((size), (off))) +#define __BITINSERT_R_SAFE(dst, src, size, off) \ + __builtin_pulp_binsert_r((dst), (src), __ExtInsMaskSafe((size), (off))) + +/* 1 bit rotation to the right, 32 bits input */ +#define __ROTR(x) __builtin_pulp_rotr((x)) + +/* Add with normalization */ +#define __ADDNORMU(x, y, scale) __builtin_pulp_adduN((x), (y), (scale)) +#define __ADDNORMU_REG(x, y, scale) __builtin_pulp_adduN_r((x), (y), (scale)) +#define __ADDNORM(x, y, scale) __builtin_pulp_addN((x), (y), (scale)) +#define __ADDNORM_REG(x, y, scale) __builtin_pulp_addN_r((x), (y), (scale)) + +/* Add with normalization and rounding */ +#define __ADDROUNDNORMU(x, y, scale) \ + __builtin_pulp_adduRN((x), (y), (scale), (1 << ((scale)-1))) +#define __ADDROUNDNORMU_REG(x, y, scale) \ + __builtin_pulp_adduRN_r((x), (y), (scale)) +#define __ADDROUNDNORM(x, y, scale) \ + __builtin_pulp_addRN((x), (y), (scale), (1 << ((scale)-1))) +#define __ADDROUNDNORM_REG(x, y, scale) \ + __builtin_pulp_addRN_r((x), (y), (scale)) + +/* Sub with normalization */ +#define __SUBNORMU(x, y, scale) __builtin_pulp_subuN((x), (y), (scale)) +#define __SUBNORMU_REG(x, y, scale) __builtin_pulp_subuN_r((x), (y), (scale)) +#define __SUBNORM(x, y, scale) __builtin_pulp_subN((x), (y), (scale)) +#define __SUBNORM_REG(x, y, scale) __builtin_pulp_subN_r((x), (y), (scale)) + +/* Sub with normalization and rounding */ +#define __SUBROUNDNORMU(x, y, scale) \ + __builtin_pulp_subuRN((x), (y), (scale), (1 << ((scale)-1))) +#define __SUBROUNDNORMU_REG(x, y, scale) \ + __builtin_pulp_subuRN_r((x), (y), (scale)) +#define __SUBROUNDNORM(x, y, scale) \ + __builtin_pulp_subRN((x), (y), (scale), (1 << ((scale)-1))) +#define __SUBROUNDNORM_REG(x, y, scale) \ + __builtin_pulp_subRN_r((x), (y), (scale)) + +/* Normalization and rounding */ +#define __ROUNDNORMU(x, scale) \ + __builtin_pulp_adduRN((x), 0, (scale), (1 << ((scale)-1))) +#define __ROUNDNORMU_REG(x, scale) __builtin_pulp_adduRN_r((x), 0, (scale)) +#define __ROUNDNORM(x, scale) \ + __builtin_pulp_addRN((x), 0, (scale), (1 << ((scale)-1))) +#define __ROUNDNORM_REG(x, scale) __builtin_pulp_addRN_r((x), 0, (scale)) + +#define __COREID() __builtin_pulp_CoreId() +#define __CLUSTERID() __builtin_pulp_ClusterId() +#define __NCORE() __builtin_pulp_CoreCount() +#define __ISFC() __builtin_pulp_IsFc() + +#define __SPRWRITE(x, y) __builtin_pulp_spr_write(x, y) +#define __SPRREAD(x) __builtin_pulp_spr_read(x) +#define __SPRREAD_VOL(x) __builtin_pulp_spr_read_vol(x) + +#define __SPRBITSET(spr, off) __builtin_pulp_spr_bit_set((spr), (off)) +#define __SPRBITCLR(spr, off) __builtin_pulp_spr_bit_clr((spr), (off)) + +#define __SPRREADTHENWRITE(spr, x) \ + __builtin_pulp_read_then_spr_write((spr), (x)) +#define __SPRREADTHENBITSET(spr, off) \ + __builtin_pulp_read_then_spr_bit_set((spr), (off)) +#define __SPRREADTHENBITCLR(spr, off) \ + __builtin_pulp_read_then_spr_bit_clr((spr), (off)) + +#define __READ_BASE_OFF(base, off) __builtin_pulp_read_base_off((base), (off)) +#define __WRITE_BASE_OFF(base, off, val) \ + __builtin_pulp_write_base_off((base), (off), (val)) + +#define __READ_BASE_OFF_VOL(base, off) \ + __builtin_pulp_OffsetedRead((base), (off)) +#define __READ_BASE_OFF_HALF_VOL(base, off) \ + __builtin_pulp_OffsetedReadHalf((base), (off)) +#define __READ_BASE_OFF_BYTE_VOL(base, off) \ + __builtin_pulp_OffsetedReadByte((base), (off)) + +#define __WRITE_BASE_OFF_VOL(x, base, off) \ + __builtin_pulp_OffsetedWrite((x), (base), (off)) +#define __WRITE_BASE_OFF_HALF_VOL(x, base, off) \ + __builtin_pulp_OffsetedWriteHalf((x), (base), (off)) +#define __WRITE_BASE_OFF_BYTE_VOL(x, base, off) \ + __builtin_pulp_OffsetedWriteByte((x), (base), (off)) +/* Utilities, Target independant */ +#define FIX2FP(Val, Precision) ((float)(Val) / (float)(1 << (Precision))) +#define FP2FIXR(Val, Precision) ((int)((Val) * ((1 << (Precision)) - 1) + 0.5)) +#define FP2FIX(Val, Precision) ((int)((Val) * ((1 << (Precision)) - 1))) + +#endif diff --git a/apps/common/xpulp/conv_2d.h b/apps/common/xpulp/conv_2d.h new file mode 100644 index 000000000..31b0ad167 --- /dev/null +++ b/apps/common/xpulp/conv_2d.h @@ -0,0 +1,399 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Samuel Riedel, ETH Zurich +// Davide Schiavone, ETH Zurich +// Sergio Mazzola, ETH Zurich + +#include "xpulp/builtins_v2.h" +#include +#include + +#include "encoding.h" +#include "printf.h" +#include "runtime.h" +#include "synchronization.h" + +/* + * 2D Convolution 3x3 ---------------------------------- + * kernel = conv2d_3x3_unrolled_i8_rv32im + * data type = 8-bit integer + * multi-core = no + * unrolling = whole 3x3 kernel + * simd = no + */ +void conv2d_3x3_unrolled_i8_rv32im(int8_t const volatile *__restrict__ in, + uint32_t in_x, uint32_t in_y, + uint8_t const volatile *__restrict__ k, + int32_t volatile *__restrict__ out) { + int32_t sum; + uint32_t weight = 0; + for (int i = 0; i < 9; ++i) { + weight += k[i]; + } + + for (uint32_t i = 1; i < in_x - 1; ++i) { + for (uint32_t j = 1; j < in_y - 1; j++) { + sum = 0; + sum += in[(j - 1) * in_x + (i - 1)] * k[0]; + sum += in[(j - 1) * in_x + (i + 0)] * k[1]; + sum += in[(j - 1) * in_x + (i + 1)] * k[2]; + sum += in[(j + 0) * in_x + (i - 1)] * k[3]; + sum += in[(j + 0) * in_x + (i + 0)] * k[4]; + sum += in[(j + 0) * in_x + (i + 1)] * k[5]; + sum += in[(j + 1) * in_x + (i - 1)] * k[6]; + sum += in[(j + 1) * in_x + (i + 0)] * k[7]; + sum += in[(j + 1) * in_x + (i + 1)] * k[8]; + out[j * in_x + i] = sum / (int)weight; + } + } +} + +/* + * 2D Convolution 3x3 ---------------------------------- + * kernel = conv2d_3x3_unrolled_i8_rv32im + * data type = 8-bit integer + * multi-core = no + * unrolling = whole 3x3 kernel + * simd = no + */ +void conv2d_3x3_unrolled2_i8_rv32im(int8_t const volatile *__restrict__ in, + uint32_t in_x, uint32_t in_y, + uint8_t const volatile *__restrict__ k, + int32_t volatile *__restrict__ out) { + int32_t sum; + uint32_t weight = 0; + + int8_t elem_00, elem_01, elem_02; + int8_t elem_10, elem_11, elem_12; + int8_t elem_20, elem_21, elem_22; + + uint32_t j; + + for (int i = 0; i < 9; ++i) { + weight += k[i]; + } + + for (uint32_t i = 1; i < in_x - 1; ++i) { + elem_00 = in[i - 1]; + elem_01 = in[i + 0]; + elem_02 = in[i + 1]; + elem_10 = in[in_x + (i - 1)]; + elem_11 = in[in_x + (i + 0)]; + elem_12 = in[in_x + (i + 1)]; + elem_20 = in[2 * in_x + (i - 1)]; + elem_21 = in[2 * in_x + (i + 0)]; + elem_22 = in[2 * in_x + (i + 1)]; + for (j = 1; j < in_y - 1; j++) { + sum = 0; + sum += elem_00 * k[0]; + sum += elem_01 * k[1]; + sum += elem_02 * k[2]; + sum += elem_10 * k[3]; + sum += elem_11 * k[4]; + sum += elem_12 * k[5]; + sum += elem_20 * k[6]; + sum += elem_21 * k[7]; + sum += elem_22 * k[8]; + + elem_00 = elem_10; + elem_01 = elem_11; + elem_02 = elem_12; + elem_10 = elem_20; + elem_11 = elem_21; + elem_12 = elem_22; + elem_20 = in[(j + 2) * in_x + (i - 1)]; + elem_21 = in[(j + 2) * in_x + (i + 0)]; + elem_22 = in[(j + 2) * in_x + (i + 1)]; + + out[j * in_x + i] = sum / (int)weight; + } + } +} + +/* + * 2D Convolution 3x3 ---------------------------------- + * kernel = conv_3x3_unrolled_i8_xpulpv2 + * data type = 8-bit integer + * multi-core = no + * unrolling = whole 3x3 kernel + * simd = yes, Xpulpv2 intrinsics + * + * Insipired from Conv3x3_Vector from pulp-training + */ +#ifdef __XPULPIMG +void conv2d_3x3_unrolled_i8_xpulpv2( + int8_t const volatile *__restrict__ In_Img, + int32_t volatile *__restrict__ Out_Img, uint32_t R, uint32_t C, + uint8_t const volatile *__restrict__ Kernel) { + v4u coeff_0, coeff_1, coeff_2; + v4s Img_0, Img_1, Img_2; + v4s new_data; + uint32_t r, c, t; + int32_t S; + + uint32_t weight = 0; + for (int i = 0; i < 9; ++i) { + weight += Kernel[i]; + } + + coeff_0 = (v4u){Kernel[0], Kernel[1], Kernel[2], 0}; + coeff_1 = (v4u){Kernel[3], Kernel[4], Kernel[5], 0}; + coeff_2 = (v4u){Kernel[6], Kernel[7], Kernel[8], 0}; + + // image board is black + for (c = 1; c < C - 1; c++) { + + Img_0 = (v4s){In_Img[c - 1], In_Img[c], In_Img[c + 1], 0}; + Img_1 = (v4s){In_Img[c - 1 + R], In_Img[c + R], In_Img[c + 1 + R], 0}; + Img_2 = (v4s){In_Img[c - 1 + R * 2], In_Img[c + R * 2], + In_Img[c + 1 + R * 2], 0}; + + for (r = 1; r < R - 1; r++) { + t = r * R + c; + S = __builtin_pulp_dotsp4(Img_0, coeff_0); + S = __builtin_pulp_sdotsp4(Img_1, coeff_1, S); + S = __builtin_pulp_sdotsp4(Img_2, coeff_2, S); + + Out_Img[t] = S / (int)weight; + + // load a new rod + new_data = (v4s){In_Img[(r + 2) * R + c - 1], In_Img[(r + 2) * R + c], + In_Img[(r + 2) * R + c + 1], 0}; + // move the window: move each vector one line down + Img_0 = Img_1; + Img_1 = Img_2; + Img_2 = new_data; + } + } +} +#endif + +/* + * 2D Convolution 3x3 ---------------------------------- + * kernel = conv_3x3_unrolled2_i8_xpulpv2 + * data type = 8-bit integer + * multi-core = no + * unrolling = whole 3x3 kernel, 2 kernels per iteration + * simd = yes, Xpulpv2 intrinsics + * + * Insipired from Conv3x3_Vector from pulp-training + */ +#ifdef __XPULPIMG +void conv2d_3x3_unrolled2_i8_xpulpv2( + int8_t const volatile *__restrict__ In_Img, + int32_t volatile *__restrict__ Out_Img, uint32_t R, uint32_t C, + uint8_t const volatile *__restrict__ Kernel) { + v4u coeff_0, coeff_1, coeff_2; + v4s Img_00, Img_10, Img_20; + v4s Img_01, Img_11, Img_21; + v4s new_data_0, new_data_1; + uint32_t r, c; + int32_t S_0, S_1; + + uint32_t weight = 0; + for (int i = 0; i < 9; ++i) { + weight += Kernel[i]; + } + + coeff_0 = (v4u){Kernel[0], Kernel[1], Kernel[2], 0}; + coeff_1 = (v4u){Kernel[3], Kernel[4], Kernel[5], 0}; + coeff_2 = (v4u){Kernel[6], Kernel[7], Kernel[8], 0}; + + // image board is black + for (c = 1; c < C / 2; c++) { + + Img_00 = (v4s){In_Img[2 * c - 2], In_Img[2 * c - 1], In_Img[2 * c], 0}; + Img_10 = (v4s){In_Img[2 * c - 2 + R], In_Img[2 * c - 1 + R], + In_Img[2 * c + R], 0}; + Img_20 = (v4s){In_Img[2 * c - 2 + R * 2], In_Img[2 * c - 1 + R * 2], + In_Img[2 * c + R * 2], 0}; + + Img_01 = (v4s){In_Img[2 * c - 1], In_Img[2 * c], In_Img[2 * c + 1], 0}; + Img_11 = (v4s){In_Img[2 * c - 1 + R], In_Img[2 * c + R], + In_Img[2 * c + 1 + R], 0}; + Img_21 = (v4s){In_Img[2 * c - 1 + R * 2], In_Img[2 * c + R * 2], + In_Img[2 * c + 1 + R * 2], 0}; + + for (r = 1; r < R - 1; r++) { + S_0 = __builtin_pulp_dotsp4(Img_00, coeff_0); + S_1 = __builtin_pulp_dotsp4(Img_01, coeff_0); + + S_0 = __builtin_pulp_sdotsp4(Img_10, coeff_1, S_0); + S_1 = __builtin_pulp_sdotsp4(Img_11, coeff_1, S_1); + + S_0 = __builtin_pulp_sdotsp4(Img_20, coeff_2, S_0); + S_1 = __builtin_pulp_sdotsp4(Img_21, coeff_2, S_1); + + int32_t res_0 = S_0 / (int)weight; + int32_t res_1 = S_1 / (int)weight; + + // load a new rod + new_data_0 = (v4s){In_Img[(r + 2) * R + (2 * c - 1) - 1], + In_Img[(r + 2) * R + (2 * c - 1)], + In_Img[(r + 2) * R + (2 * c - 1) + 1], 0}; + new_data_1 = + (v4s){In_Img[(r + 2) * R + 2 * c - 1], In_Img[(r + 2) * R + 2 * c], + In_Img[(r + 2) * R + 2 * c + 1], 0}; + // move the window: move each vector one line down + Img_00 = Img_10; + Img_10 = Img_20; + Img_20 = new_data_0; + Img_01 = Img_11; + Img_11 = Img_21; + Img_21 = new_data_1; + + Out_Img[r * R + (2 * c - 1)] = res_0; + Out_Img[r * R + 2 * c] = res_1; + } + } +} +#endif + +// Testing +// Initialize the image in parallel +void init_conv2d_image_i8(volatile int8_t *img, uint32_t img_x, + uint32_t img_y) { + if (img_y > img_x) { + for (int i = 0; i < (int)img_y; ++i) { + for (int j = 0; j < (int)img_x; ++j) { + img[i * (int)img_x + j] = (int8_t)((i % 16) + (j % 4)); + } + } + } else { + for (int j = 0; j < (int)img_x; ++j) { + for (int i = 0; i < (int)img_y; ++i) { + img[i * (int)img_x + j] = (int8_t)((i % 16) + (j % 4)); + } + } + } +} + +// Verify and reset the image +int verify_conv2d_image_i8(volatile int32_t *img, uint32_t img_x, + uint32_t img_y) { + for (int i = 1; i < (int)img_y - 1; ++i) { + int32_t y = i % 16; + if (i % 16 == 0) + y = 4; + if (i % 16 == 15) + y = 11; + for (int32_t j = 1; j < (int)img_x - 1; ++j) { + int32_t x = ((j % 4) / 2) + 1; + if ((int32_t)img[i * (int)img_x + j] != (int32_t)(x + y)) { + return (i + j) == 0 ? -1 : i * (int)img_x + j; + } + img[i * (int)img_x + j] = 0; + } + } + return 0; +} + +// Verify and reset the image +int verify_conv2d_image_i8_verbose(int32_t *img, uint32_t img_x, + uint32_t img_y) { + for (int i = 1; i < (int)img_y - 1; ++i) { + int32_t y = i % 16; + if (i % 16 == 0) + y = 4; + if (i % 16 == 15) + y = 11; + printf("|"); + for (int32_t j = 1; j < (int)img_x - 1; ++j) { + int32_t x = ((j % 4) / 2) + 1; + printf(" %2u - %2u |", img[i * (int)img_x + j], x + y); + } + printf("\n"); + } + return 0; +} + +#ifdef __XPULPIMG +void conv2d_3x3_unrolled_i8_xpulpv2_verbose( + int8_t const *__restrict__ In_Img, int32_t volatile *__restrict__ Out_Img, + uint32_t R, uint32_t C, uint8_t const volatile *__restrict__ Kernel) { + v4u coeff_0, coeff_1, coeff_2; + v4s Img_0, Img_1, Img_2; + v4s new_data; + uint32_t r, c, t; + volatile int32_t S; + + uint32_t weight = 0; + for (int i = 0; i < 9; ++i) { + weight += Kernel[i]; + } + + coeff_0 = (v4u){Kernel[0], Kernel[1], Kernel[2], 0}; + coeff_1 = (v4u){Kernel[3], Kernel[4], Kernel[5], 0}; + coeff_2 = (v4u){Kernel[6], Kernel[7], Kernel[8], 0}; + + // image board is black + for (c = 1; c < C - 1; c++) { + + Img_0 = (v4s){In_Img[c - 1], In_Img[c], In_Img[c + 1], 0}; + Img_1 = (v4s){In_Img[c - 1 + R], In_Img[c + R], In_Img[c + 1 + R], 0}; + Img_2 = (v4s){In_Img[c - 1 + R * 2], In_Img[c + R * 2], + In_Img[c + 1 + R * 2], 0}; + + for (r = 1; r < R - 1; r++) { + printf("-------------\n"); + + printf("[ %u, %u, %u]\n", Img_0[0], Img_0[1], Img_0[2]); + printf("[ %u, %u, %u]\n", Img_1[0], Img_1[1], Img_1[2]); + printf("[ %u, %u, %u]\n", Img_2[0], Img_2[1], Img_2[2]); + + t = r * R + c; + S = __builtin_pulp_dotsp4(Img_0, coeff_0); + S = __builtin_pulp_sdotsp4(Img_1, coeff_1, S); + S = __builtin_pulp_sdotsp4(Img_2, coeff_2, S); + + printf("S = %d\n", S); + printf("S/weight = %d\n", S / (int)weight); + + Out_Img[t] = S / (int)weight; + printf("Out_Img[%d] = %d\n", t, Out_Img[t]); + + new_data = (v4s){In_Img[(r + 2) * R + c - 1], In_Img[(r + 2) * R + c], + In_Img[(r + 2) * R + c + 1], 0}; + + // Move the window + /* + Three vectors: + Img_0 = {A0, A1, A2, 0} + Img_1 = {B0, B1, B2, 0} + Img_2 = {C0, C1, C2, 0} + Current Windonw: + XX XX XX + A0 A1 A2 + B0 B1 B2 + C0 C1 C2 + D0 D1 D2 + XX XX XX + We want to load next line (D0, D1, D2) in vector new_data + new_data = {D0, D1, D2, 0} + Move each vector one line down + Img_0 = Img_1 + Img_1 = Img_2 + Img_2 = new_data + */ + + Img_0 = Img_1; + Img_1 = Img_2; + Img_2 = new_data; + } + } +} +#endif diff --git a/apps/common/xpulp/mat_mul.h b/apps/common/xpulp/mat_mul.h new file mode 100644 index 000000000..78297c999 --- /dev/null +++ b/apps/common/xpulp/mat_mul.h @@ -0,0 +1,746 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Samuel Riedel, ETH Zurich +// Sergio Mazzola, ETH Zurich + +#include "xpulp/builtins_v2.h" + +/* This library implements the matrix multiplication for several data widths + * in Zmultiple different ways. The functions all follow the following format: + * + * A is an M x N matrix, B is a N x P matrix, and C is a M x P matrix + * C = AB + * + * Note that all the matrices dimensions must be multiples of 4; these + * kernels do not have clean-up code and remaining elements would not be + * considered, leading to wrong results + */ + +/* + * Matrix multiplication ---------------------------------- + * kernel = matmul_unrolled_2x2_parallel_i8_rv32im + * data type = 8-bit integer + * multi-core = yes + * unrolling = 4 elements of C per iteration (2x2 chunks) + * simd = no + */ +void matmul_unrolled_2x2_parallel_i8_rv32im(int8_t const *__restrict__ A, + int8_t const *__restrict__ B, + int32_t *__restrict__ C, uint32_t M, + uint32_t N, uint32_t P, uint32_t id, + uint32_t numThreads) { + // Parallelize by assigning each core one row + uint32_t const c = 8; // How many columns to split the matrix into + uint32_t const c_start = (P / c) * (id % c); + uint32_t const c_end = (P / c) * ((id % c) + 1); + for (uint32_t i = 2 * (id / c); i < M; i += 2 * (numThreads / c)) { + for (uint32_t j = c_start; j < c_end; j += 2) { + int32_t c00 = 0; + int32_t c01 = 0; + int32_t c10 = 0; + int32_t c11 = 0; + for (uint32_t k = 0; k < N; k += 2) { + // Explicitly load the values first to help with scheduling + int8_t val_a00 = A[(i + 0) * N + k + 0]; + int8_t val_a01 = A[(i + 0) * N + k + 1]; + int8_t val_a10 = A[(i + 1) * N + k + 0]; + int8_t val_a11 = A[(i + 1) * N + k + 1]; + int8_t val_b00 = B[(k + 0) * P + j + 0]; + int8_t val_b01 = B[(k + 0) * P + j + 1]; + int8_t val_b10 = B[(k + 1) * P + j + 0]; + int8_t val_b11 = B[(k + 1) * P + j + 1]; + c00 += val_a00 * val_b00; + c00 += val_a01 * val_b10; + c01 += val_a00 * val_b01; + c01 += val_a01 * val_b11; + c10 += val_a10 * val_b00; + c10 += val_a11 * val_b10; + c11 += val_a10 * val_b01; + c11 += val_a11 * val_b11; + } + C[(i + 0) * P + j + 0] = c00; + C[(i + 0) * P + j + 1] = c01; + C[(i + 1) * P + j + 0] = c10; + C[(i + 1) * P + j + 1] = c11; + } + } +} + +/* + * Matrix multiplication ---------------------------------- + * kernel = matmul_unrolled_2x2_parallel_i16_rv32im + * data type = 16-bit integer + * multi-core = yes + * unrolling = 4 elements of C per iteration (2x2 chunks) + * simd = no + */ +void matmul_unrolled_2x2_parallel_i16_rv32im(int16_t const *__restrict__ A, + int16_t const *__restrict__ B, + int32_t *__restrict__ C, + uint32_t M, uint32_t N, uint32_t P, + uint32_t id, uint32_t numThreads) { + // Parallelize by assigning each core one row + uint32_t const c = 8; // How many columns to split the matrix into + uint32_t const c_start = (P / c) * (id % c); + uint32_t const c_end = (P / c) * ((id % c) + 1); + for (uint32_t i = 2 * (id / c); i < M; i += 2 * (numThreads / c)) { + for (uint32_t j = c_start; j < c_end; j += 2) { + int32_t c00 = 0; + int32_t c01 = 0; + int32_t c10 = 0; + int32_t c11 = 0; + for (uint32_t k = 0; k < N; k += 2) { + // Explicitly load the values first to help with scheduling + int16_t val_a00 = A[(i + 0) * N + k + 0]; + int16_t val_a01 = A[(i + 0) * N + k + 1]; + int16_t val_a10 = A[(i + 1) * N + k + 0]; + int16_t val_a11 = A[(i + 1) * N + k + 1]; + int16_t val_b00 = B[(k + 0) * P + j + 0]; + int16_t val_b01 = B[(k + 0) * P + j + 1]; + int16_t val_b10 = B[(k + 1) * P + j + 0]; + int16_t val_b11 = B[(k + 1) * P + j + 1]; + c00 += val_a00 * val_b00; + c00 += val_a01 * val_b10; + c01 += val_a00 * val_b01; + c01 += val_a01 * val_b11; + c10 += val_a10 * val_b00; + c10 += val_a11 * val_b10; + c11 += val_a10 * val_b01; + c11 += val_a11 * val_b11; + } + C[(i + 0) * P + j + 0] = c00; + C[(i + 0) * P + j + 1] = c01; + C[(i + 1) * P + j + 0] = c10; + C[(i + 1) * P + j + 1] = c11; + } + } +} + +/* + * Matrix multiplication ---------------------------------- + * kernel = matmul_unrolled_2x4_i8_xpulpv2 + * data type = 8-bit integer + * multi-core = no + * unrolling = 8 elements of C per iteration (2x4 chunks) + * simd = yes, Xpulpv2 intrinsics + * + * Original plp_mat_mult_i8s_xpulpv2 from pulp-dsp + */ +#ifdef __XPULPIMG +void matmul_unrolled_2x4_i8_xpulpv2(const int8_t *__restrict__ pSrcA, + const int8_t *__restrict__ pSrcB, + int32_t *__restrict__ pDstC, uint32_t M, + uint32_t N, uint32_t P) { + static v4s mask0 = {0, 1, 4, 5}; + static v4s mask1 = {2, 3, 6, 7}; + static v4s mask2 = {0, 2, 4, 6}; + static v4s mask3 = {1, 3, 5, 7}; + + uint32_t i = 0; // loop counter for M + uint32_t j = 0; // loop counter for N + uint32_t k = 0; // loop counter for P + + for (i = 0; i < M / 2; i++) { + for (k = 0; k < P / 4; k++) { + + int32_t sum00 = 0; + int32_t sum01 = 0; + int32_t sum02 = 0; + int32_t sum03 = 0; + int32_t sum10 = 0; + int32_t sum11 = 0; + int32_t sum12 = 0; + int32_t sum13 = 0; + + for (j = 0; j < N / 4; j++) { + + v4s aVec0 = *((v4s *)&(pSrcA[(i * 2) * N + (j * 4)])); + v4s aVec1 = *((v4s *)&(pSrcA[(i * 2 + 1) * N + (j * 4)])); + + v4s temp0 = *((v4s *)&(pSrcB[(j * 4) * P + (k * 4)])); + v4s temp1 = *((v4s *)&(pSrcB[(j * 4 + 1) * P + (k * 4)])); + v4s temp2 = *((v4s *)&(pSrcB[(j * 4 + 2) * P + (k * 4)])); + v4s temp3 = *((v4s *)&(pSrcB[(j * 4 + 3) * P + (k * 4)])); + + v4s temp4 = __builtin_shuffle(temp0, temp1, mask0); // 0,1,4,5 + v4s temp5 = __builtin_shuffle(temp2, temp3, mask0); // 8,9,12,13 + v4s temp6 = __builtin_shuffle(temp0, temp1, mask1); // 2,3,6,7 + v4s temp7 = __builtin_shuffle(temp2, temp3, mask1); // 3,7,11,15 + + v4s bVec0 = __builtin_shuffle(temp4, temp5, mask2); // 0,4,8,12 + v4s bVec1 = __builtin_shuffle(temp4, temp5, mask3); // 1,5,9,13 + v4s bVec2 = __builtin_shuffle(temp6, temp7, mask2); // 2,6,10,14 + v4s bVec3 = __builtin_shuffle(temp6, temp7, mask3); // 3,7,11,15 + + sum00 = __SUMDOTP4(aVec0, bVec0, sum00); + sum01 = __SUMDOTP4(aVec0, bVec1, sum01); + sum02 = __SUMDOTP4(aVec0, bVec2, sum02); + sum03 = __SUMDOTP4(aVec0, bVec3, sum03); + sum10 = __SUMDOTP4(aVec1, bVec0, sum10); + sum11 = __SUMDOTP4(aVec1, bVec1, sum11); + sum12 = __SUMDOTP4(aVec1, bVec2, sum12); + sum13 = __SUMDOTP4(aVec1, bVec3, sum13); + } + + pDstC[(i * 2) * P + (k * 4)] = sum00; + pDstC[(i * 2) * P + (k * 4 + 1)] = sum01; + pDstC[(i * 2) * P + (k * 4 + 2)] = sum02; + pDstC[(i * 2) * P + (k * 4 + 3)] = sum03; + pDstC[(i * 2 + 1) * P + (k * 4)] = sum10; + pDstC[(i * 2 + 1) * P + (k * 4 + 1)] = sum11; + pDstC[(i * 2 + 1) * P + (k * 4 + 2)] = sum12; + pDstC[(i * 2 + 1) * P + (k * 4 + 3)] = sum13; + } + } +} +#endif + +/* + * Matrix multiplication ---------------------------------- + * kernel = matmul_unrolled_2x4_parallel_i8_xpulpv2 + * data type = 8-bit integer + * multi-core = yes + * unrolling = 8 elements of C per iteration (2x4 chunks) + * simd = yes, Xpulpv2 intrinsics + * + * Original plp_mat_mult_i8p_xpulpv2 from pulp-dsp + */ +#ifdef __XPULPIMG +void matmul_unrolled_2x4_parallel_i8_xpulpv2(const int8_t *__restrict__ pSrcA, + const int8_t *__restrict__ pSrcB, + int32_t *__restrict__ pDstC, + uint32_t M, uint32_t N, uint32_t P, + uint32_t core_id, + uint32_t numThreads) { + static v4s mask0 = {0, 1, 4, 5}; + static v4s mask1 = {2, 3, 6, 7}; + static v4s mask2 = {0, 2, 4, 6}; + static v4s mask3 = {1, 3, 5, 7}; + + uint32_t i = 0; // loop counter for M + uint32_t j = 0; // loop counter for N + uint32_t k = 0; // loop counter for P + + for (k = core_id; k < P / 4; k += numThreads) { + for (i = 0; i < M / 2; i++) { + + int32_t sum00 = 0; + int32_t sum01 = 0; + int32_t sum02 = 0; + int32_t sum03 = 0; + int32_t sum10 = 0; + int32_t sum11 = 0; + int32_t sum12 = 0; + int32_t sum13 = 0; + + for (j = 0; j < N / 4; j++) { + + v4s aVec0 = *((v4s *)&(pSrcA[(i * 2) * N + (j * 4)])); + v4s aVec1 = *((v4s *)&(pSrcA[(i * 2 + 1) * N + (j * 4)])); + + v4s temp0 = *((v4s *)&(pSrcB[(j * 4) * P + (k * 4)])); + v4s temp1 = *((v4s *)&(pSrcB[(j * 4 + 1) * P + (k * 4)])); + v4s temp2 = *((v4s *)&(pSrcB[(j * 4 + 2) * P + (k * 4)])); + v4s temp3 = *((v4s *)&(pSrcB[(j * 4 + 3) * P + (k * 4)])); + + v4s temp4 = __builtin_shuffle(temp0, temp1, mask0); // 0,1,4,5 + v4s temp5 = __builtin_shuffle(temp2, temp3, mask0); // 8,9,12,13 + v4s temp6 = __builtin_shuffle(temp0, temp1, mask1); // 2,3,6,7 + v4s temp7 = __builtin_shuffle(temp2, temp3, mask1); // 3,7,11,15 + + v4s bVec0 = __builtin_shuffle(temp4, temp5, mask2); // 0,4,8,12 + v4s bVec1 = __builtin_shuffle(temp4, temp5, mask3); // 1,5,9,13 + v4s bVec2 = __builtin_shuffle(temp6, temp7, mask2); // 2,6,10,14 + v4s bVec3 = __builtin_shuffle(temp6, temp7, mask3); // 3,7,11,15 + + sum00 = __SUMDOTP4(aVec0, bVec0, sum00); + sum01 = __SUMDOTP4(aVec0, bVec1, sum01); + sum02 = __SUMDOTP4(aVec0, bVec2, sum02); + sum03 = __SUMDOTP4(aVec0, bVec3, sum03); + sum10 = __SUMDOTP4(aVec1, bVec0, sum10); + sum11 = __SUMDOTP4(aVec1, bVec1, sum11); + sum12 = __SUMDOTP4(aVec1, bVec2, sum12); + sum13 = __SUMDOTP4(aVec1, bVec3, sum13); + } + + pDstC[(i * 2) * P + (k * 4)] = sum00; + pDstC[(i * 2) * P + (k * 4 + 1)] = sum01; + pDstC[(i * 2) * P + (k * 4 + 2)] = sum02; + pDstC[(i * 2) * P + (k * 4 + 3)] = sum03; + pDstC[(i * 2 + 1) * P + (k * 4)] = sum10; + pDstC[(i * 2 + 1) * P + (k * 4 + 1)] = sum11; + pDstC[(i * 2 + 1) * P + (k * 4 + 2)] = sum12; + pDstC[(i * 2 + 1) * P + (k * 4 + 3)] = sum13; + } + } +} +#endif + +/* + * Matrix multiplication ---------------------------------- + * kernel = matmul_unrolled_2x4_pincr_asm_parallel_i8_xpulpv2 + * data type = 8-bit integer + * multi-core = yes + * unrolling = 8 elements of C per iteration (2x4 chunks) + * simd = yes, Xpulpv2 intrinsics + * other = using pointer incrementing insteady of array + * indexing and loads/stores explicitly written + * in asm, for optimal register utilization + * + * Inspired from plp_mat_mult_i8p_xpulpv2 from pulp-dsp + */ +#ifdef __XPULPIMG +void matmul_unrolled_2x4_pincr_asm_parallel_i8_xpulpv2( + const int8_t *__restrict__ pSrcA, const int8_t *__restrict__ pSrcB, + int32_t *__restrict__ pDstC, uint32_t M, uint32_t N, uint32_t P, + uint32_t core_id, uint32_t numThreads) { + // Masks for shuffles + static v4s mask0 = {0, 1, 4, 5}; + static v4s mask1 = {2, 3, 6, 7}; + static v4s mask2 = {0, 2, 4, 6}; + static v4s mask3 = {1, 3, 5, 7}; + + // Loop counter for P + uint32_t k = 0; + // Row decrement for A matrix + int32_t const N_decr = -(int)N + 4; + // Row increment for C matrix + uint32_t const P_incr = (P * 4) - 12; + + for (k = core_id; k < P / 4; k += numThreads) { + const int8_t *idx_a = &pSrcA[0]; // start_a + int32_t *idx_c = &pDstC[k * 4]; // start_c + int32_t const *end_c = &pDstC[P * M]; // actually (P * M) + (k * 4) + while (idx_c < end_c) { + + int32_t sum00 = 0; + int32_t sum01 = 0; + int32_t sum02 = 0; + int32_t sum03 = 0; + int32_t sum10 = 0; + int32_t sum11 = 0; + int32_t sum12 = 0; + int32_t sum13 = 0; + + int8_t const *end_a = idx_a + N; + const int8_t *idx_b = &pSrcB[k * 4]; // start_b + while (idx_a < end_a) { + + v4s aVec0, aVec1; + v4s temp0, temp1, temp2, temp3; + + __asm__ volatile( + "p.lw %[a0], %[a_incr](%[addr_a]!) \n\t" + "p.lw %[a1], %[a_decr](%[addr_a]!) \n\t" + "p.lw %[t0], %[b_incr](%[addr_b]!) \n\t" + "p.lw %[t1], %[b_incr](%[addr_b]!) \n\t" + "p.lw %[t2], %[b_incr](%[addr_b]!) \n\t" + "p.lw %[t3], %[b_incr](%[addr_b]!) \n\t" + : [ a0 ] "=&r"(aVec0), [ a1 ] "=&r"(aVec1), [ t0 ] "=&r"(temp0), + [ t1 ] "=&r"(temp1), [ t2 ] "=&r"(temp2), [ t3 ] "=&r"(temp3), + [ addr_a ] "+&r"(idx_a), [ addr_b ] "+&r"(idx_b) + : [ a_incr ] "r"(N), [ a_decr ] "r"(N_decr), [ b_incr ] "r"(P) + : "memory"); + /* The asm code above implements the following commented C code */ + // go to next row, same column + // v4s aVec0 = *((v4s *)idx_a); idx_a += N; + // go to previous row, one column forward + // v4s aVec1 = *((v4s *)idx_a); idx_a -= N - 4; + // v4s temp0 = *((v4s *)idx_b); idx_b += P; + // v4s temp1 = *((v4s *)idx_b); idx_b += P; + // v4s temp2 = *((v4s *)idx_b); idx_b += P; + // v4s temp3 = *((v4s *)idx_b); idx_b += P; + + // Shuffles to transpose at runtime the chunk extracted from B before + // multiplying with A chunk temp0-3 variables needed because shuffles + // use rD as source, but also modify it, thus we need a copy of their + // content to use it twice in their original form + v4s temp4 = __builtin_shuffle(temp0, temp1, mask0); // 0,1,4,5 + v4s temp5 = __builtin_shuffle(temp2, temp3, mask0); // 8,9,12,13 + v4s temp6 = __builtin_shuffle(temp0, temp1, mask1); // 2,3,6,7 + v4s temp7 = __builtin_shuffle(temp2, temp3, mask1); // 3,7,11,15 + + v4s bVec0 = __builtin_shuffle(temp4, temp5, mask2); // 0,4,8,12 + v4s bVec1 = __builtin_shuffle(temp4, temp5, mask3); // 1,5,9,13 + v4s bVec2 = __builtin_shuffle(temp6, temp7, mask2); // 2,6,10,14 + v4s bVec3 = __builtin_shuffle(temp6, temp7, mask3); // 3,7,11,15 + + sum00 = __SUMDOTP4(aVec0, bVec0, sum00); + sum01 = __SUMDOTP4(aVec0, bVec1, sum01); + sum02 = __SUMDOTP4(aVec0, bVec2, sum02); + sum03 = __SUMDOTP4(aVec0, bVec3, sum03); + sum10 = __SUMDOTP4(aVec1, bVec0, sum10); + sum11 = __SUMDOTP4(aVec1, bVec1, sum11); + sum12 = __SUMDOTP4(aVec1, bVec2, sum12); + sum13 = __SUMDOTP4(aVec1, bVec3, sum13); + } + + __asm__ volatile( + "p.sw %[s00], 4(%[addr_c]!) \n\t" + "p.sw %[s01], 4(%[addr_c]!) \n\t" + "p.sw %[s02], 4(%[addr_c]!) \n\t" + "p.sw %[s03], %[c_incr](%[addr_c]!) \n\t" + "p.sw %[s10], 4(%[addr_c]!) \n\t" + "p.sw %[s11], 4(%[addr_c]!) \n\t" + "p.sw %[s12], 4(%[addr_c]!) \n\t" + "p.sw %[s13], %[c_incr](%[addr_c]!) \n\t" + : [ addr_c ] "+&r"(idx_c) + : [ s00 ] "r"(sum00), [ s01 ] "r"(sum01), [ s02 ] "r"(sum02), + [ s03 ] "r"(sum03), [ s10 ] "r"(sum10), [ s11 ] "r"(sum11), + [ s12 ] "r"(sum12), [ s13 ] "r"(sum13), [ c_incr ] "r"(P_incr) + : "memory"); + /* The asm code above implements the following commented C code */ + // *(idx_c++) = sum00; + // *(idx_c++) = sum01; + // *(idx_c++) = sum02; + // *(idx_c) = sum03; idx_c += P - 3; + // *(idx_c++) = sum10; + // *(idx_c++) = sum11; + // *(idx_c++) = sum12; + // *(idx_c) = sum13; idx_c += P - 3; + + idx_a += N; // adjust A matrix pointer + } + } +} +#endif + +/* + * Matrix multiplication ---------------------------------- + * kernel = matmul_unrolled_4x2_parallel_i16_xpulpv2 + * data type = 16-bit integer + * multi-core = yes + * unrolling = 8 elements of C per iteration (4x2 chunks) + * simd = yes, Xpulpv2 intrinsics + * + * Original plp_mat_mult_i16p_xpulpv2 from pulp-dsp + */ +#ifdef __XPULPIMG +void matmul_unrolled_4x2_parallel_i16_xpulpv2(const int16_t *__restrict__ pSrcA, + const int16_t *__restrict__ pSrcB, + int32_t *__restrict__ pDstC, + uint32_t M, uint32_t N, + uint32_t P, uint32_t core_id, + uint32_t numThreads) { + uint32_t i = 0; // loop counter for M + uint32_t j = 0; // loop counter for N + uint32_t k = 0; // loop counter for P + + for (k = core_id; k < P / 2; k += numThreads) { + for (i = 0; i < M / 4; i++) { + + int32_t sum00 = 0; + int32_t sum01 = 0; + int32_t sum10 = 0; + int32_t sum11 = 0; + int32_t sum20 = 0; + int32_t sum21 = 0; + int32_t sum30 = 0; + int32_t sum31 = 0; + + for (j = 0; j < N / 2; j++) { + + v2s aVec0 = *((v2s *)&(pSrcA[(i * 4) * N + (j * 2)])); + v2s aVec1 = *((v2s *)&(pSrcA[(i * 4 + 1) * N + (j * 2)])); + v2s aVec2 = *((v2s *)&(pSrcA[(i * 4 + 2) * N + (j * 2)])); + v2s aVec3 = *((v2s *)&(pSrcA[(i * 4 + 3) * N + (j * 2)])); + + v2s bTemp0 = *((v2s *)&(pSrcB[(j * 2) * P + (k * 2)])); + v2s bTemp1 = *((v2s *)&(pSrcB[(j * 2 + 1) * P + (k * 2)])); + + v2s bVec0 = __builtin_shuffle(bTemp0, bTemp1, (v2s){0, 2}); + v2s bVec1 = __builtin_shuffle(bTemp0, bTemp1, (v2s){1, 3}); + + sum00 = __SUMDOTP2(aVec0, bVec0, sum00); + sum01 = __SUMDOTP2(aVec0, bVec1, sum01); + sum10 = __SUMDOTP2(aVec1, bVec0, sum10); + sum11 = __SUMDOTP2(aVec1, bVec1, sum11); + sum20 = __SUMDOTP2(aVec2, bVec0, sum20); + sum21 = __SUMDOTP2(aVec2, bVec1, sum21); + sum30 = __SUMDOTP2(aVec3, bVec0, sum30); + sum31 = __SUMDOTP2(aVec3, bVec1, sum31); + } + + pDstC[(i * 4) * P + (k * 2)] = sum00; + pDstC[(i * 4) * P + (k * 2 + 1)] = sum01; + pDstC[(i * 4 + 1) * P + (k * 2)] = sum10; + pDstC[(i * 4 + 1) * P + (k * 2 + 1)] = sum11; + pDstC[(i * 4 + 2) * P + (k * 2)] = sum20; + pDstC[(i * 4 + 2) * P + (k * 2 + 1)] = sum21; + pDstC[(i * 4 + 3) * P + (k * 2)] = sum30; + pDstC[(i * 4 + 3) * P + (k * 2 + 1)] = sum31; + } + } +} +#endif + +/* + * Matrix multiplication ---------------------------------- + * kernel = matmul_unrolled_4x2_pincr_asm_parallel_i16_xpulpv2 + * data type = 16-bit integer + * multi-core = yes + * unrolling = 8 elements of C per iteration (4x2 chunks) + * simd = yes, Xpulpv2 intrinsics + * other = using pointer incrementing insteady of array + * indexing and loads/stores explicitly written + * in asm, for optimal register utilization + * + * Inspired from plp_mat_mult_i16p_xpulpv2 from pulp-dsp + */ +#ifdef __XPULPIMG +void matmul_unrolled_4x2_pincr_asm_parallel_i16_xpulpv2( + const int16_t *__restrict__ pSrcA, const int16_t *__restrict__ pSrcB, + int32_t *__restrict__ pDstC, uint32_t M, uint32_t N, uint32_t P, + uint32_t core_id, uint32_t numThreads) { + // Loop counter for P + uint32_t k = 0; + // Increment for A matrix = 1 row forward + uint32_t const A_incr = N * sizeof(int16_t); + // Decrement for A matrix = 3 rows backward and 2 words forward + int32_t const A_decr = + -(int)(N * 3 * sizeof(int16_t)) + 2 * (int)sizeof(int16_t); + // Increment for B matrix = 1 row forward + uint32_t const B_incr = P * sizeof(int16_t); // bytes in 1 row + // Increment for C matrix = 1 row forward and 1 word backward + uint32_t const C_incr = (P * sizeof(int32_t)) - sizeof(int32_t); + + for (k = core_id; k < P / 2; k += numThreads) { + const int16_t *idx_a = &pSrcA[0]; // start_a + int32_t *idx_c = &pDstC[k * 2]; // start_c + int32_t const *end_c = &pDstC[P * M]; // actually (P * M) + (k * 2) + + while (idx_c < end_c) { + + int32_t sum00 = 0; + int32_t sum01 = 0; + int32_t sum10 = 0; + int32_t sum11 = 0; + int32_t sum20 = 0; + int32_t sum21 = 0; + int32_t sum30 = 0; + int32_t sum31 = 0; + + int16_t const *end_a = idx_a + N; + const int16_t *idx_b = &pSrcB[k * 2]; // start_b + + while (idx_a < end_a) { + + v2s aVec0, aVec1, aVec2, aVec3; + v2s bTemp0, bTemp1; + + __asm__ volatile("p.lw %[a0], %[a_incr](%[addr_a]!) \n\t" + "p.lw %[a1], %[a_incr](%[addr_a]!) \n\t" + "p.lw %[a2], %[a_incr](%[addr_a]!) \n\t" + "p.lw %[a3], %[a_decr](%[addr_a]!) \n\t" + "p.lw %[t0], %[b_incr](%[addr_b]!) \n\t" + "p.lw %[t1], %[b_incr](%[addr_b]!) \n\t" + : [ a0 ] "=&r"(aVec0), [ a1 ] "=&r"(aVec1), + [ a2 ] "=&r"(aVec2), [ a3 ] "=&r"(aVec3), + [ t0 ] "=&r"(bTemp0), [ t1 ] "=&r"(bTemp1), + [ addr_a ] "+&r"(idx_a), [ addr_b ] "+&r"(idx_b) + : [ a_incr ] "r"(A_incr), [ a_decr ] "r"(A_decr), + [ b_incr ] "r"(B_incr) + : "memory"); + /* The asm code above implements the following commented C code */ + // v2s aVec0 = *((v2s *)&(pSrcA[(i * 4) * N + (j * 2)])); + // v2s aVec1 = *((v2s *)&(pSrcA[(i * 4 + 1) * N + (j * 2)])); + // v2s aVec2 = *((v2s *)&(pSrcA[(i * 4 + 2) * N + (j * 2)])); + // v2s aVec3 = *((v2s *)&(pSrcA[(i * 4 + 3) * N + (j * 2)])); + // v2s bTemp0 = *((v2s *)&(pSrcB[(j * 2) * P + (k * 2)])); + // v2s bTemp1 = *((v2s *)&(pSrcB[(j * 2 + 1) * P + (k * 2)])); + + v2s bVec0 = __builtin_shuffle(bTemp0, bTemp1, (v2s){0, 2}); + v2s bVec1 = __builtin_shuffle(bTemp0, bTemp1, (v2s){1, 3}); + + sum00 = __SUMDOTP2(aVec0, bVec0, sum00); + sum01 = __SUMDOTP2(aVec0, bVec1, sum01); + sum10 = __SUMDOTP2(aVec1, bVec0, sum10); + sum11 = __SUMDOTP2(aVec1, bVec1, sum11); + sum20 = __SUMDOTP2(aVec2, bVec0, sum20); + sum21 = __SUMDOTP2(aVec2, bVec1, sum21); + sum30 = __SUMDOTP2(aVec3, bVec0, sum30); + sum31 = __SUMDOTP2(aVec3, bVec1, sum31); + } + + __asm__ volatile( + "p.sw %[s00], 4(%[addr_c]!) \n\t" + "p.sw %[s01], %[c_incr](%[addr_c]!) \n\t" + "p.sw %[s10], 4(%[addr_c]!) \n\t" + "p.sw %[s11], %[c_incr](%[addr_c]!) \n\t" + "p.sw %[s20], 4(%[addr_c]!) \n\t" + "p.sw %[s21], %[c_incr](%[addr_c]!) \n\t" + "p.sw %[s30], 4(%[addr_c]!) \n\t" + "p.sw %[s31], %[c_incr](%[addr_c]!) \n\t" + : [ addr_c ] "+&r"(idx_c) + : [ s00 ] "r"(sum00), [ s01 ] "r"(sum01), [ s10 ] "r"(sum10), + [ s11 ] "r"(sum11), [ s20 ] "r"(sum20), [ s21 ] "r"(sum21), + [ s30 ] "r"(sum30), [ s31 ] "r"(sum31), [ c_incr ] "r"(C_incr) + : "memory"); + /* The asm code above implements the following commented C code */ + // pDstC[(i * 4) * P + (k * 2)] = sum00; + // pDstC[(i * 4) * P + (k * 2 + 1)] = sum01; + // pDstC[(i * 4 + 1) * P + (k * 2)] = sum10; + // pDstC[(i * 4 + 1) * P + (k * 2 + 1)] = sum11; + // pDstC[(i * 4 + 2) * P + (k * 2)] = sum20; + // pDstC[(i * 4 + 2) * P + (k * 2 + 1)] = sum21; + // pDstC[(i * 4 + 3) * P + (k * 2)] = sum30; + // pDstC[(i * 4 + 3) * P + (k * 2 + 1)] = sum31; + + idx_a += N * 3; + } + } +} +#endif + +/* + * Matrix multiplication ---------------------------------- + * kernel = matmul_unrolled_2x2_parallel_i32_rv32im + * data type = 32-bit integer + * multi-core = yes + * unrolling = 4 elements of C per iteration (2x2 chunks) + * simd = no + */ +void matmul_unrolled_2x2_parallel_i32_rv32im(int32_t const *__restrict__ A, + int32_t const *__restrict__ B, + int32_t *__restrict__ C, + uint32_t M, uint32_t N, uint32_t P, + uint32_t id, uint32_t numThreads) { + // Parallelize by assigning each core one row + uint32_t const c = 8; // How many columns to split the matrix into + uint32_t const c_start = (P / c) * (id % c); + uint32_t const c_end = (P / c) * ((id % c) + 1); + for (uint32_t i = 2 * (id / c); i < M; i += 2 * (numThreads / c)) { + for (uint32_t j = c_start; j < c_end; j += 2) { + int32_t c00 = 0; + int32_t c01 = 0; + int32_t c10 = 0; + int32_t c11 = 0; + for (uint32_t k = 0; k < N; k += 2) { + // Explicitly load the values first to help with scheduling + int32_t val_a00 = A[(i + 0) * N + k + 0]; + int32_t val_a01 = A[(i + 0) * N + k + 1]; + int32_t val_a10 = A[(i + 1) * N + k + 0]; + int32_t val_a11 = A[(i + 1) * N + k + 1]; + int32_t val_b00 = B[(k + 0) * P + j + 0]; + int32_t val_b01 = B[(k + 0) * P + j + 1]; + int32_t val_b10 = B[(k + 1) * P + j + 0]; + int32_t val_b11 = B[(k + 1) * P + j + 1]; + c00 += val_a00 * val_b00; + c00 += val_a01 * val_b10; + c01 += val_a00 * val_b01; + c01 += val_a01 * val_b11; + c10 += val_a10 * val_b00; + c10 += val_a11 * val_b10; + c11 += val_a10 * val_b01; + c11 += val_a11 * val_b11; + } + C[(i + 0) * P + j + 0] = c00; + C[(i + 0) * P + j + 1] = c01; + C[(i + 1) * P + j + 0] = c10; + C[(i + 1) * P + j + 1] = c11; + } + } +} + +/* + * Matrix multiplication ---------------------------------- + * kernel = matmul_unrolled_2x2_parallel_i32_xpulpv2 + * data type = 32-bit integer + * multi-core = yes + * unrolling = 4 elements of C per iteration (2x2 chunks) + * simd = no + * other = loads/stores explicitly written in asm + * for optimal register utilization + */ +#ifdef __XPULPIMG +void matmul_unrolled_2x2_parallel_i32_xpulpv2(int32_t const *__restrict__ A, + int32_t const *__restrict__ B, + int32_t *__restrict__ C, + uint32_t M, uint32_t N, + uint32_t P, uint32_t id, + uint32_t numThreads) { + // Parallelize by assigning each core one row + uint32_t const c = 8; // How many columns to split the matrix into + uint32_t const c_start = (P / c) * (id % c); + uint32_t const c_end = (P / c) * ((id % c) + 1); + + uint32_t const A_incr = (N * sizeof(int32_t)) - sizeof(int32_t); + uint32_t const B_incr = (P * sizeof(int32_t)) - sizeof(int32_t); + + for (uint32_t i = 2 * (id / c); i < M; i += 2 * (numThreads / c)) { + for (uint32_t j = c_start; j < c_end; j += 2) { + int32_t c00 = 0; + int32_t c01 = 0; + int32_t c10 = 0; + int32_t c11 = 0; + + for (uint32_t k = 0; k < N; k += 2) { + const int32_t *idx_a = &A[i * N + k]; + const int32_t *idx_b = &B[k * P + j]; + int32_t val_a00, val_a01, val_a10, val_a11, val_b00, val_b01, val_b10, + val_b11; + __asm__ volatile("p.lw %[a00], 4(%[addr_a]!) \n\t" + "p.lw %[a01], %[a_incr](%[addr_a]!) \n\t" + "p.lw %[a10], 4(%[addr_a]!) \n\t" + "p.lw %[a11], 0(%[addr_a]) \n\t" + "p.lw %[b00], 4(%[addr_b]!) \n\t" + "p.lw %[b01], %[b_incr](%[addr_b]!) \n\t" + "p.lw %[b10], 4(%[addr_b]!) \n\t" + "p.lw %[b11], 0(%[addr_b]) \n\t" + : [ a00 ] "=&r"(val_a00), [ a01 ] "=&r"(val_a01), + [ a10 ] "=&r"(val_a10), [ a11 ] "=&r"(val_a11), + [ b00 ] "=&r"(val_b00), [ b01 ] "=&r"(val_b01), + [ b10 ] "=&r"(val_b10), [ b11 ] "=&r"(val_b11), + [ addr_a ] "+&r"(idx_a), [ addr_b ] "+&r"(idx_b) + : [ a_incr ] "r"(A_incr), [ b_incr ] "r"(B_incr) + : "memory"); + /* The asm code above implements the following commented C code */ + // int32_t val_a00 = A[(i + 0) * N + k + 0]; + // int32_t val_a01 = A[(i + 0) * N + k + 1]; + // int32_t val_a10 = A[(i + 1) * N + k + 0]; + // int32_t val_a11 = A[(i + 1) * N + k + 1]; + // int32_t val_b00 = B[(k + 0) * P + j + 0]; + // int32_t val_b01 = B[(k + 0) * P + j + 1]; + // int32_t val_b10 = B[(k + 1) * P + j + 0]; + // int32_t val_b11 = B[(k + 1) * P + j + 1]; + c00 += val_a00 * val_b00; + c00 += val_a01 * val_b10; + c01 += val_a00 * val_b01; + c01 += val_a01 * val_b11; + c10 += val_a10 * val_b00; + c10 += val_a11 * val_b10; + c11 += val_a10 * val_b01; + c11 += val_a11 * val_b11; + } + int32_t *idx_c = &C[i * P + j]; + __asm__ volatile("p.sw %[s00], 4(%[addr_c]!) \n\t" + "p.sw %[s01], %[c_incr](%[addr_c]!) \n\t" + "p.sw %[s10], 4(%[addr_c]!) \n\t" + "p.sw %[s11], 0(%[addr_c]) \n\t" + : [ addr_c ] "+&r"(idx_c) + : [ s00 ] "r"(c00), [ s01 ] "r"(c01), [ s10 ] "r"(c10), + [ s11 ] "r"(c11), [ c_incr ] "r"(B_incr) + : "memory"); + /* The asm code above implements the following commented C code */ + // C[(i + 0) * P + j + 0] = c00; + // C[(i + 0) * P + j + 1] = c01; + // C[(i + 1) * P + j + 0] = c10; + // C[(i + 1) * P + j + 1] = c11; + } + } +} +#endif diff --git a/apps/conv2d_i8/main.c b/apps/conv2d_i8/main.c new file mode 100644 index 000000000..81ebee8ec --- /dev/null +++ b/apps/conv2d_i8/main.c @@ -0,0 +1,111 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Samuel Riedel, ETH Zurich + +#include +#include + +#include "encoding.h" +#include "printf.h" +#include "runtime.h" +#include "synchronization.h" +#include "xpulp/conv_2d.h" + +#define M 32 +#define N 32 +#define KERNEL_N 3 +//#define VERBOSE_IN +//#define VERBOSE_OUT + +volatile int8_t in[M * N] __attribute__((section(".l1_prio"))); +volatile int32_t out[M * N] __attribute__((section(".l1_prio"))); +volatile uint8_t kernel[KERNEL_N * KERNEL_N] __attribute__((section(".l1"))); +volatile int error __attribute__((section(".l1"))); + +int main() { + uint32_t core_id = mempool_get_core_id(); + uint32_t num_cores = mempool_get_core_count(); + mempool_barrier_init(core_id, num_cores); + + mempool_barrier(num_cores, num_cores / 2); + + if (core_id == 0) { + // Initialize error + error = 0; + // Initialize kernel + kernel[0] = 1; + kernel[1] = 2; + kernel[2] = 1; + + kernel[3] = 2; + kernel[4] = 4; + kernel[5] = 2; + + kernel[6] = 1; + kernel[7] = 2; + kernel[8] = 1; + + // Initialize img + init_conv2d_image_i8(in, N, M); + +#ifdef VERBOSE_IN + printf("A:\n"); + for (int i = 0; i < M; i++) { + for (int j = 0; j < N; j++) { + printf("%4u ", in[i * N + j]); + } + printf("\n"); + } + printf("kernel:\n"); + for (int i = 0; i < KERNEL_N; i++) { + for (int j = 0; j < KERNEL_N; j++) { + printf("%4u ", kernel[i * KERNEL_N + j]); + } + printf("\n"); + } +#endif + + mempool_start_benchmark(); +#ifdef __XPULPIMG + conv2d_3x3_unrolled2_i8_xpulpv2(in, out, M, N, kernel); +#else + conv2d_3x3_unrolled2_i8_rv32im(in, N, M, kernel, out); +#endif + mempool_stop_benchmark(); + +#ifdef VERBOSE_OUT + printf("out:\n"); + for (int i = 1; i < M - 1; i++) { + for (int j = 1; j < N - 1; j++) { + printf("%4u ", out[i * N + j]); + } + printf("\n"); + } +#endif + + // verify_conv2d_image_i8_verbose(out, N, M); + // Check result + if (verify_conv2d_image_i8(out, N, M)) { + error = 1; + } + } + + // wait until all cores have finished + mempool_barrier(num_cores, 4 * num_cores); + + return error; +} diff --git a/apps/convolution/main.c b/apps/convolution/main.c index 990b23c34..df47be73d 100644 --- a/apps/convolution/main.c +++ b/apps/convolution/main.c @@ -125,7 +125,7 @@ int main() { mempool_barrier(num_cores, num_cores * 4); // Check result if (verify_conv2d_image(out, N, M, core_id, num_cores)) { - __atomic_fetch_or(&error, i, __ATOMIC_SEQ_CST); + amo_or(&error, (unsigned)i); } } diff --git a/apps/matmul_i16/main.c b/apps/matmul_i16/main.c new file mode 100644 index 000000000..e25b12a3e --- /dev/null +++ b/apps/matmul_i16/main.c @@ -0,0 +1,151 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Samuel Riedel, ETH Zurich + +#include +#include + +#include "encoding.h" +#include "printf.h" +#include "runtime.h" +#include "synchronization.h" +#include "xpulp/mat_mul.h" + +// Define Matrix dimensions: +// C = AB with A=[MxN], B=[NxP], C=[MxP] +#define matrix_M 64 +#define matrix_N 64 +#define matrix_P 64 + +int16_t matrix_a[matrix_M * matrix_N] __attribute__((section(".l1_prio"))); +int16_t matrix_b[matrix_N * matrix_P] __attribute__((section(".l1_prio"))); +int32_t matrix_c[matrix_M * matrix_P] __attribute__((section(".l1_prio"))); + +int volatile error __attribute__((section(".l1"))); + +void init_matrix(int16_t *matrix, uint32_t num_rows, uint32_t num_columns, + int16_t a, int16_t b, int16_t c, uint32_t core_id, + uint32_t num_cores) { + uint32_t const split = 8; // How many rows/columns to split the matrix into + if (num_columns > num_rows) { + // Parallelize over columns + uint32_t const c_start = (num_rows / split) * (core_id % split); + uint32_t const c_end = (num_rows / split) * ((core_id % split) + 1); + for (uint32_t j = (core_id / split); j < num_columns; + j += (num_cores / split)) { + for (uint32_t i = c_start; i < c_end; ++i) { + matrix[i * num_columns + j] = a * (int16_t)i + b * (int16_t)j + c; + } + } + } else { + // Parallelize over rows + uint32_t const c_start = (num_columns / split) * (core_id % split); + uint32_t const c_end = (num_columns / split) * ((core_id % split) + 1); + for (uint32_t i = (core_id / split); i < num_rows; + i += (num_cores / split)) { + for (uint32_t j = c_start; j < c_end; ++j) { + matrix[i * num_columns + j] = a * (int16_t)i + b * (int16_t)j + c; + } + } + } +} + +// Initialize the matrices in parallel +int verify_matrix(int32_t *matrix, uint32_t num_rows, uint32_t num_columns, + uint32_t inner_dim, int16_t aa, int16_t ab, int16_t ac, + int16_t ba, int16_t bb, int16_t bc, uint32_t core_id, + uint32_t num_cores) { + // Convert to signed + int32_t n = (int32_t)inner_dim; + // Parallelize over rows + for (uint32_t i = core_id; i < num_rows; i += num_cores) { + for (uint32_t j = 0; j < num_columns; ++j) { + int32_t ii = (int32_t)i; + int32_t jj = (int32_t)j; + int32_t lin = ((int32_t)aa * bb * ii * jj + aa * bc * ii + ac * bb * jj + + (int32_t)ac * bc) * + n; + int32_t qua = + (((int32_t)aa * ba * ii + ab * bb * jj + ab * bc + (int32_t)ba * ac) * + (n * (n - 1))) / + 2; + int32_t cub = (((int32_t)ab * ba) * (n * (n - 1) * (2 * n - 1))) / 6; + int32_t golden = lin + qua + cub; + if (matrix[i * num_columns + j] != golden) { + return (i + j) == 0 ? -1 : (int)(i * num_columns + j); + } + matrix[i * num_columns + j] = 0; + } + } + return 0; +} + +int test_matrix_multiplication(int16_t *__restrict__ A, int16_t *__restrict__ B, + int32_t *__restrict__ C, uint32_t M, uint32_t N, + uint32_t P, uint32_t core_id, + uint32_t num_cores) { + int16_t const A_a = 1; + int16_t const A_b = 1; + int16_t const A_c = -40; + int16_t const B_a = 0; + int16_t const B_b = 1; + int16_t const B_c = 19; + + // Initialize Matrices + init_matrix(A, M, N, A_a, A_b, A_c, core_id, num_cores); + init_matrix(B, N, P, B_a, B_b, B_c, core_id, num_cores); + // Wait at barrier until everyone is ready + mempool_barrier(num_cores, num_cores / 2); + // Execute function to test. + mempool_start_benchmark(); + +#ifdef __XPULPIMG + matmul_unrolled_4x2_pincr_asm_parallel_i16_xpulpv2(A, B, C, M, N, P, core_id, + num_cores); +#else + matmul_unrolled_2x2_parallel_i16_rv32im(A, B, C, M, N, P, core_id, num_cores); +#endif + + mempool_stop_benchmark(); + // Wait at barrier befor checking + mempool_barrier(num_cores, num_cores * 4); + if (verify_matrix(C, M, P, N, A_a, A_b, A_c, B_a, B_b, B_c, core_id, + num_cores)) { + error = 1; + return -1; + } + return 0; +} + +int main() { + uint32_t core_id = mempool_get_core_id(); + uint32_t num_cores = mempool_get_core_count(); + // Initialize barrier and synchronize + mempool_barrier_init(core_id, num_cores); + + if (core_id == 0) { + error = 0; + } + + // Test the Matrix multiplication + test_matrix_multiplication(matrix_a, matrix_b, matrix_c, matrix_M, matrix_N, + matrix_P, core_id, num_cores); + // wait until all cores have finished + mempool_barrier(num_cores, num_cores * 4); + + return error; +} diff --git a/apps/matmul_i32/main.c b/apps/matmul_i32/main.c new file mode 100644 index 000000000..6fb3d422e --- /dev/null +++ b/apps/matmul_i32/main.c @@ -0,0 +1,149 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Samuel Riedel, ETH Zurich + +#include +#include + +#include "encoding.h" +#include "printf.h" +#include "runtime.h" +#include "synchronization.h" +#include "xpulp/mat_mul.h" + +// Define Matrix dimensions: +// C = AB with A=[MxN], B=[NxP], C=[MxP] +#define matrix_M 64 +#define matrix_N 32 +#define matrix_P 64 + +int32_t matrix_a[matrix_M * matrix_N] __attribute__((section(".l1_prio"))); +int32_t matrix_b[matrix_N * matrix_P] __attribute__((section(".l1_prio"))); +int32_t matrix_c[matrix_M * matrix_P] __attribute__((section(".l1_prio"))); + +int volatile error __attribute__((section(".l1"))); + +void init_matrix(int32_t *matrix, uint32_t num_rows, uint32_t num_columns, + int32_t a, int32_t b, int32_t c, uint32_t core_id, + uint32_t num_cores) { + uint32_t const split = 8; // How many rows/columns to split the matrix into + if (num_columns > num_rows) { + // Parallelize over columns + uint32_t const c_start = (num_rows / split) * (core_id % split); + uint32_t const c_end = (num_rows / split) * ((core_id % split) + 1); + for (uint32_t j = (core_id / split); j < num_columns; + j += (num_cores / split)) { + for (uint32_t i = c_start; i < c_end; ++i) { + matrix[i * num_columns + j] = a * (int32_t)i + b * (int32_t)j + c; + } + } + } else { + // Parallelize over rows + uint32_t const c_start = (num_columns / split) * (core_id % split); + uint32_t const c_end = (num_columns / split) * ((core_id % split) + 1); + for (uint32_t i = (core_id / split); i < num_rows; + i += (num_cores / split)) { + for (uint32_t j = c_start; j < c_end; ++j) { + matrix[i * num_columns + j] = a * (int32_t)i + b * (int32_t)j + c; + } + } + } +} + +// Initialize the matrices in parallel +int verify_matrix(int32_t *matrix, uint32_t num_rows, uint32_t num_columns, + uint32_t inner_dim, int32_t aa, int32_t ab, int32_t ac, + int32_t ba, int32_t bb, int32_t bc, uint32_t core_id, + uint32_t num_cores) { + // Convert to signed + int32_t n = (int32_t)inner_dim; + // Parallelize over rows + for (uint32_t i = core_id; i < num_rows; i += num_cores) { + for (uint32_t j = 0; j < num_columns; ++j) { + int32_t ii = (int32_t)i; + int32_t jj = (int32_t)j; + int32_t lin = + (aa * bb * ii * jj + aa * bc * ii + ac * bb * jj + ac * bc) * n; + int32_t qua = + ((aa * ba * ii + ab * bb * jj + ab * bc + ba * ac) * (n * (n - 1))) / + 2; + int32_t cub = ((ab * ba) * (n * (n - 1) * (2 * n - 1))) / 6; + int32_t golden = lin + qua + cub; + if (matrix[i * num_columns + j] != golden) { + return (i + j) == 0 ? -1 : (int)(i * num_columns + j); + } + matrix[i * num_columns + j] = 0; + } + } + return 0; +} + +int test_matrix_multiplication(int32_t *__restrict__ A, int32_t *__restrict__ B, + int32_t *__restrict__ C, uint32_t M, uint32_t N, + uint32_t P, uint32_t core_id, + uint32_t num_cores) { + int32_t const A_a = 1; + int32_t const A_b = 1; + int32_t const A_c = -32; + int32_t const B_a = 2; + int32_t const B_b = 1; + int32_t const B_c = 16; + + // Initialize Matrices + init_matrix(A, M, N, A_a, A_b, A_c, core_id, num_cores); + init_matrix(B, N, P, B_a, B_b, B_c, core_id, num_cores); + // Wait at barrier until everyone is ready + mempool_barrier(num_cores, num_cores / 2); + // Execute function to test. + mempool_start_benchmark(); + +#ifdef __XPULPIMG + matmul_unrolled_2x2_parallel_i32_xpulpv2(A, B, C, M, N, P, core_id, + num_cores); +#else + matmul_unrolled_2x2_parallel_i32_rv32im(A, B, C, M, N, P, core_id, num_cores); +#endif + + mempool_stop_benchmark(); + // Wait at barrier befor checking + mempool_barrier(num_cores, num_cores * 4); + if (verify_matrix(C, M, P, N, A_a, A_b, A_c, B_a, B_b, B_c, core_id, + num_cores)) { + error = 1; + return -1; + } + return 0; +} + +int main() { + uint32_t core_id = mempool_get_core_id(); + uint32_t num_cores = mempool_get_core_count(); + // Initialize barrier and synchronize + mempool_barrier_init(core_id, num_cores); + + if (core_id == 0) { + error = 0; + } + + // Test the Matrix multiplication + test_matrix_multiplication(matrix_a, matrix_b, matrix_c, matrix_M, matrix_N, + matrix_P, core_id, num_cores); + // wait until all cores have finished + mempool_barrier(num_cores, num_cores * 4); + + return error; +} diff --git a/apps/matmul_i8/main.c b/apps/matmul_i8/main.c new file mode 100644 index 000000000..6182458a8 --- /dev/null +++ b/apps/matmul_i8/main.c @@ -0,0 +1,153 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Samuel Riedel, ETH Zurich + +#include +#include + +#include "encoding.h" +#include "printf.h" +#include "runtime.h" +#include "synchronization.h" +#include "xpulp/mat_mul.h" + +// Define Matrix dimensions: +// C = AB with A=[MxN], B=[NxP], C=[MxP] +#define matrix_M 64 +#define matrix_N 64 +#define matrix_P 64 + +int8_t matrix_a[matrix_M * matrix_N] __attribute__((section(".l1_prio"))); +int8_t matrix_b[matrix_N * matrix_P] __attribute__((section(".l1_prio"))); +int32_t matrix_c[matrix_M * matrix_P] __attribute__((section(".l1_prio"))); + +int volatile error __attribute__((section(".l1"))); + +void init_matrix(int8_t *matrix, uint32_t num_rows, uint32_t num_columns, + int8_t a, int8_t b, int8_t c, uint32_t core_id, + uint32_t num_cores) { + uint32_t const split = 8; // How many rows/columns to split the matrix into + if (num_columns > num_rows) { + // Parallelize over columns + uint32_t const c_start = (num_rows / split) * (core_id % split); + uint32_t const c_end = (num_rows / split) * ((core_id % split) + 1); + for (uint32_t j = (core_id / split); j < num_columns; + j += (num_cores / split)) { + for (uint32_t i = c_start; i < c_end; ++i) { + matrix[i * num_columns + j] = a * (int8_t)i + b * (int8_t)j + c; + } + } + } else { + // Parallelize over rows + uint32_t const c_start = (num_columns / split) * (core_id % split); + uint32_t const c_end = (num_columns / split) * ((core_id % split) + 1); + for (uint32_t i = (core_id / split); i < num_rows; + i += (num_cores / split)) { + for (uint32_t j = c_start; j < c_end; ++j) { + matrix[i * num_columns + j] = a * (int8_t)i + b * (int8_t)j + c; + } + } + } +} + +// Initialize the matrices in parallel +int verify_matrix(int32_t *matrix, uint32_t num_rows, uint32_t num_columns, + uint32_t inner_dim, int8_t aa, int8_t ab, int8_t ac, + int8_t ba, int8_t bb, int8_t bc, uint32_t core_id, + uint32_t num_cores) { + // Convert to signed + int32_t n = (int32_t)inner_dim; + // Parallelize over rows + for (uint32_t i = core_id; i < num_rows; i += num_cores) { + for (uint32_t j = 0; j < num_columns; ++j) { + int32_t ii = (int32_t)i; + int32_t jj = (int32_t)j; + int32_t lin = ((int32_t)aa * bb * ii * jj + aa * bc * ii + ac * bb * jj + + (int32_t)ac * bc) * + n; + int32_t qua = + (((int32_t)aa * ba * ii + ab * bb * jj + ab * bc + (int32_t)ba * ac) * + (n * (n - 1))) / + 2; + int32_t cub = (((int32_t)ab * ba) * (n * (n - 1) * (2 * n - 1))) / 6; + int32_t golden = lin + qua + cub; + if (matrix[i * num_columns + j] != golden) { + return (i + j) == 0 ? -1 : (int)(i * num_columns + j); + } + matrix[i * num_columns + j] = 0; + } + } + return 0; +} + +int test_matrix_multiplication(int8_t *__restrict__ A, int8_t *__restrict__ B, + int32_t *__restrict__ C, uint32_t M, uint32_t N, + uint32_t P, uint32_t core_id, + uint32_t num_cores) { + int8_t const A_a = 1; + int8_t const A_b = 1; + int8_t const A_c = -40; + int8_t const B_a = 0; + int8_t const B_b = 1; + int8_t const B_c = 19; + + // Initialize Matrices + init_matrix(A, M, N, A_a, A_b, A_c, core_id, num_cores); + init_matrix(B, N, P, B_a, B_b, B_c, core_id, num_cores); + // Wait at barrier until everyone is ready + mempool_barrier(num_cores, num_cores / 2); + // Execute function to test. + mempool_start_benchmark(); + +#ifdef __XPULPIMG + matmul_unrolled_2x4_pincr_asm_parallel_i8_xpulpv2(A, B, C, M, N, P, core_id, + num_cores); + // matmul_unrolled_2x4_parallel_i8_xpulpv2(A, B, C, M, N, P, core_id, + // num_cores); +#else + matmul_unrolled_2x2_parallel_i8_rv32im(A, B, C, M, N, P, core_id, num_cores); +#endif + + mempool_stop_benchmark(); + // Wait at barrier befor checking + mempool_barrier(num_cores, num_cores * 4); + if (verify_matrix(C, M, P, N, A_a, A_b, A_c, B_a, B_b, B_c, core_id, + num_cores)) { + error = 1; + return -1; + } + return 0; +} + +int main() { + uint32_t core_id = mempool_get_core_id(); + uint32_t num_cores = mempool_get_core_count(); + // Initialize barrier and synchronize + mempool_barrier_init(core_id, num_cores); + + if (core_id == 0) { + error = 0; + } + + // Test the Matrix multiplication + test_matrix_multiplication(matrix_a, matrix_b, matrix_c, matrix_M, matrix_N, + matrix_P, core_id, num_cores); + // wait until all cores have finished + mempool_barrier(num_cores, num_cores * 4); + + return error; +} diff --git a/apps/riscv-tests/isa/Makefile b/apps/riscv-tests/isa/Makefile index c7f9b5607..542e05167 100644 --- a/apps/riscv-tests/isa/Makefile +++ b/apps/riscv-tests/isa/Makefile @@ -52,9 +52,11 @@ vpath %.S $(src_dir) $(RISCV_OBJDUMP) $< > $@ %.out: % + PATH="$(MEMPOOL_DIR)/install/riscv-isa-sim/bin:$$PATH"; \ $(RISCV_SIM) --isa=rv64gc $< 2> $@ %.out32: % + PATH="$(MEMPOOL_DIR)/install/riscv-isa-sim/bin:$$PATH"; \ $(RISCV_SIM) --isa=rv32gc $< 2> $@ define compile_template diff --git a/apps/riscv-tests/isa/macros/scalar/test_macros.h b/apps/riscv-tests/isa/macros/scalar/test_macros.h index 0eacde614..10b31a5e5 100644 --- a/apps/riscv-tests/isa/macros/scalar/test_macros.h +++ b/apps/riscv-tests/isa/macros/scalar/test_macros.h @@ -3,6 +3,7 @@ #ifndef __TEST_MACROS_SCALAR_H #define __TEST_MACROS_SCALAR_H +// clang-format off #----------------------------------------------------------------------- # Helper macros @@ -141,6 +142,109 @@ test_ ## testnum: \ inst x0, x1, ZEXT_UIMM5(imm); \ ) +#----------------------------------------------------------------------- +# Tests for Xpulpimg instructions with 6-bit unsigned immediate operand +#----------------------------------------------------------------------- + +#define ZEXT_UIMM6(x) ((x) & 0x3F) + +#define TEST_UIMM6_OP( testnum, inst, result, val1, imm ) \ + TEST_CASE( testnum, x14, result, \ + li x1, MASK_XLEN(val1); \ + inst x14, x1, ZEXT_UIMM6(imm); \ + ) + +#define TEST_UIMM6_SRC1_EQ_DEST( testnum, inst, result, val1, imm ) \ + TEST_CASE( testnum, x1, result, \ + li x1, MASK_XLEN(val1); \ + inst x1, x1, ZEXT_UIMM6(imm); \ + ) + +#define TEST_UIMM6_DEST_BYPASS( testnum, nop_cycles, inst, result, val1, imm ) \ + TEST_CASE( testnum, x6, result, \ + li x4, 0; \ +1: li x1, MASK_XLEN(val1); \ + inst x14, x1, ZEXT_UIMM6(imm); \ + TEST_INSERT_NOPS_ ## nop_cycles \ + addi x6, x14, 0; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + ) + +#define TEST_UIMM6_SRC1_BYPASS( testnum, nop_cycles, inst, result, val1, imm ) \ + TEST_CASE( testnum, x14, result, \ + li x4, 0; \ +1: li x1, MASK_XLEN(val1); \ + TEST_INSERT_NOPS_ ## nop_cycles \ + inst x14, x1, ZEXT_UIMM6(imm); \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + ) + +#define TEST_UIMM6_ZEROSRC1( testnum, inst, result, imm ) \ + TEST_CASE( testnum, x1, result, \ + inst x1, x0, ZEXT_UIMM6(imm); \ + ) + +#define TEST_UIMM6_ZERODEST( testnum, inst, val1, imm ) \ + TEST_CASE( testnum, x0, 0, \ + li x1, MASK_XLEN(val1); \ + inst x0, x1, ZEXT_UIMM6(imm); \ + ) + +#----------------------------------------------------------------------- +# Tests for Xpulpimg instructions with 6-bit signed immediate operand +#----------------------------------------------------------------------- +#define SEXT_IMM6(x) ((x) | (-(((x) >> 5) & 1) << 5)) + +#define TEST_SIMM6_OP( testnum, inst, result, val1, imm ) \ + TEST_CASE( testnum, x14, result, \ + li x1, MASK_XLEN(val1); \ + inst x14, x1, SEXT_IMM6(imm); \ + ) + +#define TEST_SIMM6_SRC1_EQ_DEST( testnum, inst, result, val1, imm ) \ + TEST_CASE( testnum, x1, result, \ + li x1, MASK_XLEN(val1); \ + inst x1, x1, SEXT_IMM6(imm); \ + ) + +#define TEST_SIMM6_DEST_BYPASS( testnum, nop_cycles, inst, result, val1, imm ) \ + TEST_CASE( testnum, x6, result, \ + li x4, 0; \ +1: li x1, MASK_XLEN(val1); \ + inst x14, x1, SEXT_IMM6(imm); \ + TEST_INSERT_NOPS_ ## nop_cycles \ + addi x6, x14, 0; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + ) + +#define TEST_SIMM6_SRC1_BYPASS( testnum, nop_cycles, inst, result, val1, imm ) \ + TEST_CASE( testnum, x14, result, \ + li x4, 0; \ +1: li x1, MASK_XLEN(val1); \ + TEST_INSERT_NOPS_ ## nop_cycles \ + inst x14, x1, SEXT_IMM6(imm); \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + ) + +#define TEST_SIMM6_ZEROSRC1( testnum, inst, result, imm ) \ + TEST_CASE( testnum, x1, result, \ + inst x1, x0, SEXT_IMM6(imm); \ + ) + +#define TEST_SIMM6_ZERODEST( testnum, inst, val1, imm ) \ + TEST_CASE( testnum, x0, 0, \ + li x1, MASK_XLEN(val1); \ + inst x0, x1, SEXT_IMM6(imm); \ + ) + #----------------------------------------------------------------------- # Tests for an instruction with register operands #----------------------------------------------------------------------- @@ -264,7 +368,164 @@ test_ ## testnum: \ ) #----------------------------------------------------------------------- -# Test memory instructions +# Tests for instructions with 3 register operands +#----------------------------------------------------------------------- + +#define TEST_RRR_OP( testnum, inst, result, val1, val2, val3 ) \ + TEST_CASE( testnum, x14, result, \ + li x1, MASK_XLEN(val1); \ + li x2, MASK_XLEN(val2); \ + li x14, MASK_XLEN(val3); \ + inst x14, x1, x2; \ + ) + +#define TEST_RRR_SRC1_EQ_DEST( testnum, inst, result, val1, val2 ) \ + TEST_CASE( testnum, x1, result, \ + li x1, MASK_XLEN(val1); \ + li x2, MASK_XLEN(val2); \ + inst x1, x1, x2; \ + ) + +#define TEST_RRR_SRC2_EQ_DEST( testnum, inst, result, val1, val2 ) \ + TEST_CASE( testnum, x2, result, \ + li x1, MASK_XLEN(val1); \ + li x2, MASK_XLEN(val2); \ + inst x2, x1, x2; \ + ) + +#define TEST_RRR_SRC12_EQ_DEST( testnum, inst, result, val1 ) \ + TEST_CASE( testnum, x1, result, \ + li x1, MASK_XLEN(val1); \ + inst x1, x1, x1; \ + ) + +#define TEST_RRR_DEST_BYPASS( testnum, nop_cycles, inst, result, val1, val2, val3 ) \ + TEST_CASE( testnum, x6, result, \ + li x4, 0; \ +1: li x1, MASK_XLEN(val1); \ + li x2, MASK_XLEN(val2); \ + li x14, MASK_XLEN(val3); \ + inst x14, x1, x2; \ + TEST_INSERT_NOPS_ ## nop_cycles \ + addi x6, x14, 0; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + ) + +#define TEST_RRR_SRC12_BYPASS( testnum, src1_nops, src2_nops, inst, result, val1, val2, val3 ) \ + TEST_CASE( testnum, x14, result, \ + li x4, 0; \ +1: li x14, MASK_XLEN(val3); \ + li x1, MASK_XLEN(val1); \ + TEST_INSERT_NOPS_ ## src1_nops \ + li x2, MASK_XLEN(val2); \ + TEST_INSERT_NOPS_ ## src2_nops \ + inst x14, x1, x2; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + ) + +#define TEST_RRR_SRC21_BYPASS( testnum, src1_nops, src2_nops, inst, result, val1, val2, val3 ) \ + TEST_CASE( testnum, x14, result, \ + li x4, 0; \ +1: li x14, MASK_XLEN(val3); \ + li x2, MASK_XLEN(val2); \ + TEST_INSERT_NOPS_ ## src1_nops \ + li x1, MASK_XLEN(val1); \ + TEST_INSERT_NOPS_ ## src2_nops \ + inst x14, x1, x2; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + ) + +# Actually here we have 3 sources; to avoid too many tests we test rD source bypass only on its own +#define TEST_RRR_SRC3_BYPASS( testnum, nop_cycles, inst, result, val1, val2, val3 ) \ + TEST_CASE( testnum, x14, result, \ + li x4, 0; \ +1: li x1, MASK_XLEN(val1); \ + li x2, MASK_XLEN(val2); \ + li x14, MASK_XLEN(val3); \ + TEST_INSERT_NOPS_ ## nop_cycles \ + inst x14, x1, x2; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + ) + +#define TEST_RRR_ZEROSRC1( testnum, inst, result, val1, val2 ) \ + TEST_CASE( testnum, x2, result, \ + li x1, MASK_XLEN(val1); \ + li x2, MASK_XLEN(val2); \ + inst x2, x0, x1; \ + ) + +#define TEST_RRR_ZEROSRC2( testnum, inst, result, val1, val2 ) \ + TEST_CASE( testnum, x2, result, \ + li x1, MASK_XLEN(val1); \ + li x2, MASK_XLEN(val2); \ + inst x2, x1, x0; \ + ) + +#define TEST_RRR_ZEROSRC3( testnum, inst, result, val1, val2 ) \ + TEST_CASE( testnum, x14, result, \ + li x1, MASK_XLEN(val1); \ + li x2, MASK_XLEN(val2); \ + li x14, 0; \ + inst x14, x1, x2; \ + ) + +#define TEST_RRR_ZEROSRC12( testnum, inst, result, val1 ) \ + TEST_CASE( testnum, x1, result, \ + li x1, MASK_XLEN(val1); \ + inst x1, x0, x0; \ + ) + +#define TEST_RRR_ZEROSRC123( testnum, inst, result ) \ + TEST_CASE( testnum, x1, result, \ + li x1, 0; \ + inst x1, x0, x0; \ + ) + +#define TEST_RRR_ZERODEST( testnum, inst, val1, val2 ) \ + TEST_CASE( testnum, x0, 0, \ + li x1, MASK_XLEN(val1); \ + li x2, MASK_XLEN(val2); \ + inst x0, x1, x2; \ + ) + +#----------------------------------------------------------------------- +# Tests for Xpulpimg instructions with 2 register operands (rd and rs1) +# and a 6-bit unsigned immediate input +#----------------------------------------------------------------------- + +#define TEST_RR_UIMM6_OP( testnum, inst, result, val1, imm, val3 ) \ + TEST_CASE( testnum, x14, result, \ + li x1, MASK_XLEN(val1); \ + li x14, MASK_XLEN(val3); \ + inst x14, x1, ZEXT_UIMM6(imm); \ + ) + +# TODO(smazzola): finish to write macros to cover all tests types + +#----------------------------------------------------------------------- +# Tests for Xpulpimg instructions with 2 register operands (rd and rs1) +# and a 6-bit signed immediate input +#----------------------------------------------------------------------- + +#define TEST_RR_SIMM6_OP( testnum, inst, result, val1, imm, val3 ) \ + TEST_CASE( testnum, x14, result, \ + li x1, MASK_XLEN(val1); \ + li x14, MASK_XLEN(val3); \ + inst x14, x1, SEXT_IMM6(imm); \ + ) + +# TODO(smazzola): finish to write macros to cover all tests types + +#----------------------------------------------------------------------- +# Test memory instructions (immediate offset) #----------------------------------------------------------------------- #define TEST_LD_OP( testnum, inst, result, offset, base ) \ @@ -340,6 +601,356 @@ test_ ## testnum: \ li x5, 2; \ bne x4, x5, 1b \ +#----------------------------------------------------------------------- +# Test post-increment memory instructions (immediate offset) +#----------------------------------------------------------------------- + +#define TEST_LD_POST_OP( testnum, inst, load_result, offset, base ) \ + TEST_CASE( testnum, x14, load_result, \ + la x1, base; \ + addi x15, x1, offset; \ + inst x14, offset(x1!); \ + ) \ + bne x1, x15, fail; + +#define TEST_ST_POST_OP( testnum, store_inst, load_inst, store_result, offset, base ) \ + TEST_CASE( testnum, x14, store_result, \ + la x1, base; \ + la x15, base; \ + li x2, store_result; \ + store_inst x2, offset(x1!); \ + load_inst x14, 0(x15); \ + ) \ + addi x15, x15, offset; \ + bne x1, x15, fail; + +#define TEST_LD_POST_DEST1_BYPASS( testnum, nop_cycles, inst, load_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x1, base; \ + inst x14, offset(x1!); \ + TEST_INSERT_NOPS_ ## nop_cycles \ + addi x6, x14, 0; \ + li x7, load_result; \ + bne x6, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b; \ + +#define TEST_LD_POST_DEST2_BYPASS( testnum, nop_cycles, inst, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x1, base; \ + addi x7, x1, offset; \ + inst x14, offset(x1!); \ + TEST_INSERT_NOPS_ ## nop_cycles \ + addi x6, x1, 0; \ + bne x6, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b; \ + +#define TEST_LD_POST_SRC1_BYPASS( testnum, nop_cycles, inst, load_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x1, base; \ + TEST_INSERT_NOPS_ ## nop_cycles \ + inst x14, offset(x1!); \ + li x7, load_result; \ + bne x14, x7, fail; \ + la x15, base; \ + addi x15, x15, offset; \ + bne x1, x15, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + +#define TEST_ST_POST_SRC12_BYPASS( testnum, src1_nops, src2_nops, store_inst, load_inst, store_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: li x1, store_result; \ + TEST_INSERT_NOPS_ ## src1_nops \ + la x2, base; \ + TEST_INSERT_NOPS_ ## src2_nops \ + store_inst x1, offset(x2!); \ + la x15, base; \ + load_inst x14, 0(x15); \ + li x7, store_result; \ + bne x14, x7, fail; \ + addi x15, x15, offset; \ + bne x2, x15, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + +#define TEST_ST_POST_SRC21_BYPASS( testnum, src1_nops, src2_nops, store_inst, load_inst, store_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x2, base; \ + TEST_INSERT_NOPS_ ## src1_nops \ + li x1, store_result; \ + TEST_INSERT_NOPS_ ## src2_nops \ + store_inst x1, offset(x2!); \ + la x15, base; \ + load_inst x14, 0(x15); \ + li x7, store_result; \ + bne x14, x7, fail; \ + addi x15, x15, offset; \ + bne x2, x15, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + + +# You have to make sure Mem[base] =! value, otherwise this test might wrongly succeed +#define TEST_LD_POST_DEST1_WAW( testnum, inst, value, base ) \ + TEST_CASE( testnum, x14, value, \ + la x1, base; \ + inst x14, 0(x1!); \ + li x14, value; \ + ) + +#----------------------------------------------------------------------- +# Test memory instructions (register offset) +#----------------------------------------------------------------------- + +#define TEST_LD_RR_OP( testnum, inst, load_result, offset, base ) \ + TEST_CASE( testnum, x14, load_result, \ + la x1, base; \ + li x16, offset; \ + inst x14, x16(x1); \ + ) \ + la x15, base; \ + bne x1, x15, fail; + +#define TEST_ST_RR_OP( testnum, store_inst, load_inst, store_result, offset, base ) \ + TEST_CASE( testnum, x14, store_result, \ + la x1, base; \ + la x15, base; \ + li x16, offset; \ + li x2, store_result; \ + store_inst x2, x16(x1); \ + load_inst x14, offset(x15); \ + ) \ + bne x1, x15, fail; + +#define TEST_LD_RR_DEST_BYPASS( testnum, nop_cycles, inst, load_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x1, base; \ + li x16, offset; \ + inst x14, x16(x1); \ + TEST_INSERT_NOPS_ ## nop_cycles \ + addi x6, x14, 0; \ + li x7, load_result; \ + bne x6, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b; \ + +#define TEST_LD_RR_SRC12_BYPASS( testnum, src1_nops, src2_nops, inst, load_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x1, base; \ + TEST_INSERT_NOPS_ ## src1_nops \ + li x16, offset; \ + TEST_INSERT_NOPS_ ## src2_nops \ + inst x14, x16(x1); \ + li x7, load_result; \ + bne x14, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + +#define TEST_LD_RR_SRC21_BYPASS( testnum, src1_nops, src2_nops, inst, load_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: li x16, offset; \ + TEST_INSERT_NOPS_ ## src1_nops \ + la x1, base; \ + TEST_INSERT_NOPS_ ## src2_nops \ + inst x14, x16(x1); \ + li x7, load_result; \ + bne x14, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + +# Actually reg-reg stores have 3 sources; to avoid too many tests we +# only test rs1 and rs3 bypass (rs2 bypass already tested by others) +#define TEST_ST_RR_SRC12_BYPASS( testnum, src1_nops, src2_nops, store_inst, load_inst, store_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x2, base; \ + li x1, store_result; \ + TEST_INSERT_NOPS_ ## src1_nops \ + li x16, offset; \ + TEST_INSERT_NOPS_ ## src2_nops \ + store_inst x1, x16(x2); \ + la x15, base; \ + load_inst x14, offset(x15); \ + li x7, store_result; \ + bne x14, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + +#define TEST_ST_RR_SRC21_BYPASS( testnum, src1_nops, src2_nops, store_inst, load_inst, store_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x2, base; \ + li x16, offset; \ + TEST_INSERT_NOPS_ ## src1_nops \ + li x1, store_result; \ + TEST_INSERT_NOPS_ ## src2_nops \ + store_inst x1, x16(x2); \ + la x15, base; \ + load_inst x14, offset(x15); \ + li x7, store_result; \ + bne x14, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + +# You have to make sure Mem[base] =! value, otherwise this test might wrongly succeed +#define TEST_LD_RR_DEST1_WAW( testnum, inst, value, base ) \ + TEST_CASE( testnum, x14, value, \ + la x1, base; \ + inst x14, x0(x1); \ + li x14, value; \ + ) \ + +#----------------------------------------------------------------------- +# Test post-increment memory instructions (register offset) +#----------------------------------------------------------------------- + +#define TEST_LD_RR_POST_OP( testnum, inst, load_result, offset, base ) \ + TEST_CASE( testnum, x14, load_result, \ + la x1, base; \ + li x16, offset; \ + inst x14, x16(x1!); \ + ) \ + la x15, base; \ + addi x15, x15, offset; \ + bne x1, x15, fail; + +#define TEST_ST_RR_POST_OP( testnum, store_inst, load_inst, store_result, offset, base ) \ + TEST_CASE( testnum, x14, store_result, \ + la x1, base; \ + la x15, base; \ + li x16, offset; \ + li x2, store_result; \ + store_inst x2, x16(x1!); \ + load_inst x14, 0(x15); \ + ) \ + addi x15, x15, offset; \ + bne x1, x15, fail; + +#define TEST_LD_RR_POST_DEST_BYPASS( testnum, nop_cycles, inst, load_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x1, base; \ + li x16, offset; \ + inst x14, x16(x1!); \ + TEST_INSERT_NOPS_ ## nop_cycles \ + addi x6, x14, 0; \ + li x7, load_result; \ + bne x6, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b; \ + +#define TEST_LD_RR_POST_SRC12_BYPASS( testnum, src1_nops, src2_nops, inst, load_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x1, base; \ + TEST_INSERT_NOPS_ ## src1_nops \ + li x16, offset; \ + TEST_INSERT_NOPS_ ## src2_nops \ + inst x14, x16(x1!); \ + li x7, load_result; \ + bne x14, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + +#define TEST_LD_RR_POST_SRC21_BYPASS( testnum, src1_nops, src2_nops, inst, load_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: li x16, offset; \ + TEST_INSERT_NOPS_ ## src1_nops \ + la x1, base; \ + TEST_INSERT_NOPS_ ## src2_nops \ + inst x14, x16(x1!); \ + li x7, load_result; \ + bne x14, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + +# Actually reg-reg stores have 3 sources; to avoid too many tests we +# only test rs1 and rs3 bypass (rs2 bypass already tested by others) +#define TEST_ST_RR_POST_SRC12_BYPASS( testnum, src1_nops, src2_nops, store_inst, load_inst, store_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x2, base; \ + li x1, store_result; \ + TEST_INSERT_NOPS_ ## src1_nops \ + li x16, offset; \ + TEST_INSERT_NOPS_ ## src2_nops \ + store_inst x1, x16(x2!); \ + la x15, base; \ + load_inst x14, 0 (x15); \ + li x7, store_result; \ + bne x14, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + +#define TEST_ST_RR_POST_SRC21_BYPASS( testnum, src1_nops, src2_nops, store_inst, load_inst, store_result, offset, base ) \ +test_ ## testnum: \ + li TESTNUM, testnum; \ + li x4, 0; \ +1: la x2, base; \ + li x16, offset; \ + TEST_INSERT_NOPS_ ## src1_nops \ + li x1, store_result; \ + TEST_INSERT_NOPS_ ## src2_nops \ + store_inst x1, x16(x2!); \ + la x15, base; \ + load_inst x14, 0(x15); \ + li x7, store_result; \ + bne x14, x7, fail; \ + addi x4, x4, 1; \ + li x5, 2; \ + bne x4, x5, 1b \ + +# You have to make sure Mem[base] =! value, otherwise this test might wrongly succeed +#define TEST_LD_RR_POST_DEST1_WAW( testnum, inst, value, base ) \ + TEST_CASE( testnum, x14, value, \ + la x1, base; \ + inst x14, x0(x1!); \ + li x14, value; \ + ) \ + +#----------------------------------------------------------------------- +# Test branch instructions +#----------------------------------------------------------------------- + #define TEST_BR2_OP_TAKEN( testnum, inst, val1, val2 ) \ test_ ## testnum: \ li TESTNUM, testnum; \ @@ -736,4 +1347,6 @@ pass: \ #define TEST_DATA +// clang-format on + #endif diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag b/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag index f34ab11dd..8bf2c6741 100644 --- a/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag +++ b/apps/riscv-tests/isa/rv32uxpulpimg/Makefrag @@ -3,6 +3,12 @@ #----------------------------------------------------------------------- rv32uxpulpimg_sc_tests = \ + p_lb_irpost p_lbu_irpost p_lh_irpost p_lhu_irpost p_lw_irpost \ + p_lb_rrpost p_lbu_rrpost p_lh_rrpost p_lhu_rrpost p_lw_rrpost \ + p_lb_rr p_lbu_rr p_lh_rr p_lhu_rr p_lw_rr \ + p_sb_irpost p_sh_irpost p_sw_irpost \ + p_sb_rrpost p_sh_rrpost p_sw_rrpost \ + p_sb_rr p_sh_rr p_sw_rr \ p_abs \ p_slet p_sletu \ p_min p_minu \ @@ -12,6 +18,28 @@ rv32uxpulpimg_sc_tests = \ p_clip p_clipu \ p_clipr p_clipur \ p_beqimm p_bneimm \ + p_mac p_msu \ + pv_add \ + pv_sub \ + pv_avg pv_avgu \ + pv_min pv_minu \ + pv_max pv_maxu \ + pv_srl \ + pv_sra \ + pv_sll \ + pv_or \ + pv_xor \ + pv_and \ + pv_abs \ + pv_extract pv_extractu \ + pv_insert \ + pv_dotup \ + pv_dotusp \ + pv_dotsp \ + pv_sdotup \ + pv_sdotusp \ + pv_sdotsp \ + pv_shuffle2 \ rv32uxpulpimg_p_tests = $(addprefix rv32uxpulpimg-p-, $(rv32uxpulpimg_sc_tests)) rv32uxpulpimg_v_tests = $(addprefix rv32uxpulpimg-v-, $(rv32uxpulpimg_sc_tests)) diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lb_irpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lb_irpost.S new file mode 100644 index 000000000..2322ca2d6 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lb_irpost.S @@ -0,0 +1,70 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lb_irpost.S +#----------------------------------------------------------------------------- +# +# Test p.lb (immediate-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_POST_OP( 2, p.lb, 0xffffffff, 0, tdat ) + TEST_LD_POST_OP( 3, p.lb, 0xffffffff, 1, tdat ) + TEST_LD_POST_OP( 4, p.lb, 0x0000000f, 2, tdat3 ) + TEST_LD_POST_OP( 5, p.lb, 0x0000000f, 3, tdat3 ) + + # Negative offset + TEST_LD_POST_OP( 6, p.lb, 0x00000000, -3, tdat1 ) + TEST_LD_POST_OP( 7, p.lb, 0x00000000, -2, tdat1 ) + TEST_LD_POST_OP( 8, p.lb, 0xfffffff0, -1, tdat2 ) + TEST_LD_POST_OP( 9, p.lb, 0xfffffff0, 0, tdat2 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_POST_DEST1_BYPASS( 10, 0, p.lb, 0xffffffff, 1, tdat0 ) + TEST_LD_POST_DEST1_BYPASS( 11, 1, p.lb, 0x00000000, 1, tdat1 ) + TEST_LD_POST_DEST1_BYPASS( 12, 2, p.lb, 0xfffffff0, 1, tdat2 ) + + TEST_LD_POST_DEST2_BYPASS( 13, 0, p.lb, 2, tdat0 ) + TEST_LD_POST_DEST2_BYPASS( 14, 1, p.lb, 1, tdat1 ) + TEST_LD_POST_DEST2_BYPASS( 15, 2, p.lb, -3, tdat3 ) + + TEST_LD_POST_SRC1_BYPASS( 16, 0, p.lb, 0xffffffff, 1, tdat0 ) + TEST_LD_POST_SRC1_BYPASS( 17, 1, p.lb, 0x0000000f, -1, tdat3 ) + TEST_LD_POST_SRC1_BYPASS( 18, 2, p.lb, 0xfffffff0, 1, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_POST_DEST1_WAW( 19, p.lb, 25, tdat ) + TEST_LD_POST_DEST1_WAW( 20, p.lb, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .byte 0xff +tdat1: .byte 0x00 +tdat2: .byte 0xf0 +tdat3: .byte 0x0f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lb_rr.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lb_rr.S new file mode 100644 index 000000000..6938e133d --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lb_rr.S @@ -0,0 +1,76 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lb_rr.S +#----------------------------------------------------------------------------- +# +# Test p.lb (register-register) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_RR_OP( 2, p.lb, 0xffffffff, 0, tdat ) + TEST_LD_RR_OP( 3, p.lb, 0x00000000, 1, tdat ) + TEST_LD_RR_OP( 4, p.lb, 0xfffffff0, 2, tdat ) + TEST_LD_RR_OP( 5, p.lb, 0x0000000f, 3, tdat ) + + # Negative offset + TEST_LD_RR_OP( 6, p.lb, 0xffffffff, -3, tdat3 ) + TEST_LD_RR_OP( 7, p.lb, 0x00000000, -2, tdat3 ) + TEST_LD_RR_OP( 8, p.lb, 0xfffffff0, -1, tdat3 ) + TEST_LD_RR_OP( 9, p.lb, 0x0000000f, 0, tdat3 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_RR_DEST_BYPASS( 10, 0, p.lb, 0x00000000, 1, tdat0 ) + TEST_LD_RR_DEST_BYPASS( 11, 1, p.lb, 0xfffffff0, 1, tdat1 ) + TEST_LD_RR_DEST_BYPASS( 12, 2, p.lb, 0x0000000f, 1, tdat2 ) + + TEST_LD_RR_SRC12_BYPASS( 13, 0, 0, p.lb, 0x00000000, 1, tdat0 ) + TEST_LD_RR_SRC12_BYPASS( 14, 0, 1, p.lb, 0xfffffff0, -1, tdat3 ) + TEST_LD_RR_SRC12_BYPASS( 15, 0, 2, p.lb, 0x0000000f, 1, tdat2 ) + TEST_LD_RR_SRC12_BYPASS( 16, 1, 0, p.lb, 0x00000000, 1, tdat0 ) + TEST_LD_RR_SRC12_BYPASS( 17, 1, 1, p.lb, 0xfffffff0, -1, tdat3 ) + TEST_LD_RR_SRC12_BYPASS( 18, 2, 0, p.lb, 0x0000000f, 1, tdat2 ) + + TEST_LD_RR_SRC21_BYPASS( 19, 0, 0, p.lb, 0x00000000, 1, tdat0 ) + TEST_LD_RR_SRC21_BYPASS( 20, 0, 1, p.lb, 0xfffffff0, -1, tdat3 ) + TEST_LD_RR_SRC21_BYPASS( 21, 0, 2, p.lb, 0x0000000f, 1, tdat2 ) + TEST_LD_RR_SRC21_BYPASS( 22, 1, 0, p.lb, 0x00000000, 1, tdat0 ) + TEST_LD_RR_SRC21_BYPASS( 23, 1, 1, p.lb, 0xfffffff0, -1, tdat3 ) + TEST_LD_RR_SRC21_BYPASS( 24, 2, 0, p.lb, 0x0000000f, 1, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_RR_DEST1_WAW( 25, p.lb, 25, tdat ) + TEST_LD_RR_DEST1_WAW( 26, p.lb, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .byte 0xff +tdat1: .byte 0x00 +tdat2: .byte 0xf0 +tdat3: .byte 0x0f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lb_rrpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lb_rrpost.S new file mode 100644 index 000000000..afa33f659 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lb_rrpost.S @@ -0,0 +1,76 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lb_rrpost.S +#----------------------------------------------------------------------------- +# +# Test p.lb (register-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_RR_POST_OP( 2, p.lb, 0xffffffff, 0, tdat ) + TEST_LD_RR_POST_OP( 3, p.lb, 0xffffffff, 1, tdat ) + TEST_LD_RR_POST_OP( 4, p.lb, 0x0000000f, 2, tdat3 ) + TEST_LD_RR_POST_OP( 5, p.lb, 0x0000000f, 3, tdat3 ) + + # Negative offset + TEST_LD_RR_POST_OP( 6, p.lb, 0x00000000, -3, tdat1 ) + TEST_LD_RR_POST_OP( 7, p.lb, 0x00000000, -2, tdat1 ) + TEST_LD_RR_POST_OP( 8, p.lb, 0xfffffff0, -1, tdat2 ) + TEST_LD_RR_POST_OP( 9, p.lb, 0xfffffff0, 0, tdat2 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_RR_POST_DEST_BYPASS( 10, 0, p.lb, 0xffffffff, 1, tdat0 ) + TEST_LD_RR_POST_DEST_BYPASS( 11, 1, p.lb, 0x00000000, 1, tdat1 ) + TEST_LD_RR_POST_DEST_BYPASS( 12, 2, p.lb, 0xfffffff0, 1, tdat2 ) + + TEST_LD_RR_POST_SRC12_BYPASS( 13, 0, 0, p.lb, 0xffffffff, 1, tdat0 ) + TEST_LD_RR_POST_SRC12_BYPASS( 14, 0, 1, p.lb, 0x0000000f, -1, tdat3 ) + TEST_LD_RR_POST_SRC12_BYPASS( 15, 0, 2, p.lb, 0xfffffff0, 1, tdat2 ) + TEST_LD_RR_POST_SRC12_BYPASS( 16, 1, 0, p.lb, 0xffffffff, 1, tdat0 ) + TEST_LD_RR_POST_SRC12_BYPASS( 17, 1, 1, p.lb, 0x0000000f, -1, tdat3 ) + TEST_LD_RR_POST_SRC12_BYPASS( 18, 2, 0, p.lb, 0xfffffff0, 1, tdat2 ) + + TEST_LD_RR_POST_SRC21_BYPASS( 19, 0, 0, p.lb, 0xffffffff, 1, tdat0 ) + TEST_LD_RR_POST_SRC21_BYPASS( 20, 0, 1, p.lb, 0x0000000f, -1, tdat3 ) + TEST_LD_RR_POST_SRC21_BYPASS( 21, 0, 2, p.lb, 0xfffffff0, 1, tdat2 ) + TEST_LD_RR_POST_SRC21_BYPASS( 22, 1, 0, p.lb, 0xffffffff, 1, tdat0 ) + TEST_LD_RR_POST_SRC21_BYPASS( 23, 1, 1, p.lb, 0x0000000f, -1, tdat3 ) + TEST_LD_RR_POST_SRC21_BYPASS( 24, 2, 0, p.lb, 0xfffffff0, 1, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_RR_POST_DEST1_WAW( 25, p.lb, 25, tdat ) + TEST_LD_RR_POST_DEST1_WAW( 26, p.lb, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .byte 0xff +tdat1: .byte 0x00 +tdat2: .byte 0xf0 +tdat3: .byte 0x0f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_irpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_irpost.S new file mode 100644 index 000000000..a8c72a965 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_irpost.S @@ -0,0 +1,70 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lbu_irpost.S +#----------------------------------------------------------------------------- +# +# Test p.lbu (immediate-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_POST_OP( 2, p.lbu, 0x000000ff, 0, tdat ) + TEST_LD_POST_OP( 3, p.lbu, 0x000000ff, 1, tdat ) + TEST_LD_POST_OP( 4, p.lbu, 0x0000000f, 2, tdat3 ) + TEST_LD_POST_OP( 5, p.lbu, 0x0000000f, 3, tdat3 ) + + # Negative offset + TEST_LD_POST_OP( 6, p.lbu, 0x00000000, -3, tdat1 ) + TEST_LD_POST_OP( 7, p.lbu, 0x00000000, -2, tdat1 ) + TEST_LD_POST_OP( 8, p.lbu, 0x000000f0, -1, tdat2 ) + TEST_LD_POST_OP( 9, p.lbu, 0x000000f0, 0, tdat2 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_POST_DEST1_BYPASS( 10, 0, p.lbu, 0x000000ff, 1, tdat0 ) + TEST_LD_POST_DEST1_BYPASS( 11, 1, p.lbu, 0x00000000, 1, tdat1 ) + TEST_LD_POST_DEST1_BYPASS( 12, 2, p.lbu, 0x000000f0, 1, tdat2 ) + + TEST_LD_POST_DEST2_BYPASS( 13, 0, p.lbu, 2, tdat0 ) + TEST_LD_POST_DEST2_BYPASS( 14, 1, p.lbu, 1, tdat1 ) + TEST_LD_POST_DEST2_BYPASS( 15, 2, p.lbu, -3, tdat3 ) + + TEST_LD_POST_SRC1_BYPASS( 16, 0, p.lbu, 0x000000ff, 1, tdat0 ) + TEST_LD_POST_SRC1_BYPASS( 17, 1, p.lbu, 0x0000000f, -1, tdat3 ) + TEST_LD_POST_SRC1_BYPASS( 18, 2, p.lbu, 0x000000f0, 1, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_POST_DEST1_WAW( 19, p.lbu, 25, tdat ) + TEST_LD_POST_DEST1_WAW( 20, p.lbu, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .byte 0xff +tdat1: .byte 0x00 +tdat2: .byte 0xf0 +tdat3: .byte 0x0f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_rr.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_rr.S new file mode 100644 index 000000000..ba0b33235 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_rr.S @@ -0,0 +1,77 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lbu_rr.S +#----------------------------------------------------------------------------- +# +# Test p.lbu (register-register) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_RR_OP( 2, p.lbu, 0x000000ff, 0, tdat ) + TEST_LD_RR_OP( 3, p.lbu, 0x00000000, 1, tdat ) + TEST_LD_RR_OP( 4, p.lbu, 0x000000f0, 2, tdat ) + TEST_LD_RR_OP( 5, p.lbu, 0x0000000f, 3, tdat ) + + # Negative offset + TEST_LD_RR_OP( 6, p.lbu, 0x000000ff, -3, tdat3 ) + TEST_LD_RR_OP( 7, p.lbu, 0x00000000, -2, tdat3 ) + TEST_LD_RR_OP( 8, p.lbu, 0x000000f0, -1, tdat3 ) + TEST_LD_RR_OP( 9, p.lbu, 0x0000000f, 0, tdat3 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_RR_DEST_BYPASS( 10, 0, p.lbu, 0x00000000, 1, tdat0 ) + TEST_LD_RR_DEST_BYPASS( 11, 1, p.lbu, 0x000000f0, 1, tdat1 ) + TEST_LD_RR_DEST_BYPASS( 12, 2, p.lbu, 0x0000000f, 1, tdat2 ) + + TEST_LD_RR_SRC12_BYPASS( 13, 0, 0, p.lbu, 0x00000000, 1, tdat0 ) + TEST_LD_RR_SRC12_BYPASS( 14, 0, 1, p.lbu, 0x000000f0, -1, tdat3 ) + TEST_LD_RR_SRC12_BYPASS( 15, 0, 2, p.lbu, 0x0000000f, 1, tdat2 ) + TEST_LD_RR_SRC12_BYPASS( 16, 1, 0, p.lbu, 0x00000000, 1, tdat0 ) + TEST_LD_RR_SRC12_BYPASS( 17, 1, 1, p.lbu, 0x000000f0, -1, tdat3 ) + TEST_LD_RR_SRC12_BYPASS( 18, 2, 0, p.lbu, 0x0000000f, 1, tdat2 ) + + TEST_LD_RR_SRC21_BYPASS( 19, 0, 0, p.lbu, 0x00000000, 1, tdat0 ) + TEST_LD_RR_SRC21_BYPASS( 20, 0, 1, p.lbu, 0x000000f0, -1, tdat3 ) + TEST_LD_RR_SRC21_BYPASS( 21, 0, 2, p.lbu, 0x0000000f, 1, tdat2 ) + TEST_LD_RR_SRC21_BYPASS( 22, 1, 0, p.lbu, 0x00000000, 1, tdat0 ) + TEST_LD_RR_SRC21_BYPASS( 23, 1, 1, p.lbu, 0x000000f0, -1, tdat3 ) + TEST_LD_RR_SRC21_BYPASS( 24, 2, 0, p.lbu, 0x0000000f, 1, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_RR_DEST1_WAW( 25, p.lbu, 25, tdat ) + TEST_LD_RR_DEST1_WAW( 26, p.lbu, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .byte 0xff +tdat1: .byte 0x00 +tdat2: .byte 0xf0 +tdat3: .byte 0x0f + +RVTEST_DATA_END + diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_rrpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_rrpost.S new file mode 100644 index 000000000..9582ca6f7 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lbu_rrpost.S @@ -0,0 +1,76 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lbu_rrpost.S +#----------------------------------------------------------------------------- +# +# Test p.lbu (register-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_RR_POST_OP( 2, p.lbu, 0x000000ff, 0, tdat ) + TEST_LD_RR_POST_OP( 3, p.lbu, 0x000000ff, 1, tdat ) + TEST_LD_RR_POST_OP( 4, p.lbu, 0x0000000f, 2, tdat3 ) + TEST_LD_RR_POST_OP( 5, p.lbu, 0x0000000f, 3, tdat3 ) + + # Negative offset + TEST_LD_RR_POST_OP( 6, p.lbu, 0x00000000, -3, tdat1 ) + TEST_LD_RR_POST_OP( 7, p.lbu, 0x00000000, -2, tdat1 ) + TEST_LD_RR_POST_OP( 8, p.lbu, 0x000000f0, -1, tdat2 ) + TEST_LD_RR_POST_OP( 9, p.lbu, 0x000000f0, 0, tdat2 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_RR_POST_DEST_BYPASS( 10, 0, p.lbu, 0x000000ff, 1, tdat0 ) + TEST_LD_RR_POST_DEST_BYPASS( 11, 1, p.lbu, 0x00000000, 1, tdat1 ) + TEST_LD_RR_POST_DEST_BYPASS( 12, 2, p.lbu, 0x000000f0, 1, tdat2 ) + + TEST_LD_RR_POST_SRC12_BYPASS( 13, 0, 0, p.lbu, 0x000000ff, 1, tdat0 ) + TEST_LD_RR_POST_SRC12_BYPASS( 14, 0, 1, p.lbu, 0x0000000f, -1, tdat3 ) + TEST_LD_RR_POST_SRC12_BYPASS( 15, 0, 2, p.lbu, 0x000000f0, 1, tdat2 ) + TEST_LD_RR_POST_SRC12_BYPASS( 16, 1, 0, p.lbu, 0x000000ff, 1, tdat0 ) + TEST_LD_RR_POST_SRC12_BYPASS( 17, 1, 1, p.lbu, 0x0000000f, -1, tdat3 ) + TEST_LD_RR_POST_SRC12_BYPASS( 18, 2, 0, p.lbu, 0x000000f0, 1, tdat2 ) + + TEST_LD_RR_POST_SRC21_BYPASS( 19, 0, 0, p.lbu, 0x000000ff, 1, tdat0 ) + TEST_LD_RR_POST_SRC21_BYPASS( 20, 0, 1, p.lbu, 0x0000000f, -1, tdat3 ) + TEST_LD_RR_POST_SRC21_BYPASS( 21, 0, 2, p.lbu, 0x000000f0, 1, tdat2 ) + TEST_LD_RR_POST_SRC21_BYPASS( 22, 1, 0, p.lbu, 0x000000ff, 1, tdat0 ) + TEST_LD_RR_POST_SRC21_BYPASS( 23, 1, 1, p.lbu, 0x0000000f, -1, tdat3 ) + TEST_LD_RR_POST_SRC21_BYPASS( 24, 2, 0, p.lbu, 0x000000f0, 1, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_RR_POST_DEST1_WAW( 25, p.lbu, 25, tdat ) + TEST_LD_RR_POST_DEST1_WAW( 26, p.lbu, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .byte 0xff +tdat1: .byte 0x00 +tdat2: .byte 0xf0 +tdat3: .byte 0x0f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lh_irpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lh_irpost.S new file mode 100644 index 000000000..ca376a2a6 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lh_irpost.S @@ -0,0 +1,70 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lh_irpost.S +#----------------------------------------------------------------------------- +# +# Test p.lh (immediate-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_POST_OP( 2, p.lh, 0x000000ff, 0, tdat ) + TEST_LD_POST_OP( 3, p.lh, 0x000000ff, 2, tdat ) + TEST_LD_POST_OP( 4, p.lh, 0xfffff00f, 4, tdat3 ) + TEST_LD_POST_OP( 5, p.lh, 0xfffff00f, 6, tdat3 ) + + # Negative offset + TEST_LD_POST_OP( 6, p.lh, 0xffffff00, -6, tdat1 ) + TEST_LD_POST_OP( 7, p.lh, 0xffffff00, -4, tdat1 ) + TEST_LD_POST_OP( 8, p.lh, 0x00000ff0, -2, tdat2 ) + TEST_LD_POST_OP( 9, p.lh, 0x00000ff0, 0, tdat2 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_POST_DEST1_BYPASS( 10, 0, p.lh, 0x000000ff, 2, tdat0 ) + TEST_LD_POST_DEST1_BYPASS( 11, 1, p.lh, 0xffffff00, 2, tdat1 ) + TEST_LD_POST_DEST1_BYPASS( 12, 2, p.lh, 0x00000ff0, 2, tdat2 ) + + TEST_LD_POST_DEST2_BYPASS( 13, 0, p.lh, 4, tdat0 ) + TEST_LD_POST_DEST2_BYPASS( 14, 1, p.lh, 2, tdat1 ) + TEST_LD_POST_DEST2_BYPASS( 15, 2, p.lh, -6, tdat3 ) + + TEST_LD_POST_SRC1_BYPASS( 16, 0, p.lh, 0x000000ff, 2, tdat0 ) + TEST_LD_POST_SRC1_BYPASS( 17, 1, p.lh, 0xfffff00f, -2, tdat3 ) + TEST_LD_POST_SRC1_BYPASS( 18, 2, p.lh, 0x00000ff0, 2, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_POST_DEST1_WAW( 19, p.lh, 25, tdat ) + TEST_LD_POST_DEST1_WAW( 20, p.lh, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .half 0x00ff +tdat1: .half 0xff00 +tdat2: .half 0x0ff0 +tdat3: .half 0xf00f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lh_rr.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lh_rr.S new file mode 100644 index 000000000..ebc5aabd9 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lh_rr.S @@ -0,0 +1,76 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lh_rr.S +#----------------------------------------------------------------------------- +# +# Test p.lh (register-register) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_RR_OP( 2, p.lh, 0x000000ff, 0, tdat ) + TEST_LD_RR_OP( 3, p.lh, 0xffffff00, 2, tdat ) + TEST_LD_RR_OP( 4, p.lh, 0x00000ff0, 4, tdat ) + TEST_LD_RR_OP( 5, p.lh, 0xfffff00f, 6, tdat ) + + # Negative offset + TEST_LD_RR_OP( 6, p.lh, 0x000000ff, -6, tdat3 ) + TEST_LD_RR_OP( 7, p.lh, 0xffffff00, -4, tdat3 ) + TEST_LD_RR_OP( 8, p.lh, 0x00000ff0, -2, tdat3 ) + TEST_LD_RR_OP( 9, p.lh, 0xfffff00f, 0, tdat3 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_RR_DEST_BYPASS( 10, 0, p.lh, 0xffffff00, 2, tdat0 ) + TEST_LD_RR_DEST_BYPASS( 11, 1, p.lh, 0x00000ff0, 2, tdat1 ) + TEST_LD_RR_DEST_BYPASS( 12, 2, p.lh, 0xfffff00f, 2, tdat2 ) + + TEST_LD_RR_SRC12_BYPASS( 13, 0, 0, p.lh, 0xffffff00, 2, tdat0 ) + TEST_LD_RR_SRC12_BYPASS( 14, 0, 1, p.lh, 0x00000ff0, -2, tdat3 ) + TEST_LD_RR_SRC12_BYPASS( 15, 0, 2, p.lh, 0xfffff00f, 2, tdat2 ) + TEST_LD_RR_SRC12_BYPASS( 16, 1, 0, p.lh, 0xffffff00, 2, tdat0 ) + TEST_LD_RR_SRC12_BYPASS( 17, 1, 1, p.lh, 0x00000ff0, -2, tdat3 ) + TEST_LD_RR_SRC12_BYPASS( 18, 2, 0, p.lh, 0xfffff00f, 2, tdat2 ) + + TEST_LD_RR_SRC21_BYPASS( 19, 0, 0, p.lh, 0xffffff00, 2, tdat0 ) + TEST_LD_RR_SRC21_BYPASS( 20, 0, 1, p.lh, 0x00000ff0, -2, tdat3 ) + TEST_LD_RR_SRC21_BYPASS( 21, 0, 2, p.lh, 0xfffff00f, 2, tdat2 ) + TEST_LD_RR_SRC21_BYPASS( 22, 1, 0, p.lh, 0xffffff00, 2, tdat0 ) + TEST_LD_RR_SRC21_BYPASS( 23, 1, 1, p.lh, 0x00000ff0, -2, tdat3 ) + TEST_LD_RR_SRC21_BYPASS( 24, 2, 0, p.lh, 0xfffff00f, 2, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_RR_DEST1_WAW( 25, p.lh, 25, tdat ) + TEST_LD_RR_DEST1_WAW( 26, p.lh, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .half 0x00ff +tdat1: .half 0xff00 +tdat2: .half 0x0ff0 +tdat3: .half 0xf00f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lh_rrpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lh_rrpost.S new file mode 100644 index 000000000..64a6281aa --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lh_rrpost.S @@ -0,0 +1,76 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lh_rrpost.S +#----------------------------------------------------------------------------- +# +# Test p.lh (register-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_RR_POST_OP( 2, p.lh, 0x000000ff, 0, tdat ) + TEST_LD_RR_POST_OP( 3, p.lh, 0x000000ff, 2, tdat ) + TEST_LD_RR_POST_OP( 4, p.lh, 0xffffff00, 4, tdat1 ) + TEST_LD_RR_POST_OP( 5, p.lh, 0xffffff00, 6, tdat1 ) + + # Negative offset + TEST_LD_RR_POST_OP( 6, p.lh, 0x00000ff0, -6, tdat2 ) + TEST_LD_RR_POST_OP( 7, p.lh, 0x00000ff0, -4, tdat2 ) + TEST_LD_RR_POST_OP( 8, p.lh, 0xfffff00f, -2, tdat3 ) + TEST_LD_RR_POST_OP( 9, p.lh, 0xfffff00f, 0, tdat3 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_RR_POST_DEST_BYPASS( 10, 0, p.lh, 0x000000ff, 2, tdat0 ) + TEST_LD_RR_POST_DEST_BYPASS( 11, 1, p.lh, 0xffffff00, 2, tdat1 ) + TEST_LD_RR_POST_DEST_BYPASS( 12, 2, p.lh, 0x00000ff0, 2, tdat2 ) + + TEST_LD_RR_POST_SRC12_BYPASS( 13, 0, 0, p.lh, 0x000000ff, 2, tdat0 ) + TEST_LD_RR_POST_SRC12_BYPASS( 14, 0, 1, p.lh, 0xfffff00f, -2, tdat3 ) + TEST_LD_RR_POST_SRC12_BYPASS( 15, 0, 2, p.lh, 0x00000ff0, 2, tdat2 ) + TEST_LD_RR_POST_SRC12_BYPASS( 16, 1, 0, p.lh, 0x000000ff, 2, tdat0 ) + TEST_LD_RR_POST_SRC12_BYPASS( 17, 1, 1, p.lh, 0xfffff00f, -2, tdat3 ) + TEST_LD_RR_POST_SRC12_BYPASS( 18, 2, 0, p.lh, 0x00000ff0, 2, tdat2 ) + + TEST_LD_RR_POST_SRC21_BYPASS( 19, 0, 0, p.lh, 0x000000ff, 2, tdat0 ) + TEST_LD_RR_POST_SRC21_BYPASS( 20, 0, 1, p.lh, 0xfffff00f, -2, tdat3 ) + TEST_LD_RR_POST_SRC21_BYPASS( 21, 0, 2, p.lh, 0x00000ff0, 2, tdat2 ) + TEST_LD_RR_POST_SRC21_BYPASS( 22, 1, 0, p.lh, 0x000000ff, 2, tdat0 ) + TEST_LD_RR_POST_SRC21_BYPASS( 23, 1, 1, p.lh, 0xfffff00f, -2, tdat3 ) + TEST_LD_RR_POST_SRC21_BYPASS( 24, 2, 0, p.lh, 0x00000ff0, 2, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_RR_POST_DEST1_WAW( 25, p.lh, 25, tdat ) + TEST_LD_RR_POST_DEST1_WAW( 26, p.lh, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .half 0x00ff +tdat1: .half 0xff00 +tdat2: .half 0x0ff0 +tdat3: .half 0xf00f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_irpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_irpost.S new file mode 100644 index 000000000..72a3d86b8 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_irpost.S @@ -0,0 +1,70 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lhu_irpost.S +#----------------------------------------------------------------------------- +# +# Test p.lhu (immediate-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_POST_OP( 2, p.lhu, 0x000000ff, 0, tdat ) + TEST_LD_POST_OP( 3, p.lhu, 0x000000ff, 2, tdat ) + TEST_LD_POST_OP( 4, p.lhu, 0x0000f00f, 4, tdat3 ) + TEST_LD_POST_OP( 5, p.lhu, 0x0000f00f, 6, tdat3 ) + + # Negative offset + TEST_LD_POST_OP( 6, p.lhu, 0x0000ff00, -6, tdat1 ) + TEST_LD_POST_OP( 7, p.lhu, 0x0000ff00, -4, tdat1 ) + TEST_LD_POST_OP( 8, p.lhu, 0x00000ff0, -2, tdat2 ) + TEST_LD_POST_OP( 9, p.lhu, 0x00000ff0, 0, tdat2 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_POST_DEST1_BYPASS( 10, 0, p.lhu, 0x000000ff, 2, tdat0 ) + TEST_LD_POST_DEST1_BYPASS( 11, 1, p.lhu, 0x0000ff00, 2, tdat1 ) + TEST_LD_POST_DEST1_BYPASS( 12, 2, p.lhu, 0x00000ff0, 2, tdat2 ) + + TEST_LD_POST_DEST2_BYPASS( 13, 0, p.lhu, 4, tdat0 ) + TEST_LD_POST_DEST2_BYPASS( 14, 1, p.lhu, 2, tdat1 ) + TEST_LD_POST_DEST2_BYPASS( 15, 2, p.lhu, -6, tdat3 ) + + TEST_LD_POST_SRC1_BYPASS( 16, 0, p.lhu, 0x000000ff, 2, tdat0 ) + TEST_LD_POST_SRC1_BYPASS( 17, 1, p.lhu, 0x0000f00f, -2, tdat3 ) + TEST_LD_POST_SRC1_BYPASS( 18, 2, p.lhu, 0x00000ff0, 2, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_POST_DEST1_WAW( 19, p.lhu, 25, tdat ) + TEST_LD_POST_DEST1_WAW( 20, p.lhu, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .half 0x00ff +tdat1: .half 0xff00 +tdat2: .half 0x0ff0 +tdat3: .half 0xf00f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_rr.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_rr.S new file mode 100644 index 000000000..a8c54fff9 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_rr.S @@ -0,0 +1,76 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lhu_rr.S +#----------------------------------------------------------------------------- +# +# Test p.lhu (register-register) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_RR_OP( 2, p.lhu, 0x000000ff, 0, tdat ) + TEST_LD_RR_OP( 3, p.lhu, 0x0000ff00, 2, tdat ) + TEST_LD_RR_OP( 4, p.lhu, 0x00000ff0, 4, tdat ) + TEST_LD_RR_OP( 5, p.lhu, 0x0000f00f, 6, tdat ) + + # Negative offset + TEST_LD_RR_OP( 6, p.lhu, 0x000000ff, -6, tdat3 ) + TEST_LD_RR_OP( 7, p.lhu, 0x0000ff00, -4, tdat3 ) + TEST_LD_RR_OP( 8, p.lhu, 0x00000ff0, -2, tdat3 ) + TEST_LD_RR_OP( 9, p.lhu, 0x0000f00f, 0, tdat3 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_RR_DEST_BYPASS( 10, 0, p.lhu, 0x0000ff00, 2, tdat0 ) + TEST_LD_RR_DEST_BYPASS( 11, 1, p.lhu, 0x00000ff0, 2, tdat1 ) + TEST_LD_RR_DEST_BYPASS( 12, 2, p.lhu, 0x0000f00f, 2, tdat2 ) + + TEST_LD_RR_SRC12_BYPASS( 13, 0, 0, p.lhu, 0x0000ff00, 2, tdat0 ) + TEST_LD_RR_SRC12_BYPASS( 14, 0, 1, p.lhu, 0x00000ff0, -2, tdat3 ) + TEST_LD_RR_SRC12_BYPASS( 15, 0, 2, p.lhu, 0x0000f00f, 2, tdat2 ) + TEST_LD_RR_SRC12_BYPASS( 16, 1, 0, p.lhu, 0x0000ff00, 2, tdat0 ) + TEST_LD_RR_SRC12_BYPASS( 17, 1, 1, p.lhu, 0x00000ff0, -2, tdat3 ) + TEST_LD_RR_SRC12_BYPASS( 18, 2, 0, p.lhu, 0x0000f00f, 2, tdat2 ) + + TEST_LD_RR_SRC21_BYPASS( 19, 0, 0, p.lhu, 0x0000ff00, 2, tdat0 ) + TEST_LD_RR_SRC21_BYPASS( 20, 0, 1, p.lhu, 0x00000ff0, -2, tdat3 ) + TEST_LD_RR_SRC21_BYPASS( 21, 0, 2, p.lhu, 0x0000f00f, 2, tdat2 ) + TEST_LD_RR_SRC21_BYPASS( 22, 1, 0, p.lhu, 0x0000ff00, 2, tdat0 ) + TEST_LD_RR_SRC21_BYPASS( 23, 1, 1, p.lhu, 0x00000ff0, -2, tdat3 ) + TEST_LD_RR_SRC21_BYPASS( 24, 2, 0, p.lhu, 0x0000f00f, 2, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_RR_DEST1_WAW( 25, p.lhu, 25, tdat ) + TEST_LD_RR_DEST1_WAW( 26, p.lhu, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .half 0x00ff +tdat1: .half 0xff00 +tdat2: .half 0x0ff0 +tdat3: .half 0xf00f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_rrpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_rrpost.S new file mode 100644 index 000000000..aee16032b --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lhu_rrpost.S @@ -0,0 +1,76 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lhu_rrpost.S +#----------------------------------------------------------------------------- +# +# Test p.lhu (register-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_RR_POST_OP( 2, p.lhu, 0x000000ff, 0, tdat ) + TEST_LD_RR_POST_OP( 3, p.lhu, 0x000000ff, 2, tdat ) + TEST_LD_RR_POST_OP( 4, p.lhu, 0x0000ff00, 4, tdat1 ) + TEST_LD_RR_POST_OP( 5, p.lhu, 0x0000ff00, 6, tdat1 ) + + # Negative offset + TEST_LD_RR_POST_OP( 6, p.lhu, 0x00000ff0, -6, tdat2 ) + TEST_LD_RR_POST_OP( 7, p.lhu, 0x00000ff0, -4, tdat2 ) + TEST_LD_RR_POST_OP( 8, p.lhu, 0x0000f00f, -2, tdat3 ) + TEST_LD_RR_POST_OP( 9, p.lhu, 0x0000f00f, 0, tdat3 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_RR_POST_DEST_BYPASS( 10, 0, p.lhu, 0x000000ff, 2, tdat0 ) + TEST_LD_RR_POST_DEST_BYPASS( 11, 1, p.lhu, 0x0000ff00, 2, tdat1 ) + TEST_LD_RR_POST_DEST_BYPASS( 12, 2, p.lhu, 0x00000ff0, 2, tdat2 ) + + TEST_LD_RR_POST_SRC12_BYPASS( 13, 0, 0, p.lhu, 0x000000ff, 2, tdat0 ) + TEST_LD_RR_POST_SRC12_BYPASS( 14, 0, 1, p.lhu, 0x0000f00f, -2, tdat3 ) + TEST_LD_RR_POST_SRC12_BYPASS( 15, 0, 2, p.lhu, 0x00000ff0, 2, tdat2 ) + TEST_LD_RR_POST_SRC12_BYPASS( 16, 1, 0, p.lhu, 0x000000ff, 2, tdat0 ) + TEST_LD_RR_POST_SRC12_BYPASS( 17, 1, 1, p.lhu, 0x0000f00f, -2, tdat3 ) + TEST_LD_RR_POST_SRC12_BYPASS( 18, 2, 0, p.lhu, 0x00000ff0, 2, tdat2 ) + + TEST_LD_RR_POST_SRC21_BYPASS( 19, 0, 0, p.lhu, 0x000000ff, 2, tdat0 ) + TEST_LD_RR_POST_SRC21_BYPASS( 20, 0, 1, p.lhu, 0x0000f00f, -2, tdat3 ) + TEST_LD_RR_POST_SRC21_BYPASS( 21, 0, 2, p.lhu, 0x00000ff0, 2, tdat2 ) + TEST_LD_RR_POST_SRC21_BYPASS( 22, 1, 0, p.lhu, 0x000000ff, 2, tdat0 ) + TEST_LD_RR_POST_SRC21_BYPASS( 23, 1, 1, p.lhu, 0x0000f00f, -2, tdat3 ) + TEST_LD_RR_POST_SRC21_BYPASS( 24, 2, 0, p.lhu, 0x00000ff0, 2, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_RR_POST_DEST1_WAW( 25, p.lhu, 25, tdat ) + TEST_LD_RR_POST_DEST1_WAW( 26, p.lhu, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .half 0x00ff +tdat1: .half 0xff00 +tdat2: .half 0x0ff0 +tdat3: .half 0xf00f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lw_irpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lw_irpost.S new file mode 100644 index 000000000..ec62744a2 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lw_irpost.S @@ -0,0 +1,70 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lw_irpost.S +#----------------------------------------------------------------------------- +# +# Test p.lw (immediate-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_POST_OP( 2, p.lw, 0x00ff00ff, 0, tdat ) + TEST_LD_POST_OP( 3, p.lw, 0x00ff00ff, 4, tdat ) + TEST_LD_POST_OP( 4, p.lw, 0xf00ff00f, 8, tdat3 ) + TEST_LD_POST_OP( 5, p.lw, 0xf00ff00f, 12, tdat3 ) + + # Negative offset + TEST_LD_POST_OP( 6, p.lw, 0xff00ff00, -12, tdat1 ) + TEST_LD_POST_OP( 7, p.lw, 0xff00ff00, -8, tdat1 ) + TEST_LD_POST_OP( 8, p.lw, 0x0ff00ff0, -4, tdat2 ) + TEST_LD_POST_OP( 9, p.lw, 0x0ff00ff0, 0, tdat2 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_POST_DEST1_BYPASS( 10, 0, p.lw, 0x00ff00ff, 4, tdat0 ) + TEST_LD_POST_DEST1_BYPASS( 11, 1, p.lw, 0xff00ff00, 4, tdat1 ) + TEST_LD_POST_DEST1_BYPASS( 12, 2, p.lw, 0x0ff00ff0, 4, tdat2 ) + + TEST_LD_POST_DEST2_BYPASS( 13, 0, p.lw, 8, tdat0 ) + TEST_LD_POST_DEST2_BYPASS( 14, 1, p.lw, 4, tdat1 ) + TEST_LD_POST_DEST2_BYPASS( 15, 2, p.lw, -12, tdat3 ) + + TEST_LD_POST_SRC1_BYPASS( 16, 0, p.lw, 0x00ff00ff, 4, tdat0 ) + TEST_LD_POST_SRC1_BYPASS( 17, 1, p.lw, 0xf00ff00f, -4, tdat3 ) + TEST_LD_POST_SRC1_BYPASS( 18, 2, p.lw, 0x0ff00ff0, 4, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_POST_DEST1_WAW( 19, p.lw, 25, tdat ) + TEST_LD_POST_DEST1_WAW( 20, p.lw, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .word 0x00ff00ff +tdat1: .word 0xff00ff00 +tdat2: .word 0x0ff00ff0 +tdat3: .word 0xf00ff00f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lw_rr.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lw_rr.S new file mode 100644 index 000000000..f6000fd78 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lw_rr.S @@ -0,0 +1,76 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lw_rr.S +#----------------------------------------------------------------------------- +# +# Test p.lw (register-register) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_RR_OP( 2, p.lw, 0x00ff00ff, 0, tdat ) + TEST_LD_RR_OP( 3, p.lw, 0xff00ff00, 4, tdat ) + TEST_LD_RR_OP( 4, p.lw, 0x0ff00ff0, 8, tdat ) + TEST_LD_RR_OP( 5, p.lw, 0xf00ff00f, 12, tdat ) + + # Negative offset + TEST_LD_RR_OP( 6, p.lw, 0x00ff00ff, -12, tdat3 ) + TEST_LD_RR_OP( 7, p.lw, 0xff00ff00, -8, tdat3 ) + TEST_LD_RR_OP( 8, p.lw, 0x0ff00ff0, -4, tdat3 ) + TEST_LD_RR_OP( 9, p.lw, 0xf00ff00f, 0, tdat3 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_RR_DEST_BYPASS( 10, 0, p.lw, 0xff00ff00, 4, tdat0 ) + TEST_LD_RR_DEST_BYPASS( 11, 1, p.lw, 0x0ff00ff0, 4, tdat1 ) + TEST_LD_RR_DEST_BYPASS( 12, 2, p.lw, 0xf00ff00f, 4, tdat2 ) + + TEST_LD_RR_SRC12_BYPASS( 13, 0, 0, p.lw, 0xff00ff00, 4, tdat0 ) + TEST_LD_RR_SRC12_BYPASS( 14, 0, 1, p.lw, 0x0ff00ff0, -4, tdat3 ) + TEST_LD_RR_SRC12_BYPASS( 15, 0, 2, p.lw, 0xf00ff00f, 4, tdat2 ) + TEST_LD_RR_SRC12_BYPASS( 16, 1, 0, p.lw, 0xff00ff00, 4, tdat0 ) + TEST_LD_RR_SRC12_BYPASS( 17, 1, 1, p.lw, 0x0ff00ff0, -4, tdat3 ) + TEST_LD_RR_SRC12_BYPASS( 18, 2, 0, p.lw, 0xf00ff00f, 4, tdat2 ) + + TEST_LD_RR_SRC21_BYPASS( 19, 0, 0, p.lw, 0xff00ff00, 4, tdat0 ) + TEST_LD_RR_SRC21_BYPASS( 20, 0, 1, p.lw, 0x0ff00ff0, -4, tdat3 ) + TEST_LD_RR_SRC21_BYPASS( 21, 0, 2, p.lw, 0xf00ff00f, 4, tdat2 ) + TEST_LD_RR_SRC21_BYPASS( 22, 1, 0, p.lw, 0xff00ff00, 4, tdat0 ) + TEST_LD_RR_SRC21_BYPASS( 23, 1, 1, p.lw, 0x0ff00ff0, -4, tdat3 ) + TEST_LD_RR_SRC21_BYPASS( 24, 2, 0, p.lw, 0xf00ff00f, 4, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_RR_DEST1_WAW( 25, p.lw, 25, tdat ) + TEST_LD_RR_DEST1_WAW( 26, p.lw, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .word 0x00ff00ff +tdat1: .word 0xff00ff00 +tdat2: .word 0x0ff00ff0 +tdat3: .word 0xf00ff00f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_lw_rrpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_lw_rrpost.S new file mode 100644 index 000000000..bdec214d1 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_lw_rrpost.S @@ -0,0 +1,76 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_lw_rrpost.S +#----------------------------------------------------------------------------- +# +# Test p.lw (register-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_LD_RR_POST_OP( 2, p.lw, 0x00ff00ff, 0, tdat ) + TEST_LD_RR_POST_OP( 3, p.lw, 0x00ff00ff, 4, tdat ) + TEST_LD_RR_POST_OP( 4, p.lw, 0xff00ff00, 8, tdat1 ) + TEST_LD_RR_POST_OP( 5, p.lw, 0xff00ff00, 12, tdat1 ) + + # Negative offset + TEST_LD_RR_POST_OP( 6, p.lw, 0x0ff00ff0, -12, tdat2 ) + TEST_LD_RR_POST_OP( 7, p.lw, 0x0ff00ff0, -8, tdat2 ) + TEST_LD_RR_POST_OP( 8, p.lw, 0xf00ff00f, -4, tdat3 ) + TEST_LD_RR_POST_OP( 9, p.lw, 0xf00ff00f, 0, tdat3 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_LD_RR_POST_DEST_BYPASS( 10, 0, p.lw, 0x00ff00ff, 4, tdat0 ) + TEST_LD_RR_POST_DEST_BYPASS( 11, 1, p.lw, 0xff00ff00, 4, tdat1 ) + TEST_LD_RR_POST_DEST_BYPASS( 12, 2, p.lw, 0x0ff00ff0, 4, tdat2 ) + + TEST_LD_RR_POST_SRC12_BYPASS( 13, 0, 0, p.lw, 0x00ff00ff, 4, tdat0 ) + TEST_LD_RR_POST_SRC12_BYPASS( 14, 0, 1, p.lw, 0xf00ff00f, -4, tdat3 ) + TEST_LD_RR_POST_SRC12_BYPASS( 15, 0, 2, p.lw, 0x0ff00ff0, 4, tdat2 ) + TEST_LD_RR_POST_SRC12_BYPASS( 16, 1, 0, p.lw, 0x00ff00ff, 4, tdat0 ) + TEST_LD_RR_POST_SRC12_BYPASS( 17, 1, 1, p.lw, 0xf00ff00f, -4, tdat3 ) + TEST_LD_RR_POST_SRC12_BYPASS( 18, 2, 0, p.lw, 0x0ff00ff0, 4, tdat2 ) + + TEST_LD_RR_POST_SRC21_BYPASS( 19, 0, 0, p.lw, 0x00ff00ff, 4, tdat0 ) + TEST_LD_RR_POST_SRC21_BYPASS( 20, 0, 1, p.lw, 0xf00ff00f, -4, tdat3 ) + TEST_LD_RR_POST_SRC21_BYPASS( 21, 0, 2, p.lw, 0x0ff00ff0, 4, tdat2 ) + TEST_LD_RR_POST_SRC21_BYPASS( 22, 1, 0, p.lw, 0x00ff00ff, 4, tdat0 ) + TEST_LD_RR_POST_SRC21_BYPASS( 23, 1, 1, p.lw, 0xf00ff00f, -4, tdat3 ) + TEST_LD_RR_POST_SRC21_BYPASS( 24, 2, 0, p.lw, 0x0ff00ff0, 4, tdat2 ) + + #------------------------------------------------------------- + # Test write-after-write hazard + #------------------------------------------------------------- + + TEST_LD_RR_POST_DEST1_WAW( 25, p.lw, 25, tdat ) + TEST_LD_RR_POST_DEST1_WAW( 26, p.lw, -76, tdat ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .word 0x00ff00ff +tdat1: .word 0xff00ff00 +tdat2: .word 0x0ff00ff0 +tdat3: .word 0xf00ff00f + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_mac.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_mac.S new file mode 100644 index 000000000..3ccd7f41b --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_mac.S @@ -0,0 +1,88 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_mac.S +#----------------------------------------------------------------------------- +# +# Test p.mac instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + TEST_RRR_OP( 2, p.mac, 0x00000000, 0x00000000, 0x00000000, 0x00000000 ) + TEST_RRR_OP( 3, p.mac, 0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF ) + TEST_RRR_OP( 4, p.mac, 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000 ) + TEST_RRR_OP( 5, p.mac, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF ) + TEST_RRR_OP( 6, p.mac, 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000 ) + TEST_RRR_OP( 7, p.mac, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF ) + TEST_RRR_OP( 8, p.mac, 0x00000001, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 ) + TEST_RRR_OP( 9, p.mac, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF ) + + TEST_RRR_OP( 10, p.mac, 0x4CF50B3F, 0xB463DADE, 0x1C2D45F3, 0x67287485 ) + TEST_RRR_OP( 11, p.mac, 0x01C8425D, 0x5DE547E9, 0xBE923643, 0x20B94A62 ) + TEST_RRR_OP( 12, p.mac, 0x125000F5, 0xD5042C35, 0x113E2192, 0xD265F5BB ) + TEST_RRR_OP( 13, p.mac, 0x117DE9BB, 0x0762A9A5, 0xAB420127, 0x9B426C98 ) + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + TEST_RRR_SRC1_EQ_DEST( 14, p.mac, 0x0CEE5928, 0x113E2192, 0x1C2D45F3 ) + TEST_RRR_SRC2_EQ_DEST( 15, p.mac, 0x1F280E82, 0x7E139C55, 0xBE923643 ) + + TEST_RRR_SRC12_EQ_DEST( 16, p.mac, 0x22EE857E, 0x84BB8025 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_RRR_DEST_BYPASS( 17, 0, p.mac, 5, 1, 2, 3 ) + TEST_RRR_DEST_BYPASS( 18, 1, p.mac, 26, 4, 5, 6 ) + TEST_RRR_DEST_BYPASS( 19, 2, p.mac, 65, 7, 8, 9 ) + + TEST_RRR_SRC12_BYPASS( 20, 0, 0, p.mac, 5, 1, 2, 3 ) + TEST_RRR_SRC12_BYPASS( 21, 0, 1, p.mac, 26, 4, 5, 6 ) + TEST_RRR_SRC12_BYPASS( 22, 0, 2, p.mac, 65, 7, 8, 9 ) + TEST_RRR_SRC12_BYPASS( 23, 1, 0, p.mac, 5, -1, -2, 3 ) + TEST_RRR_SRC12_BYPASS( 24, 1, 1, p.mac, 14, 4, 5, -6 ) + TEST_RRR_SRC12_BYPASS( 25, 2, 0, p.mac, -47, -7, 8, 9 ) + + TEST_RRR_SRC21_BYPASS( 26, 0, 0, p.mac, 5, 1, 2, 3 ) + TEST_RRR_SRC21_BYPASS( 27, 0, 1, p.mac, 26, 4, 5, 6 ) + TEST_RRR_SRC21_BYPASS( 28, 0, 2, p.mac, 65, 7, 8, 9 ) + TEST_RRR_SRC21_BYPASS( 29, 1, 0, p.mac, 5, -1, -2, 3 ) + TEST_RRR_SRC21_BYPASS( 30, 1, 1, p.mac, 14, 4, 5, -6 ) + TEST_RRR_SRC21_BYPASS( 31, 2, 0, p.mac, -47, -7, 8, 9 ) + + TEST_RRR_SRC3_BYPASS( 32, 0, p.mac, 26, 4, 5, 6 ) + TEST_RRR_SRC3_BYPASS( 33, 1, p.mac, 65, 7, 8, 9 ) + TEST_RRR_SRC3_BYPASS( 34, 2, p.mac, 5, -1, -2, 3 ) + + TEST_RRR_ZEROSRC1( 35, p.mac, 10, -5, 10 ) + TEST_RRR_ZEROSRC2( 36, p.mac, 7, 32, 7 ) + TEST_RRR_ZEROSRC3( 37, p.mac, -8, -1, 8 ) + + TEST_RRR_ZEROSRC12( 38, p.mac, -3, -3 ) + + TEST_RRR_ZEROSRC123( 39, p.mac, 0 ) + + TEST_RRR_ZERODEST( 40, p.mac, 34, -10 ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_msu.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_msu.S new file mode 100644 index 000000000..46f7b5866 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_msu.S @@ -0,0 +1,88 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_msu.S +#----------------------------------------------------------------------------- +# +# Test p.msu instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + TEST_RRR_OP( 2, p.msu, 0x00000000, 0x00000000, 0x00000000, 0x00000000 ) + TEST_RRR_OP( 3, p.msu, 0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF ) + TEST_RRR_OP( 4, p.msu, 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000 ) + TEST_RRR_OP( 5, p.msu, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF ) + TEST_RRR_OP( 6, p.msu, 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000 ) + TEST_RRR_OP( 7, p.msu, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF ) + TEST_RRR_OP( 8, p.msu, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 ) + TEST_RRR_OP( 9, p.msu, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF ) + + TEST_RRR_OP( 10, p.msu, 0x815BDDCB, 0xB463DADE, 0x1C2D45F3, 0x67287485 ) + TEST_RRR_OP( 11, p.msu, 0x3FAA5267, 0x5DE547E9, 0xBE923643, 0x20B94A62 ) + TEST_RRR_OP( 12, p.msu, 0x927BEA81, 0xD5042C35, 0x113E2192, 0xD265F5BB ) + TEST_RRR_OP( 13, p.msu, 0x2506EF75, 0x0762A9A5, 0xAB420127, 0x9B426C98 ) + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + TEST_RRR_SRC1_EQ_DEST( 14, p.msu, 0x158DE9FC, 0x113E2192, 0x1C2D45F3 ) + TEST_RRR_SRC2_EQ_DEST( 15, p.msu, 0x5DFC5E04, 0x7E139C55, 0xBE923643 ) + + TEST_RRR_SRC12_EQ_DEST( 16, p.msu, 0xE6887ACC, 0x84BB8025 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_RRR_DEST_BYPASS( 17, 0, p.msu, 1, 1, 2, 3 ) + TEST_RRR_DEST_BYPASS( 18, 1, p.msu, -14, 4, 5, 6 ) + TEST_RRR_DEST_BYPASS( 19, 2, p.msu, -47, 7, 8, 9 ) + + TEST_RRR_SRC12_BYPASS( 20, 0, 0, p.msu, 1, 1, 2, 3 ) + TEST_RRR_SRC12_BYPASS( 21, 0, 1, p.msu, -14, 4, 5, 6 ) + TEST_RRR_SRC12_BYPASS( 22, 0, 2, p.msu, -47, 7, 8, 9 ) + TEST_RRR_SRC12_BYPASS( 23, 1, 0, p.msu, 1, -1, -2, 3 ) + TEST_RRR_SRC12_BYPASS( 24, 1, 1, p.msu, -26, 4, 5, -6 ) + TEST_RRR_SRC12_BYPASS( 25, 2, 0, p.msu, 65, -7, 8, 9 ) + + TEST_RRR_SRC21_BYPASS( 26, 0, 0, p.msu, 1, 1, 2, 3 ) + TEST_RRR_SRC21_BYPASS( 27, 0, 1, p.msu, -14, 4, 5, 6 ) + TEST_RRR_SRC21_BYPASS( 28, 0, 2, p.msu, -47, 7, 8, 9 ) + TEST_RRR_SRC21_BYPASS( 29, 1, 0, p.msu, 1, -1, -2, 3 ) + TEST_RRR_SRC21_BYPASS( 30, 1, 1, p.msu, -26, 4, 5, -6 ) + TEST_RRR_SRC21_BYPASS( 31, 2, 0, p.msu, 65, -7, 8, 9 ) + + TEST_RRR_SRC3_BYPASS( 32, 0, p.msu, -14, 4, 5, 6 ) + TEST_RRR_SRC3_BYPASS( 33, 1, p.msu, -47, 7, 8, 9 ) + TEST_RRR_SRC3_BYPASS( 34, 2, p.msu, 1, -1, -2, 3 ) + + TEST_RRR_ZEROSRC1( 35, p.msu, 10, -5, 10 ) + TEST_RRR_ZEROSRC2( 36, p.msu, 7, 32, 7 ) + TEST_RRR_ZEROSRC3( 37, p.msu, 8, -1, 8 ) + + TEST_RRR_ZEROSRC12( 38, p.msu, -3, -3 ) + + TEST_RRR_ZEROSRC123( 39, p.msu, 0 ) + + TEST_RRR_ZERODEST( 40, p.msu, 34, -10 ) + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_sb_irpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_sb_irpost.S new file mode 100644 index 000000000..d5ddf3a03 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_sb_irpost.S @@ -0,0 +1,71 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_sb_irpost.S +#----------------------------------------------------------------------------- +# +# Test p.sb (immediate-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_ST_POST_OP( 2, p.sb, lb, 0xffffffaa, 0, tdat ) + TEST_ST_POST_OP( 3, p.sb, lb, 0x00000000, 1, tdat ) + TEST_ST_POST_OP( 4, p.sb, lh, 0xffffefa0, 2, tdat ) + TEST_ST_POST_OP( 5, p.sb, lb, 0x0000000a, 3, tdat ) + + # Negative offset + TEST_ST_POST_OP( 6, p.sb, lb, 0xffffffaa, -6, tdat8 ) + TEST_ST_POST_OP( 7, p.sb, lb, 0x00000000, -5, tdat8 ) + TEST_ST_POST_OP( 8, p.sb, lb, 0xffffffa0, -3, tdat8 ) + TEST_ST_POST_OP( 9, p.sb, lb, 0x0000000a, -1, tdat8 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_ST_POST_SRC12_BYPASS( 10, 0, 0, p.sb, lb, 0xffffffdd, 0, tdat ); + TEST_ST_POST_SRC12_BYPASS( 11, 0, 1, p.sb, lb, 0xffffffcd, 1, tdat ); + TEST_ST_POST_SRC12_BYPASS( 12, 0, 2, p.sb, lb, 0xffffffcc, 2, tdat ); + TEST_ST_POST_SRC12_BYPASS( 13, 1, 0, p.sb, lb, 0xffffffbc, 3, tdat ); + TEST_ST_POST_SRC12_BYPASS( 14, 1, 1, p.sb, lb, 0xffffffbb, 4, tdat ); + TEST_ST_POST_SRC12_BYPASS( 15, 2, 0, p.sb, lb, 0xffffffab, 5, tdat ); + + TEST_ST_POST_SRC21_BYPASS( 16, 0, 0, p.sb, lb, 0x00000033, 0, tdat ); + TEST_ST_POST_SRC21_BYPASS( 17, 0, 1, p.sb, lb, 0x00000023, 1, tdat ); + TEST_ST_POST_SRC21_BYPASS( 18, 0, 2, p.sb, lb, 0x00000022, 2, tdat ); + TEST_ST_POST_SRC21_BYPASS( 19, 1, 0, p.sb, lb, 0x00000012, 3, tdat ); + TEST_ST_POST_SRC21_BYPASS( 20, 1, 1, p.sb, lb, 0x00000011, 4, tdat ); + TEST_ST_POST_SRC21_BYPASS( 21, 2, 0, p.sb, lb, 0x00000001, 5, tdat ); + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .byte 0xef +tdat1: .byte 0xef +tdat2: .byte 0xef +tdat3: .byte 0xef +tdat4: .byte 0xef +tdat5: .byte 0xef +tdat6: .byte 0xef +tdat7: .byte 0xef +tdat8: .byte 0xef +tdat9: .byte 0xef + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_sb_rr.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_sb_rr.S new file mode 100644 index 000000000..6b501b487 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_sb_rr.S @@ -0,0 +1,71 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_sb_rr.S +#----------------------------------------------------------------------------- +# +# Test p.sb (register-register) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_ST_RR_OP( 2, p.sb, lb, 0xffffffaa, 0, tdat ) + TEST_ST_RR_OP( 3, p.sb, lb, 0x00000000, 1, tdat ) + TEST_ST_RR_OP( 4, p.sb, lh, 0xffffefa0, 2, tdat ) + TEST_ST_RR_OP( 5, p.sb, lb, 0x0000000a, 3, tdat ) + + # Negative offset + TEST_ST_RR_OP( 6, p.sb, lb, 0xffffffaa, -6, tdat8 ) + TEST_ST_RR_OP( 7, p.sb, lb, 0x00000000, -5, tdat8 ) + TEST_ST_RR_OP( 8, p.sb, lb, 0xffffffa0, -3, tdat8 ) + TEST_ST_RR_OP( 9, p.sb, lb, 0x0000000a, -1, tdat8 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_ST_RR_SRC12_BYPASS( 10, 0, 0, p.sb, lb, 0xffffffdd, 0, tdat ); + TEST_ST_RR_SRC12_BYPASS( 11, 0, 1, p.sb, lb, 0xffffffcd, 1, tdat ); + TEST_ST_RR_SRC12_BYPASS( 12, 0, 2, p.sb, lb, 0xffffffcc, 2, tdat ); + TEST_ST_RR_SRC12_BYPASS( 13, 1, 0, p.sb, lb, 0xffffffbc, 3, tdat ); + TEST_ST_RR_SRC12_BYPASS( 14, 1, 1, p.sb, lb, 0xffffffbb, 4, tdat ); + TEST_ST_RR_SRC12_BYPASS( 15, 2, 0, p.sb, lb, 0xffffffab, 5, tdat ); + + TEST_ST_RR_SRC21_BYPASS( 16, 0, 0, p.sb, lb, 0x00000033, 0, tdat ); + TEST_ST_RR_SRC21_BYPASS( 17, 0, 1, p.sb, lb, 0x00000023, 1, tdat ); + TEST_ST_RR_SRC21_BYPASS( 18, 0, 2, p.sb, lb, 0x00000022, 2, tdat ); + TEST_ST_RR_SRC21_BYPASS( 19, 1, 0, p.sb, lb, 0x00000012, 3, tdat ); + TEST_ST_RR_SRC21_BYPASS( 20, 1, 1, p.sb, lb, 0x00000011, 4, tdat ); + TEST_ST_RR_SRC21_BYPASS( 21, 2, 0, p.sb, lb, 0x00000001, 5, tdat ); + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .byte 0xef +tdat1: .byte 0xef +tdat2: .byte 0xef +tdat3: .byte 0xef +tdat4: .byte 0xef +tdat5: .byte 0xef +tdat6: .byte 0xef +tdat7: .byte 0xef +tdat8: .byte 0xef +tdat9: .byte 0xef + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_sb_rrpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_sb_rrpost.S new file mode 100644 index 000000000..3ed706fde --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_sb_rrpost.S @@ -0,0 +1,71 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_sb_rrpost.S +#----------------------------------------------------------------------------- +# +# Test p.sb (register-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_ST_RR_POST_OP( 2, p.sb, lb, 0xffffffaa, 0, tdat1 ) + TEST_ST_RR_POST_OP( 3, p.sb, lb, 0x00000000, 1, tdat1 ) + TEST_ST_RR_POST_OP( 4, p.sb, lh, 0xffffefa0, 2, tdat2 ) + TEST_ST_RR_POST_OP( 5, p.sb, lb, 0x0000000a, 3, tdat2 ) + + # Negative offset + TEST_ST_RR_POST_OP( 6, p.sb, lb, 0xffffffaa, -6, tdat8 ) + TEST_ST_RR_POST_OP( 7, p.sb, lb, 0x00000000, -5, tdat8 ) + TEST_ST_RR_POST_OP( 8, p.sb, lb, 0xffffffa0, -3, tdat8 ) + TEST_ST_RR_POST_OP( 9, p.sb, lb, 0x0000000a, -1, tdat8 ) + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_ST_RR_POST_SRC12_BYPASS( 10, 0, 0, p.sb, lb, 0xffffffdd, 0, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 11, 0, 1, p.sb, lb, 0xffffffcd, 1, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 12, 0, 2, p.sb, lb, 0xffffffcc, 2, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 13, 1, 0, p.sb, lb, 0xffffffbc, 3, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 14, 1, 1, p.sb, lb, 0xffffffbb, 4, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 15, 2, 0, p.sb, lb, 0xffffffab, 5, tdat ); + + TEST_ST_RR_POST_SRC21_BYPASS( 16, 0, 0, p.sb, lb, 0x00000033, 0, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 17, 0, 1, p.sb, lb, 0x00000023, 1, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 18, 0, 2, p.sb, lb, 0x00000022, 2, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 19, 1, 0, p.sb, lb, 0x00000012, 3, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 20, 1, 1, p.sb, lb, 0x00000011, 4, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 21, 2, 0, p.sb, lb, 0x00000001, 5, tdat ); + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .byte 0xef +tdat1: .byte 0xef +tdat2: .byte 0xef +tdat3: .byte 0xef +tdat4: .byte 0xef +tdat5: .byte 0xef +tdat6: .byte 0xef +tdat7: .byte 0xef +tdat8: .byte 0xef +tdat9: .byte 0xef + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_sh_irpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_sh_irpost.S new file mode 100644 index 000000000..32c0b376b --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_sh_irpost.S @@ -0,0 +1,71 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_sh_irpost.S +#----------------------------------------------------------------------------- +# +# Test p.sh (immediate-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_ST_POST_OP( 2, p.sh, lh, 0x000000aa, 0, tdat ); + TEST_ST_POST_OP( 3, p.sh, lh, 0xffffaa00, 2, tdat ); + TEST_ST_POST_OP( 4, p.sh, lw, 0xbeef0aa0, 4, tdat ); + TEST_ST_POST_OP( 5, p.sh, lh, 0xffffa00a, 6, tdat ); + + # Negative offset + TEST_ST_POST_OP( 6, p.sh, lh, 0x000000aa, -6, tdat8 ); + TEST_ST_POST_OP( 7, p.sh, lh, 0xffffaa00, -4, tdat8 ); + TEST_ST_POST_OP( 8, p.sh, lh, 0x00000aa0, -2, tdat8 ); + TEST_ST_POST_OP( 9, p.sh, lh, 0xffffa00a, 0, tdat8 ); + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_ST_POST_SRC12_BYPASS( 10, 0, 0, p.sh, lh, 0xffffccdd, 0, tdat ); + TEST_ST_POST_SRC12_BYPASS( 11, 0, 1, p.sh, lh, 0xffffbccd, 2, tdat ); + TEST_ST_POST_SRC12_BYPASS( 12, 0, 2, p.sh, lh, 0xffffbbcc, 4, tdat ); + TEST_ST_POST_SRC12_BYPASS( 13, 1, 0, p.sh, lh, 0xffffabbc, 6, tdat ); + TEST_ST_POST_SRC12_BYPASS( 14, 1, 1, p.sh, lh, 0xffffaabb, 8, tdat ); + TEST_ST_POST_SRC12_BYPASS( 15, 2, 0, p.sh, lh, 0xffffdaab, 10, tdat ); + + TEST_ST_POST_SRC21_BYPASS( 16, 0, 0, p.sh, lh, 0x00002233, 0, tdat ); + TEST_ST_POST_SRC21_BYPASS( 17, 0, 1, p.sh, lh, 0x00001223, 2, tdat ); + TEST_ST_POST_SRC21_BYPASS( 18, 0, 2, p.sh, lh, 0x00001122, 4, tdat ); + TEST_ST_POST_SRC21_BYPASS( 19, 1, 0, p.sh, lh, 0x00000112, 6, tdat ); + TEST_ST_POST_SRC21_BYPASS( 20, 1, 1, p.sh, lh, 0x00000011, 8, tdat ); + TEST_ST_POST_SRC21_BYPASS( 21, 2, 0, p.sh, lh, 0x00003001, 10, tdat ); + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .half 0xbeef +tdat1: .half 0xbeef +tdat2: .half 0xbeef +tdat3: .half 0xbeef +tdat4: .half 0xbeef +tdat5: .half 0xbeef +tdat6: .half 0xbeef +tdat7: .half 0xbeef +tdat8: .half 0xbeef +tdat9: .half 0xbeef + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_sh_rr.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_sh_rr.S new file mode 100644 index 000000000..0c5f4cbcb --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_sh_rr.S @@ -0,0 +1,71 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_sh_rr.S +#----------------------------------------------------------------------------- +# +# Test p.sh (register-register) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_ST_RR_OP( 2, p.sh, lh, 0x000000aa, 0, tdat ); + TEST_ST_RR_OP( 3, p.sh, lh, 0xffffaa00, 2, tdat ); + TEST_ST_RR_OP( 4, p.sh, lw, 0xbeef0aa0, 4, tdat ); + TEST_ST_RR_OP( 5, p.sh, lh, 0xffffa00a, 6, tdat ); + + # Negative offset + TEST_ST_RR_OP( 6, p.sh, lh, 0x000000aa, -6, tdat8 ); + TEST_ST_RR_OP( 7, p.sh, lh, 0xffffaa00, -4, tdat8 ); + TEST_ST_RR_OP( 8, p.sh, lh, 0x00000aa0, -2, tdat8 ); + TEST_ST_RR_OP( 9, p.sh, lh, 0xffffa00a, 0, tdat8 ); + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_ST_RR_SRC12_BYPASS( 10, 0, 0, p.sh, lh, 0xffffccdd, 0, tdat ); + TEST_ST_RR_SRC12_BYPASS( 11, 0, 1, p.sh, lh, 0xffffbccd, 2, tdat ); + TEST_ST_RR_SRC12_BYPASS( 12, 0, 2, p.sh, lh, 0xffffbbcc, 4, tdat ); + TEST_ST_RR_SRC12_BYPASS( 13, 1, 0, p.sh, lh, 0xffffabbc, 6, tdat ); + TEST_ST_RR_SRC12_BYPASS( 14, 1, 1, p.sh, lh, 0xffffaabb, 8, tdat ); + TEST_ST_RR_SRC12_BYPASS( 15, 2, 0, p.sh, lh, 0xffffdaab, 10, tdat ); + + TEST_ST_RR_SRC21_BYPASS( 16, 0, 0, p.sh, lh, 0x00002233, 0, tdat ); + TEST_ST_RR_SRC21_BYPASS( 17, 0, 1, p.sh, lh, 0x00001223, 2, tdat ); + TEST_ST_RR_SRC21_BYPASS( 18, 0, 2, p.sh, lh, 0x00001122, 4, tdat ); + TEST_ST_RR_SRC21_BYPASS( 19, 1, 0, p.sh, lh, 0x00000112, 6, tdat ); + TEST_ST_RR_SRC21_BYPASS( 20, 1, 1, p.sh, lh, 0x00000011, 8, tdat ); + TEST_ST_RR_SRC21_BYPASS( 21, 2, 0, p.sh, lh, 0x00003001, 10, tdat ); + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .half 0xbeef +tdat1: .half 0xbeef +tdat2: .half 0xbeef +tdat3: .half 0xbeef +tdat4: .half 0xbeef +tdat5: .half 0xbeef +tdat6: .half 0xbeef +tdat7: .half 0xbeef +tdat8: .half 0xbeef +tdat9: .half 0xbeef + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_sh_rrpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_sh_rrpost.S new file mode 100644 index 000000000..5dafda6d9 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_sh_rrpost.S @@ -0,0 +1,71 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_sh_rrpost.S +#----------------------------------------------------------------------------- +# +# Test p.sh (register-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_ST_RR_POST_OP( 2, p.sh, lh, 0x000000aa, 0, tdat ); + TEST_ST_RR_POST_OP( 3, p.sh, lh, 0xffffaa00, 2, tdat ); + TEST_ST_RR_POST_OP( 4, p.sh, lw, 0xbeef0aa0, 4, tdat ); + TEST_ST_RR_POST_OP( 5, p.sh, lh, 0xffffa00a, 6, tdat ); + + # Negative offset + TEST_ST_RR_POST_OP( 6, p.sh, lh, 0x000000aa, -6, tdat8 ); + TEST_ST_RR_POST_OP( 7, p.sh, lh, 0xffffaa00, -4, tdat8 ); + TEST_ST_RR_POST_OP( 8, p.sh, lh, 0x00000aa0, -2, tdat8 ); + TEST_ST_RR_POST_OP( 9, p.sh, lh, 0xffffa00a, 0, tdat8 ); + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_ST_RR_POST_SRC12_BYPASS( 10, 0, 0, p.sh, lh, 0xffffccdd, 0, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 11, 0, 1, p.sh, lh, 0xffffbccd, 2, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 12, 0, 2, p.sh, lh, 0xffffbbcc, 4, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 13, 1, 0, p.sh, lh, 0xffffabbc, 6, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 14, 1, 1, p.sh, lh, 0xffffaabb, 8, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 15, 2, 0, p.sh, lh, 0xffffdaab, 10, tdat ); + + TEST_ST_RR_POST_SRC21_BYPASS( 16, 0, 0, p.sh, lh, 0x00002233, 0, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 17, 0, 1, p.sh, lh, 0x00001223, 2, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 18, 0, 2, p.sh, lh, 0x00001122, 4, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 19, 1, 0, p.sh, lh, 0x00000112, 6, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 20, 1, 1, p.sh, lh, 0x00000011, 8, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 21, 2, 0, p.sh, lh, 0x00003001, 10, tdat ); + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .half 0xbeef +tdat1: .half 0xbeef +tdat2: .half 0xbeef +tdat3: .half 0xbeef +tdat4: .half 0xbeef +tdat5: .half 0xbeef +tdat6: .half 0xbeef +tdat7: .half 0xbeef +tdat8: .half 0xbeef +tdat9: .half 0xbeef + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_sw_irpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_sw_irpost.S new file mode 100644 index 000000000..bd8c174d4 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_sw_irpost.S @@ -0,0 +1,71 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_sw_irpost.S +#----------------------------------------------------------------------------- +# +# Test p.sw (immediate-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_ST_POST_OP( 2, p.sw, lw, 0x00aa00aa, 0, tdat ); + TEST_ST_POST_OP( 3, p.sw, lw, 0xaa00aa00, 4, tdat ); + TEST_ST_POST_OP( 4, p.sw, lw, 0x0aa00aa0, 8, tdat ); + TEST_ST_POST_OP( 5, p.sw, lw, 0xa00aa00a, 12, tdat ); + + # Negative offset + TEST_ST_POST_OP( 6, p.sw, lw, 0x00aa00aa, -12, tdat8 ); + TEST_ST_POST_OP( 7, p.sw, lw, 0xaa00aa00, -8, tdat8 ); + TEST_ST_POST_OP( 8, p.sw, lw, 0x0aa00aa0, -4, tdat8 ); + TEST_ST_POST_OP( 9, p.sw, lw, 0xa00aa00a, 0, tdat8 ); + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_ST_POST_SRC12_BYPASS( 10, 0, 0, p.sw, lw, 0xaabbccdd, 0, tdat ); + TEST_ST_POST_SRC12_BYPASS( 11, 0, 1, p.sw, lw, 0xdaabbccd, 4, tdat ); + TEST_ST_POST_SRC12_BYPASS( 12, 0, 2, p.sw, lw, 0xddaabbcc, 8, tdat ); + TEST_ST_POST_SRC12_BYPASS( 13, 1, 0, p.sw, lw, 0xcddaabbc, 12, tdat ); + TEST_ST_POST_SRC12_BYPASS( 14, 1, 1, p.sw, lw, 0xccddaabb, 16, tdat ); + TEST_ST_POST_SRC12_BYPASS( 15, 2, 0, p.sw, lw, 0xbccddaab, 20, tdat ); + + TEST_ST_POST_SRC21_BYPASS( 16, 0, 0, p.sw, lw, 0x00112233, 0, tdat ); + TEST_ST_POST_SRC21_BYPASS( 17, 0, 1, p.sw, lw, 0x30011223, 4, tdat ); + TEST_ST_POST_SRC21_BYPASS( 18, 0, 2, p.sw, lw, 0x33001122, 8, tdat ); + TEST_ST_POST_SRC21_BYPASS( 19, 1, 0, p.sw, lw, 0x23300112, 12, tdat ); + TEST_ST_POST_SRC21_BYPASS( 20, 1, 1, p.sw, lw, 0x22330011, 16, tdat ); + TEST_ST_POST_SRC21_BYPASS( 21, 2, 0, p.sw, lw, 0x12233001, 20, tdat ); + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .word 0xdeadbeef +tdat1: .word 0xdeadbeef +tdat2: .word 0xdeadbeef +tdat3: .word 0xdeadbeef +tdat4: .word 0xdeadbeef +tdat5: .word 0xdeadbeef +tdat6: .word 0xdeadbeef +tdat7: .word 0xdeadbeef +tdat8: .word 0xdeadbeef +tdat9: .word 0xdeadbeef + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_sw_rr.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_sw_rr.S new file mode 100644 index 000000000..6a6a53e38 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_sw_rr.S @@ -0,0 +1,72 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_sw_rr.S +#----------------------------------------------------------------------------- +# +# Test p.sw (register-register) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_ST_RR_OP( 2, p.sw, lw, 0x00aa00aa, 0, tdat ); + TEST_ST_RR_OP( 3, p.sw, lw, 0xaa00aa00, 4, tdat ); + TEST_ST_RR_OP( 4, p.sw, lw, 0x0aa00aa0, 8, tdat ); + TEST_ST_RR_OP( 5, p.sw, lw, 0xa00aa00a, 12, tdat ); + + # Negative offset + TEST_ST_RR_OP( 6, p.sw, lw, 0x00aa00aa, -12, tdat8 ); + TEST_ST_RR_OP( 7, p.sw, lw, 0xaa00aa00, -8, tdat8 ); + TEST_ST_RR_OP( 8, p.sw, lw, 0x0aa00aa0, -4, tdat8 ); + TEST_ST_RR_OP( 9, p.sw, lw, 0xa00aa00a, 0, tdat8 ); + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_ST_RR_SRC12_BYPASS( 10, 0, 0, p.sw, lw, 0xaabbccdd, 0, tdat ); + TEST_ST_RR_SRC12_BYPASS( 11, 0, 1, p.sw, lw, 0xdaabbccd, 4, tdat ); + TEST_ST_RR_SRC12_BYPASS( 12, 0, 2, p.sw, lw, 0xddaabbcc, 8, tdat ); + TEST_ST_RR_SRC12_BYPASS( 13, 1, 0, p.sw, lw, 0xcddaabbc, 12, tdat ); + TEST_ST_RR_SRC12_BYPASS( 14, 1, 1, p.sw, lw, 0xccddaabb, 16, tdat ); + TEST_ST_RR_SRC12_BYPASS( 15, 2, 0, p.sw, lw, 0xbccddaab, 20, tdat ); + + TEST_ST_RR_SRC21_BYPASS( 16, 0, 0, p.sw, lw, 0x00112233, 0, tdat ); + TEST_ST_RR_SRC21_BYPASS( 17, 0, 1, p.sw, lw, 0x30011223, 4, tdat ); + TEST_ST_RR_SRC21_BYPASS( 18, 0, 2, p.sw, lw, 0x33001122, 8, tdat ); + TEST_ST_RR_SRC21_BYPASS( 19, 1, 0, p.sw, lw, 0x23300112, 12, tdat ); + TEST_ST_RR_SRC21_BYPASS( 20, 1, 1, p.sw, lw, 0x22330011, 16, tdat ); + TEST_ST_RR_SRC21_BYPASS( 21, 2, 0, p.sw, lw, 0x12233001, 20, tdat ); + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .word 0xdeadbeef +tdat1: .word 0xdeadbeef +tdat2: .word 0xdeadbeef +tdat3: .word 0xdeadbeef +tdat4: .word 0xdeadbeef +tdat5: .word 0xdeadbeef +tdat6: .word 0xdeadbeef +tdat7: .word 0xdeadbeef +tdat8: .word 0xdeadbeef +tdat9: .word 0xdeadbeef + +RVTEST_DATA_END + diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/p_sw_rrpost.S b/apps/riscv-tests/isa/rv32uxpulpimg/p_sw_rrpost.S new file mode 100644 index 000000000..ce9c58d1a --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/p_sw_rrpost.S @@ -0,0 +1,71 @@ +# See LICENSE for license details. + +#***************************************************************************** +# p_sw_rrpost.S +#----------------------------------------------------------------------------- +# +# Test p.sw (register-register post-increment) instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Basic tests + #------------------------------------------------------------- + + # Positive offset + TEST_ST_RR_POST_OP( 2, p.sw, lw, 0x00aa00aa, 0, tdat ); + TEST_ST_RR_POST_OP( 3, p.sw, lw, 0xaa00aa00, 4, tdat ); + TEST_ST_RR_POST_OP( 4, p.sw, lw, 0x0aa00aa0, 8, tdat ); + TEST_ST_RR_POST_OP( 5, p.sw, lw, 0xa00aa00a, 12, tdat ); + + # Negative offset + TEST_ST_RR_POST_OP( 6, p.sw, lw, 0x00aa00aa, -12, tdat8 ); + TEST_ST_RR_POST_OP( 7, p.sw, lw, 0xaa00aa00, -8, tdat8 ); + TEST_ST_RR_POST_OP( 8, p.sw, lw, 0x0aa00aa0, -4, tdat8 ); + TEST_ST_RR_POST_OP( 9, p.sw, lw, 0xa00aa00a, 0, tdat8 ); + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_ST_RR_POST_SRC12_BYPASS( 10, 0, 0, p.sw, lw, 0xaabbccdd, 0, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 11, 0, 1, p.sw, lw, 0xdaabbccd, 4, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 12, 0, 2, p.sw, lw, 0xddaabbcc, 8, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 13, 1, 0, p.sw, lw, 0xcddaabbc, 12, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 14, 1, 1, p.sw, lw, 0xccddaabb, 16, tdat ); + TEST_ST_RR_POST_SRC12_BYPASS( 15, 2, 0, p.sw, lw, 0xbccddaab, 20, tdat ); + + TEST_ST_RR_POST_SRC21_BYPASS( 16, 0, 0, p.sw, lw, 0x00112233, 0, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 17, 0, 1, p.sw, lw, 0x30011223, 4, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 18, 0, 2, p.sw, lw, 0x33001122, 8, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 19, 1, 0, p.sw, lw, 0x23300112, 12, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 20, 1, 1, p.sw, lw, 0x22330011, 16, tdat ); + TEST_ST_RR_POST_SRC21_BYPASS( 21, 2, 0, p.sw, lw, 0x12233001, 20, tdat ); + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +tdat: +tdat0: .word 0xdeadbeef +tdat1: .word 0xdeadbeef +tdat2: .word 0xdeadbeef +tdat3: .word 0xdeadbeef +tdat4: .word 0xdeadbeef +tdat5: .word 0xdeadbeef +tdat6: .word 0xdeadbeef +tdat7: .word 0xdeadbeef +tdat8: .word 0xdeadbeef +tdat9: .word 0xdeadbeef + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_abs.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_abs.S new file mode 100644 index 000000000..79b4eeecd --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_abs.S @@ -0,0 +1,52 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_abs.S +#----------------------------------------------------------------------------- +# +# Test pv.abs instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.abs.h + TEST_R_OP( 2, pv.abs.h, 0x10081554, 0xEFF8EAAC ); + TEST_R_OP( 3, pv.abs.h, 0x369800DA, 0x3698FF26 ); + TEST_R_OP( 4, pv.abs.h, 0x7C127B74, 0x7C12848C ); + # pv.abs.b + TEST_R_OP( 5, pv.abs.b, 0x3A444335, 0x3ABC4335 ); + TEST_R_OP( 6, pv.abs.b, 0x2B743B7C, 0x2B8C3B7C ); + TEST_R_OP( 7, pv.abs.b, 0x70362066, 0x70362066 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # TEST_R_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # TEST_R_DEST_BYPASS + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_add.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_add.S new file mode 100644 index 000000000..0287cc57c --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_add.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_add.S +#----------------------------------------------------------------------------- +# +# Test pv.add instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.add.h + TEST_RR_OP( 2, pv.add.h, 0xC1ACF68C, 0xF014169D, 0xD198DFEF ); + TEST_RR_OP( 3, pv.add.h, 0x795F026B, 0x7ABB8DD7, 0xFEA47494 ); + TEST_RR_OP( 4, pv.add.h, 0x8ABE2A6C, 0xAA4F3E71, 0xE06FEBFB ); + # pv.add.sc.h + TEST_RR_OP( 5, pv.add.sc.h, 0x603D0BE0, 0xFC7EA821, 0x94BF63BF ); + TEST_RR_OP( 6, pv.add.sc.h, 0x4A3F89DF, 0x8E28CDC8, 0x3230BC17 ); + TEST_RR_OP( 7, pv.add.sc.h, 0x2034B556, 0x506CE58E, 0x4436CFC8 ); + # pv.add.sci.h + TEST_SIMM6_OP( 8, pv.add.sci.h, 0x77371C0E, 0x772C1C03, 11 ); + TEST_SIMM6_OP( 9, pv.add.sci.h, 0xD1BA3380, 0xD1AF3375, 11 ); + TEST_SIMM6_OP( 10, pv.add.sci.h, 0x6E73CC2D, 0x6E68CC22, 11 ); + # pv.add.b + TEST_RR_OP( 11, pv.add.b, 0x8A1518C0, 0x3E50B3BE, 0x4CC56502 ); + TEST_RR_OP( 12, pv.add.b, 0xE8E21596, 0x7ECB21CB, 0x6A17F4CB ); + TEST_RR_OP( 13, pv.add.b, 0xD48653D2, 0x2E741840, 0xA6123B92 ); + # pv.add.sc.b + TEST_RR_OP( 14, pv.add.sc.b, 0xC96CF4FF, 0x52F57D88, 0x86A5D077 ); + TEST_RR_OP( 15, pv.add.sc.b, 0x877D91A1, 0x2F253949, 0x694FD558 ); + TEST_RR_OP( 16, pv.add.sc.b, 0xC6646B7D, 0x28C6CDDF, 0x1E09659E ); + # pv.add.sci.b + TEST_SIMM6_OP( 17, pv.add.sci.b, 0x3820508C, 0x2D154581, 11 ); + TEST_SIMM6_OP( 18, pv.add.sci.b, 0xBF98380C, 0xB48D2D01, 11 ); + TEST_SIMM6_OP( 19, pv.add.sci.b, 0x90AAEB98, 0x859FE08D, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_SIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_SIMM6_DEST_BYPASS, + # TEST_SIMM6_SRC1_BYPASS, TEST_SIMM6_ZEROSRC1, TEST_SIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_and.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_and.S new file mode 100644 index 000000000..328104676 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_and.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_and.S +#----------------------------------------------------------------------------- +# +# Test pv.and instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.and.h + TEST_RR_OP( 2, pv.and.h, 0xE1C028D0, 0xE1D16DD8, 0xE7E4A8F0 ); + TEST_RR_OP( 3, pv.and.h, 0x30111070, 0xB0111070, 0x3715D975 ); + TEST_RR_OP( 4, pv.and.h, 0x04000084, 0x04040AA4, 0xBD7314C7 ); + # pv.and.sc.h + TEST_RR_OP( 5, pv.and.sc.h, 0x18008480, 0x5818A5AB, 0x7C269E80 ); + TEST_RR_OP( 6, pv.and.sc.h, 0x0E0B0683, 0xCF2B6697, 0x4E211ECB ); + TEST_RR_OP( 7, pv.and.sc.h, 0x08070806, 0x28376966, 0xD7848E0F ); + # pv.and.sci.h + TEST_UIMM6_OP( 8, pv.and.sci.h, 0x00010003, 0xBFE568E7, 11 ); + TEST_UIMM6_OP( 9, pv.and.sci.h, 0x000A0001, 0xC08A6275, 11 ); + TEST_UIMM6_OP( 10, pv.and.sci.h, 0x000A0008, 0xDFEE3E6C, 11 ); + # pv.and.b + TEST_RR_OP( 11, pv.and.b, 0xA106671C, 0xE317675C, 0xADC6E7BF ); + TEST_RR_OP( 12, pv.and.b, 0x036080A8, 0xB360A0A8, 0x077A84AC ); + TEST_RR_OP( 13, pv.and.b, 0x0B430011, 0x1FDB225B, 0xCB431CB5 ); + # pv.and.sc.b + TEST_RR_OP( 14, pv.and.sc.b, 0xE4892568, 0xE48B3778, 0xCCC46AED ); + TEST_RR_OP( 15, pv.and.sc.b, 0x00000000, 0x0D96B284, 0x8B596F00 ); + TEST_RR_OP( 16, pv.and.sc.b, 0x49672C2F, 0xC9672CBF, 0x6AC7706F ); + # pv.and.sci.b + TEST_UIMM6_OP( 17, pv.and.sci.b, 0x0B090808, 0x8F29C848, 11 ); + TEST_UIMM6_OP( 18, pv.and.sci.b, 0x000A0908, 0x30EA9D78, 11 ); + TEST_UIMM6_OP( 19, pv.and.sci.b, 0x03000801, 0x83743C41, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_UIMM6_DEST_BYPASS, + # TEST_UIMM6_SRC1_BYPASS, TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_avg.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_avg.S new file mode 100644 index 000000000..0a7e1ede3 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_avg.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_avg.S +#----------------------------------------------------------------------------- +# +# Test pv.avg instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.avg.h + TEST_RR_OP( 2, pv.avg.h, 0xDFA53D57, 0x2C5F4D25, 0x92EC2D89 ); + TEST_RR_OP( 3, pv.avg.h, 0x18A2C49C, 0xD09FBFB6, 0x60A5C983 ); + TEST_RR_OP( 4, pv.avg.h, 0xD2902560, 0xE37F8F8F, 0xC1A2BB32 ); + # pv.avg.sc.h + TEST_RR_OP( 5, pv.avg.sc.h, 0xF8B0DF51, 0x6A263768, 0xD18D873A ); + TEST_RR_OP( 6, pv.avg.sc.h, 0x29B50628, 0xDA3A9320, 0xDB667930 ); + TEST_RR_OP( 7, pv.avg.sc.h, 0x1CBDF112, 0x4376EC20, 0x13B2F605 ); + # pv.avg.sci.h + TEST_SIMM6_OP( 8, pv.avg.sci.h, 0x2F8BD535, 0x5F0CAA60, 11 ); + TEST_SIMM6_OP( 9, pv.avg.sci.h, 0x1F1B0A1B, 0x3E2B142C, 11 ); + TEST_SIMM6_OP( 10, pv.avg.sci.h, 0x1E533C46, 0x3C9C7881, 11 ); + # pv.avg.b + TEST_RR_OP( 11, pv.avg.b, 0xEF09DD01, 0x242B76A4, 0xBBE7445F ); + TEST_RR_OP( 12, pv.avg.b, 0x2C31DBEE, 0x7B0B5CD3, 0xDE575B0A ); + TEST_RR_OP( 13, pv.avg.b, 0xF11E19E0, 0x0278F0DE, 0xE0C543E3 ); + # pv.avg.sc.b + TEST_RR_OP( 14, pv.avg.sc.b, 0x12E71EFC, 0x40E95813, 0xDE5394E5 ); + TEST_RR_OP( 15, pv.avg.sc.b, 0x102204DA, 0xE005C975, 0xE6677040 ); + TEST_RR_OP( 16, pv.avg.sc.b, 0x1E2ADA29, 0x2840A03D, 0xCF897515 ); + # pv.avg.sci.b + TEST_SIMM6_OP( 17, pv.avg.sci.b, 0x04D5DEFE, 0xFDA0B1F1, 11 ); + TEST_SIMM6_OP( 18, pv.avg.sci.b, 0xD4192A1E, 0x9E274932, 11 ); + TEST_SIMM6_OP( 19, pv.avg.sci.b, 0x11E3CFE6, 0x17BC93C1, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_SIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_SIMM6_DEST_BYPASS, + # TEST_SIMM6_SRC1_BYPASS, TEST_SIMM6_ZEROSRC1, TEST_SIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_avgu.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_avgu.S new file mode 100644 index 000000000..1d4c7de8d --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_avgu.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_avgu.S +#----------------------------------------------------------------------------- +# +# Test pv.avgu instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.avgu.h + TEST_RR_OP( 2, pv.avgu.h, 0x627F5574, 0xA12DA561, 0x23D10588 ); + TEST_RR_OP( 3, pv.avgu.h, 0x5F5E7CE3, 0x979062E4, 0x272C96E3 ); + TEST_RR_OP( 4, pv.avgu.h, 0x6D64331C, 0xF472E6FA, 0xE6567F3F ); + # pv.avgu.sc.h + TEST_RR_OP( 5, pv.avgu.sc.h, 0x0CED14D1, 0xD924E8ED, 0xFFB240B6 ); + TEST_RR_OP( 6, pv.avgu.sc.h, 0x127F3F7B, 0x7447CE40, 0x64E4B0B7 ); + TEST_RR_OP( 7, pv.avgu.sc.h, 0x737C50C4, 0x7D7C380C, 0xB749697C ); + # pv.avgu.sci.h + TEST_UIMM6_OP( 8, pv.avgu.sci.h, 0x76BB744A, 0xED6BE88A, 11 ); + TEST_UIMM6_OP( 9, pv.avgu.sci.h, 0x3BD96A9F, 0x77A8D534, 11 ); + TEST_UIMM6_OP( 10, pv.avgu.sci.h, 0x551A6EC8, 0xAA29DD86, 11 ); + # pv.avgu.b + TEST_RR_OP( 11, pv.avgu.b, 0x366D332C, 0x8F75F8E9, 0xDD666F70 ); + TEST_RR_OP( 12, pv.avgu.b, 0x166D3707, 0x5F0C48DF, 0xCECE2730 ); + TEST_RR_OP( 13, pv.avgu.b, 0x13390E74, 0x2D0C048B, 0xFA67185E ); + # pv.avgu.sc.b + TEST_RR_OP( 14, pv.avgu.sc.b, 0x20102F22, 0xFDDD1B00, 0x65EACB44 ); + TEST_RR_OP( 15, pv.avgu.sc.b, 0x79130A10, 0x2156444F, 0xAF0796D1 ); + TEST_RR_OP( 16, pv.avgu.sc.b, 0x44260042, 0x591DD256, 0xFBAE832F ); + # pv.avgu.sci.b + TEST_UIMM6_OP( 17, pv.avgu.sci.b, 0x016B6549, 0xF7CBBF88, 11 ); + TEST_UIMM6_OP( 18, pv.avgu.sci.b, 0x742F1E50, 0xDE543195, 11 ); + TEST_UIMM6_OP( 19, pv.avgu.sci.b, 0x34686166, 0x5EC5B7C1, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_UIMM6_DEST_BYPASS, + # TEST_UIMM6_SRC1_BYPASS, TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_dotsp.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_dotsp.S new file mode 100644 index 000000000..9242c8bb9 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_dotsp.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_dotsp.S +#----------------------------------------------------------------------------- +# +# Test pv.dotsp instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.dotsp.h + TEST_RR_OP( 2, pv.dotsp.h, 0x0023A592, 0xFE67FB1A, 0x50E4DE57 ); + TEST_RR_OP( 3, pv.dotsp.h, 0x1EC63DAA, 0xADBC1E09, 0xA2C806FA ); + TEST_RR_OP( 4, pv.dotsp.h, 0x222B210B, 0x3FDAEFE7, 0x7BA5CB0F ); + # pv.dotsp.sc.h + TEST_RR_OP( 5, pv.dotsp.sc.h, 0xDE3EBCF3, 0x5E5C31BF, 0xCB24C409 ); + TEST_RR_OP( 6, pv.dotsp.sc.h, 0x03F34EE4, 0xEC042250, 0x230A4695 ); + TEST_RR_OP( 7, pv.dotsp.sc.h, 0x047909E6, 0x6BF5D085, 0x9AB012EF ); + # pv.dotsp.sci.h + TEST_SIMM6_OP( 8, pv.dotsp.sci.h, 0xFFFD1338, 0x36D2FEAA, -14 ); + TEST_SIMM6_OP( 9, pv.dotsp.sci.h, 0xFFFC68FB, 0x6752FECB, -9 ); + TEST_SIMM6_OP( 10, pv.dotsp.sci.h, 0x000098C4, 0x9747CFF5, -1 ); + # pv.dotsp.b + TEST_RR_OP( 11, pv.dotsp.b, 0x000003DA, 0xEB8A58F5, 0xCAECEE54 ); + TEST_RR_OP( 12, pv.dotsp.b, 0xFFFFAD05, 0x47665939, 0x9E989665 ); + TEST_RR_OP( 13, pv.dotsp.b, 0x00005335, 0x79D072B4, 0x5B8B4327 ); + # pv.dotsp.sc.b + TEST_RR_OP( 14, pv.dotsp.sc.b, 0x000059EF, 0x6F622436, 0x1E1E694D ); + TEST_RR_OP( 15, pv.dotsp.sc.b, 0x00001BDA, 0x77B8759A, 0xC1056E73 ); + TEST_RR_OP( 16, pv.dotsp.sc.b, 0x00002238, 0x74740933, 0xF898DF1E ); + # pv.dotsp.sci.b + TEST_SIMM6_OP( 17, pv.dotsp.sci.b, 0x0000006E, 0x4CD92920, 1 ); + TEST_SIMM6_OP( 18, pv.dotsp.sci.b, 0xFFFFFE20, 0xAFCE7172, -5 ); + TEST_SIMM6_OP( 19, pv.dotsp.sci.b, 0xFFFFF9FD, 0xDB25ABAA, 9 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_SIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_SIMM6_DEST_BYPASS, + # TEST_SIMM6_SRC1_BYPASS, TEST_SIMM6_ZEROSRC1, TEST_SIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_dotup.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_dotup.S new file mode 100644 index 000000000..71d3e470d --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_dotup.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_dotup.S +#----------------------------------------------------------------------------- +# +# Test pv.dotup instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.dotup.h + TEST_RR_OP( 2, pv.dotup.h, 0x2A78A592, 0xFE67FB1A, 0x50E4DE57 ); + TEST_RR_OP( 3, pv.dotup.h, 0x6F4A3DAA, 0xADBC1E09, 0xA2C806FA ); + TEST_RR_OP( 4, pv.dotup.h, 0xDD21210B, 0x3FDAEFE7, 0x7BA5CB0F ); + # pv.dotup.sc.h + TEST_RR_OP( 5, pv.dotup.sc.h, 0x6E59BCF3, 0x5E5C31BF, 0xCB24C409 ); + TEST_RR_OP( 6, pv.dotup.sc.h, 0x4A884EE4, 0xEC042250, 0x230A4695 ); + TEST_RR_OP( 7, pv.dotup.sc.h, 0x176809E6, 0x6BF5D085, 0x9AB012EF ); + # pv.dotup.sci.h + TEST_UIMM6_OP( 8, pv.dotup.sci.h, 0x00148D3C, 0x36D2FEAA, 17 ); + TEST_UIMM6_OP( 9, pv.dotup.sci.h, 0x0002CC3A, 0x6752FECB, 2 ); + TEST_UIMM6_OP( 10, pv.dotup.sci.h, 0x000F6F94, 0x9747CFF5, 11 ); + # pv.dotup.b + TEST_RR_OP( 11, pv.dotup.b, 0x0001DADA, 0xEB8A58F5, 0xCAECEE54 ); + TEST_RR_OP( 12, pv.dotup.b, 0x0000B305, 0x47665939, 0x9E989665 ); + TEST_RR_OP( 13, pv.dotup.b, 0x0000D535, 0x79D072B4, 0x5B8B4327 ); + # pv.dotup.sc.b + TEST_RR_OP( 14, pv.dotup.sc.b, 0x000059EF, 0x6F622436, 0x1E1E694D ); + TEST_RR_OP( 15, pv.dotup.sc.b, 0x000101DA, 0x77B8759A, 0xC1056E73 ); + TEST_RR_OP( 16, pv.dotup.sc.b, 0x00002238, 0x74740933, 0xF898DF1E ); + # pv.dotup.sci.b + TEST_UIMM6_OP( 17, pv.dotup.sci.b, 0x00000FBA, 0x4CD92920, 11 ); + TEST_UIMM6_OP( 18, pv.dotup.sci.b, 0x00002140, 0xAFCE7172, 14 ); + TEST_UIMM6_OP( 19, pv.dotup.sci.b, 0x00001053, 0xDB25ABAA, 7 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-uimm6 instructions TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-uimm6 instructions TEST_UIMM6_DEST_BYPASS, + # TEST_UIMM6_SRC1_BYPASS, TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_dotusp.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_dotusp.S new file mode 100644 index 000000000..17a92e165 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_dotusp.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_dotusp.S +#----------------------------------------------------------------------------- +# +# Test pv.dotusp instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.dotusp.h + TEST_RR_OP( 2, pv.dotusp.h, 0x2F5EA592, 0xFE67FB1A, 0x50E4DE57 ); + TEST_RR_OP( 3, pv.dotusp.h, 0xC18E3DAA, 0xADBC1E09, 0xA2C806FA ); + TEST_RR_OP( 4, pv.dotusp.h, 0xED3A210B, 0x3FDAEFE7, 0x7BA5CB0F ); + # pv.dotusp.sc.h + TEST_RR_OP( 5, pv.dotusp.sc.h, 0xDE3EBCF3, 0x5E5C31BF, 0xCB24C409 ); + TEST_RR_OP( 6, pv.dotusp.sc.h, 0x4A884EE4, 0xEC042250, 0x230A4695 ); + TEST_RR_OP( 7, pv.dotusp.sc.h, 0x176809E6, 0x6BF5D085, 0x9AB012EF ); + # pv.dotusp.sci.h + TEST_SIMM6_OP( 8, pv.dotusp.sci.h, 0xFFEF1338, 0x36D2FEAA, -14 ); + TEST_SIMM6_OP( 9, pv.dotusp.sci.h, 0xFFF368FB, 0x6752FECB, -9 ); + TEST_SIMM6_OP( 10, pv.dotusp.sci.h, 0xFFFE98C4, 0x9747CFF5, -1 ); + # pv.dotusp.b + TEST_RR_OP( 11, pv.dotusp.b, 0x00000DDA, 0xEB8A58F5, 0xCAECEE54 ); + TEST_RR_OP( 12, pv.dotusp.b, 0xFFFFAD05, 0x47665939, 0x9E989665 ); + TEST_RR_OP( 13, pv.dotusp.b, 0x00000535, 0x79D072B4, 0x5B8B4327 ); + # pv.dotusp.sc.b + TEST_RR_OP( 14, pv.dotusp.sc.b, 0x000059EF, 0x6F622436, 0x1E1E694D ); + TEST_RR_OP( 15, pv.dotusp.sc.b, 0x000101DA, 0x77B8759A, 0xC1056E73 ); + TEST_RR_OP( 16, pv.dotusp.sc.b, 0x00002238, 0x74740933, 0xF898DF1E ); + # pv.dotusp.sci.b + TEST_SIMM6_OP( 17, pv.dotusp.sci.b, 0x0000016E, 0x4CD92920, 1 ); + TEST_SIMM6_OP( 18, pv.dotusp.sci.b, 0xFFFFF420, 0xAFCE7172, -5 ); + TEST_SIMM6_OP( 19, pv.dotusp.sci.b, 0x000014FD, 0xDB25ABAA, 9 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_SIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_SIMM6_DEST_BYPASS, + # TEST_SIMM6_SRC1_BYPASS, TEST_SIMM6_ZEROSRC1, TEST_SIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_extract.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_extract.S new file mode 100644 index 000000000..5d0a0b70a --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_extract.S @@ -0,0 +1,65 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_extract.S +#----------------------------------------------------------------------------- +# +# Test pv.extract instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Functional tests + #------------------------------------------------------------- + + # pv.extract.h + TEST_SIMM6_OP( 2, pv.extract.h, 0x00000DEA, 0x53F90DEA, 0 ); + TEST_SIMM6_OP( 3, pv.extract.h, 0x00000315, 0xC6990315, 0 ); + TEST_SIMM6_OP( 4, pv.extract.h, 0x00005B08, 0xE1415B08, 0 ); + TEST_SIMM6_OP( 5, pv.extract.h, 0x00003654, 0x3654249D, 1 ); + TEST_SIMM6_OP( 6, pv.extract.h, 0x00002EE3, 0x2EE3D9FE, 1 ); + TEST_SIMM6_OP( 7, pv.extract.h, 0xFFFF93B1, 0x93B1AA99, 1 ); + # pv.extract.b + TEST_SIMM6_OP( 8, pv.extract.b, 0xFFFFFFD9, 0x53C073D9, 0 ); + TEST_SIMM6_OP( 9, pv.extract.b, 0x0000001F, 0x269EFC1F, 0 ); + TEST_SIMM6_OP( 10, pv.extract.b, 0xFFFFFFAB, 0x0E8CD3AB, 0 ); + TEST_SIMM6_OP( 11, pv.extract.b, 0x0000004A, 0xF7964A55, 1 ); + TEST_SIMM6_OP( 12, pv.extract.b, 0x0000006C, 0x1F366C84, 1 ); + TEST_SIMM6_OP( 13, pv.extract.b, 0x0000005B, 0x11205B09, 1 ); + TEST_SIMM6_OP( 14, pv.extract.b, 0x00000036, 0x2C36C818, 2 ); + TEST_SIMM6_OP( 15, pv.extract.b, 0x00000003, 0x4C039923, 2 ); + TEST_SIMM6_OP( 16, pv.extract.b, 0x0000007E, 0x057ED2EE, 2 ); + TEST_SIMM6_OP( 17, pv.extract.b, 0x00000056, 0x56B005BB, 3 ); + TEST_SIMM6_OP( 18, pv.extract.b, 0xFFFFFFE7, 0xE7798BAA, 3 ); + TEST_SIMM6_OP( 19, pv.extract.b, 0xFFFFFFF3, 0xF3F956A2, 3 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # TEST_SIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # TEST_SIMM6_DEST_BYPASS, TEST_SIMM6_SRC1_BYPASS, + # TEST_SIMM6_ZEROSRC1, TEST_SIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_extractu.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_extractu.S new file mode 100644 index 000000000..ccd6e37c0 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_extractu.S @@ -0,0 +1,65 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_extractu.S +#----------------------------------------------------------------------------- +# +# Test pv.extractu instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Functional tests + #------------------------------------------------------------- + + # pv.extractu.h + TEST_UIMM6_OP( 2, pv.extractu.h, 0x0000A18D, 0xED2CA18D, 0 ); + TEST_UIMM6_OP( 3, pv.extractu.h, 0x00006A18, 0x3C576A18, 0 ); + TEST_UIMM6_OP( 4, pv.extractu.h, 0x000040A2, 0x1DAB40A2, 0 ); + TEST_UIMM6_OP( 5, pv.extractu.h, 0x0000BC96, 0xBC969BEC, 1 ); + TEST_UIMM6_OP( 6, pv.extractu.h, 0x0000DF7E, 0xDF7E4D2B, 1 ); + TEST_UIMM6_OP( 7, pv.extractu.h, 0x000099AE, 0x99AEE13C, 1 ); + # pv.extractu.b + TEST_UIMM6_OP( 8, pv.extractu.b, 0x00000046, 0x8FA19B46, 0 ); + TEST_UIMM6_OP( 9, pv.extractu.b, 0x0000009A, 0xE19C009A, 0 ); + TEST_UIMM6_OP( 10, pv.extractu.b, 0x0000002A, 0x408D722A, 0 ); + TEST_UIMM6_OP( 11, pv.extractu.b, 0x0000006C, 0xA2AF6C67, 1 ); + TEST_UIMM6_OP( 12, pv.extractu.b, 0x0000001F, 0xDE671F25, 1 ); + TEST_UIMM6_OP( 13, pv.extractu.b, 0x00000046, 0x325D46CE, 1 ); + TEST_UIMM6_OP( 14, pv.extractu.b, 0x00000003, 0x4603F967, 2 ); + TEST_UIMM6_OP( 15, pv.extractu.b, 0x000000C1, 0xDBC1292F, 2 ); + TEST_UIMM6_OP( 16, pv.extractu.b, 0x000000D6, 0xE7D631CF, 2 ); + TEST_UIMM6_OP( 17, pv.extractu.b, 0x00000020, 0x20B64275, 3 ); + TEST_UIMM6_OP( 18, pv.extractu.b, 0x000000D6, 0xD64B2CC0, 3 ); + TEST_UIMM6_OP( 19, pv.extractu.b, 0x00000084, 0x845485BD, 3 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # TEST_UIMM6_DEST_BYPASS, TEST_UIMM6_SRC1_BYPASS, + # TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_insert.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_insert.S new file mode 100644 index 000000000..aa427d6ac --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_insert.S @@ -0,0 +1,87 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_insert.S +#----------------------------------------------------------------------------- +# +# Test pv.insert instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Functional tests + #------------------------------------------------------------- + + # load immediate in test register necessary before execution + # of each test case because the output is dependent on the + # previous state of rD + + # pv.insert.h + li x14, 0x6ACB7454; + TEST_UIMM6_OP( 2, pv.insert.h, 0x6ACBF419, 0x3A12F419, 0 ); + li x14, 0x2BCBE5BA; + TEST_UIMM6_OP( 3, pv.insert.h, 0x2BCB3FEE, 0x86013FEE, 0 ); + li x14, 0x8E18DBE7; + TEST_UIMM6_OP( 4, pv.insert.h, 0x8E18C59F, 0x7153C59F, 0 ); + li x14, 0x57DF0195; + TEST_UIMM6_OP( 5, pv.insert.h, 0x00F60195, 0x267700F6, 1 ); + li x14, 0x7825C668; + TEST_UIMM6_OP( 6, pv.insert.h, 0x17F7C668, 0x04A017F7, 1 ); + li x14, 0xDBC05DC7; + TEST_UIMM6_OP( 7, pv.insert.h, 0xF7455DC7, 0x3569F745, 1 ); + # pv.insert.b + li x14, 0x5C93979B; + TEST_UIMM6_OP( 8, pv.insert.b, 0x5C93979C, 0x955C289C, 0 ); + li x14, 0x4696DE77; + TEST_UIMM6_OP( 9, pv.insert.b, 0x4696DEB0, 0x00E6ADB0, 0 ); + li x14, 0x48024613; + TEST_UIMM6_OP( 10, pv.insert.b, 0x4802465E, 0xE580375E, 0 ); + li x14, 0x55963E26; + TEST_UIMM6_OP( 11, pv.insert.b, 0x5596FC26, 0x215C2AFC, 1 ); + li x14, 0xAA2930B8; + TEST_UIMM6_OP( 12, pv.insert.b, 0xAA29DCB8, 0xE0318DDC, 1 ); + li x14, 0x844521DE; + TEST_UIMM6_OP( 13, pv.insert.b, 0x84459DDE, 0x12ED4F9D, 1 ); + li x14, 0xFE27DE9A; + TEST_UIMM6_OP( 14, pv.insert.b, 0xFED1DE9A, 0xC72B60D1, 2 ); + li x14, 0x41075730; + TEST_UIMM6_OP( 15, pv.insert.b, 0x41065730, 0x63ED6A06, 2 ); + li x14, 0xFD9C6336; + TEST_UIMM6_OP( 16, pv.insert.b, 0xFD426336, 0xA924A142, 2 ); + li x14, 0x2A3A8341; + TEST_UIMM6_OP( 17, pv.insert.b, 0x513A8341, 0x6B50F251, 3 ); + li x14, 0x59FBF2A7; + TEST_UIMM6_OP( 18, pv.insert.b, 0x31FBF2A7, 0x41767331, 3 ); + li x14, 0xE056E2B2; + TEST_UIMM6_OP( 19, pv.insert.b, 0x8C56E2B2, 0x2B08038C, 3 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # TEST_UIMM6_DEST_BYPASS, TEST_UIMM6_SRC1_BYPASS, + # TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_max.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_max.S new file mode 100644 index 000000000..20f4c69f8 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_max.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_max.S +#----------------------------------------------------------------------------- +# +# Test pv.max instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.max.h + TEST_RR_OP( 2, pv.max.h, 0x731E1846, 0xF4D3B4D4, 0x731E1846 ); + TEST_RR_OP( 3, pv.max.h, 0x0E5963C7, 0x0E5963C7, 0xC078A04B ); + TEST_RR_OP( 4, pv.max.h, 0x10AF37F3, 0x10AF37F3, 0xA4DA964F ); + # pv.max.sc.h + TEST_RR_OP( 5, pv.max.sc.h, 0xFDCCFDCC, 0xC86DA7A4, 0x5AC6FDCC ); + TEST_RR_OP( 6, pv.max.sc.h, 0x6F096F09, 0x1EBE021F, 0xAEAF6F09 ); + TEST_RR_OP( 7, pv.max.sc.h, 0x72AF72AF, 0xCD6ACE5B, 0xA0D172AF ); + # pv.max.sci.h + TEST_SIMM6_OP( 8, pv.max.sci.h, 0x000B1FDD, 0xAD6D1FDD, 11 ); + TEST_SIMM6_OP( 9, pv.max.sci.h, 0x000B000B, 0xAAF6EBB0, 11 ); + TEST_SIMM6_OP( 10, pv.max.sci.h, 0x252B000B, 0x252BC255, 11 ); + # pv.max.b + TEST_RR_OP( 11, pv.max.b, 0xF2402D09, 0xB040FD9D, 0xF2842D09 ); + TEST_RR_OP( 12, pv.max.b, 0x78652008, 0xD749FDBE, 0x78652008 ); + TEST_RR_OP( 13, pv.max.b, 0xC5755F6A, 0xC5755F6A, 0xAD1CD088 ); + # pv.max.sc.b + TEST_RR_OP( 14, pv.max.sc.b, 0x75757575, 0x01B6C06B, 0xC1698275 ); + TEST_RR_OP( 15, pv.max.sc.b, 0x7B7B7B7B, 0x4A547B78, 0xCD4D377B ); + TEST_RR_OP( 16, pv.max.sc.b, 0x5D7B5D5F, 0x027B0E5F, 0x595E995D ); + # pv.max.sci.b + TEST_SIMM6_OP( 17, pv.max.sci.b, 0x0B0B0B0B, 0xEB06FBAB, 11 ); + TEST_SIMM6_OP( 18, pv.max.sci.b, 0x56240B26, 0x56249726, 11 ); + TEST_SIMM6_OP( 19, pv.max.sci.b, 0x5F32211E, 0x5F32211E, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_SIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_SIMM6_DEST_BYPASS, + # TEST_SIMM6_SRC1_BYPASS, TEST_SIMM6_ZEROSRC1, TEST_SIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_maxu.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_maxu.S new file mode 100644 index 000000000..e5451559f --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_maxu.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_maxu.S +#----------------------------------------------------------------------------- +# +# Test pv.maxu instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.maxu.h + TEST_RR_OP( 2, pv.maxu.h, 0xBA529136, 0x3C369136, 0xBA524CAE ); + TEST_RR_OP( 3, pv.maxu.h, 0xC9E65AD2, 0xC9E60677, 0x00145AD2 ); + TEST_RR_OP( 4, pv.maxu.h, 0x42D67990, 0x42D67990, 0x244A0E31 ); + # pv.maxu.sc.h + TEST_RR_OP( 5, pv.maxu.sc.h, 0x36D0CB1F, 0x36D0CB1F, 0x426D0434 ); + TEST_RR_OP( 6, pv.maxu.sc.h, 0xAE6DE3C7, 0xAE6DE3C7, 0x6ACB58AD ); + TEST_RR_OP( 7, pv.maxu.sc.h, 0xB6CDD3B0, 0xB6CDD3B0, 0x1CF29759 ); + # pv.maxu.sci.h + TEST_UIMM6_OP( 8, pv.maxu.sci.h, 0xF503CA6A, 0xF503CA6A, 11 ); + TEST_UIMM6_OP( 9, pv.maxu.sci.h, 0x6781179C, 0x6781179C, 11 ); + TEST_UIMM6_OP( 10, pv.maxu.sci.h, 0xB778D8A3, 0xB778D8A3, 11 ); + # pv.maxu.b + TEST_RR_OP( 11, pv.maxu.b, 0x17F9C1D2, 0x0DF91FD2, 0x1703C18D ); + TEST_RR_OP( 12, pv.maxu.b, 0xD04FEFA4, 0x434FEFA4, 0xD032B42E ); + TEST_RR_OP( 13, pv.maxu.b, 0x8A95BFF2, 0x56955708, 0x8A08BFF2 ); + # pv.maxu.sc.b + TEST_RR_OP( 14, pv.maxu.sc.b, 0xE8E8E8E8, 0x318C6A64, 0x82B8BEE8 ); + TEST_RR_OP( 15, pv.maxu.sc.b, 0xDFE73926, 0xDFE73909, 0xBFC58126 ); + TEST_RR_OP( 16, pv.maxu.sc.b, 0x9DF09D9D, 0x6FF07641, 0x5689109D ); + # pv.maxu.sci.b + TEST_UIMM6_OP( 17, pv.maxu.sci.b, 0x0B7062D8, 0x0A7062D8, 11 ); + TEST_UIMM6_OP( 18, pv.maxu.sci.b, 0x0B469D5D, 0x07469D5D, 11 ); + TEST_UIMM6_OP( 19, pv.maxu.sci.b, 0x4E875E27, 0x4E875E27, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_UIMM6_DEST_BYPASS, + # TEST_UIMM6_SRC1_BYPASS, TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_min.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_min.S new file mode 100644 index 000000000..c402ffad2 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_min.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_min.S +#----------------------------------------------------------------------------- +# +# Test pv.min instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.min.h + TEST_RR_OP( 2, pv.min.h, 0x82E7A6AB, 0x8A92A6AB, 0x82E77B73 ); + TEST_RR_OP( 3, pv.min.h, 0xE3770915, 0xE37773E8, 0x44330915 ); + TEST_RR_OP( 4, pv.min.h, 0x85B0BA95, 0x85B0E12E, 0x6CA1BA95 ); + # pv.min.sc.h + TEST_RR_OP( 5, pv.min.sc.h, 0x33A388EB, 0x33A388EB, 0xA73B6225 ); + TEST_RR_OP( 6, pv.min.sc.h, 0xB2D1B2D1, 0x6C255634, 0xC06DB2D1 ); + TEST_RR_OP( 7, pv.min.sc.h, 0xED24CC32, 0xED24CC32, 0x20DD20AE ); + # pv.min.sci.h + TEST_SIMM6_OP( 8, pv.min.sci.h, 0x000BABA8, 0x3116ABA8, 11 ); + TEST_SIMM6_OP( 9, pv.min.sci.h, 0xF270FB23, 0xF270FB23, 11 ); + TEST_SIMM6_OP( 10, pv.min.sci.h, 0xF45DE902, 0xF45DE902, 11 ); + # pv.min.b + TEST_RR_OP( 11, pv.min.b, 0x3BD1A58C, 0x3BF5A5CD, 0x59D1618C ); + TEST_RR_OP( 12, pv.min.b, 0x99C52CBA, 0xF4D42C6F, 0x99C57ABA ); + TEST_RR_OP( 13, pv.min.b, 0x13CB8AE9, 0x13CB8AE9, 0x47F8D538 ); + # pv.min.sc.b + TEST_RR_OP( 14, pv.min.sc.b, 0xDCC161BB, 0xDCC161BB, 0x41A0EA7B ); + TEST_RR_OP( 15, pv.min.sc.b, 0x3059A553, 0x3059A553, 0xB80EA978 ); + TEST_RR_OP( 16, pv.min.sc.b, 0x97ECEFEF, 0x97EC4211, 0x8059FEEF ); + # pv.min.sci.b + TEST_SIMM6_OP( 17, pv.min.sci.b, 0x0B0BB986, 0x732DB986, 11 ); + TEST_SIMM6_OP( 18, pv.min.sci.b, 0xF40B0BE7, 0xF47567E7, 11 ); + TEST_SIMM6_OP( 19, pv.min.sci.b, 0x0BF70B0B, 0x7DF77268, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_SIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_SIMM6_DEST_BYPASS, + # TEST_SIMM6_SRC1_BYPASS, TEST_SIMM6_ZEROSRC1, TEST_SIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_minu.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_minu.S new file mode 100644 index 000000000..4c875e427 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_minu.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_minu.S +#----------------------------------------------------------------------------- +# +# Test pv.minu instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.minu.h + TEST_RR_OP( 2, pv.minu.h, 0x6A212A68, 0xE2B42A68, 0x6A212B4A ); + TEST_RR_OP( 3, pv.minu.h, 0x50AA637D, 0xC219637D, 0x50AA84CC ); + TEST_RR_OP( 4, pv.minu.h, 0x579039EE, 0x92C439EE, 0x5790DCDC ); + # pv.minu.sc.h + TEST_RR_OP( 5, pv.minu.sc.h, 0x7EF4A8D0, 0x7EF4D67D, 0x268CA8D0 ); + TEST_RR_OP( 6, pv.minu.sc.h, 0x8C9F3E8A, 0x9A9F3E8A, 0x2E6A8C9F ); + TEST_RR_OP( 7, pv.minu.sc.h, 0x844C6178, 0x844C6178, 0x7A819ECF ); + # pv.minu.sci.h + TEST_UIMM6_OP( 8, pv.minu.sci.h, 0x000B000B, 0x2E8024BF, 11 ); + TEST_UIMM6_OP( 9, pv.minu.sci.h, 0x000B000B, 0x7070C7D7, 11 ); + TEST_UIMM6_OP( 10, pv.minu.sci.h, 0x000B000B, 0x6955494F, 11 ); + # pv.minu.b + TEST_RR_OP( 11, pv.minu.b, 0x46A4170C, 0x46F51795, 0xAAA4C60C ); + TEST_RR_OP( 12, pv.minu.b, 0xA33FAB2C, 0xA33FAB2C, 0xE140C044 ); + TEST_RR_OP( 13, pv.minu.b, 0x43015111, 0xF49B5111, 0x43018736 ); + # pv.minu.sc.b + TEST_RR_OP( 14, pv.minu.sc.b, 0x3EB0291F, 0x3EBF291F, 0x909B9AB0 ); + TEST_RR_OP( 15, pv.minu.sc.b, 0x000C0C0C, 0x00555837, 0x7F61610C ); + TEST_RR_OP( 16, pv.minu.sc.b, 0x01010101, 0x47AB06B4, 0x0E0F9001 ); + # pv.minu.sci.b + TEST_UIMM6_OP( 17, pv.minu.sci.b, 0x0B0B0B0B, 0xBEAF5AAB, 11 ); + TEST_UIMM6_OP( 18, pv.minu.sci.b, 0x0B0B0B0B, 0xDC152410, 11 ); + TEST_UIMM6_OP( 19, pv.minu.sci.b, 0x0B0B0B0B, 0x1DAD56C8, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_UIMM6_DEST_BYPASS, + # TEST_UIMM6_SRC1_BYPASS, TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_or.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_or.S new file mode 100644 index 000000000..821377c14 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_or.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_or.S +#----------------------------------------------------------------------------- +# +# Test pv.or instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.or.h + TEST_RR_OP( 2, pv.or.h, 0xDFEFB3F3, 0x9E678370, 0x418EB1F3 ); + TEST_RR_OP( 3, pv.or.h, 0x7EFFDD7D, 0x727D5079, 0x7C968D05 ); + TEST_RR_OP( 4, pv.or.h, 0x5FAEEFD9, 0x4BAEA991, 0x1604EFD9 ); + # pv.or.sc.h + TEST_RR_OP( 5, pv.or.sc.h, 0x7FB2EFF1, 0x5782C951, 0x886D2FB0 ); + TEST_RR_OP( 6, pv.or.sc.h, 0x7F9E6FF9, 0x5E0E04F9, 0x248B6F98 ); + TEST_RR_OP( 7, pv.or.sc.h, 0xBD9BBD4B, 0x85998C42, 0xD1F2B90B ); + # pv.or.sci.h + TEST_UIMM6_OP( 8, pv.or.sci.h, 0x4F6F5CBF, 0x4F645CB5, 11 ); + TEST_UIMM6_OP( 9, pv.or.sci.h, 0x34DF2B7B, 0x34DD2B73, 11 ); + TEST_UIMM6_OP( 10, pv.or.sci.h, 0xE73F5DEF, 0xE73D5DE5, 11 ); + # pv.or.b + TEST_RR_OP( 11, pv.or.b, 0xFDFAFC34, 0x25AA9830, 0xD8706434 ); + TEST_RR_OP( 12, pv.or.b, 0x9C7BF5EF, 0x9C41746A, 0x003BC1ED ); + TEST_RR_OP( 13, pv.or.b, 0x7BEBAEFF, 0x7B4BA8E2, 0x1BA3263F ); + # pv.or.sc.b + TEST_RR_OP( 14, pv.or.sc.b, 0xFFF7B7B7, 0xFDE72320, 0x26977A97 ); + TEST_RR_OP( 15, pv.or.sc.b, 0xFFD5F5FD, 0xEA55E02C, 0x0D23AFD5 ); + TEST_RR_OP( 16, pv.or.sc.b, 0xDBDFFBD3, 0x9B8E6BC3, 0xCF31CDD3 ); + # pv.or.sci.b + TEST_UIMM6_OP( 17, pv.or.sci.b, 0xCBCF2F9B, 0xC2CC2499, 11 ); + TEST_UIMM6_OP( 18, pv.or.sci.b, 0x3BEF1BEB, 0x33EE13E0, 11 ); + TEST_UIMM6_OP( 19, pv.or.sci.b, 0xFB5B5BEF, 0xF05252EE, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_UIMM6_DEST_BYPASS, + # TEST_UIMM6_SRC1_BYPASS, TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotsp.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotsp.S new file mode 100644 index 000000000..c41784263 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotsp.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_sdotsp.S +#----------------------------------------------------------------------------- +# +# Test pv.sdotsp instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.sdotsp.h + TEST_RRR_OP( 2, pv.sdotsp.h, 0x8588AF48, 0xFE67FB1A, 0x50E4DE57, 0x856509B6 ); + TEST_RRR_OP( 3, pv.sdotsp.h, 0xA5102DA6, 0xADBC1E09, 0xA2C806FA, 0x8649EFFC ); + TEST_RRR_OP( 4, pv.sdotsp.h, 0xB6C05945, 0x3FDAEFE7, 0x7BA5CB0F, 0x9495383A ); + # pv.sdotsp.sc.h + TEST_RRR_OP( 5, pv.sdotsp.sc.h, 0x76464853, 0x5E5C31BF, 0xCB24C409, 0x98078B60 ); + TEST_RRR_OP( 6, pv.sdotsp.sc.h, 0xBAB1856D, 0xEC042250, 0x230A4695, 0xB6BE3689 ); + TEST_RRR_OP( 7, pv.sdotsp.sc.h, 0xA318DEC3, 0x6BF5D085, 0x9AB012EF, 0x9E9FD4DD ); + # pv.sdotsp.sci.h + TEST_RR_SIMM6_OP( 8, pv.sdotsp.sci.h, 0x6AA9C4BB, 0x36D2FEAA, -14, 0x6AACB183 ); + TEST_RR_SIMM6_OP( 9, pv.sdotsp.sci.h, 0xA61C8356, 0x6752FECB, -9, 0xA6201A5B ); + TEST_RR_SIMM6_OP( 10, pv.sdotsp.sci.h, 0x968EF09B, 0x9747CFF5, -1, 0x968E57D7 ); + # pv.sdotsp.b + TEST_RRR_OP( 11, pv.sdotsp.b, 0x6BF81516, 0xEB8A58F5, 0xCAECEE54, 0x6BF8113C ); + TEST_RRR_OP( 12, pv.sdotsp.b, 0x5D238DA6, 0x47665939, 0x9E989665, 0x5D23E0A1 ); + TEST_RRR_OP( 13, pv.sdotsp.b, 0xC511714F, 0x79D072B4, 0x5B8B4327, 0xC5111E1A ); + # pv.sdotsp.sc.b + TEST_RRR_OP( 14, pv.sdotsp.sc.b, 0x7C691AEB, 0x6F622436, 0x1E1E694D, 0x7C68C0FC ); + TEST_RRR_OP( 15, pv.sdotsp.sc.b, 0xAC521CE2, 0x77B8759A, 0xC1056E73, 0xAC520108 ); + TEST_RRR_OP( 16, pv.sdotsp.sc.b, 0xAEA211C3, 0x74740933, 0xF898DF1E, 0xAEA1EF8B ); + # pv.sdotsp.sci.b + TEST_RR_SIMM6_OP( 17, pv.sdotsp.sci.b, 0x86CD84EE, 0x4CD92920, 1, 0x86CD8480 ); + TEST_RR_SIMM6_OP( 18, pv.sdotsp.sci.b, 0x82399E03, 0xAFCE7172, -5, 0x82399FE3 ); + TEST_RR_SIMM6_OP( 19, pv.sdotsp.sci.b, 0x3F752492, 0xDB25ABAA, 9, 0x3F752A95 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for reg-reg-reg instructions TEST_RRR_SRC1_EQ_DEST, + # TEST_RRR_SRC2_EQ_DEST, TEST_RRR_SRC12_EQ_DEST + # for reg-simm6-reg instructions *macros still to be written* + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for reg-reg-reg instructions TEST_RRR_DEST_BYPASS, + # TEST_RRR_SRC12_BYPASS, TEST_RRR_SRC21_BYPASS, TEST_RRR_SRC3_BYPASS, + # TEST_RRR_ZEROSRC1, TEST_RRR_ZEROSRC2, TEST_RRR_ZEROSRC3, + # TEST_RRR_ZEROSRC12, TEST_RRR_ZEROSRC123, TEST_RRR_ZERODEST + # for reg-simm6-reg instructions *macros still to be written* + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotup.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotup.S new file mode 100644 index 000000000..7e99c6415 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotup.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_sdotup.S +#----------------------------------------------------------------------------- +# +# Test pv.sdotup instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.sdotup.h + TEST_RRR_OP( 2, pv.sdotup.h, 0xAFDDAF48, 0xFE67FB1A, 0x50E4DE57, 0x856509B6 ); + TEST_RRR_OP( 3, pv.sdotup.h, 0xF5942DA6, 0xADBC1E09, 0xA2C806FA, 0x8649EFFC ); + TEST_RRR_OP( 4, pv.sdotup.h, 0x71B65945, 0x3FDAEFE7, 0x7BA5CB0F, 0x9495383A ); + # pv.sdotup.sc.h + TEST_RRR_OP( 5, pv.sdotup.sc.h, 0x06614853, 0x5E5C31BF, 0xCB24C409, 0x98078B60 ); + TEST_RRR_OP( 6, pv.sdotup.sc.h, 0x0146856D, 0xEC042250, 0x230A4695, 0xB6BE3689 ); + TEST_RRR_OP( 7, pv.sdotup.sc.h, 0xB607DEC3, 0x6BF5D085, 0x9AB012EF, 0x9E9FD4DD ); + # pv.sdotup.sci.h + TEST_RR_UIMM6_OP( 8, pv.sdotup.sci.h, 0x6AC13EBF, 0x36D2FEAA, 17, 0x6AACB183 ); + TEST_RR_UIMM6_OP( 9, pv.sdotup.sci.h, 0xA622E695, 0x6752FECB, 2, 0xA6201A5B ); + TEST_RR_UIMM6_OP( 10, pv.sdotup.sci.h, 0x969DC76B, 0x9747CFF5, 11, 0x968E57D7 ); + # pv.sdotup.b + TEST_RRR_OP( 11, pv.sdotup.b, 0x6BF9EC16, 0xEB8A58F5, 0xCAECEE54, 0x6BF8113C ); + TEST_RRR_OP( 12, pv.sdotup.b, 0x5D2493A6, 0x47665939, 0x9E989665, 0x5D23E0A1 ); + TEST_RRR_OP( 13, pv.sdotup.b, 0xC511F34F, 0x79D072B4, 0x5B8B4327, 0xC5111E1A ); + # pv.sdotup.sc.b + TEST_RRR_OP( 14, pv.sdotup.sc.b, 0x7C691AEB, 0x6F622436, 0x1E1E694D, 0x7C68C0FC ); + TEST_RRR_OP( 15, pv.sdotup.sc.b, 0xAC5302E2, 0x77B8759A, 0xC1056E73, 0xAC520108 ); + TEST_RRR_OP( 16, pv.sdotup.sc.b, 0xAEA211C3, 0x74740933, 0xF898DF1E, 0xAEA1EF8B ); + # pv.sdotup.sci.b + TEST_RR_UIMM6_OP( 17, pv.sdotup.sci.b, 0x86CD943A, 0x4CD92920, 11, 0x86CD8480 ); + TEST_RR_UIMM6_OP( 18, pv.sdotup.sci.b, 0x8239C123, 0xAFCE7172, 14, 0x82399FE3 ); + TEST_RR_UIMM6_OP( 19, pv.sdotup.sci.b, 0x3F753AE8, 0xDB25ABAA, 7, 0x3F752A95 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for reg-reg-reg instructions TEST_RRR_SRC1_EQ_DEST, + # TEST_RRR_SRC2_EQ_DEST, TEST_RRR_SRC12_EQ_DEST + # for reg-uimm6-reg instructions *macros still to be written* + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for reg-reg-reg instructions TEST_RRR_DEST_BYPASS, + # TEST_RRR_SRC12_BYPASS, TEST_RRR_SRC21_BYPASS, TEST_RRR_SRC3_BYPASS, + # TEST_RRR_ZEROSRC1, TEST_RRR_ZEROSRC2, TEST_RRR_ZEROSRC3, + # TEST_RRR_ZEROSRC12, TEST_RRR_ZEROSRC123, TEST_RRR_ZERODEST + # for reg-uimm6-reg instructions *macros still to be written* + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotusp.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotusp.S new file mode 100644 index 000000000..30f30ba24 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sdotusp.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_sdotusp.S +#----------------------------------------------------------------------------- +# +# Test pv.sdotusp instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.sdotusp.h + TEST_RRR_OP( 2, pv.sdotusp.h, 0xB4C3AF48, 0xFE67FB1A, 0x50E4DE57, 0x856509B6 ); + TEST_RRR_OP( 3, pv.sdotusp.h, 0x47D82DA6, 0xADBC1E09, 0xA2C806FA, 0x8649EFFC ); + TEST_RRR_OP( 4, pv.sdotusp.h, 0x81CF5945, 0x3FDAEFE7, 0x7BA5CB0F, 0x9495383A ); + # pv.sdotusp.sc.h + TEST_RRR_OP( 5, pv.sdotusp.sc.h, 0x76464853, 0x5E5C31BF, 0xCB24C409, 0x98078B60 ); + TEST_RRR_OP( 6, pv.sdotusp.sc.h, 0x0146856D, 0xEC042250, 0x230A4695, 0xB6BE3689 ); + TEST_RRR_OP( 7, pv.sdotusp.sc.h, 0xB607DEC3, 0x6BF5D085, 0x9AB012EF, 0x9E9FD4DD ); + # pv.sdotusp.sci.h + TEST_RR_SIMM6_OP( 8, pv.sdotusp.sci.h, 0x6A9BC4BB, 0x36D2FEAA, -14, 0x6AACB183 ); + TEST_RR_SIMM6_OP( 9, pv.sdotusp.sci.h, 0xA6138356, 0x6752FECB, -9, 0xA6201A5B ); + TEST_RR_SIMM6_OP( 10, pv.sdotusp.sci.h, 0x968CF09B, 0x9747CFF5, -1, 0x968E57D7 ); + # pv.sdotusp.b + TEST_RRR_OP( 11, pv.sdotusp.b, 0x6BF81F16, 0xEB8A58F5, 0xCAECEE54, 0x6BF8113C ); + TEST_RRR_OP( 12, pv.sdotusp.b, 0x5D238DA6, 0x47665939, 0x9E989665, 0x5D23E0A1 ); + TEST_RRR_OP( 13, pv.sdotusp.b, 0xC511234F, 0x79D072B4, 0x5B8B4327, 0xC5111E1A ); + # pv.sdotusp.sc.b + TEST_RRR_OP( 14, pv.sdotusp.sc.b, 0x7C691AEB, 0x6F622436, 0x1E1E694D, 0x7C68C0FC ); + TEST_RRR_OP( 15, pv.sdotusp.sc.b, 0xAC5302E2, 0x77B8759A, 0xC1056E73, 0xAC520108 ); + TEST_RRR_OP( 16, pv.sdotusp.sc.b, 0xAEA211C3, 0x74740933, 0xF898DF1E, 0xAEA1EF8B ); + # pv.sdotusp.sci.b + TEST_RR_SIMM6_OP( 17, pv.sdotusp.sci.b, 0x86CD85EE, 0x4CD92920, 1, 0x86CD8480 ); + TEST_RR_SIMM6_OP( 18, pv.sdotusp.sci.b, 0x82399403, 0xAFCE7172, -5, 0x82399FE3 ); + TEST_RR_SIMM6_OP( 19, pv.sdotusp.sci.b, 0x3F753F92, 0xDB25ABAA, 9, 0x3F752A95 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for reg-reg-reg instructions TEST_RRR_SRC1_EQ_DEST, + # TEST_RRR_SRC2_EQ_DEST, TEST_RRR_SRC12_EQ_DEST + # for reg-simm6-reg instructions *macros still to be written* + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for reg-reg-reg instructions TEST_RRR_DEST_BYPASS, + # TEST_RRR_SRC12_BYPASS, TEST_RRR_SRC21_BYPASS, TEST_RRR_SRC3_BYPASS, + # TEST_RRR_ZEROSRC1, TEST_RRR_ZEROSRC2, TEST_RRR_ZEROSRC3, + # TEST_RRR_ZEROSRC12, TEST_RRR_ZEROSRC123, TEST_RRR_ZERODEST + # for reg-simm6-reg instructions *macros still to be written* + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_shuffle2.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_shuffle2.S new file mode 100644 index 000000000..fd3f2bf09 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_shuffle2.S @@ -0,0 +1,70 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_shuffle2.S +#----------------------------------------------------------------------------- +# +# Test pv.shuffle2 instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.shuffle2.h + TEST_RRR_OP( 2, pv.shuffle2.h, 0xABD0A85B, 0xABD03F6E, 0x1D4B26D0, 0xF0C3A85B ); + TEST_RRR_OP( 3, pv.shuffle2.h, 0x93A60706, 0x511B0706, 0xEB397322, 0x93A613C6 ); + TEST_RRR_OP( 4, pv.shuffle2.h, 0x2BCE15F2, 0x9D2D15F2, 0x5C71278E, 0x2BCEDA18 ); + TEST_RRR_OP( 5, pv.shuffle2.h, 0x2C48AA34, 0x2C48AA34, 0x4887D28E, 0x55247E80 ); + TEST_RRR_OP( 6, pv.shuffle2.h, 0xE999ADE8, 0xADE8E999, 0xD26AD68F, 0x23A14961 ); + TEST_RRR_OP( 7, pv.shuffle2.h, 0x0059517C, 0x6BF30059, 0xEFB6AF79, 0x517C1495 ); + TEST_RRR_OP( 8, pv.shuffle2.h, 0xB7FEA035, 0xB7FED864, 0x5BBB1058, 0x4583A035 ); + TEST_RRR_OP( 9, pv.shuffle2.h, 0xEA55FDC2, 0xFDC2EA55, 0x7292CF23, 0x4F82A53E ); + TEST_RRR_OP( 10, pv.shuffle2.h, 0xBE7232CB, 0x32CBBE72, 0x6DB6060F, 0x22C33B63 ); + TEST_RRR_OP( 11, pv.shuffle2.h, 0x4389A2A3, 0xCB19A2A3, 0x00BCDD22, 0xFB744389 ); + # pv.shuffle2.b + TEST_RRR_OP( 12, pv.shuffle2.b, 0xDAD9ECA3, 0x35A309D9, 0x8AE410B6, 0x22DA0BEC ); + TEST_RRR_OP( 13, pv.shuffle2.b, 0x0EF485F4, 0xCA850EB8, 0x256B969B, 0xF438D1D7 ); + TEST_RRR_OP( 14, pv.shuffle2.b, 0x1414E4C0, 0xE433C0A1, 0xC8381F65, 0xAC7DBC14 ); + TEST_RRR_OP( 15, pv.shuffle2.b, 0x81676762, 0x36DE6217, 0xC98AEA7D, 0x9D6781F4 ); + TEST_RRR_OP( 16, pv.shuffle2.b, 0xD80DD8B4, 0xD8CE132C, 0x67D8BF89, 0x166FB40D ); + TEST_RRR_OP( 17, pv.shuffle2.b, 0x7B9E0404, 0x657BF4D6, 0x06DB0232, 0x9E049D7E ); + TEST_RRR_OP( 18, pv.shuffle2.b, 0xD15526EE, 0x617EEED1, 0xE4D33275, 0x55264DEE ); + TEST_RRR_OP( 19, pv.shuffle2.b, 0x73AB4CAB, 0x43AB21CB, 0x4B2EC0BE, 0x7306984C ); + TEST_RRR_OP( 20, pv.shuffle2.b, 0x5235C41D, 0x052B5263, 0x85BB52D0, 0x35C4A31D ); + TEST_RRR_OP( 21, pv.shuffle2.b, 0xF1E0F194, 0xFFABF194, 0x35CBE594, 0xE0A7A1D1 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for reg-reg-reg instructions TEST_RRR_SRC1_EQ_DEST, + # TEST_RRR_SRC2_EQ_DEST, TEST_RRR_SRC12_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for reg-reg-reg instructions TEST_RRR_DEST_BYPASS, + # TEST_RRR_SRC12_BYPASS, TEST_RRR_SRC21_BYPASS, TEST_RRR_SRC3_BYPASS, + # TEST_RRR_ZEROSRC1, TEST_RRR_ZEROSRC2, TEST_RRR_ZEROSRC3, + # TEST_RRR_ZEROSRC12, TEST_RRR_ZEROSRC123, TEST_RRR_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_sll.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sll.S new file mode 100644 index 000000000..3e44223bd --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sll.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_sll.S +#----------------------------------------------------------------------------- +# +# Test pv.sll instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.sll.h + TEST_RR_OP( 2, pv.sll.h, 0x40000E80, 0xCC85D0E8, 0x000E0004 ); + TEST_RR_OP( 3, pv.sll.h, 0xF0802C00, 0x83E1502C, 0x00070008 ); + TEST_RR_OP( 4, pv.sll.h, 0x20005470, 0x8AA1551C, 0x000D0002 ); + # pv.sll.sc.h + TEST_RR_OP( 5, pv.sll.sc.h, 0x81F03608, 0x103E26C1, 0x000A0003 ); + TEST_RR_OP( 6, pv.sll.sc.h, 0x1B800F00, 0x0437CE1E, 0x00080007 ); + TEST_RR_OP( 7, pv.sll.sc.h, 0xC7002900, 0xE5C75029, 0x000D0008 ); + # pv.sll.sci.h + TEST_UIMM6_OP( 8, pv.sll.sci.h, 0x46002600, 0x48233B93, 9 ); + TEST_UIMM6_OP( 9, pv.sll.sci.h, 0x9600AC00, 0x344B9356, 9 ); + TEST_UIMM6_OP( 10, pv.sll.sci.h, 0x40002E00, 0xB2A0E417, 9 ); + # pv.sll.b + TEST_RR_OP( 11, pv.sll.b, 0x7EE05CA8, 0x3FCE5C2A, 0x01040002 ); + TEST_RR_OP( 12, pv.sll.b, 0xA45E8034, 0xE95E5934, 0x02000700 ); + TEST_RR_OP( 13, pv.sll.b, 0xB0780068, 0xFB8FA8B4, 0x04030501 ); + # pv.sll.sc.b + TEST_RR_OP( 14, pv.sll.sc.b, 0xF05000C0, 0x5FF510FC, 0x02020004 ); + TEST_RR_OP( 15, pv.sll.sc.b, 0xA0C0B0A0, 0x2A6CFB1A, 0x01000604 ); + TEST_RR_OP( 16, pv.sll.sc.b, 0x047E94F8, 0x823FCAFC, 0x07010601 ); + # pv.sll.sci.b + TEST_UIMM6_OP( 17, pv.sll.sci.b, 0x60002020, 0x8BA0A901, 5 ); + TEST_UIMM6_OP( 18, pv.sll.sci.b, 0x604020E0, 0x83CAE947, 5 ); + TEST_UIMM6_OP( 19, pv.sll.sci.b, 0x0020A060, 0x98F185C3, 5 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_UIMM6_DEST_BYPASS, + # TEST_UIMM6_SRC1_BYPASS, TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_sra.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sra.S new file mode 100644 index 000000000..d0bfe0d89 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sra.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_sra.S +#----------------------------------------------------------------------------- +# +# Test pv.sra instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.sra.h + TEST_RR_OP( 2, pv.sra.h, 0x2C4BFFF8, 0x5896C0A3, 0x0001000B ); + TEST_RR_OP( 3, pv.sra.h, 0x005E760F, 0x5E43760F, 0x00080000 ); + TEST_RR_OP( 4, pv.sra.h, 0x0059F46B, 0x166BA35F, 0x00060003 ); + # pv.sra.sc.h + TEST_RR_OP( 5, pv.sra.sc.h, 0x00000005, 0x080A5F54, 0x000D000C ); + TEST_RR_OP( 6, pv.sra.sc.h, 0xFF0400FB, 0xE0871F6D, 0x00020005 ); + TEST_RR_OP( 7, pv.sra.sc.h, 0x00010000, 0x40FF2C98, 0x000B000E ); + # pv.sra.sci.h + TEST_UIMM6_OP( 8, pv.sra.sci.h, 0xFFE70029, 0xCEB053F9, 9 ); + TEST_UIMM6_OP( 9, pv.sra.sci.h, 0xFFD5FFF8, 0xAA4AF03F, 9 ); + TEST_UIMM6_OP( 10, pv.sra.sci.h, 0x00340008, 0x68E511A2, 9 ); + # pv.sra.b + TEST_RR_OP( 11, pv.sra.b, 0x08FF00F9, 0x11F61B9D, 0x01050704 ); + TEST_RR_OP( 12, pv.sra.b, 0xFCFCFFF3, 0x8FE3F89C, 0x05030603 ); + TEST_RR_OP( 13, pv.sra.b, 0x0096FFA3, 0x0296E1A3, 0x02000600 ); + # pv.sra.sc.b + TEST_RR_OP( 14, pv.sra.sc.b, 0x66F6E2A1, 0x66F6E2A1, 0x01050600 ); + TEST_RR_OP( 15, pv.sra.sc.b, 0x0DFD1A13, 0x36F56B4D, 0x03040302 ); + TEST_RR_OP( 16, pv.sra.sc.b, 0x00000201, 0x16135625, 0x01040205 ); + # pv.sra.sci.b + TEST_UIMM6_OP( 17, pv.sra.sci.b, 0xFCFC02FD, 0x848B57AD, 5 ); + TEST_UIMM6_OP( 18, pv.sra.sci.b, 0x02FE02FC, 0x40CD5290, 5 ); + TEST_UIMM6_OP( 19, pv.sra.sci.b, 0x02FCFF01, 0x549FFD20, 5 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_UIMM6_DEST_BYPASS, + # TEST_UIMM6_SRC1_BYPASS, TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_srl.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_srl.S new file mode 100644 index 000000000..47ebe0e49 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_srl.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_srl.S +#----------------------------------------------------------------------------- +# +# Test pv.srl instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.srl.h + TEST_RR_OP( 2, pv.srl.h, 0x06030067, 0xC076CE34, 0x00050009 ); + TEST_RR_OP( 3, pv.srl.h, 0x00A40047, 0xA41723DF, 0x00080007 ); + TEST_RR_OP( 4, pv.srl.h, 0x00142A49, 0x52ADA926, 0x000A0002 ); + # pv.srl.sc.h + TEST_RR_OP( 5, pv.srl.sc.h, 0x1EE01053, 0xF706829F, 0x00080003 ); + TEST_RR_OP( 6, pv.srl.sc.h, 0x00000001, 0x3BC79528, 0x000E000F ); + TEST_RR_OP( 7, pv.srl.sc.h, 0x001001BE, 0x04236F94, 0x000D0006 ); + # pv.srl.sci.h + TEST_UIMM6_OP( 8, pv.srl.sci.h, 0x00450077, 0x8AA9EF3A, 9 ); + TEST_UIMM6_OP( 9, pv.srl.sci.h, 0x0049006B, 0x93A9D63A, 9 ); + TEST_UIMM6_OP( 10, pv.srl.sci.h, 0x003F0040, 0x7E0D81AF, 9 ); + # pv.srl.b + TEST_RR_OP( 11, pv.srl.b, 0x030C6A01, 0xFAC8D4F6, 0x06040107 ); + TEST_RR_OP( 12, pv.srl.b, 0x07000105, 0x3F0B94B5, 0x03050705 ); + TEST_RR_OP( 13, pv.srl.b, 0x00311065, 0x29C54065, 0x07020200 ); + # pv.srl.sc.b + TEST_RR_OP( 14, pv.srl.sc.b, 0x3A37353E, 0x746E6A7C, 0x02010701 ); + TEST_RR_OP( 15, pv.srl.sc.b, 0x0A080A0D, 0xAA82A5D6, 0x01030204 ); + TEST_RR_OP( 16, pv.srl.sc.b, 0x6F5D6D75, 0xDEBBDAEB, 0x03040701 ); + # pv.srl.sci.b + TEST_UIMM6_OP( 17, pv.srl.sci.b, 0x06010704, 0xCD2DE193, 5 ); + TEST_UIMM6_OP( 18, pv.srl.sci.b, 0x00030507, 0x0B64B9E8, 5 ); + TEST_UIMM6_OP( 19, pv.srl.sci.b, 0x02070306, 0x50E572CB, 5 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_UIMM6_DEST_BYPASS, + # TEST_UIMM6_SRC1_BYPASS, TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_sub.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sub.S new file mode 100644 index 000000000..72336e0d9 --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_sub.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_sub.S +#----------------------------------------------------------------------------- +# +# Test pv.sub instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.sub.h + TEST_RR_OP( 2, pv.sub.h, 0x21549541, 0xC037A04F, 0x9EE30B0E ); + TEST_RR_OP( 3, pv.sub.h, 0x0A8F87AF, 0xA6011B6C, 0x9B7293BD ); + TEST_RR_OP( 4, pv.sub.h, 0xE3DFCB44, 0x2BD6C73C, 0x47F7FBF8 ); + # pv.sub.sc.h + TEST_RR_OP( 5, pv.sub.sc.h, 0x005D10C7, 0xA7DFB849, 0xA0DAA782 ); + TEST_RR_OP( 6, pv.sub.sc.h, 0x915182F6, 0xEB54DCF9, 0xE0AB5A03 ); + TEST_RR_OP( 7, pv.sub.sc.h, 0x8ADB0963, 0xC72645AE, 0x44033C4B ); + # pv.sub.sci.h + TEST_SIMM6_OP( 8, pv.sub.sci.h, 0x5F6A01D4, 0x5F7501DF, 11 ); + TEST_SIMM6_OP( 9, pv.sub.sci.h, 0xEDEBEE05, 0xEDF6EE10, 11 ); + TEST_SIMM6_OP( 10, pv.sub.sci.h, 0x5254F633, 0x525FF63E, 11 ); + # pv.sub.b + TEST_RR_OP( 11, pv.sub.b, 0xCFA312C4, 0xD6B51AA7, 0x071208E3 ); + TEST_RR_OP( 12, pv.sub.b, 0x399B9FC4, 0x273CF552, 0xEEA1568E ); + TEST_RR_OP( 13, pv.sub.b, 0x75B1BB23, 0x3E6DD37D, 0xC9BC185A ); + # pv.sub.sc.b + TEST_RR_OP( 14, pv.sub.sc.b, 0x85D1880A, 0xCE1AD153, 0x11D9D249 ); + TEST_RR_OP( 15, pv.sub.sc.b, 0xF53BE607, 0xB0F6A1C2, 0xA4990EBB ); + TEST_RR_OP( 16, pv.sub.sc.b, 0xB890FAF0, 0x4A228C82, 0x3DEA1692 ); + # pv.sub.sci.b + TEST_SIMM6_OP( 17, pv.sub.sci.b, 0x5282B987, 0x5D8DC492, 11 ); + TEST_SIMM6_OP( 18, pv.sub.sci.b, 0x12D59C9F, 0x1DE0A7AA, 11 ); + TEST_SIMM6_OP( 19, pv.sub.sci.b, 0x6C6D5D05, 0x77786810, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_SIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_SIMM6_DEST_BYPASS, + # TEST_SIMM6_SRC1_BYPASS, TEST_SIMM6_ZEROSRC1, TEST_SIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/rv32uxpulpimg/pv_xor.S b/apps/riscv-tests/isa/rv32uxpulpimg/pv_xor.S new file mode 100644 index 000000000..75fee565c --- /dev/null +++ b/apps/riscv-tests/isa/rv32uxpulpimg/pv_xor.S @@ -0,0 +1,74 @@ +# See LICENSE for license details. + +#***************************************************************************** +# pv_xor.S +#----------------------------------------------------------------------------- +# +# Test pv.xor instructions. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + # pv.xor.h + TEST_RR_OP( 2, pv.xor.h, 0x66F696DC, 0x5FCE4AD5, 0x3938DC09 ); + TEST_RR_OP( 3, pv.xor.h, 0x58A5BD3D, 0x672A5F61, 0x3F8FE25C ); + TEST_RR_OP( 4, pv.xor.h, 0x339E302C, 0xE468E8F4, 0xD7F6D8D8 ); + # pv.xor.sc.h + TEST_RR_OP( 5, pv.xor.sc.h, 0x5FB150BC, 0xC4A5CBA8, 0x43CC9B14 ); + TEST_RR_OP( 6, pv.xor.sc.h, 0x48030479, 0xD7F09B8A, 0xCB019FF3 ); + TEST_RR_OP( 7, pv.xor.sc.h, 0x0465D51A, 0x40CF91B0, 0x55DB44AA ); + # pv.xor.sci.h + TEST_UIMM6_OP( 8, pv.xor.sci.h, 0x0F43E04C, 0x0F48E047, 11 ); + TEST_UIMM6_OP( 9, pv.xor.sci.h, 0xEC22101C, 0xEC291017, 11 ); + TEST_UIMM6_OP( 10, pv.xor.sci.h, 0x137F208C, 0x13742087, 11 ); + # pv.xor.b + TEST_RR_OP( 11, pv.xor.b, 0x6A9EC5B4, 0x13518603, 0x79CF43B7 ); + TEST_RR_OP( 12, pv.xor.b, 0xEE0CDAEA, 0x59CAB02D, 0xB7C66AC7 ); + TEST_RR_OP( 13, pv.xor.b, 0x5B6E4CC1, 0x8B61A064, 0xD00FECA5 ); + # pv.xor.sc.b + TEST_RR_OP( 14, pv.xor.sc.b, 0x40CF2054, 0x0F806F1B, 0x7CD0414F ); + TEST_RR_OP( 15, pv.xor.sc.b, 0x89E5AA00, 0x127E319B, 0xC919409B ); + TEST_RR_OP( 16, pv.xor.sc.b, 0xFC7E17F9, 0xAE2C45AB, 0xB9254252 ); + # pv.xor.sci.b + TEST_UIMM6_OP( 17, pv.xor.sci.b, 0x2D2D131C, 0x26261817, 11 ); + TEST_UIMM6_OP( 18, pv.xor.sci.b, 0x23EC42D8, 0x28E749D3, 11 ); + TEST_UIMM6_OP( 19, pv.xor.sci.b, 0xCAA811C9, 0xC1A31AC2, 11 ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_SRC1_EQ_DEST, + # TEST_RR_SRC2_EQ_DEST, TEST_RR_SRC12_EQ_DEST + # for register-simm6 instructions TEST_UIMM6_SRC1_EQ_DEST + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + # TODO(smazzola): + # for register-register instructions TEST_RR_DEST_BYPASS, + # TEST_RR_SRC12_BYPASS, TEST_RR_SRC21_BYPASS, TEST_RR_ZEROSRC1, + # TEST_RR_ZEROSRC2, TEST_RR_ZEROSRC12, TEST_RR_ZERODEST + # for register-simm6 instructions TEST_UIMM6_DEST_BYPASS, + # TEST_UIMM6_SRC1_BYPASS, TEST_UIMM6_ZEROSRC1, TEST_UIMM6_ZERODEST + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END diff --git a/apps/riscv-tests/isa/snitch_isa.mk b/apps/riscv-tests/isa/snitch_isa.mk index 3a571c2e1..c914a0a8b 100644 --- a/apps/riscv-tests/isa/snitch_isa.mk +++ b/apps/riscv-tests/isa/snitch_isa.mk @@ -33,6 +33,12 @@ rv32um_snitch_sc_tests = \ ifeq ($(xpulpimg),1) rv32uxpulpimg_snitch_sc_tests = \ + p_lb_irpost p_lbu_irpost p_lh_irpost p_lhu_irpost p_lw_irpost \ + p_lb_rrpost p_lbu_rrpost p_lh_rrpost p_lhu_rrpost p_lw_rrpost \ + p_lb_rr p_lbu_rr p_lh_rr p_lhu_rr p_lw_rr \ + p_sb_irpost p_sh_irpost p_sw_irpost \ + p_sb_rrpost p_sh_rrpost p_sw_rrpost \ + p_sb_rr p_sh_rr p_sw_rr \ p_abs \ p_slet p_sletu \ p_min p_minu \ @@ -42,6 +48,28 @@ ifeq ($(xpulpimg),1) p_clip p_clipu \ p_clipr p_clipur \ p_beqimm p_bneimm \ + p_mac p_msu \ + pv_add \ + pv_sub \ + pv_avg pv_avgu \ + pv_min pv_minu \ + pv_max pv_maxu \ + pv_srl \ + pv_sra \ + pv_sll \ + pv_or \ + pv_xor \ + pv_and \ + pv_abs \ + pv_extract pv_extractu \ + pv_insert \ + pv_dotup \ + pv_dotusp \ + pv_dotsp \ + pv_sdotup \ + pv_sdotusp \ + pv_sdotsp \ + pv_shuffle2 \ endif diff --git a/apps/sleep_wakeup/main.c b/apps/sleep_wakeup/main.c index f4b9bfa00..308a68689 100644 --- a/apps/sleep_wakeup/main.c +++ b/apps/sleep_wakeup/main.c @@ -6,7 +6,7 @@ #include "runtime.h" #include "synchronization.h" -volatile uint32_t atomic __attribute__((section(".l2"))) = -1; +volatile uint32_t atomic __attribute__((section(".l2"))) = (uint32_t)-1; extern volatile uint32_t tcdm_start_address_reg; extern volatile uint32_t tcdm_end_address_reg; diff --git a/hardware/Makefile b/hardware/Makefile index 836d7b457..941ae8df8 100644 --- a/hardware/Makefile +++ b/hardware/Makefile @@ -215,7 +215,7 @@ pre_trace: post_trace: mkdir -p "$(result_dir)" - cp $(buildpath)/transcript "$(result_dir)/" + cp $(buildpath)/transcript "$(result_dir)/" | true cp $(traceresult) "$(result_dir)" cp $(trace) "$(result_dir)" diff --git a/hardware/deps/snitch/src/mempool_cc.sv b/hardware/deps/snitch/src/mempool_cc.sv index b6244db67..14f3efab7 100644 --- a/hardware/deps/snitch/src/mempool_cc.sv +++ b/hardware/deps/snitch/src/mempool_cc.sv @@ -255,8 +255,8 @@ module mempool_cc #( "acc_pid": i_snitch.acc_pid_i, "acc_pdata_32": i_snitch.acc_pdata_i[31:0], // FPU offload - "fpu_offload": (i_snitch.acc_qready_i && i_snitch.acc_qvalid_o && !snitch_pkg::shared_offload(i_snitch.acc_qdata_op_o)), - "is_seq_insn": (i_snitch.inst_data_i ==? riscv_instr::FREP) + "fpu_offload": 1'b0, + "is_seq_insn": 1'b0 }; task fmt_extras ( @@ -321,8 +321,8 @@ module mempool_cc #( extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "acc_pid", i_snitch.acc_pid_i,); extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "acc_pdata_32",i_snitch.acc_pdata_i[31:0],); // FPU offload - extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "fpu_offload", (i_snitch.acc_qready_i && i_snitch.acc_qvalid_o && !snitch_pkg::shared_offload(i_snitch.acc_qdata_op_o)),); - extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "is_seq_insn", (i_snitch.inst_data_i ==? riscv_instr::FREP)); + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "fpu_offload", 1'b0); + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "is_seq_insn", 1'b0); extras_str = $sformatf("%s}", extras_str); `else fmt_extras(extras_snitch, extras_str); diff --git a/hardware/deps/snitch/src/snitch.sv b/hardware/deps/snitch/src/snitch.sv index f71786d1f..148549b0a 100644 --- a/hardware/deps/snitch/src/snitch.sv +++ b/hardware/deps/snitch/src/snitch.sv @@ -81,7 +81,7 @@ module snitch #( ); localparam int RegWidth = RVE ? 4 : 5; - localparam int RegNrReadPorts = 2; + localparam int RegNrReadPorts = snitch_pkg::XPULPIMG ? 3 : 2; logic illegal_inst; logic zero_lsb; @@ -118,7 +118,7 @@ module snitch #( logic [2**RegWidth-1:0] sb_d, sb_q; // Load/Store Defines - logic is_load, is_store, is_signed; + logic is_load, is_store, is_signed, is_postincr; logic is_fp_load, is_fp_store; logic ls_misaligned; logic ld_addr_misaligned; @@ -150,8 +150,10 @@ module snitch #( logic lsu_qready, lsu_qvalid; logic lsu_pvalid, lsu_pready; logic [RegWidth-1:0] lsu_rd; + logic [31:0] lsu_qaddr; logic retire_load; // retire a load instruction + logic retire_p; // retire from post-increment instructions logic retire_i; // retire the rest of the base instruction set logic retire_acc; // retire an instruction we offloaded @@ -175,11 +177,13 @@ module snitch #( } alu_op; enum logic [3:0] { - None, Reg, IImmediate, UImmediate, JImmediate, SImmediate, SFImmediate, PC, CSR, CSRImmediate, PBImmediate - } opa_select, opb_select; + None, Reg, IImmediate, UImmediate, JImmediate, SImmediate, SFImmediate, PC, CSR, CSRImmediate, PBImmediate, RegRd, RegRs2 + } opa_select, opb_select, opc_select; - logic write_rd; // write desitnation this cycle + logic write_rd; // write rd desitnation this cycle logic uses_rd; + logic write_rs1; // write rs1 destination this cycle + logic uses_rs1; enum logic [1:0] {Consec, Alu, Exception} next_pc; enum logic [1:0] {RdAlu, RdConsecPC, RdBypass} rd_select; @@ -217,7 +221,7 @@ module snitch #( assign acc_qdata_op_o = inst_data_i; assign acc_qdata_arga_o = {{32{gpr_rdata[0][31]}}, gpr_rdata[0]}; assign acc_qdata_argb_o = {{32{gpr_rdata[1][31]}}, gpr_rdata[1]}; - assign acc_qdata_argc_o = {32'b0, alu_result}; + assign acc_qdata_argc_o = {{32{gpr_rdata[2][31]}}, gpr_rdata[2]}; // instruction fetch interface assign inst_addr_o = pc_q; @@ -229,7 +233,8 @@ module snitch #( // Scoreboard: Keep track of rd dependencies (only loads at the moment) logic operands_ready; logic dst_ready; - logic opa_ready, opb_ready; + logic opa_ready, opb_ready, opc_ready; + logic dstrd_ready, dstrs1_ready; always_comb begin sb_d = sb_q; @@ -241,11 +246,14 @@ module snitch #( end // TODO(zarubaf): This can probably be described a bit more efficient assign opa_ready = (opa_select != Reg) | ~sb_q[rs1]; - assign opb_ready = (opb_select != Reg & opb_select != SImmediate) | ~sb_q[rs2]; - assign operands_ready = opa_ready & opb_ready; + assign opb_ready = ((opb_select != Reg & opb_select != SImmediate) | ~sb_q[rs2]) & ((opb_select != RegRd) | ~sb_q[rd]); + assign opc_ready = ((opc_select != Reg) | ~sb_q[rd]) & ((opc_select != RegRs2) | ~sb_q[rs2]); + assign operands_ready = opa_ready & opb_ready & opc_ready; // either we are not using the destination register or we need to make // sure that its destination operand is not marked busy in the scoreboard. - assign dst_ready = ~uses_rd | (uses_rd & ~sb_q[rd]); + assign dstrd_ready = ~uses_rd | (uses_rd & ~sb_q[rd]); + assign dstrs1_ready = ~uses_rs1 | (uses_rs1 & ~sb_q[rs1]); + assign dst_ready = dstrd_ready & dstrs1_ready; assign valid_instr = (inst_ready_i & inst_valid_o) & operands_ready & dst_ready; // the accelerator interface stalled us @@ -284,14 +292,18 @@ module snitch #( alu_op = Add; opa_select = None; opb_select = None; + opc_select = None; next_pc = Consec; + // set up rd destination rd_select = RdAlu; write_rd = 1'b1; - // if we are writing the field this cycle we need - // an int destination register + // if we are writing the field this cycle we need an int destination register uses_rd = write_rd; + // set up rs1 destination + write_rs1 = 1'b0; + uses_rs1 = write_rs1; rd_bypass = '0; zero_lsb = 1'b0; @@ -299,6 +311,7 @@ module snitch #( // LSU interface is_load = 1'b0; is_store = 1'b0; + is_postincr = 1'b0; is_fp_load = 1'b0; is_fp_store = 1'b0; is_signed = 1'b0; @@ -748,29 +761,324 @@ module snitch #( end /* Xpulpimg extension */ - // Off-load to IPU coprocessor - riscv_instr::P_ABS, // Xpulpimg: p.abs - riscv_instr::P_SLET, // Xpulpimg: p.slet - riscv_instr::P_SLETU, // Xpulpimg: p.sletu - riscv_instr::P_MIN, // Xpulpimg: p.min - riscv_instr::P_MINU, // Xpulpimg: p.minu - riscv_instr::P_MAX, // Xpulpimg: p.max - riscv_instr::P_MAXU, // Xpulpimg: p.maxu - riscv_instr::P_EXTHS, // Xpulpimg: p.exths - riscv_instr::P_EXTHZ, // Xpulpimg: p.exthz - riscv_instr::P_EXTBS, // Xpulpimg: p.extbs - riscv_instr::P_EXTBZ, // Xpulpimg: p.extbz - riscv_instr::P_CLIP, // Xpulpimg: p.clip - riscv_instr::P_CLIPU, // Xpulpimg: p.clipu - riscv_instr::P_CLIPR, // Xpulpimg: p.clipr - riscv_instr::P_CLIPUR: begin // Xpulpimg: p.clipur + // Post-increment loads/stores + riscv_instr::P_LB_IRPOST: begin // Xpulpimg: p.lb rd,iimm(rs1!) if (snitch_pkg::XPULPIMG) begin write_rd = 1'b0; uses_rd = 1'b1; - acc_qvalid_o = valid_instr; + write_rs1 = 1'b1; + is_load = 1'b1; + is_postincr = 1'b1; + is_signed = 1'b1; + opa_select = Reg; + opb_select = IImmediate; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LBU_IRPOST: begin // Xpulpimg: p.lbu + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + write_rs1 = 1'b1; + is_load = 1'b1; + is_postincr = 1'b1; + opa_select = Reg; + opb_select = IImmediate; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LH_IRPOST: begin // Xpulpimg: p.lh + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + write_rs1 = 1'b1; + is_load = 1'b1; + is_postincr = 1'b1; + is_signed = 1'b1; + ls_size = HalfWord; + opa_select = Reg; + opb_select = IImmediate; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LHU_IRPOST: begin // Xpulpimg: p.lhu + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + write_rs1 = 1'b1; + is_load = 1'b1; + is_postincr = 1'b1; + ls_size = HalfWord; + opa_select = Reg; + opb_select = IImmediate; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LW_IRPOST: begin // Xpulpimg: p.lw + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + write_rs1 = 1'b1; + is_load = 1'b1; + is_postincr = 1'b1; + is_signed = 1'b1; + ls_size = Word; + opa_select = Reg; + opb_select = IImmediate; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LB_RRPOST: begin // Xpulpimg: p.lb rd,rs2(rs1!) + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + write_rs1 = 1'b1; + is_load = 1'b1; + is_postincr = 1'b1; + is_signed = 1'b1; + opa_select = Reg; + opb_select = Reg; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LBU_RRPOST: begin // Xpulpimg: p.lbu + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + write_rs1 = 1'b1; + is_load = 1'b1; + is_postincr = 1'b1; + opa_select = Reg; + opb_select = Reg; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LH_RRPOST: begin // Xpulpimg: p.lh + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + write_rs1 = 1'b1; + is_load = 1'b1; + is_postincr = 1'b1; + is_signed = 1'b1; + ls_size = HalfWord; + opa_select = Reg; + opb_select = Reg; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LHU_RRPOST: begin // Xpulpimg: p.lhu + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + write_rs1 = 1'b1; + is_load = 1'b1; + is_postincr = 1'b1; + ls_size = HalfWord; + opa_select = Reg; + opb_select = Reg; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LW_RRPOST: begin // Xpulpimg: p.lw + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + write_rs1 = 1'b1; + is_load = 1'b1; + is_postincr = 1'b1; + is_signed = 1'b1; + ls_size = Word; + opa_select = Reg; + opb_select = Reg; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LB_RR: begin // Xpulpimg: p.lb rd,rs2(rs1) + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + is_load = 1'b1; + is_signed = 1'b1; opa_select = Reg; opb_select = Reg; - acc_register_rd = 1'b1; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LBU_RR: begin // Xpulpimg: p.lbu + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + is_load = 1'b1; + opa_select = Reg; + opb_select = Reg; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LH_RR: begin // Xpulpimg: p.lh + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + is_load = 1'b1; + is_signed = 1'b1; + ls_size = HalfWord; + opa_select = Reg; + opb_select = Reg; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LHU_RR: begin // Xpulpimg: p.lhu + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + is_load = 1'b1; + ls_size = HalfWord; + opa_select = Reg; + opb_select = Reg; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_LW_RR: begin // Xpulpimg: p.lw + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + is_load = 1'b1; + is_signed = 1'b1; + ls_size = Word; + opa_select = Reg; + opb_select = Reg; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_SB_IRPOST: begin // Xpulpimg: p.sb rs2,simm(rs1!) + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + write_rs1 = 1'b1; + is_store = 1'b1; + is_postincr = 1'b1; + opa_select = Reg; + opb_select = SImmediate; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_SH_IRPOST: begin // Xpulpimg: p.sh + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + write_rs1 = 1'b1; + is_store = 1'b1; + is_postincr = 1'b1; + ls_size = HalfWord; + opa_select = Reg; + opb_select = SImmediate; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_SW_IRPOST: begin // Xpulpimg: p.sw + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + write_rs1 = 1'b1; + is_store = 1'b1; + is_postincr = 1'b1; + ls_size = Word; + opa_select = Reg; + opb_select = SImmediate; + end else begin + illegal_inst = 1'b1; + end + end + // opb is usually assigned with the content of rs2; in stores with reg-reg + // addressing mode, however, the offset is stored in rd, so rd content is + // instead assigned to opb: if we cross such signals now (rd -> opb, + // rs2 -> opc) we don't have to do that in the ALU, with bigger muxes + riscv_instr::P_SB_RRPOST: begin // Xpulpimg: p.sb rs2,rs3(rs1!) + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + write_rs1 = 1'b1; + is_store = 1'b1; + is_postincr = 1'b1; + opa_select = Reg; // rs1 base address + opb_select = RegRd; // rs3 (i.e. rd) offset + opc_select = RegRs2; // rs2 source data + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_SH_RRPOST: begin // Xpulpimg: p.sh + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + write_rs1 = 1'b1; + is_store = 1'b1; + is_postincr = 1'b1; + ls_size = HalfWord; + opa_select = Reg; + opb_select = RegRd; + opc_select = RegRs2; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_SW_RRPOST: begin // Xpulpimg: p.sw + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + write_rs1 = 1'b1; + is_store = 1'b1; + is_postincr = 1'b1; + ls_size = Word; + opa_select = Reg; + opb_select = RegRd; + opc_select = RegRs2; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_SB_RR: begin // Xpulpimg: p.sb rs2,rs3(rs1) + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + is_store = 1'b1; + opa_select = Reg; + opb_select = RegRd; + opc_select = RegRs2; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_SH_RR: begin // Xpulpimg: p.sh + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + is_store = 1'b1; + ls_size = HalfWord; + opa_select = Reg; + opb_select = RegRd; + opc_select = RegRs2; + end else begin + illegal_inst = 1'b1; + end + end + riscv_instr::P_SW_RR: begin // Xpulpimg: p.sw + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + is_store = 1'b1; + ls_size = Word; + opa_select = Reg; + opb_select = RegRd; + opc_select = RegRs2; end else begin illegal_inst = 1'b1; end @@ -798,6 +1106,202 @@ module snitch #( illegal_inst = 1'b1; end end + // Off-load to IPU coprocessor + // 1 source register (rs1) + riscv_instr::P_ABS, // Xpulpimg: p.abs + riscv_instr::P_EXTHS, // Xpulpimg: p.exths + riscv_instr::P_EXTHZ, // Xpulpimg: p.exthz + riscv_instr::P_EXTBS, // Xpulpimg: p.extbs + riscv_instr::P_EXTBZ, // Xpulpimg: p.extbz + riscv_instr::P_CLIP, // Xpulpimg: p.clip + riscv_instr::P_CLIPU, // Xpulpimg: p.clipu + riscv_instr::PV_ADD_SCI_H, // Xpulpimg: pv.add.sci.h + riscv_instr::PV_ADD_SCI_B, // Xpulpimg: pv.add.sci.b + riscv_instr::PV_SUB_SCI_H, // Xpulpimg: pv.sub.sci.h + riscv_instr::PV_SUB_SCI_B, // Xpulpimg: pv.sub.sci.b + riscv_instr::PV_AVG_SCI_H, // Xpulpimg: pv.avg.sci.h + riscv_instr::PV_AVG_SCI_B, // Xpulpimg: pv.avg.sci.b + riscv_instr::PV_AVGU_SCI_H, // Xpulpimg: pv.avgu.sci.h + riscv_instr::PV_AVGU_SCI_B, // Xpulpimg: pv.avgu.sci.b + riscv_instr::PV_MIN_SCI_H, // Xpulpimg: pv.min.sci.h + riscv_instr::PV_MIN_SCI_B, // Xpulpimg: pv.min.sci.b + riscv_instr::PV_MINU_SCI_H, // Xpulpimg: pv.minu.sci.h + riscv_instr::PV_MINU_SCI_B, // Xpulpimg: pv.minu.sci.b + riscv_instr::PV_MAX_SCI_H, // Xpulpimg: pv.max.sci.h + riscv_instr::PV_MAX_SCI_B, // Xpulpimg: pv.max.sci.b + riscv_instr::PV_MAXU_SCI_H, // Xpulpimg: pv.maxu.sci.h + riscv_instr::PV_MAXU_SCI_B, // Xpulpimg: pv.maxu.sci.b + riscv_instr::PV_SRL_SCI_H, // Xpulpimg: pv.srl.sci.h + riscv_instr::PV_SRL_SCI_B, // Xpulpimg: pv.srl.sci.b + riscv_instr::PV_SRA_SCI_H, // Xpulpimg: pv.sra.sci.h + riscv_instr::PV_SRA_SCI_B, // Xpulpimg: pv.sra.sci.b + riscv_instr::PV_SLL_SCI_H, // Xpulpimg: pv.sll.sci.h + riscv_instr::PV_SLL_SCI_B, // Xpulpimg: pv.sll.sci.b + riscv_instr::PV_OR_SCI_H, // Xpulpimg: pv.or.sci.h + riscv_instr::PV_OR_SCI_B, // Xpulpimg: pv.or.sci.b + riscv_instr::PV_XOR_SCI_H, // Xpulpimg: pv.xor.sci.h + riscv_instr::PV_XOR_SCI_B, // Xpulpimg: pv.xor.sci.b + riscv_instr::PV_AND_SCI_B, // Xpulpimg: pv.and.sci.b + riscv_instr::PV_AND_SCI_H, // Xpulpimg: pv.and.sci.h + riscv_instr::PV_ABS_H, // Xpulpimg: pv.abs.h + riscv_instr::PV_ABS_B, // Xpulpimg: pv.abs.b + riscv_instr::PV_EXTRACT_H, // Xpulpimg: pv.extract.h + riscv_instr::PV_EXTRACT_B, // Xpulpimg: pv.extract.b + riscv_instr::PV_EXTRACTU_H, // Xpulpimg: pv.extractu.h + riscv_instr::PV_EXTRACTU_B, // Xpulpimg: pv.extractu.b + riscv_instr::PV_DOTUP_SCI_H, // Xpulpimg: pv.dotup.sci.h + riscv_instr::PV_DOTUP_SCI_B, // Xpulpimg: pv.dotup.sci.b + riscv_instr::PV_DOTUSP_SCI_H, // Xpulpimg: pv.dotusp.sci.h + riscv_instr::PV_DOTUSP_SCI_B, // Xpulpimg: pv.dotusp.sci.b + riscv_instr::PV_DOTSP_SCI_H, // Xpulpimg: pv.dotsp.sci.h + riscv_instr::PV_DOTSP_SCI_B: begin // Xpulpimg: pv.dotsp.sci.b + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + acc_qvalid_o = valid_instr; + opa_select = Reg; + acc_register_rd = 1'b1; + end else begin + illegal_inst = 1'b1; + end + end + // 2 source registers (rs1, rs2) + riscv_instr::P_SLET, // Xpulpimg: p.slet + riscv_instr::P_SLETU, // Xpulpimg: p.sletu + riscv_instr::P_MIN, // Xpulpimg: p.min + riscv_instr::P_MINU, // Xpulpimg: p.minu + riscv_instr::P_MAX, // Xpulpimg: p.max + riscv_instr::P_MAXU, // Xpulpimg: p.maxu + riscv_instr::P_CLIPR, // Xpulpimg: p.clipr + riscv_instr::P_CLIPUR, // Xpulpimg: p.clipur + riscv_instr::PV_ADD_H, // Xpulpimg: pv.add.h + riscv_instr::PV_ADD_SC_H, // Xpulpimg: pv.add.sc.h + riscv_instr::PV_ADD_B, // Xpulpimg: pv.add.b + riscv_instr::PV_ADD_SC_B, // Xpulpimg: pv.add.sc.b + riscv_instr::PV_SUB_H, // Xpulpimg: pv.sub.h + riscv_instr::PV_SUB_SC_H, // Xpulpimg: pv.sub.sc.h + riscv_instr::PV_SUB_B, // Xpulpimg: pv.sub.b + riscv_instr::PV_SUB_SC_B, // Xpulpimg: pv.sub.sc.b + riscv_instr::PV_AVG_H, // Xpulpimg: pv.avg.h + riscv_instr::PV_AVG_SC_H, // Xpulpimg: pv.avg.sc.h + riscv_instr::PV_AVG_B, // Xpulpimg: pv.avg.b + riscv_instr::PV_AVG_SC_B, // Xpulpimg: pv.avg.sc.b + riscv_instr::PV_AVGU_H, // Xpulpimg: pv.avgu.h + riscv_instr::PV_AVGU_SC_H, // Xpulpimg: pv.avgu.sc.h + riscv_instr::PV_AVGU_B, // Xpulpimg: pv.avgu.b + riscv_instr::PV_AVGU_SC_B, // Xpulpimg: pv.avgu.sc.b + riscv_instr::PV_MIN_H, // Xpulpimg: pv.min.h + riscv_instr::PV_MIN_SC_H, // Xpulpimg: pv.min.sc.h + riscv_instr::PV_MIN_B, // Xpulpimg: pv.min.b + riscv_instr::PV_MIN_SC_B, // Xpulpimg: pv.min.sc.b + riscv_instr::PV_MINU_H, // Xpulpimg: pv.minu.h + riscv_instr::PV_MINU_SC_H, // Xpulpimg: pv.minu.sc.h + riscv_instr::PV_MINU_B, // Xpulpimg: pv.minu.b + riscv_instr::PV_MINU_SC_B, // Xpulpimg: pv.minu.sc.b + riscv_instr::PV_MAX_H, // Xpulpimg: pv.max.h + riscv_instr::PV_MAX_SC_H, // Xpulpimg: pv.max.sc.h + riscv_instr::PV_MAX_B, // Xpulpimg: pv.max.b + riscv_instr::PV_MAX_SC_B, // Xpulpimg: pv.max.sc.b + riscv_instr::PV_MAXU_H, // Xpulpimg: pv.maxu.h + riscv_instr::PV_MAXU_SC_H, // Xpulpimg: pv.maxu.sc.h + riscv_instr::PV_MAXU_B, // Xpulpimg: pv.maxu.b + riscv_instr::PV_MAXU_SC_B, // Xpulpimg: pv.maxu.sc.b + riscv_instr::PV_SRL_H, // Xpulpimg: pv.srl.h + riscv_instr::PV_SRL_SC_H, // Xpulpimg: pv.srl.sc.h + riscv_instr::PV_SRL_B, // Xpulpimg: pv.srl.b + riscv_instr::PV_SRL_SC_B, // Xpulpimg: pv.srl.sc.b + riscv_instr::PV_SRA_H, // Xpulpimg: pv.sra.h + riscv_instr::PV_SRA_SC_H, // Xpulpimg: pv.sra.sc.h + riscv_instr::PV_SRA_B, // Xpulpimg: pv.sra.b + riscv_instr::PV_SRA_SC_B, // Xpulpimg: pv.sra.sc.b + riscv_instr::PV_SLL_H, // Xpulpimg: pv.sll.h + riscv_instr::PV_SLL_SC_H, // Xpulpimg: pv.sll.sc.h + riscv_instr::PV_SLL_B, // Xpulpimg: pv.sll.b + riscv_instr::PV_SLL_SC_B, // Xpulpimg: pv.sll.sc.b + riscv_instr::PV_OR_H, // Xpulpimg: pv.or.h + riscv_instr::PV_OR_SC_H, // Xpulpimg: pv.or.sc.h + riscv_instr::PV_OR_B, // Xpulpimg: pv.or.b + riscv_instr::PV_OR_SC_B, // Xpulpimg: pv.or.sc.b + riscv_instr::PV_XOR_H, // Xpulpimg: pv.xor.h + riscv_instr::PV_XOR_SC_H, // Xpulpimg: pv.xor.sc.h + riscv_instr::PV_XOR_B, // Xpulpimg: pv.xor.b + riscv_instr::PV_XOR_SC_B, // Xpulpimg: pv.xor.sc.b + riscv_instr::PV_AND_H, // Xpulpimg: pv.and.h + riscv_instr::PV_AND_SC_H, // Xpulpimg: pv.and.sc.h + riscv_instr::PV_AND_B, // Xpulpimg: pv.and.b + riscv_instr::PV_AND_SC_B, // Xpulpimg: pv.and.sc.b + riscv_instr::PV_DOTUP_H, // Xpulpimg: pv.dotup.h + riscv_instr::PV_DOTUP_SC_H, // Xpulpimg: pv.dotup.sc.h + riscv_instr::PV_DOTUP_B, // Xpulpimg: pv.dotup.b + riscv_instr::PV_DOTUP_SC_B, // Xpulpimg: pv.dotup.sc.b + riscv_instr::PV_DOTUSP_H, // Xpulpimg: pv.dotusp.h + riscv_instr::PV_DOTUSP_SC_H, // Xpulpimg: pv.dotusp.sc.h + riscv_instr::PV_DOTUSP_B, // Xpulpimg: pv.dotusp.b + riscv_instr::PV_DOTUSP_SC_B, // Xpulpimg: pv.dotusp.sc.b + riscv_instr::PV_DOTSP_H, // Xpulpimg: pv.dotsp.h + riscv_instr::PV_DOTSP_SC_H, // Xpulpimg: pv.dotsp.sc.h + riscv_instr::PV_DOTSP_B, // Xpulpimg: pv.dotsp.b + riscv_instr::PV_DOTSP_SC_B: begin // Xpulpimg: pv.dotsp.sc.b + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + acc_qvalid_o = valid_instr; + opa_select = Reg; + opb_select = Reg; + acc_register_rd = 1'b1; + end else begin + illegal_inst = 1'b1; + end + end + // 2 source registers (rs1, rd) + riscv_instr::PV_INSERT_H, // Xpulpimg: pv.insert.h + riscv_instr::PV_INSERT_B, // Xpulpimg: pv.insert.b + riscv_instr::PV_SDOTUP_SCI_H, // Xpulpimg: pv.sdotup.sci.h + riscv_instr::PV_SDOTUP_SCI_B, // Xpulpimg: pv.sdotup.sci.b + riscv_instr::PV_SDOTUSP_SCI_H, // Xpulpimg: pv.sdotusp.sci.h + riscv_instr::PV_SDOTUSP_SCI_B, // Xpulpimg: pv.sdotusp.sci.b + riscv_instr::PV_SDOTSP_SCI_H, // Xpulpimg: pv.sdotsp.sci.h + riscv_instr::PV_SDOTSP_SCI_B: begin // Xpulpimg: pv.sdotsp.sci.b + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + acc_qvalid_o = valid_instr; + opa_select = Reg; + opc_select = Reg; + acc_register_rd = 1'b1; + end else begin + illegal_inst = 1'b1; + end + end + // 3 source registers (rs1, rs2, rd) + riscv_instr::P_MAC, // Xpulpimg: p.mac + riscv_instr::P_MSU, // Xpulpimg: p.msu + riscv_instr::PV_SDOTUP_H, // Xpulpimg: pv.sdotup.h + riscv_instr::PV_SDOTUP_SC_H, // Xpulpimg: pv.sdotup.sc.h + riscv_instr::PV_SDOTUP_B, // Xpulpimg: pv.sdotup.b + riscv_instr::PV_SDOTUP_SC_B, // Xpulpimg: pv.sdotup.sc.b + riscv_instr::PV_SDOTUSP_H, // Xpulpimg: pv.sdotusp.h + riscv_instr::PV_SDOTUSP_SC_H, // Xpulpimg: pv.sdotusp.sc.h + riscv_instr::PV_SDOTUSP_B, // Xpulpimg: pv.sdotusp.b + riscv_instr::PV_SDOTUSP_SC_B, // Xpulpimg: pv.sdotusp.sc.b + riscv_instr::PV_SDOTSP_H, // Xpulpimg: pv.sdotsp.h + riscv_instr::PV_SDOTSP_SC_H, // Xpulpimg: pv.sdotsp.sc.h + riscv_instr::PV_SDOTSP_B, // Xpulpimg: pv.sdotsp.b + riscv_instr::PV_SDOTSP_SC_B, // Xpulpimg: pv.sdotsp.sc.b + riscv_instr::PV_SHUFFLE2_H, // Xpulpimg: pv.shuffle2.h + riscv_instr::PV_SHUFFLE2_B: begin // Xpulpimg: pv.shuffle2.b + if (snitch_pkg::XPULPIMG) begin + write_rd = 1'b0; + uses_rd = 1'b1; + acc_qvalid_o = valid_instr; + opa_select = Reg; + opb_select = Reg; + opc_select = Reg; + acc_register_rd = 1'b1; + end else begin + illegal_inst = 1'b1; + end + end /* end of Xpulpimg extension */ // TODO(zarubaf): Illegal Instructions @@ -810,6 +1314,8 @@ module snitch #( if (exception) begin write_rd = 1'b0; uses_rd = 1'b0; + write_rs1 = 1'b0; + uses_rs1 = 1'b0; acc_qvalid_o = 1'b0; next_pc = Exception; end @@ -893,12 +1399,17 @@ module snitch #( PC: opb = pc_q; CSR: opb = csr_rvalue; PBImmediate: opb = pbimm; + RegRd: opb = gpr_rdata[2]; default: opb = '0; endcase end assign gpr_raddr[0] = rs1; assign gpr_raddr[1] = rs2; + // connect third read port only if present + if (RegNrReadPorts >= 3) begin : gpr_raddr_2 + assign gpr_raddr[2] = rd; + end // -------------------- // ALU @@ -998,7 +1509,7 @@ module snitch #( .lsu_qtag_i ( rd ), .lsu_qwrite ( is_store ), .lsu_qsigned ( is_signed ), - .lsu_qaddr_i ( alu_result ), + .lsu_qaddr_i ( lsu_qaddr ), .lsu_qdata_i ( gpr_rdata[1] ), .lsu_qsize_i ( ls_size ), .lsu_qamo_i ( ls_amo ), @@ -1022,8 +1533,18 @@ module snitch #( .data_pready_o ); + // address can be alu_result (i.e. rs1 + iimm/simm) or rs1 (for post-increment load/stores) + assign lsu_qaddr = is_postincr ? gpr_rdata[0] : alu_result; + assign lsu_qvalid = valid_instr & (is_load | is_store) & ~(ld_addr_misaligned | st_addr_misaligned); + // NOTE(smazzola): write-backs "on rd from non-load or non-acc instructions" and "on rs1 from + // post-increment instructions" in the same cycle should be mutually exclusive (currently valid + // assumption since write-back to rs1 happens on the cycle in which the post-increment load/store + // is issued, if that cycle is not a stall, and it is not postponed like offloaded instructions, + // so no other instructions writing back on rd can be issued in the same cycle) + // retire post-incremented address on rs1 if valid postincr instruction and LSU not stalling + assign retire_p = write_rs1 & ~stall & (rs1 != 0); // we can retire if we are not stalling and if the instruction is writing a register assign retire_i = write_rd & valid_instr & (rd != 0); @@ -1071,7 +1592,9 @@ module snitch #( if (RegNrWritePorts == 1) begin always_comb begin gpr_we[0] = 1'b0; - gpr_waddr[0] = rd; + // NOTE(smazzola): this works because write-backs on rd and rs1 in the same cycle are mutually + // exclusive; if this should change, the following statement has to be written in another form + gpr_waddr[0] = retire_p ? rs1 : rd; // choose whether to writeback at RF[rs1] for post-increment load/stores gpr_wdata[0] = alu_writeback; // external interfaces lsu_pready = 1'b0; @@ -1079,7 +1602,7 @@ module snitch #( retire_acc = 1'b0; retire_load = 1'b0; - if (retire_i) begin + if (retire_i | retire_p) begin gpr_we[0] = 1'b1; // if we are not retiring another instruction retire the load now end else if (lsu_pvalid) begin @@ -1099,7 +1622,9 @@ module snitch #( end else if (RegNrWritePorts == 2) begin always_comb begin gpr_we[0] = 1'b0; - gpr_waddr[0] = rd; + // NOTE(smazzola): this works because write-backs on rd and rs1 in the same cycle are mutually + // exclusive; if this should change, the following statement has to be written in another form + gpr_waddr[0] = retire_p ? rs1 : rd; // choose whether to writeback at RF[rs1] for post-increment load/stores gpr_wdata[0] = alu_writeback; gpr_we[1] = 1'b0; gpr_waddr[1] = lsu_rd; @@ -1110,7 +1635,7 @@ module snitch #( retire_acc = 1'b0; retire_load = 1'b0; - if (retire_i) begin + if (retire_i | retire_p) begin gpr_we[0] = 1'b1; if (lsu_pvalid) begin retire_load = 1'b1; diff --git a/hardware/deps/snitch/src/snitch_icache/snitch_icache.sv b/hardware/deps/snitch/src/snitch_icache/snitch_icache.sv index 8b66d80d3..9bcc4a4d6 100644 --- a/hardware/deps/snitch/src/snitch_icache/snitch_icache.sv +++ b/hardware/deps/snitch/src/snitch_icache/snitch_icache.sv @@ -52,7 +52,7 @@ module snitch_icache #( input logic clk_d2_i, input logic rst_ni, - input logic enable_prefetching_i, + input logic [NR_FETCH_PORTS-1:0] enable_prefetching_i, output snitch_icache_pkg::icache_events_t [NR_FETCH_PORTS-1:0] icache_events_o, input logic flush_valid_i, @@ -220,24 +220,24 @@ module snitch_icache #( .clk_i ( clk_d2_i ), .rst_ni, .flush_valid_i, - .enable_prefetching_i, - .icache_events_o ( icache_events_o [i] ), - .in_addr_i ( inst_addr_i [i] ), - .in_data_o ( in_cache_data [i] ), - .in_error_o ( in_cache_error [i] ), - .in_valid_i ( in_cache_valid [i] ), - .in_ready_o ( in_cache_ready [i] ), - - .out_req_addr_o ( local_prefetch_req.addr ), - .out_req_id_o ( local_prefetch_req.id ), - .out_req_valid_o ( local_prefetch_req_valid ), - .out_req_ready_i ( local_prefetch_req_ready ), - - .out_rsp_data_i ( local_prefetch_rsp.data ), - .out_rsp_error_i ( local_prefetch_rsp.error ), - .out_rsp_id_i ( local_prefetch_rsp.id ), - .out_rsp_valid_i ( local_prefetch_rsp_valid ), - .out_rsp_ready_o ( local_prefetch_rsp_ready ) + .enable_prefetching_i ( enable_prefetching_i [i] ), + .icache_events_o ( icache_events_o [i] ), + .in_addr_i ( inst_addr_i [i] ), + .in_data_o ( in_cache_data [i] ), + .in_error_o ( in_cache_error [i] ), + .in_valid_i ( in_cache_valid [i] ), + .in_ready_o ( in_cache_ready [i] ), + + .out_req_addr_o ( local_prefetch_req.addr ), + .out_req_id_o ( local_prefetch_req.id ), + .out_req_valid_o ( local_prefetch_req_valid ), + .out_req_ready_i ( local_prefetch_req_ready ), + + .out_rsp_data_i ( local_prefetch_rsp.data ), + .out_rsp_error_i ( local_prefetch_rsp.error ), + .out_rsp_id_i ( local_prefetch_rsp.id ), + .out_rsp_valid_i ( local_prefetch_rsp_valid ), + .out_rsp_ready_o ( local_prefetch_rsp_ready ) ); isochronous_spill_register #( diff --git a/hardware/deps/snitch/src/snitch_icache/snitch_icache_l0.sv b/hardware/deps/snitch/src/snitch_icache/snitch_icache_l0.sv index 8544d3078..147dbf093 100644 --- a/hardware/deps/snitch/src/snitch_icache/snitch_icache_l0.sv +++ b/hardware/deps/snitch/src/snitch_icache/snitch_icache_l0.sv @@ -298,26 +298,28 @@ module snitch_icache_l0 import snitch_icache_pkg::*; #( always_comb begin is_branch_taken[i] = 1'b0; is_jal[i] = 1'b0; - unique casez (ins_data[i*32+:32]) - // static prediction - riscv_instr::BEQ, - riscv_instr::BNE, - riscv_instr::BLT, - riscv_instr::BGE, - riscv_instr::BLTU, - riscv_instr::BGEU: begin - // look at the sign bit of the immediate field - // backward branches (immediate negative) taken - // forward branches not taken - is_branch_taken[i] = ins_data[i*32+31]; - end - riscv_instr::JAL: begin - is_jal[i] = 1'b1; - end - // we can't do anything about the JALR case as we don't - // know the destination. - default:; - endcase + if (hit_early_is_onehot) begin + unique casez (ins_data[i*32+:32]) + // static prediction + riscv_instr::BEQ, + riscv_instr::BNE, + riscv_instr::BLT, + riscv_instr::BGE, + riscv_instr::BLTU, + riscv_instr::BGEU: begin + // look at the sign bit of the immediate field + // backward branches (immediate negative) taken + // forward branches not taken + is_branch_taken[i] = ins_data[i*32+31]; + end + riscv_instr::JAL: begin + is_jal[i] = 1'b1; + end + // we can't do anything about the JALR case as we don't + // know the destination. + default:; + endcase + end end end diff --git a/hardware/deps/snitch/src/snitch_ipu.sv b/hardware/deps/snitch/src/snitch_ipu.sv index 6f6dc2bcf..c7c72e52f 100644 --- a/hardware/deps/snitch/src/snitch_ipu.sv +++ b/hardware/deps/snitch/src/snitch_ipu.sv @@ -30,8 +30,10 @@ module snitch_ipu #( } result_t; // input handshake logic div_valid_op, div_ready_op; + /* verilator lint_off UNDRIVEN */ logic mul_valid_op, mul_ready_op; logic dsp_valid_op, dsp_ready_op; + /* verilator lint_on UNDRIVEN */ // output handshake logic mul_valid, mul_ready; logic div_valid, div_ready; @@ -51,8 +53,13 @@ module snitch_ipu #( riscv_instr::MULH, riscv_instr::MULHSU, riscv_instr::MULHU: begin - mul_valid_op = acc_qvalid_i; - acc_qready_o = mul_ready_op; + if (snitch_pkg::XPULPIMG) begin + dsp_valid_op = acc_qvalid_i; + acc_qready_o = dsp_ready_op; + end else begin + mul_valid_op = acc_qvalid_i; + acc_qready_o = mul_ready_op; + end end riscv_instr::DIV, riscv_instr::DIVU, @@ -61,21 +68,153 @@ module snitch_ipu #( div_valid_op = acc_qvalid_i; acc_qready_o = div_ready_op; end - riscv_instr::P_ABS, // Xpulpimg: p.abs - riscv_instr::P_SLET, // Xpulpimg: p.slet - riscv_instr::P_SLETU, // Xpulpimg: p.sletu - riscv_instr::P_MIN, // Xpulpimg: p.min - riscv_instr::P_MINU, // Xpulpimg: p.minu - riscv_instr::P_MAX, // Xpulpimg: p.max - riscv_instr::P_MAXU, // Xpulpimg: p.maxu - riscv_instr::P_EXTHS, // Xpulpimg: p.exths - riscv_instr::P_EXTHZ, // Xpulpimg: p.exthz - riscv_instr::P_EXTBS, // Xpulpimg: p.extbs - riscv_instr::P_EXTBZ, // Xpulpimg: p.extbz - riscv_instr::P_CLIP, // Xpulpimg: p.clip - riscv_instr::P_CLIPU, // Xpulpimg: p.clipu - riscv_instr::P_CLIPR, // Xpulpimg: p.clipr - riscv_instr::P_CLIPUR: begin // Xpulpimg: p.clipur + riscv_instr::P_ABS, // Xpulpimg: p.abs + riscv_instr::P_SLET, // Xpulpimg: p.slet + riscv_instr::P_SLETU, // Xpulpimg: p.sletu + riscv_instr::P_MIN, // Xpulpimg: p.min + riscv_instr::P_MINU, // Xpulpimg: p.minu + riscv_instr::P_MAX, // Xpulpimg: p.max + riscv_instr::P_MAXU, // Xpulpimg: p.maxu + riscv_instr::P_EXTHS, // Xpulpimg: p.exths + riscv_instr::P_EXTHZ, // Xpulpimg: p.exthz + riscv_instr::P_EXTBS, // Xpulpimg: p.extbs + riscv_instr::P_EXTBZ, // Xpulpimg: p.extbz + riscv_instr::P_CLIP, // Xpulpimg: p.clip + riscv_instr::P_CLIPU, // Xpulpimg: p.clipu + riscv_instr::P_CLIPR, // Xpulpimg: p.clipr + riscv_instr::P_CLIPUR, // Xpulpimg: p.clipur + riscv_instr::P_MAC, // Xpulpimg: p.mac + riscv_instr::P_MSU, // Xpulpimg: p.msu + riscv_instr::PV_ADD_H, // Xpulpimg: pv.add.h + riscv_instr::PV_ADD_SC_H, // Xpulpimg: pv.add.sc.h + riscv_instr::PV_ADD_SCI_H, // Xpulpimg: pv.add.sci.h + riscv_instr::PV_ADD_B, // Xpulpimg: pv.add.b + riscv_instr::PV_ADD_SC_B, // Xpulpimg: pv.add.sc.b + riscv_instr::PV_ADD_SCI_B, // Xpulpimg: pv.add.sci.b + riscv_instr::PV_SUB_H, // Xpulpimg: pv.sub.h + riscv_instr::PV_SUB_SC_H, // Xpulpimg: pv.sub.sc.h + riscv_instr::PV_SUB_SCI_H, // Xpulpimg: pv.sub.sci.h + riscv_instr::PV_SUB_B, // Xpulpimg: pv.sub.b + riscv_instr::PV_SUB_SC_B, // Xpulpimg: pv.sub.sc.b + riscv_instr::PV_SUB_SCI_B, // Xpulpimg: pv.sub.sci.b + riscv_instr::PV_AVG_H, // Xpulpimg: pv.avg.h + riscv_instr::PV_AVG_SC_H, // Xpulpimg: pv.avg.sc.h + riscv_instr::PV_AVG_SCI_H, // Xpulpimg: pv.avg.sci.h + riscv_instr::PV_AVG_B, // Xpulpimg: pv.avg.b + riscv_instr::PV_AVG_SC_B, // Xpulpimg: pv.avg.sc.b + riscv_instr::PV_AVG_SCI_B, // Xpulpimg: pv.avg.sci.b + riscv_instr::PV_AVGU_H, // Xpulpimg: pv.avgu.h + riscv_instr::PV_AVGU_SC_H, // Xpulpimg: pv.avgu.sc.h + riscv_instr::PV_AVGU_SCI_H, // Xpulpimg: pv.avgu.sci.h + riscv_instr::PV_AVGU_B, // Xpulpimg: pv.avgu.b + riscv_instr::PV_AVGU_SC_B, // Xpulpimg: pv.avgu.sc.b + riscv_instr::PV_AVGU_SCI_B, // Xpulpimg: pv.avgu.sci.b + riscv_instr::PV_MIN_H, // Xpulpimg: pv.min.h + riscv_instr::PV_MIN_SC_H, // Xpulpimg: pv.min.sc.h + riscv_instr::PV_MIN_SCI_H, // Xpulpimg: pv.min.sci.h + riscv_instr::PV_MIN_B, // Xpulpimg: pv.min.b + riscv_instr::PV_MIN_SC_B, // Xpulpimg: pv.min.sc.b + riscv_instr::PV_MIN_SCI_B, // Xpulpimg: pv.min.sci.b + riscv_instr::PV_MINU_H, // Xpulpimg: pv.minu.h + riscv_instr::PV_MINU_SC_H, // Xpulpimg: pv.minu.sc.h + riscv_instr::PV_MINU_SCI_H, // Xpulpimg: pv.minu.sci.h + riscv_instr::PV_MINU_B, // Xpulpimg: pv.minu.b + riscv_instr::PV_MINU_SC_B, // Xpulpimg: pv.minu.sc.b + riscv_instr::PV_MINU_SCI_B, // Xpulpimg: pv.minu.sci.b + riscv_instr::PV_MAX_H, // Xpulpimg: pv.max.h + riscv_instr::PV_MAX_SC_H, // Xpulpimg: pv.max.sc.h + riscv_instr::PV_MAX_SCI_H, // Xpulpimg: pv.max.sci.h + riscv_instr::PV_MAX_B, // Xpulpimg: pv.max.b + riscv_instr::PV_MAX_SC_B, // Xpulpimg: pv.max.sc.b + riscv_instr::PV_MAX_SCI_B, // Xpulpimg: pv.max.sci.b + riscv_instr::PV_MAXU_H, // Xpulpimg: pv.maxu.h + riscv_instr::PV_MAXU_SC_H, // Xpulpimg: pv.maxu.sc.h + riscv_instr::PV_MAXU_SCI_H, // Xpulpimg: pv.maxu.sci.h + riscv_instr::PV_MAXU_B, // Xpulpimg: pv.maxu.b + riscv_instr::PV_MAXU_SC_B, // Xpulpimg: pv.maxu.sc.b + riscv_instr::PV_MAXU_SCI_B, // Xpulpimg: pv.maxu.sci.b + riscv_instr::PV_SRL_H, // Xpulpimg: pv.srl.h + riscv_instr::PV_SRL_SC_H, // Xpulpimg: pv.srl.sc.h + riscv_instr::PV_SRL_SCI_H, // Xpulpimg: pv.srl.sci.h + riscv_instr::PV_SRL_B, // Xpulpimg: pv.srl.b + riscv_instr::PV_SRL_SC_B, // Xpulpimg: pv.srl.sc.b + riscv_instr::PV_SRL_SCI_B, // Xpulpimg: pv.srl.sci.b + riscv_instr::PV_SRA_H, // Xpulpimg: pv.sra.h + riscv_instr::PV_SRA_SC_H, // Xpulpimg: pv.sra.sc.h + riscv_instr::PV_SRA_SCI_H, // Xpulpimg: pv.sra.sci.h + riscv_instr::PV_SRA_B, // Xpulpimg: pv.sra.b + riscv_instr::PV_SRA_SC_B, // Xpulpimg: pv.sra.sc.b + riscv_instr::PV_SRA_SCI_B, // Xpulpimg: pv.sra.sci.b + riscv_instr::PV_SLL_H, // Xpulpimg: pv.sll.h + riscv_instr::PV_SLL_SC_H, // Xpulpimg: pv.sll.sc.h + riscv_instr::PV_SLL_SCI_H, // Xpulpimg: pv.sll.sci.h + riscv_instr::PV_SLL_B, // Xpulpimg: pv.sll.b + riscv_instr::PV_SLL_SC_B, // Xpulpimg: pv.sll.sc.b + riscv_instr::PV_SLL_SCI_B, // Xpulpimg: pv.sll.sci.b + riscv_instr::PV_OR_H, // Xpulpimg: pv.or.h + riscv_instr::PV_OR_SC_H, // Xpulpimg: pv.or.sc.h + riscv_instr::PV_OR_SCI_H, // Xpulpimg: pv.or.sci.h + riscv_instr::PV_OR_B, // Xpulpimg: pv.or.b + riscv_instr::PV_OR_SC_B, // Xpulpimg: pv.or.sc.b + riscv_instr::PV_OR_SCI_B, // Xpulpimg: pv.or.sci.b + riscv_instr::PV_XOR_H, // Xpulpimg: pv.xor.h + riscv_instr::PV_XOR_SC_H, // Xpulpimg: pv.xor.sc.h + riscv_instr::PV_XOR_SCI_H, // Xpulpimg: pv.xor.sci.h + riscv_instr::PV_XOR_B, // Xpulpimg: pv.xor.b + riscv_instr::PV_XOR_SC_B, // Xpulpimg: pv.xor.sc.b + riscv_instr::PV_XOR_SCI_B, // Xpulpimg: pv.xor.sci.b + riscv_instr::PV_AND_H, // Xpulpimg: pv.and.h + riscv_instr::PV_AND_SC_H, // Xpulpimg: pv.and.sc.h + riscv_instr::PV_AND_SCI_H, // Xpulpimg: pv.and.sci.h + riscv_instr::PV_AND_B, // Xpulpimg: pv.and.b + riscv_instr::PV_AND_SC_B, // Xpulpimg: pv.and.sc.b + riscv_instr::PV_AND_SCI_B, // Xpulpimg: pv.and.sci.b + riscv_instr::PV_ABS_H, // Xpulpimg: pv.abs.h + riscv_instr::PV_ABS_B, // Xpulpimg: pv.abs.b + riscv_instr::PV_EXTRACT_H, // Xpulpimg: pv.extract.h + riscv_instr::PV_EXTRACT_B, // Xpulpimg: pv.extract.b + riscv_instr::PV_EXTRACTU_H, // Xpulpimg: pv.extractu.h + riscv_instr::PV_EXTRACTU_B, // Xpulpimg: pv.extractu.b + riscv_instr::PV_INSERT_H, // Xpulpimg: pv.insert.h + riscv_instr::PV_INSERT_B, // Xpulpimg: pv.insert.b + riscv_instr::PV_DOTUP_H, // Xpulpimg: pv.dotup.h + riscv_instr::PV_DOTUP_SC_H, // Xpulpimg: pv.dotup.sc.h + riscv_instr::PV_DOTUP_SCI_H, // Xpulpimg: pv.dotup.sci.h + riscv_instr::PV_DOTUP_B, // Xpulpimg: pv.dotup.b + riscv_instr::PV_DOTUP_SC_B, // Xpulpimg: pv.dotup.sc.b + riscv_instr::PV_DOTUP_SCI_B, // Xpulpimg: pv.dotup.sci.b + riscv_instr::PV_DOTUSP_H, // Xpulpimg: pv.dotusp.h + riscv_instr::PV_DOTUSP_SC_H, // Xpulpimg: pv.dotusp.sc.h + riscv_instr::PV_DOTUSP_SCI_H, // Xpulpimg: pv.dotusp.sci.h + riscv_instr::PV_DOTUSP_B, // Xpulpimg: pv.dotusp.b + riscv_instr::PV_DOTUSP_SC_B, // Xpulpimg: pv.dotusp.sc.b + riscv_instr::PV_DOTUSP_SCI_B, // Xpulpimg: pv.dotusp.sci.b + riscv_instr::PV_DOTSP_H, // Xpulpimg: pv.dotsp.h + riscv_instr::PV_DOTSP_SC_H, // Xpulpimg: pv.dotsp.sc.h + riscv_instr::PV_DOTSP_SCI_H, // Xpulpimg: pv.dotsp.sci.h + riscv_instr::PV_DOTSP_B, // Xpulpimg: pv.dotsp.b + riscv_instr::PV_DOTSP_SC_B, // Xpulpimg: pv.dotsp.sc.b + riscv_instr::PV_DOTSP_SCI_B, // Xpulpimg: pv.dotsp.sci.b + riscv_instr::PV_SDOTUP_H, // Xpulpimg: pv.sdotup.h + riscv_instr::PV_SDOTUP_SC_H, // Xpulpimg: pv.sdotup.sc.h + riscv_instr::PV_SDOTUP_SCI_H, // Xpulpimg: pv.sdotup.sci.h + riscv_instr::PV_SDOTUP_B, // Xpulpimg: pv.sdotup.b + riscv_instr::PV_SDOTUP_SC_B, // Xpulpimg: pv.sdotup.sc.b + riscv_instr::PV_SDOTUP_SCI_B, // Xpulpimg: pv.sdotup.sci.b + riscv_instr::PV_SDOTUSP_H, // Xpulpimg: pv.sdotusp.h + riscv_instr::PV_SDOTUSP_SC_H, // Xpulpimg: pv.sdotusp.sc.h + riscv_instr::PV_SDOTUSP_SCI_H, // Xpulpimg: pv.sdotusp.sci.h + riscv_instr::PV_SDOTUSP_B, // Xpulpimg: pv.sdotusp.b + riscv_instr::PV_SDOTUSP_SC_B, // Xpulpimg: pv.sdotusp.sc.b + riscv_instr::PV_SDOTUSP_SCI_B, // Xpulpimg: pv.sdotusp.sci.b + riscv_instr::PV_SDOTSP_H, // Xpulpimg: pv.sdotsp.h + riscv_instr::PV_SDOTSP_SC_H, // Xpulpimg: pv.sdotsp.sc.h + riscv_instr::PV_SDOTSP_SCI_H, // Xpulpimg: pv.sdotsp.sci.h + riscv_instr::PV_SDOTSP_B, // Xpulpimg: pv.sdotsp.b + riscv_instr::PV_SDOTSP_SC_B, // Xpulpimg: pv.sdotsp.sc.b + riscv_instr::PV_SDOTSP_SCI_B, // Xpulpimg: pv.sdotsp.sci.b + riscv_instr::PV_SHUFFLE2_H, // Xpulpimg: pv.shuffle2.h + riscv_instr::PV_SHUFFLE2_B: begin // Xpulpimg: pv.shuffle2.b if (snitch_pkg::XPULPIMG) begin dsp_valid_op = acc_qvalid_i; acc_qready_o = dsp_ready_op; @@ -87,24 +226,6 @@ module snitch_ipu #( endcase end - // Multiplication - multiplier #( - .Width ( 32 ), - .IdWidth ( IdWidth ) - ) i_multiplier ( - .clk_i, - .rst_i, - .id_i ( acc_qid_i ), - .operator_i ( acc_qdata_op_i ), - .operand_a_i ( acc_qdata_arga_i ), - .operand_b_i ( acc_qdata_argb_i ), - .valid_i ( mul_valid_op ), - .ready_o ( mul_ready_op ), - .result_o ( mul.result ), - .valid_o ( mul_valid ), - .ready_i ( mul_ready ), - .id_o ( mul.id ) - ); // Serial Divider serdiv #( .WIDTH ( 32 ), @@ -123,42 +244,61 @@ module snitch_ipu #( .id_o ( div.id ), .res_o ( div.result ) ); - if (snitch_pkg::XPULPIMG) begin : gen_dspu + + if (snitch_pkg::XPULPIMG) begin : gen_xpulpimg // DSP Unit dspu #( .Width ( 32 ), .IdWidth ( IdWidth ) ) i_dspu ( - .clk_i ( clk_i ), - .rst_i ( rst_i ), - .id_i ( acc_qid_i ), - .operator_i ( acc_qdata_op_i ), - .op_a_i ( acc_qdata_arga_i ), - .op_b_i ( acc_qdata_argb_i ), - .in_valid_i ( dsp_valid_op ), - .in_ready_o ( dsp_ready_op ), - .out_valid_o ( dsp_valid ), - .out_ready_i ( dsp_ready ), - .id_o ( dsp.id ), - .result_o ( dsp.result ) + .clk_i ( clk_i ), + .rst_i ( rst_i ), + .id_i ( acc_qid_i ), + .operator_i ( acc_qdata_op_i ), + .op_a_i ( acc_qdata_arga_i ), + .op_b_i ( acc_qdata_argb_i ), + .op_c_i ( acc_qdata_argc_i ), + .in_valid_i ( dsp_valid_op ), + .in_ready_o ( dsp_ready_op ), + .out_valid_o ( dsp_valid ), + .out_ready_i ( dsp_ready ), + .id_o ( dsp.id ), + .result_o ( dsp.result ) ); - end - // Output Arbitration - if (snitch_pkg::XPULPIMG) begin : gen_3inputs + // Output Arbitration stream_arbiter #( .DATA_T ( result_t ), - .N_INP ( 3 ) + .N_INP ( 2 ) ) i_stream_arbiter ( .clk_i, - .rst_ni ( ~rst_i ), - .inp_data_i ( {div, mul, dsp} ), - .inp_valid_i ( {div_valid, mul_valid, dsp_valid} ), - .inp_ready_o ( {div_ready, mul_ready, dsp_ready} ), - .oup_data_o ( oup ), - .oup_valid_o ( acc_pvalid_o ), - .oup_ready_i ( acc_pready_i ) + .rst_ni ( ~rst_i ), + .inp_data_i ( {div, dsp} ), + .inp_valid_i ( {div_valid, dsp_valid} ), + .inp_ready_o ( {div_ready, dsp_ready} ), + .oup_data_o ( oup ), + .oup_valid_o ( acc_pvalid_o ), + .oup_ready_i ( acc_pready_i ) + ); + end else begin : gen_vanilla + // Multiplication + multiplier #( + .Width ( 32 ), + .IdWidth ( IdWidth ) + ) i_multiplier ( + .clk_i, + .rst_i, + .id_i ( acc_qid_i ), + .operator_i ( acc_qdata_op_i ), + .operand_a_i ( acc_qdata_arga_i ), + .operand_b_i ( acc_qdata_argb_i ), + .valid_i ( mul_valid_op ), + .ready_o ( mul_ready_op ), + .result_o ( mul.result ), + .valid_o ( mul_valid ), + .ready_i ( mul_ready ), + .id_o ( mul.id ) ); - end else begin : gen_2inputs + // Output Arbitration stream_arbiter #( .DATA_T ( result_t ), .N_INP ( 2 ) @@ -173,6 +313,7 @@ module snitch_ipu #( .oup_ready_i ( acc_pready_i ) ); end + assign acc_pdata_o = oup.result; assign acc_pid_o = oup.id; endmodule @@ -188,6 +329,7 @@ module dspu #( input logic [31:0] operator_i, input logic [Width-1:0] op_a_i, input logic [Width-1:0] op_b_i, + input logic [Width-1:0] op_c_i, input logic in_valid_i, output logic in_ready_o, output logic out_valid_o, @@ -202,19 +344,42 @@ module dspu #( assign id_o = id_i; // Decoded fields - logic [4:0] ximm; - assign ximm = operator_i[24:20]; + logic [4:0] imm5; + logic [5:0] imm6; + assign imm5 = operator_i[24:20]; + assign imm6 = {operator_i[24:20], operator_i[25]}; // Internal control signals - logic cmp_signed; // comparator operation is signed + logic cmp_signed; // comparator operation is signed enum logic [1:0] { - Reg, Zero, ClipBound - } cmp_op_b_sel; // selection of shared comparator operands - logic clip_unsigned; // clip operation has "0" as lower bound - logic clip_register; // if 1 clip operation uses rs2, else ximm + None, Reg, Zero, ClipBound + } cmp_op_b_sel; // selection of shared comparator operands + logic clip_unsigned; // clip operation has "0" as lower bound + logic clip_register; // if 1 clip operation uses rs2, else imm5 + enum logic [1:0] { + NoMul, MulLow, MulHigh, MulMac + } mul_op; // type of multiplication operation + logic mac_msu; // multiplication operation is MSU + logic mul_op_a_sign; // sign of multiplier operand a + logic mac_op_b_sign; // sign of multiplier operand b enum logic [3:0] { - Abs, Sle, Min, Max, Exths, Exthz, Extbs, Extbz, Clip - } res_sel; // result selection + Nop, Abs, Sle, Min, Max, Exths, Exthz, Extbs, Extbz, Clip, Mac, Simd + } res_sel; // result selection + + enum logic [4:0] { + SimdNop, SimdAdd, SimdSub, SimdAvg, SimdMin, SimdMax, SimdSrl, SimdSra, SimdSll, SimdOr, + SimdXor, SimdAnd, SimdAbs, SimdExt, SimdIns, SimdDotp, SimdShuffle + } simd_op; // SIMD operation + enum logic { + HalfWord, Byte + } simd_size; // SIMD granularity + enum logic [1:0] { + Vect, Sc, Sci + } simd_mode; // SIMD mode + logic simd_signed; // SIMD operation is signed and uses sign-extended imm6 + logic simd_dotp_op_a_signed; // signedness of SIMD dotp operand a + logic simd_dotp_op_b_signed; // signedness of SIMD dotp operand b + logic simd_dotp_acc; // accumulate result of SIMD dotp on destination reg // -------------------- // Decoder @@ -222,46 +387,90 @@ module dspu #( always_comb begin cmp_signed = 1'b1; - cmp_op_b_sel = Reg; + cmp_op_b_sel = None; clip_unsigned = 1'b0; clip_register = 1'b0; - res_sel = Abs; + mul_op = NoMul; + mac_msu = 1'b0; + mul_op_a_sign = 1'b0; + mac_op_b_sign = 1'b0; + res_sel = Nop; + simd_op = SimdNop; + simd_size = HalfWord; + simd_mode = Vect; + simd_signed = 1; + simd_dotp_op_a_signed = 1; + simd_dotp_op_b_signed = 1; + simd_dotp_acc = 0; unique casez (operator_i) + // Multiplications from M extension + riscv_instr::MUL: begin + mul_op = MulLow; + mul_op_a_sign = 1'b1; + mac_op_b_sign = 1'b1; + res_sel = Mac; + end + riscv_instr::MULH: begin + mul_op = MulHigh; + mul_op_a_sign = 1'b1; + mac_op_b_sign = 1'b1; + res_sel = Mac; + end + riscv_instr::MULHSU: begin + mul_op = MulHigh; + mul_op_a_sign = 1'b1; + res_sel = Mac; + end + riscv_instr::MULHU: begin + mul_op = MulHigh; + res_sel = Mac; + end + // Instructions from Xpulpimg riscv_instr::P_ABS: begin cmp_op_b_sel = Zero; res_sel = Abs; end riscv_instr::P_SLET: begin + cmp_op_b_sel = Reg; res_sel = Sle; end riscv_instr::P_SLETU: begin cmp_signed = 1'b0; + cmp_op_b_sel = Reg; res_sel = Sle; end riscv_instr::P_MIN: begin + cmp_op_b_sel = Reg; res_sel = Min; end riscv_instr::P_MINU: begin cmp_signed = 1'b0; + cmp_op_b_sel = Reg; res_sel = Min; end riscv_instr::P_MAX: begin + cmp_op_b_sel = Reg; res_sel = Max; end riscv_instr::P_MAXU: begin cmp_signed = 1'b0; + cmp_op_b_sel = Reg; res_sel = Max; end riscv_instr::P_EXTHS: begin + cmp_op_b_sel = Reg; res_sel = Exths; end riscv_instr::P_EXTHZ: begin + cmp_op_b_sel = Reg; res_sel = Exthz; end riscv_instr::P_EXTBS: begin + cmp_op_b_sel = Reg; res_sel = Extbs; end riscv_instr::P_EXTBZ: begin + cmp_op_b_sel = Reg; res_sel = Extbz; end riscv_instr::P_CLIP: begin @@ -284,6 +493,770 @@ module dspu #( cmp_op_b_sel = ClipBound; res_sel = Clip; end + riscv_instr::P_MAC: begin + mul_op = MulMac; + mul_op_a_sign = 1'b1; + mac_op_b_sign = 1'b1; + res_sel = Mac; + end + riscv_instr::P_MSU: begin + mul_op = MulMac; + mac_msu = 1'b1; + mul_op_a_sign = 1'b1; + mac_op_b_sign = 1'b1; + res_sel = Mac; + end + riscv_instr::PV_ADD_H: begin + simd_op = SimdAdd; + res_sel = Simd; + end + riscv_instr::PV_ADD_SC_H: begin + simd_op = SimdAdd; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_ADD_SCI_H: begin + simd_op = SimdAdd; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_ADD_B: begin + simd_op = SimdAdd; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_ADD_SC_B: begin + simd_op = SimdAdd; + simd_size = Byte; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_ADD_SCI_B: begin + simd_op = SimdAdd; + simd_size = Byte; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_SUB_H: begin + simd_op = SimdSub; + res_sel = Simd; + end + riscv_instr::PV_SUB_SC_H: begin + simd_op = SimdSub; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_SUB_SCI_H: begin + simd_op = SimdSub; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_SUB_B: begin + simd_op = SimdSub; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_SUB_SC_B: begin + simd_op = SimdSub; + simd_size = Byte; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_SUB_SCI_B: begin + simd_op = SimdSub; + simd_size = Byte; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_AVG_H: begin + simd_op = SimdAvg; + res_sel = Simd; + end + riscv_instr::PV_AVG_SC_H: begin + simd_op = SimdAvg; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_AVG_SCI_H: begin + simd_op = SimdAvg; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_AVG_B: begin + simd_op = SimdAvg; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_AVG_SC_B: begin + simd_op = SimdAvg; + simd_size = Byte; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_AVG_SCI_B: begin + simd_op = SimdAvg; + simd_size = Byte; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_AVGU_H: begin + simd_op = SimdAvg; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_AVGU_SC_H: begin + simd_op = SimdAvg; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_AVGU_SCI_H: begin + simd_op = SimdAvg; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_AVGU_B: begin + simd_op = SimdAvg; + simd_size = Byte; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_AVGU_SC_B: begin + simd_op = SimdAvg; + simd_size = Byte; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_AVGU_SCI_B: begin + simd_op = SimdAvg; + simd_size = Byte; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MIN_H: begin + simd_op = SimdMin; + res_sel = Simd; + end + riscv_instr::PV_MIN_SC_H: begin + simd_op = SimdMin; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_MIN_SCI_H: begin + simd_op = SimdMin; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_MIN_B: begin + simd_op = SimdMin; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_MIN_SC_B: begin + simd_op = SimdMin; + simd_size = Byte; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_MIN_SCI_B: begin + simd_op = SimdMin; + simd_size = Byte; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_MINU_H: begin + simd_op = SimdMin; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MINU_SC_H: begin + simd_op = SimdMin; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MINU_SCI_H: begin + simd_op = SimdMin; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MINU_B: begin + simd_op = SimdMin; + simd_size = Byte; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MINU_SC_B: begin + simd_op = SimdMin; + simd_size = Byte; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MINU_SCI_B: begin + simd_op = SimdMin; + simd_size = Byte; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MAX_H: begin + simd_op = SimdMax; + res_sel = Simd; + end + riscv_instr::PV_MAX_SC_H: begin + simd_op = SimdMax; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_MAX_SCI_H: begin + simd_op = SimdMax; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_MAX_B: begin + simd_op = SimdMax; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_MAX_SC_B: begin + simd_op = SimdMax; + simd_size = Byte; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_MAX_SCI_B: begin + simd_op = SimdMax; + simd_size = Byte; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_MAXU_H: begin + simd_op = SimdMax; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MAXU_SC_H: begin + simd_op = SimdMax; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MAXU_SCI_H: begin + simd_op = SimdMax; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MAXU_B: begin + simd_op = SimdMax; + simd_size = Byte; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MAXU_SC_B: begin + simd_op = SimdMax; + simd_size = Byte; + simd_mode = Sc; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_MAXU_SCI_B: begin + simd_op = SimdMax; + simd_size = Byte; + simd_mode = Sci; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_SRL_H: begin + simd_op = SimdSrl; + res_sel = Simd; + end + riscv_instr::PV_SRL_SC_H: begin + simd_op = SimdSrl; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_SRL_SCI_H: begin + simd_op = SimdSrl; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_SRL_B: begin + simd_op = SimdSrl; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_SRL_SC_B: begin + simd_op = SimdSrl; + simd_size = Byte; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_SRL_SCI_B: begin + simd_op = SimdSrl; + simd_size = Byte; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_SRA_H: begin + simd_op = SimdSra; + res_sel = Simd; + end + riscv_instr::PV_SRA_SC_H: begin + simd_op = SimdSra; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_SRA_SCI_H: begin + simd_op = SimdSra; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_SRA_B: begin + simd_op = SimdSra; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_SRA_SC_B: begin + simd_op = SimdSra; + simd_size = Byte; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_SRA_SCI_B: begin + simd_op = SimdSra; + simd_size = Byte; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_SLL_H: begin + simd_op = SimdSll; + res_sel = Simd; + end + riscv_instr::PV_SLL_SC_H: begin + simd_op = SimdSll; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_SLL_SCI_H: begin + simd_op = SimdSll; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_SLL_B: begin + simd_op = SimdSll; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_SLL_SC_B: begin + simd_op = SimdSll; + simd_size = Byte; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_SLL_SCI_B: begin + simd_op = SimdSll; + simd_size = Byte; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_OR_H: begin + simd_op = SimdOr; + res_sel = Simd; + end + riscv_instr::PV_OR_SC_H: begin + simd_op = SimdOr; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_OR_SCI_H: begin + simd_op = SimdOr; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_OR_B: begin + simd_op = SimdOr; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_OR_SC_B: begin + simd_op = SimdOr; + simd_size = Byte; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_OR_SCI_B: begin + simd_op = SimdOr; + simd_size = Byte; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_XOR_H: begin + simd_op = SimdXor; + res_sel = Simd; + end + riscv_instr::PV_XOR_SC_H: begin + simd_op = SimdXor; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_XOR_SCI_H: begin + simd_op = SimdXor; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_XOR_B: begin + simd_op = SimdXor; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_XOR_SC_B: begin + simd_op = SimdXor; + simd_size = Byte; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_XOR_SCI_B: begin + simd_op = SimdXor; + simd_size = Byte; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_AND_H: begin + simd_op = SimdAnd; + res_sel = Simd; + end + riscv_instr::PV_AND_SC_H: begin + simd_op = SimdAnd; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_AND_SCI_H: begin + simd_op = SimdAnd; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_AND_B: begin + simd_op = SimdAnd; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_AND_SC_B: begin + simd_op = SimdAnd; + simd_size = Byte; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_AND_SCI_B: begin + simd_op = SimdAnd; + simd_size = Byte; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_ABS_H: begin + simd_op = SimdAbs; + res_sel = Simd; + end + riscv_instr::PV_ABS_B: begin + simd_op = SimdAbs; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_EXTRACT_H: begin + simd_op = SimdExt; + res_sel = Simd; + end + riscv_instr::PV_EXTRACT_B: begin + simd_op = SimdExt; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_EXTRACTU_H: begin + simd_op = SimdExt; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_EXTRACTU_B: begin + simd_op = SimdExt; + simd_size = Byte; + simd_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_INSERT_H: begin + simd_op = SimdIns; + res_sel = Simd; + end + riscv_instr::PV_INSERT_B: begin + simd_op = SimdIns; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_DOTUP_H: begin + simd_op = SimdDotp; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUP_SC_H: begin + simd_op = SimdDotp; + simd_mode = Sc; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUP_SCI_H: begin + simd_op = SimdDotp; + simd_mode = Sci; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUP_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUP_SC_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sc; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUP_SCI_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sci; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUSP_H: begin + simd_op = SimdDotp; + simd_dotp_op_a_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUSP_SC_H: begin + simd_op = SimdDotp; + simd_mode = Sc; + simd_dotp_op_a_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUSP_SCI_H: begin + simd_op = SimdDotp; + simd_mode = Sci; + simd_dotp_op_a_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUSP_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_dotp_op_a_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUSP_SC_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sc; + simd_dotp_op_a_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTUSP_SCI_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sci; + simd_dotp_op_a_signed = 0; + res_sel = Simd; + end + riscv_instr::PV_DOTSP_H: begin + simd_op = SimdDotp; + res_sel = Simd; + end + riscv_instr::PV_DOTSP_SC_H: begin + simd_op = SimdDotp; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_DOTSP_SCI_H: begin + simd_op = SimdDotp; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_DOTSP_B: begin + simd_op = SimdDotp; + simd_size = Byte; + res_sel = Simd; + end + riscv_instr::PV_DOTSP_SC_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sc; + res_sel = Simd; + end + riscv_instr::PV_DOTSP_SCI_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sci; + res_sel = Simd; + end + riscv_instr::PV_SDOTUP_H: begin + simd_op = SimdDotp; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUP_SC_H: begin + simd_op = SimdDotp; + simd_mode = Sc; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUP_SCI_H: begin + simd_op = SimdDotp; + simd_mode = Sci; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUP_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUP_SC_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sc; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUP_SCI_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sci; + simd_signed = 0; + simd_dotp_op_a_signed = 0; + simd_dotp_op_b_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUSP_H: begin + simd_op = SimdDotp; + simd_dotp_op_a_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUSP_SC_H: begin + simd_op = SimdDotp; + simd_mode = Sc; + simd_dotp_op_a_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUSP_SCI_H: begin + simd_op = SimdDotp; + simd_mode = Sci; + simd_dotp_op_a_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUSP_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_dotp_op_a_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUSP_SC_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sc; + simd_dotp_op_a_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTUSP_SCI_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sci; + simd_dotp_op_a_signed = 0; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTSP_H: begin + simd_op = SimdDotp; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTSP_SC_H: begin + simd_op = SimdDotp; + simd_mode = Sc; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTSP_SCI_H: begin + simd_op = SimdDotp; + simd_mode = Sci; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTSP_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTSP_SC_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sc; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SDOTSP_SCI_B: begin + simd_op = SimdDotp; + simd_size = Byte; + simd_mode = Sci; + simd_dotp_acc = 1; + res_sel = Simd; + end + riscv_instr::PV_SHUFFLE2_H: begin + simd_op = SimdShuffle; + res_sel = Simd; + end + riscv_instr::PV_SHUFFLE2_B: begin + simd_op = SimdShuffle; + simd_size = Byte; + res_sel = Simd; + end default: ; endcase end @@ -294,8 +1267,6 @@ module dspu #( // |___//_/ \_\|_|/_/ \_\|_| /_/ \_\|_| |_||_| // - logic cmp_result; - // -------------------- // Clips // -------------------- @@ -304,8 +1275,8 @@ module dspu #( logic [Width-1:0] clip_lower; logic [Width-1:0] clip_comp; - // Generate -2^(ximm-1), 2^(ximm-1)-1 for clip/clipu and -rs2-1, rs2 for clipr, clipur - assign clip_lower = ({(Width+1){1'b1}} << $unsigned(ximm)) >> 1; + // Generate -2^(imm5-1), 2^(imm5-1)-1 for clip/clipu and -rs2-1, rs2 for clipr, clipur + assign clip_lower = ({(Width+1){1'b1}} << $unsigned(imm5)) >> 1; assign clip_op_b_n = clip_unsigned ? 'b0 : (clip_register ? ~op_b_i : clip_lower); assign clip_op_b = clip_register ? op_b_i : ~clip_lower; @@ -315,13 +1286,14 @@ module dspu #( // Select operand to use in comparison for clip operations: clips would need two comparisons // to clamp the result between the two bounds; but one comparison is enough if we select the // second operand basing on op_a and clip_op_b signs (i.e. rs1 and clip upper bound, being - // either rs2 or 2^(ximm-1)-1) + // either rs2 or 2^(imm5-1)-1) assign clip_comp = clip_use_n_bound ? clip_op_b_n : clip_op_b; // -------------------- // Shared comparator // -------------------- logic [Width-1:0] cmp_op_a, cmp_op_b; + logic cmp_result; // Comparator operand A assignment assign cmp_op_a = op_a_i; @@ -338,6 +1310,197 @@ module dspu #( // Instantiate comparator assign cmp_result = $signed({cmp_op_a[Width-1] & cmp_signed, cmp_op_a}) <= $signed({cmp_op_b[Width-1] & cmp_signed, cmp_op_b}); + // -------------------- + // Multiplier & acc + // -------------------- + + // 32x32 into 32 bits multiplier & accumulator + logic [Width-1:0] mul_op_a; + logic [2*Width-1:0] mul_result; + logic [Width-1:0] mac_result; + + assign mul_op_a = mac_msu ? -op_a_i : op_a_i; // op_a_i is sign-inverted if mac_msu=1, to have -op_a*op_b + + // 32-bits input, 64-bits output multiplier + assign mul_result = $signed({mul_op_a[Width-1] & mul_op_a_sign, mul_op_a}) * $signed({op_b_i[Width-1] & mac_op_b_sign, op_b_i}); + + always_comb begin + unique case (mul_op) + MulLow: mac_result = mul_result[Width-1:0]; // mul, take lowest 32 bits + MulHigh: mac_result = mul_result[2*Width-1:Width]; // mul high, take highest 32 bits + MulMac: mac_result = op_c_i + mul_result[Width-1:0]; // accumulate + default: mac_result = '0; + endcase + end + + // -------------------- + // SIMD operations + // -------------------- + + logic [3:0][7:0] simd_op_a, simd_op_b, simd_op_c; + logic [1:0][7:0] simd_imm; + logic [3:0][7:0] simd_result; + + // half-word and byte immediate extensions + always_comb + if(simd_signed) simd_imm = $signed(imm6); + else simd_imm = $unsigned(imm6); + + // SIMD operands composition + always_comb begin + simd_op_a = 'b0; + simd_op_b = 'b0; + simd_op_c = 'b0; + unique case (simd_size) + // half-word granularity + HalfWord: + for (int i = 0; i < Width/16; i++) begin + simd_op_a[2*i +: 2] = op_a_i[16*i +: 16]; // operands A are the half-words of op_a_i + // operands B are the half-words of op_b_i, replicated lowest half-word of op_b_i or replicated 6-bit immediate + simd_op_b[2*i +: 2] = (simd_mode == Vect) ? op_b_i[16*i +: 16] : ((simd_mode == Sc) ? op_b_i[15:0] : simd_imm); + simd_op_c[2*i +: 2] = op_c_i[16*i +: 16]; // operands C are the half-words of op_c_i + end + // byte granularity + Byte: + for (int i = 0; i < Width/8; i++) begin + simd_op_a[i] = op_a_i[8*i +: 8]; // operands A are the bytes of op_a_i + // operands B are the bytes of op_b_i, replicated lowest byte of op_b_i or replicated 6-bit immediate + simd_op_b[i] = (simd_mode == Vect) ? op_b_i[8*i +: 8] : ((simd_mode == Sc) ? op_b_i[7:0] : simd_imm[0]); + simd_op_c[i] = op_c_i[8*i +: 8]; // operands C are the bytes of op_c_i + end + default: ; + endcase + end + + // SIMD unit + always_comb begin + simd_result = 'b0; + unique case (simd_size) + // half-word granularity + HalfWord: begin + unique case (simd_op) + SimdAdd: + for (int i = 0; i < Width/16; i++) + simd_result[2*i +: 2] = $signed(simd_op_a[2*i +: 2]) + $signed(simd_op_b[2*i +: 2]); + SimdSub: + for (int i = 0; i < Width/16; i++) + simd_result[2*i +: 2] = $signed(simd_op_a[2*i +: 2]) - $signed(simd_op_b[2*i +: 2]); + SimdAvg: + for (int i = 0; i < Width/16; i++) begin + simd_result[2*i +: 2] = $signed(simd_op_a[2*i +: 2]) + $signed(simd_op_b[2*i +: 2]); + simd_result[2*i +: 2] = {simd_result[2*i+1][7] & simd_signed, simd_result[2*i +: 2]} >> 1; + end + SimdMin: + for (int i = 0; i < Width/16; i++) + simd_result[2*i +: 2] = $signed({simd_op_a[2*i+1][7] & simd_signed, simd_op_a[2*i +: 2]}) <= + $signed({simd_op_b[2*i+1][7] & simd_signed, simd_op_b[2*i +: 2]}) ? + simd_op_a[2*i +: 2] : simd_op_b[2*i +: 2]; + SimdMax: + for (int i = 0; i < Width/16; i++) + simd_result[2*i +: 2] = $signed({simd_op_a[2*i+1][7] & simd_signed, simd_op_a[2*i +: 2]}) > + $signed({simd_op_b[2*i+1][7] & simd_signed, simd_op_b[2*i +: 2]}) ? + simd_op_a[2*i +: 2] : simd_op_b[2*i +: 2]; + SimdSrl: + for (int i = 0; i < Width/16; i++) + simd_result[2*i +: 2] = $unsigned(simd_op_a[2*i +: 2]) >> simd_op_b[2*i][3:0]; + SimdSra: + for (int i = 0; i < Width/16; i++) + simd_result[2*i +: 2] = $signed(simd_op_a[2*i +: 2]) >>> simd_op_b[2*i][3:0]; + SimdSll: + for (int i = 0; i < Width/16; i++) + simd_result[2*i +: 2] = $unsigned(simd_op_a[2*i +: 2]) << simd_op_b[2*i][3:0]; + SimdOr: simd_result = simd_op_a | simd_op_b; + SimdXor: simd_result = simd_op_a ^ simd_op_b; + SimdAnd: simd_result = simd_op_a & simd_op_b; + SimdAbs: + for (int i = 0; i < Width/16; i++) + simd_result[2*i +: 2] = $signed(simd_op_a[2*i +: 2]) > 0 ? simd_op_a[2*i +: 2] : -$signed(simd_op_a[2*i +: 2]); + SimdExt: begin + simd_result[1:0] = simd_op_a[2*imm6[0] +: 2]; + // sign- or zero-extend + simd_result[3:2] = {16{simd_op_a[2*imm6[0]+1][7] & simd_signed}}; + end + SimdIns: begin + simd_result = op_c_i; + simd_result[2*imm6[0] +: 2] = simd_op_a[1:0]; + end + SimdDotp: begin + simd_result = op_c_i & {(Width){simd_dotp_acc}}; // accumulate on rd or start from zero + for (int i = 0; i < Width/16; i++) begin + simd_result = $signed(simd_result) + $signed({simd_op_a[2*i+1][7] & simd_dotp_op_a_signed, simd_op_a[2*i +: 2]}) * + $signed({simd_op_b[2*i+1][7] & simd_dotp_op_b_signed, simd_op_b[2*i +: 2]}); + end + end + SimdShuffle: + for (int i = 0; i < Width/16; i++) + simd_result[2*i +: 2] = simd_op_b[2*i][1] ? simd_op_a[2*simd_op_b[2*i][0] +: 2] : simd_op_c[2*simd_op_b[2*i][0] +: 2]; + default: ; + endcase + end + // byte granularity + Byte: begin + unique case (simd_op) + SimdAdd: + for (int i = 0; i < Width/8; i++) + simd_result[i] = $signed(simd_op_a[i]) + $signed(simd_op_b[i]); + SimdSub: + for (int i = 0; i < Width/8; i++) + simd_result[i] = $signed(simd_op_a[i]) - $signed(simd_op_b[i]); + SimdAvg: + for (int i = 0; i < Width/8; i++) begin + simd_result[i] = $signed(simd_op_a[i]) + $signed(simd_op_b[i]); + simd_result[i] = {simd_result[i][7] & simd_signed, simd_result[i]} >> 1; + end + SimdMin: + for (int i = 0; i < Width/8; i++) + simd_result[i] = $signed({simd_op_a[i][7] & simd_signed, simd_op_a[i]}) <= + $signed({simd_op_b[i][7] & simd_signed, simd_op_b[i]}) ? + simd_op_a[i] : simd_op_b[i]; + SimdMax: + for (int i = 0; i < Width/8; i++) + simd_result[i] = $signed({simd_op_a[i][7] & simd_signed, simd_op_a[i]}) > + $signed({simd_op_b[i][7] & simd_signed, simd_op_b[i]}) ? + simd_op_a[i] : simd_op_b[i]; + SimdSrl: + for (int i = 0; i < Width/8; i++) + simd_result[i] = $unsigned(simd_op_a[i]) >> simd_op_b[i][2:0]; + SimdSra: + for (int i = 0; i < Width/8; i++) + simd_result[i] = $signed(simd_op_a[i]) >>> simd_op_b[i][2:0]; + SimdSll: + for (int i = 0; i < Width/8; i++) + simd_result[i] = $unsigned(simd_op_a[i]) << simd_op_b[i][2:0]; + SimdOr: simd_result = simd_op_a | simd_op_b; + SimdXor: simd_result = simd_op_a ^ simd_op_b; + SimdAnd: simd_result = simd_op_a & simd_op_b; + SimdAbs: + for (int i = 0; i < Width/8; i++) + simd_result[i] = $signed(simd_op_a[i]) > 0 ? simd_op_a[i] : -$signed(simd_op_a[i]); + SimdExt: begin + simd_result[0] = simd_op_a[imm6[1:0]]; + // sign- or zero-extend + simd_result[3:1] = {24{simd_op_a[imm6[1:0]][7] & simd_signed}}; + end + SimdIns: begin + simd_result = op_c_i; + simd_result[imm6[1:0]] = simd_op_a[0]; + end + SimdDotp: begin + simd_result = op_c_i & {(Width){simd_dotp_acc}}; // accumulate on rd or start from zero + for (int i = 0; i < Width/8; i++) + simd_result = $signed(simd_result) + $signed({simd_op_a[i][7] & simd_dotp_op_a_signed, simd_op_a[i]}) * + $signed({simd_op_b[i][7] & simd_dotp_op_b_signed, simd_op_b[i]}); + end + SimdShuffle: + for (int i = 0; i < Width/8; i++) + simd_result[i] = simd_op_b[i][2] ? simd_op_a[simd_op_b[i][1:0]] : simd_op_c[simd_op_b[i][1:0]]; + default: ; + endcase + end + default: ; + endcase + end + // -------------------- // Result generation // -------------------- @@ -368,6 +1531,8 @@ module dspu #( // + if clip_op_b >= 0: clip_comp=clip_op_b (i.e. rs1>=0 and clip_op_b>=0) and the result must // be clipped to the upper bound since rs1 > clip_op_b Clip: result_o = cmp_result ? (clip_use_n_bound ? clip_op_b_n : op_a_i) : (op_a_i[Width-1] ? op_a_i : clip_op_b); + Mac: result_o = mac_result; + Simd: result_o = simd_result; default: result_o = '0; endcase end diff --git a/hardware/src/mempool_tile.sv b/hardware/src/mempool_tile.sv index 79e5546a2..5a20ccf51 100644 --- a/hardware/src/mempool_tile.sv +++ b/hardware/src/mempool_tile.sv @@ -173,7 +173,7 @@ module mempool_tile .clk_i (clk_i ), .clk_d2_i (clk_i ), .rst_ni (rst_ni ), - .enable_prefetching_i (1'b1 ), + .enable_prefetching_i (snitch_inst_valid[c] ), .icache_events_o (/* Unused */ ), .flush_valid_i (1'b0 ), .flush_ready_o (/* Unused */ ), diff --git a/toolchain/riscv-gnu-toolchain b/toolchain/riscv-gnu-toolchain index 42e484f35..0c46580ac 160000 --- a/toolchain/riscv-gnu-toolchain +++ b/toolchain/riscv-gnu-toolchain @@ -1 +1 @@ -Subproject commit 42e484f35b7832ae0f67eb85bf12c7844f64f089 +Subproject commit 0c46580ac5e0cb6eca97e469d61751dda3bdcabb diff --git a/toolchain/riscv-isa-sim/disasm/disasm.cc b/toolchain/riscv-isa-sim/disasm/disasm.cc index 9ee83b575..3a73ddfe3 100644 --- a/toolchain/riscv-isa-sim/disasm/disasm.cc +++ b/toolchain/riscv-isa-sim/disasm/disasm.cc @@ -367,6 +367,55 @@ struct : public arg_t { } } p_simm5; +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((uint32_t)insn.p_zimm6()); + } +} p_zimm6; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.p_simm6()); + } +} p_simm6; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.i_imm()) + '(' + xpr_name[insn.rs1()] + "!)"; + } +} load_address_irpost; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::string(xpr_name[insn.rs2()]) + '(' + xpr_name[insn.rs1()] + "!)"; + } +} load_address_rrpost; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::string(xpr_name[insn.rs2()]) + '(' + xpr_name[insn.rs1()] + ')'; + } +} load_address_rr; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.s_imm()) + '(' + xpr_name[insn.rs1()] + "!)"; + } +} store_address_irpost; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::string(xpr_name[insn.p_rs3()]) + '(' + xpr_name[insn.rs1()] + "!)"; + } +} store_address_rrpost; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::string(xpr_name[insn.p_rs3()]) + '(' + xpr_name[insn.rs1()] + ')'; + } +} store_address_rr; + + typedef struct { reg_t match; reg_t mask; @@ -434,7 +483,15 @@ disassembler_t::disassembler_t(int xlen) #define DEFINE_XFTYPE(code) DISASM_INSN(#code, code, 0, {&frd, &xrs1}) #define DEFINE_SFENCE_TYPE(code) DISASM_INSN(#code, code, 0, {&xrs1, &xrs2}) // Xpulpimg - #define DEFINE_PITYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_zimm5}) + #define DEFINE_PLOAD_IRPOST(code) DISASM_INSN(#code, code, 0, {&xrd, &load_address_irpost}) + #define DEFINE_PLOAD_RRPOST(code) DISASM_INSN(#code, code, 0, {&xrd, &load_address_rrpost}) + #define DEFINE_PLOAD_RR(code) DISASM_INSN(#code, code, 0, {&xrd, &load_address_rr}) + #define DEFINE_PSTORE_IRPOST(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address_irpost}) + #define DEFINE_PSTORE_RRPOST(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address_rrpost}) + #define DEFINE_PSTORE_RR(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address_rr}) + #define DEFINE_PI0TYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_zimm5}) + #define DEFINE_PI1ZTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_zimm6}) + #define DEFINE_PI1STYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_simm6}) #define DEFINE_PBTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_simm5, &branch_target}) DEFINE_XLOAD(lb) @@ -752,7 +809,7 @@ disassembler_t::disassembler_t(int xlen) DISASM_INSN("c.fsdsp", c_fsdsp, 0, {&rvc_fp_rs2, &rvc_sdsp_address}); DISASM_INSN("vsetvli", vsetvli, 0, {&xrd, &xrs1, &v_vtype}); - DISASM_INSN("vsetvl", vsetvl, 0, {&xrd, &xrs1, &xrs2}); + //DISASM_INSN("vsetvl", vsetvl, 0, {&xrd, &xrs1, &xrs2}); #define DISASM_VMEM_INSN(name, fmt, ff) \ add_insn(new disasm_insn_t(#name "8" #ff ".v", match_##name##8##ff##_v, mask_##name##8##ff##_v | mask_nf, fmt)); \ @@ -963,9 +1020,9 @@ disassembler_t::disassembler_t(int xlen) DISASM_OPIV__XI_INSN(vslidedown, 0); //0b01_0000 - DISASM_OPIV_VXIM_INSN(vadc, 1, 0); + //DISASM_OPIV_VXIM_INSN(vadc, 1, 0); DISASM_OPIV_VXIM_INSN(vmadc, 1, 1); - DISASM_OPIV_VX_M_INSN(vsbc, 1, 0); + //DISASM_OPIV_VX_M_INSN(vsbc, 1, 0); DISASM_OPIV_VX_M_INSN(vmsbc, 1, 1); DISASM_OPIV_VXIM_INSN(vmerge, 1, 0); DISASM_INSN("vmv.v.i", vmv_v_i, 0, {&vd, &v_simm5}); @@ -1008,9 +1065,9 @@ disassembler_t::disassembler_t(int xlen) //OPMVV/OPMVX //0b00_0000 - DISASM_OPIV_VX__INSN(vaaddu, 0); + //DISASM_OPIV_VX__INSN(vaaddu, 0); DISASM_OPIV_VX__INSN(vaadd, 0); - DISASM_OPIV_VX__INSN(vasubu, 0); + //DISASM_OPIV_VX__INSN(vasubu, 0); DISASM_OPIV_VX__INSN(vasub, 0); DISASM_OPIV_S___INSN(vredsum, 1); @@ -1021,7 +1078,7 @@ disassembler_t::disassembler_t(int xlen) DISASM_OPIV_S___INSN(vredmin, 1); DISASM_OPIV_S___INSN(vredmaxu, 0); DISASM_OPIV_S___INSN(vredmax, 1); - DISASM_OPIV__X__INSN(vslide1up, 1); + //DISASM_OPIV__X__INSN(vslide1up, 1); DISASM_OPIV__X__INSN(vslide1down,1); //0b01_0000 @@ -1062,13 +1119,13 @@ disassembler_t::disassembler_t(int xlen) DISASM_OPIV_M___INSN(vmxnor, 1); //0b10_0000 - DISASM_OPIV_VX__INSN(vdivu, 0); + //DISASM_OPIV_VX__INSN(vdivu, 0); DISASM_OPIV_VX__INSN(vdiv, 1); DISASM_OPIV_VX__INSN(vremu, 0); DISASM_OPIV_VX__INSN(vrem, 1); - DISASM_OPIV_VX__INSN(vmulhu, 0); + //DISASM_OPIV_VX__INSN(vmulhu, 0); DISASM_OPIV_VX__INSN(vmul, 1); - DISASM_OPIV_VX__INSN(vmulhsu, 0); + //DISASM_OPIV_VX__INSN(vmulhsu, 0); DISASM_OPIV_VX__INSN(vmulh, 1); DISASM_OPIV_VX__INSN(vmadd, 1); DISASM_OPIV_VX__INSN(vnmsub, 1); @@ -1180,7 +1237,7 @@ disassembler_t::disassembler_t(int xlen) DISASM_OPIV__F_INSN(vfrdiv); //vfunary0 - DISASM_VFUNARY0_INSN(vf, v); + //DISASM_VFUNARY0_INSN(vf, v); DISASM_VFUNARY0_INSN(vfw, v); DISASM_INSN("vfwcvt.f.f.v", vfwcvt_f_f_v, 0, {&vd, &vs2, &opt, &vm}); @@ -1277,6 +1334,30 @@ disassembler_t::disassembler_t(int xlen) } // Xpulpimg extension + DEFINE_PLOAD_IRPOST(p_lb_irpost); + DEFINE_PLOAD_IRPOST(p_lbu_irpost); + DEFINE_PLOAD_IRPOST(p_lh_irpost); + DEFINE_PLOAD_IRPOST(p_lhu_irpost); + DEFINE_PLOAD_IRPOST(p_lw_irpost); + DEFINE_PLOAD_RRPOST(p_lb_rrpost); + DEFINE_PLOAD_RRPOST(p_lbu_rrpost); + DEFINE_PLOAD_RRPOST(p_lh_rrpost); + DEFINE_PLOAD_RRPOST(p_lhu_rrpost); + DEFINE_PLOAD_RRPOST(p_lw_rrpost); + DEFINE_PLOAD_RR(p_lb_rr); + DEFINE_PLOAD_RR(p_lbu_rr); + DEFINE_PLOAD_RR(p_lh_rr); + DEFINE_PLOAD_RR(p_lhu_rr); + DEFINE_PLOAD_RR(p_lw_rr); + DEFINE_PSTORE_IRPOST(p_sb_irpost); + DEFINE_PSTORE_IRPOST(p_sh_irpost); + DEFINE_PSTORE_IRPOST(p_sw_irpost); + DEFINE_PSTORE_RRPOST(p_sb_rrpost); + DEFINE_PSTORE_RRPOST(p_sh_rrpost); + DEFINE_PSTORE_RRPOST(p_sw_rrpost); + DEFINE_PSTORE_RR(p_sb_rr); + DEFINE_PSTORE_RR(p_sh_rr); + DEFINE_PSTORE_RR(p_sw_rr); DEFINE_R1TYPE(p_abs); DEFINE_RTYPE(p_slet); DEFINE_RTYPE(p_sletu); @@ -1288,12 +1369,147 @@ disassembler_t::disassembler_t(int xlen) DEFINE_R1TYPE(p_exthz); DEFINE_R1TYPE(p_extbs); DEFINE_R1TYPE(p_extbz); - DEFINE_PITYPE(p_clip); - DEFINE_PITYPE(p_clipu); + DEFINE_PI0TYPE(p_clip); + DEFINE_PI0TYPE(p_clipu); DEFINE_RTYPE(p_clipr); DEFINE_RTYPE(p_clipur); DEFINE_PBTYPE(p_beqimm); DEFINE_PBTYPE(p_bneimm); + DEFINE_RTYPE(p_mac); + DEFINE_RTYPE(p_msu); + + DEFINE_RTYPE(pv_add_h); + DEFINE_RTYPE(pv_add_sc_h); + DEFINE_PI1STYPE(pv_add_sci_h); + DEFINE_RTYPE(pv_add_b); + DEFINE_RTYPE(pv_add_sc_b); + DEFINE_PI1STYPE(pv_add_sci_b); + DEFINE_RTYPE(pv_sub_h); + DEFINE_RTYPE(pv_sub_sc_h); + DEFINE_PI1STYPE(pv_sub_sci_h); + DEFINE_RTYPE(pv_sub_b); + DEFINE_RTYPE(pv_sub_sc_b); + DEFINE_PI1STYPE(pv_sub_sci_b); + DEFINE_RTYPE(pv_avg_h); + DEFINE_RTYPE(pv_avg_sc_h); + DEFINE_PI1STYPE(pv_avg_sci_h); + DEFINE_RTYPE(pv_avg_b); + DEFINE_RTYPE(pv_avg_sc_b); + DEFINE_PI1STYPE(pv_avg_sci_b); + DEFINE_RTYPE(pv_avgu_h); + DEFINE_RTYPE(pv_avgu_sc_h); + DEFINE_PI1ZTYPE(pv_avgu_sci_h); + DEFINE_RTYPE(pv_avgu_b); + DEFINE_RTYPE(pv_avgu_sc_b); + DEFINE_PI1ZTYPE(pv_avgu_sci_b); + DEFINE_RTYPE(pv_min_h); + DEFINE_RTYPE(pv_min_sc_h); + DEFINE_PI1STYPE(pv_min_sci_h); + DEFINE_RTYPE(pv_min_b); + DEFINE_RTYPE(pv_min_sc_b); + DEFINE_PI1STYPE(pv_min_sci_b); + DEFINE_RTYPE(pv_minu_h); + DEFINE_RTYPE(pv_minu_sc_h); + DEFINE_PI1ZTYPE(pv_minu_sci_h); + DEFINE_RTYPE(pv_minu_b); + DEFINE_RTYPE(pv_minu_sc_b); + DEFINE_PI1ZTYPE(pv_minu_sci_b); + DEFINE_RTYPE(pv_max_h); + DEFINE_RTYPE(pv_max_sc_h); + DEFINE_PI1STYPE(pv_max_sci_h); + DEFINE_RTYPE(pv_max_b); + DEFINE_RTYPE(pv_max_sc_b); + DEFINE_PI1STYPE(pv_max_sci_b); + DEFINE_RTYPE(pv_maxu_h); + DEFINE_RTYPE(pv_maxu_sc_h); + DEFINE_PI1ZTYPE(pv_maxu_sci_h); + DEFINE_RTYPE(pv_maxu_b); + DEFINE_RTYPE(pv_maxu_sc_b); + DEFINE_PI1ZTYPE(pv_maxu_sci_b); + DEFINE_RTYPE(pv_srl_h); + DEFINE_RTYPE(pv_srl_sc_h); + DEFINE_PI1ZTYPE(pv_srl_sci_h); + DEFINE_RTYPE(pv_srl_b); + DEFINE_RTYPE(pv_srl_sc_b); + DEFINE_PI1ZTYPE(pv_srl_sci_b); + DEFINE_RTYPE(pv_sra_h); + DEFINE_RTYPE(pv_sra_sc_h); + DEFINE_PI1ZTYPE(pv_sra_sci_h); + DEFINE_RTYPE(pv_sra_b); + DEFINE_RTYPE(pv_sra_sc_b); + DEFINE_PI1ZTYPE(pv_sra_sci_b); + DEFINE_RTYPE(pv_sll_h); + DEFINE_RTYPE(pv_sll_sc_h); + DEFINE_PI1ZTYPE(pv_sll_sci_h); + DEFINE_RTYPE(pv_sll_b); + DEFINE_RTYPE(pv_sll_sc_b); + DEFINE_PI1ZTYPE(pv_sll_sci_b); + DEFINE_RTYPE(pv_or_h); + DEFINE_RTYPE(pv_or_sc_h); + DEFINE_PI1ZTYPE(pv_or_sci_h); + DEFINE_RTYPE(pv_or_b); + DEFINE_RTYPE(pv_or_sc_b); + DEFINE_PI1ZTYPE(pv_or_sci_b); + DEFINE_RTYPE(pv_xor_h); + DEFINE_RTYPE(pv_xor_sc_h); + DEFINE_PI1ZTYPE(pv_xor_sci_h); + DEFINE_RTYPE(pv_xor_b); + DEFINE_RTYPE(pv_xor_sc_b); + DEFINE_PI1ZTYPE(pv_xor_sci_b); + DEFINE_RTYPE(pv_and_h); + DEFINE_RTYPE(pv_and_sc_h); + DEFINE_PI1ZTYPE(pv_and_sci_h); + DEFINE_RTYPE(pv_and_b); + DEFINE_RTYPE(pv_and_sc_b); + DEFINE_PI1ZTYPE(pv_and_sci_b); + DEFINE_R1TYPE(pv_abs_h); + DEFINE_R1TYPE(pv_abs_b); + DEFINE_PI1ZTYPE(pv_extract_h); + DEFINE_PI1ZTYPE(pv_extract_b); + DEFINE_PI1ZTYPE(pv_extractu_h); + DEFINE_PI1ZTYPE(pv_extractu_b); + DEFINE_PI1ZTYPE(pv_insert_h); + DEFINE_PI1ZTYPE(pv_insert_b); + + DEFINE_RTYPE(pv_dotup_h); + DEFINE_RTYPE(pv_dotup_sc_h); + DEFINE_PI1ZTYPE(pv_dotup_sci_h); + DEFINE_RTYPE(pv_dotup_b); + DEFINE_RTYPE(pv_dotup_sc_b); + DEFINE_PI1ZTYPE(pv_dotup_sci_b); + DEFINE_RTYPE(pv_dotusp_h); + DEFINE_RTYPE(pv_dotusp_sc_h); + DEFINE_PI1ZTYPE(pv_dotusp_sci_h); + DEFINE_RTYPE(pv_dotusp_b); + DEFINE_RTYPE(pv_dotusp_sc_b); + DEFINE_PI1ZTYPE(pv_dotusp_sci_b); + DEFINE_RTYPE(pv_dotsp_h); + DEFINE_RTYPE(pv_dotsp_sc_h); + DEFINE_PI1ZTYPE(pv_dotsp_sci_h); + DEFINE_RTYPE(pv_dotsp_b); + DEFINE_RTYPE(pv_dotsp_sc_b); + DEFINE_PI1ZTYPE(pv_dotsp_sci_b); + DEFINE_RTYPE(pv_sdotup_h); + DEFINE_RTYPE(pv_sdotup_sc_h); + DEFINE_PI1ZTYPE(pv_sdotup_sci_h); + DEFINE_RTYPE(pv_sdotup_b); + DEFINE_RTYPE(pv_sdotup_sc_b); + DEFINE_PI1ZTYPE(pv_sdotup_sci_b); + DEFINE_RTYPE(pv_sdotusp_h); + DEFINE_RTYPE(pv_sdotusp_sc_h); + DEFINE_PI1ZTYPE(pv_sdotusp_sci_h); + DEFINE_RTYPE(pv_sdotusp_b); + DEFINE_RTYPE(pv_sdotusp_sc_b); + DEFINE_PI1ZTYPE(pv_sdotusp_sci_b); + DEFINE_RTYPE(pv_sdotsp_h); + DEFINE_RTYPE(pv_sdotsp_sc_h); + DEFINE_PI1ZTYPE(pv_sdotsp_sci_h); + DEFINE_RTYPE(pv_sdotsp_b); + DEFINE_RTYPE(pv_sdotsp_sc_b); + DEFINE_PI1ZTYPE(pv_sdotsp_sci_b); + + DEFINE_RTYPE(pv_shuffle2_h); + DEFINE_RTYPE(pv_shuffle2_b); // provide a default disassembly for all instructions as a fallback #define DECLARE_INSN(code, match, mask) \ diff --git a/toolchain/riscv-isa-sim/riscv/decode.h b/toolchain/riscv-isa-sim/riscv/decode.h index 63b1e2676..d6d270af8 100644 --- a/toolchain/riscv-isa-sim/riscv/decode.h +++ b/toolchain/riscv-isa-sim/riscv/decode.h @@ -131,6 +131,10 @@ class insn_t // Xpulpimg uint64_t p_zimm5() { return x(20, 5); } int64_t p_simm5() { return xs(20, 5); } + uint64_t p_rs3() { return x(7, 5); } + uint64_t p_zimm6() { return x(25,1) + (x(20, 5) << 1); } + int64_t p_simm6() { return x(25,1) + (xs(20, 5) << 1); } + private: insn_bits_t b; @@ -284,6 +288,16 @@ class regfile_t #define sext8(x) ((sreg_t)(int8_t)(x)) #define zext8(x) ((reg_t)(uint8_t)(x)) +#define P_RS3 READ_REG(insn.p_rs3()) /* same as RD, just different semantical value */ +#define WRITE_RS1(value) WRITE_REG(insn.rs1(), value) + +#define RS1_H(i) ((RS1 >> ((xlen >> 1) * (i & 0x1))) & 0xFFFF) /* select rs1 half: i should only be 0 or 1 */ +#define RS1_B(i) ((RS1 >> ((xlen >> 2) * (i & 0x3))) & 0xFF) /* select rs1 byte: i should only be from 0 to 3 */ +#define RS2_H(i) ((RS2 >> ((xlen >> 1) * (i & 0x1))) & 0xFFFF) /* select rs2 half: i should only be 0 or 1 */ +#define RS2_B(i) ((RS2 >> ((xlen >> 2) * (i & 0x3))) & 0xFF) /* select rs2 byte: i should only be from 0 to 3 */ +#define RD_H(i) ((RD >> ((xlen >> 1) * (i & 0x1))) & 0xFFFF) /* select rd half: i should only be 0 or 1 */ +#define RD_B(i) ((RD >> ((xlen >> 2) * (i & 0x3))) & 0xFF) /* select rd byte: i should only be from 0 to 3 */ + #define sext32(x) ((sreg_t)(int32_t)(x)) #define zext32(x) ((reg_t)(uint32_t)(x)) diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lb_irpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_lb_irpost.h new file mode 100644 index 000000000..ed17db162 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lb_irpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_int8(RS1)); +WRITE_RS1(RS1 + insn.i_imm()); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lb_rr.h b/toolchain/riscv-isa-sim/riscv/insns/p_lb_rr.h new file mode 100644 index 000000000..c32237fe1 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lb_rr.h @@ -0,0 +1 @@ +WRITE_RD(MMU.load_int8(RS1 + sreg_t(RS2))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lb_rrpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_lb_rrpost.h new file mode 100644 index 000000000..9dc2bd93d --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lb_rrpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_int8(RS1)); +WRITE_RS1(RS1 + sreg_t(RS2)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lbu_irpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_lbu_irpost.h new file mode 100644 index 000000000..0f015c376 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lbu_irpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_uint8(RS1)); +WRITE_RS1(RS1 + insn.i_imm()); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lbu_rr.h b/toolchain/riscv-isa-sim/riscv/insns/p_lbu_rr.h new file mode 100644 index 000000000..a95ca2a9a --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lbu_rr.h @@ -0,0 +1 @@ +WRITE_RD(MMU.load_uint8(RS1 + sreg_t(RS2))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lbu_rrpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_lbu_rrpost.h new file mode 100644 index 000000000..3456c8aec --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lbu_rrpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_uint8(RS1)); +WRITE_RS1(RS1 + sreg_t(RS2)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lh_irpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_lh_irpost.h new file mode 100644 index 000000000..3fea47c18 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lh_irpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_int16(RS1)); +WRITE_RS1(RS1 + insn.i_imm()); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lh_rr.h b/toolchain/riscv-isa-sim/riscv/insns/p_lh_rr.h new file mode 100644 index 000000000..cd5bf8219 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lh_rr.h @@ -0,0 +1 @@ +WRITE_RD(MMU.load_int16(RS1 + sreg_t(RS2))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lh_rrpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_lh_rrpost.h new file mode 100644 index 000000000..60353fd3e --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lh_rrpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_int16(RS1)); +WRITE_RS1(RS1 + sreg_t(RS2)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lhu_irpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_lhu_irpost.h new file mode 100644 index 000000000..8e7cfb6be --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lhu_irpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_uint16(RS1)); +WRITE_RS1(RS1 + insn.i_imm()); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lhu_rr.h b/toolchain/riscv-isa-sim/riscv/insns/p_lhu_rr.h new file mode 100644 index 000000000..6568736a7 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lhu_rr.h @@ -0,0 +1 @@ +WRITE_RD(MMU.load_uint16(RS1 + sreg_t(RS2))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lhu_rrpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_lhu_rrpost.h new file mode 100644 index 000000000..195222ac0 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lhu_rrpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_uint16(RS1)); +WRITE_RS1(RS1 + sreg_t(RS2)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lw_irpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_lw_irpost.h new file mode 100644 index 000000000..fb77d8723 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lw_irpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_int32(RS1)); +WRITE_RS1(RS1 + insn.i_imm()); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lw_rr.h b/toolchain/riscv-isa-sim/riscv/insns/p_lw_rr.h new file mode 100644 index 000000000..78fa33231 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lw_rr.h @@ -0,0 +1 @@ +WRITE_RD(MMU.load_int32(RS1 + sreg_t(RS2))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_lw_rrpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_lw_rrpost.h new file mode 100644 index 000000000..e315c5dfe --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_lw_rrpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_int32(RS1)); +WRITE_RS1(RS1 + sreg_t(RS2)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_mac.h b/toolchain/riscv-isa-sim/riscv/insns/p_mac.h new file mode 100644 index 000000000..bf5c77a14 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_mac.h @@ -0,0 +1 @@ +WRITE_RD(sext_xlen(sreg_t(RD) + sext_xlen(sreg_t(RS1) * sreg_t(RS2)))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_msu.h b/toolchain/riscv-isa-sim/riscv/insns/p_msu.h new file mode 100644 index 000000000..2a42cf05e --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_msu.h @@ -0,0 +1 @@ +WRITE_RD(sext_xlen(sreg_t(RD) - sext_xlen(sreg_t(RS1) * sreg_t(RS2)))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_sb_irpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_sb_irpost.h new file mode 100644 index 000000000..9339bc9ca --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_sb_irpost.h @@ -0,0 +1,2 @@ +MMU.store_uint8(RS1, RS2); +WRITE_RS1(RS1 + insn.s_imm()); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_sb_rr.h b/toolchain/riscv-isa-sim/riscv/insns/p_sb_rr.h new file mode 100644 index 000000000..73e49727c --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_sb_rr.h @@ -0,0 +1 @@ +MMU.store_uint8(RS1 + sreg_t(P_RS3), RS2); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_sb_rrpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_sb_rrpost.h new file mode 100644 index 000000000..044255174 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_sb_rrpost.h @@ -0,0 +1,2 @@ +MMU.store_uint8(RS1, RS2); +WRITE_RS1(RS1 + sreg_t(P_RS3)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_sh_irpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_sh_irpost.h new file mode 100644 index 000000000..f915c518d --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_sh_irpost.h @@ -0,0 +1,2 @@ +MMU.store_uint16(RS1, RS2); +WRITE_RS1(RS1 + insn.s_imm()); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_sh_rr.h b/toolchain/riscv-isa-sim/riscv/insns/p_sh_rr.h new file mode 100644 index 000000000..f3270bd56 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_sh_rr.h @@ -0,0 +1 @@ +MMU.store_uint16(RS1 + sreg_t(P_RS3), RS2); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_sh_rrpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_sh_rrpost.h new file mode 100644 index 000000000..5043c6287 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_sh_rrpost.h @@ -0,0 +1,2 @@ +MMU.store_uint16(RS1, RS2); +WRITE_RS1(RS1 + sreg_t(P_RS3)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_sw_irpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_sw_irpost.h new file mode 100644 index 000000000..7ff0406fe --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_sw_irpost.h @@ -0,0 +1,2 @@ +MMU.store_uint32(RS1, RS2); +WRITE_RS1(RS1 + insn.s_imm()); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_sw_rr.h b/toolchain/riscv-isa-sim/riscv/insns/p_sw_rr.h new file mode 100644 index 000000000..6bef97f73 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_sw_rr.h @@ -0,0 +1 @@ +MMU.store_uint32(RS1 + sreg_t(P_RS3), RS2); diff --git a/toolchain/riscv-isa-sim/riscv/insns/p_sw_rrpost.h b/toolchain/riscv-isa-sim/riscv/insns/p_sw_rrpost.h new file mode 100644 index 000000000..6382d6d80 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/p_sw_rrpost.h @@ -0,0 +1,2 @@ +MMU.store_uint32(RS1, RS2); +WRITE_RS1(RS1 + sreg_t(P_RS3)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_abs_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_abs_b.h new file mode 100644 index 000000000..c0bc089cc --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_abs_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) > 0 ? RS1_B(i) : -sext8(RS1_B(i)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_abs_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_abs_h.h new file mode 100644 index 000000000..42ca4ff3c --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_abs_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) > 0 ? RS1_H(i) : -sext16(RS1_H(i)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_add_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_add_b.h new file mode 100644 index 000000000..ecae63a04 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_add_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) + sext8(RS2_B(i)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_add_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_add_h.h new file mode 100644 index 000000000..0a78665af --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_add_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) + sext16(RS2_H(i)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_add_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_add_sc_b.h new file mode 100644 index 000000000..572b61c07 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_add_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) + sext8(RS2_B(0)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_add_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_add_sc_h.h new file mode 100644 index 000000000..734a911b8 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_add_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) + sext16(RS2_H(0)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_add_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_add_sci_b.h new file mode 100644 index 000000000..df47f1cb5 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_add_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) + insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_add_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_add_sci_h.h new file mode 100644 index 000000000..907621c09 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_add_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) + insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_and_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_and_b.h new file mode 100644 index 000000000..d3711b762 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_and_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) & RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_and_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_and_h.h new file mode 100644 index 000000000..8bae35685 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_and_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) & RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_b.h new file mode 100644 index 000000000..b1e6c865e --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) & RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_h.h new file mode 100644 index 000000000..2389d11e1 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) & RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_b.h new file mode 100644 index 000000000..7e4e9e0ac --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) & insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_h.h new file mode 100644 index 000000000..fbd57d116 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_and_sci_h.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_H(i) & insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_b.h new file mode 100644 index 000000000..3d5d6d472 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(sext8(RS1_B(i)) + sext8(RS2_B(i))) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_h.h new file mode 100644 index 000000000..725f2f2e0 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(sext16(RS1_H(i)) + sext16(RS2_H(i))) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_b.h new file mode 100644 index 000000000..0b7d2f8d2 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(sext8(RS1_B(i)) + sext8(RS2_B(0))) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_h.h new file mode 100644 index 000000000..8a6cb5e50 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(sext16(RS1_H(i)) + sext16(RS2_H(0))) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_b.h new file mode 100644 index 000000000..ff67065e3 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(sext8(RS1_B(i)) + insn.p_simm6()) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_h.h new file mode 100644 index 000000000..f7deefd25 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avg_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(sext16(RS1_H(i)) + insn.p_simm6()) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_b.h new file mode 100644 index 000000000..435c4d22c --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(zext8(RS1_B(i)) + zext8(RS2_B(i))) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_h.h new file mode 100644 index 000000000..3fdbaf4dd --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(zext16(RS1_H(i)) + zext16(RS2_H(i))) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_b.h new file mode 100644 index 000000000..47ca3888b --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(zext8(RS1_B(i)) + zext8(RS2_B(0))) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_h.h new file mode 100644 index 000000000..0bf92f93b --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(zext16(RS1_H(i)) + zext16(RS2_H(0))) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_b.h new file mode 100644 index 000000000..fbc0dff92 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(zext8(RS1_B(i)) + insn.p_zimm6()) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_h.h new file mode 100644 index 000000000..dd8cd3544 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_avgu_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(zext16(RS1_H(i)) + insn.p_zimm6()) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_b.h new file mode 100644 index 000000000..93b7233cc --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * sext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_h.h new file mode 100644 index 000000000..9feed35ef --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * sext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sc_b.h new file mode 100644 index 000000000..cef11d5e7 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sc_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * sext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sc_h.h new file mode 100644 index 000000000..ef558d39f --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sc_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * sext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sci_b.h new file mode 100644 index 000000000..3470fd55a --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sci_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sci_h.h new file mode 100644 index 000000000..97e30eb29 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotsp_sci_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_b.h new file mode 100644 index 000000000..fa77f3667 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_b.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * zext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_h.h new file mode 100644 index 000000000..4e170b238 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_h.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * zext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_b.h new file mode 100644 index 000000000..a581d0162 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_b.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * zext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_h.h new file mode 100644 index 000000000..b78762a87 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sc_h.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * zext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_b.h new file mode 100644 index 000000000..0dedb1caf --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_b.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * insn.p_zimm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_h.h new file mode 100644 index 000000000..64a36d569 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotup_sci_h.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * insn.p_zimm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_b.h new file mode 100644 index 000000000..1cdfc2f2c --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * sext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_h.h new file mode 100644 index 000000000..81968a14d --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * sext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sc_b.h new file mode 100644 index 000000000..d562a7d4d --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sc_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * sext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sc_h.h new file mode 100644 index 000000000..3815c3721 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sc_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * sext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sci_b.h new file mode 100644 index 000000000..92c229540 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sci_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sci_h.h new file mode 100644 index 000000000..8f91a89a3 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_dotusp_sci_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_extract_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_extract_b.h new file mode 100644 index 000000000..fce80bbb6 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_extract_b.h @@ -0,0 +1 @@ +WRITE_RD(sext8(RS1_B(insn.p_zimm6() & 0x03))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_extract_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_extract_h.h new file mode 100644 index 000000000..ee35393d4 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_extract_h.h @@ -0,0 +1 @@ +WRITE_RD(sext16(RS1_H(insn.p_zimm6() & 0x01))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_extractu_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_extractu_b.h new file mode 100644 index 000000000..c24023387 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_extractu_b.h @@ -0,0 +1 @@ +WRITE_RD(zext8(RS1_B(insn.p_zimm6() & 0x03))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_extractu_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_extractu_h.h new file mode 100644 index 000000000..90b679afd --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_extractu_h.h @@ -0,0 +1 @@ +WRITE_RD(zext16(RS1_H(insn.p_zimm6() & 0x01))); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_insert_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_insert_b.h new file mode 100644 index 000000000..5575e7967 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_insert_b.h @@ -0,0 +1,6 @@ +uint32_t ins_rd = RD; +uint8_t i = insn.p_zimm6() & 0x03; /* select to which rd half to write the 16-bit value */ + +ins_rd = (ins_rd & ~(0xFF << ((xlen >> 2) * i))) | ((RS1_H(0) & 0xFF) << ((xlen >> 2) * i)); + +WRITE_RD(sext_xlen(ins_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_insert_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_insert_h.h new file mode 100644 index 000000000..eccb0eda6 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_insert_h.h @@ -0,0 +1,6 @@ +uint32_t ins_rd = RD; +uint8_t i = insn.p_zimm6() & 0x01; /* select to which rd half to write the 16-bit value */ + +ins_rd = (ins_rd & ~(0xFFFF << ((xlen >> 1) * i))) | ((RS1_H(0) & 0xFFFF) << ((xlen >> 1) * i)); + +WRITE_RD(sext_xlen(ins_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_max_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_max_b.h new file mode 100644 index 000000000..4dc3e6be8 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_max_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) > sext8(RS2_B(i)) ? RS1_B(i) : RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_max_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_max_h.h new file mode 100644 index 000000000..c65a32da6 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_max_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) > sext16(RS2_H(i)) ? RS1_H(i) : RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_max_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_max_sc_b.h new file mode 100644 index 000000000..896087f62 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_max_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) > sext8(RS2_B(0)) ? RS1_B(i) : RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_max_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_max_sc_h.h new file mode 100644 index 000000000..fd55fb49b --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_max_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) > sext16(RS2_H(0)) ? RS1_H(i) : RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_max_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_max_sci_b.h new file mode 100644 index 000000000..5e06669fa --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_max_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) > insn.p_simm6() ? RS1_B(i) : insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_max_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_max_sci_h.h new file mode 100644 index 000000000..ce1df2ee1 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_max_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) > insn.p_simm6() ? RS1_H(i) : insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_b.h new file mode 100644 index 000000000..5821c1726 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) > zext8(RS2_B(i)) ? RS1_B(i) : RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_h.h new file mode 100644 index 000000000..3e587c3c9 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) > zext16(RS2_H(i)) ? RS1_H(i) : RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_b.h new file mode 100644 index 000000000..c297b87ab --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) > zext8(RS2_B(0)) ? RS1_B(i) : RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_h.h new file mode 100644 index 000000000..fbb5c7feb --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) > zext16(RS2_H(0)) ? RS1_H(i) : RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_b.h new file mode 100644 index 000000000..ab5f6e5f9 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_b.h @@ -0,0 +1,10 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) > insn.p_zimm6() ? RS1_B(i) : insn.p_zimm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); + diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_h.h new file mode 100644 index 000000000..9aaf9effc --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_maxu_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) > insn.p_zimm6() ? RS1_H(i) : insn.p_zimm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_min_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_min_b.h new file mode 100644 index 000000000..1b9104b55 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_min_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) <= sext8(RS2_B(i)) ? RS1_B(i) : RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_min_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_min_h.h new file mode 100644 index 000000000..bbc83caea --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_min_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) <= sext16(RS2_H(i)) ? RS1_H(i) : RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_min_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_min_sc_b.h new file mode 100644 index 000000000..1d2aac507 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_min_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) <= sext8(RS2_B(0)) ? RS1_B(i) : RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_min_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_min_sc_h.h new file mode 100644 index 000000000..b2b8ab110 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_min_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) <= sext16(RS2_H(0)) ? RS1_H(i) : RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_min_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_min_sci_b.h new file mode 100644 index 000000000..031b51f36 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_min_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) <= insn.p_simm6() ? RS1_B(i) : insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_min_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_min_sci_h.h new file mode 100644 index 000000000..d007e0662 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_min_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) <= insn.p_simm6() ? RS1_H(i) : insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_b.h new file mode 100644 index 000000000..bbb92ca55 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) <= zext8(RS2_B(i)) ? RS1_B(i) : RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_h.h new file mode 100644 index 000000000..fa7b0a4e2 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) <= zext16(RS2_H(i)) ? RS1_H(i) : RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_b.h new file mode 100644 index 000000000..566bcce6d --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) <= zext8(RS2_B(0)) ? RS1_B(i) : RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_h.h new file mode 100644 index 000000000..7471d9678 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) <= zext16(RS2_H(0)) ? RS1_H(i) : RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_b.h new file mode 100644 index 000000000..75c43787c --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) <= insn.p_zimm6() ? RS1_B(i) : insn.p_zimm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_h.h new file mode 100644 index 000000000..c665e92f4 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_minu_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) <= insn.p_zimm6() ? RS1_H(i) : insn.p_zimm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_or_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_or_b.h new file mode 100644 index 000000000..d27a6e5d1 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_or_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) | RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_or_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_or_h.h new file mode 100644 index 000000000..65b112893 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_or_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) | RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_b.h new file mode 100644 index 000000000..cac508744 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) | RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_h.h new file mode 100644 index 000000000..e6f567cf3 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) | RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_b.h new file mode 100644 index 000000000..0cb7b5cb6 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) | insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_h.h new file mode 100644 index 000000000..e95922e1e --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_or_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) | insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_b.h new file mode 100644 index 000000000..812e3d436 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * sext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_h.h new file mode 100644 index 000000000..9ccfae939 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * sext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sc_b.h new file mode 100644 index 000000000..e665a669f --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sc_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * sext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sc_h.h new file mode 100644 index 000000000..fa1ca93fe --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sc_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * sext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sci_b.h new file mode 100644 index 000000000..31aab1fe5 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sci_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sci_h.h new file mode 100644 index 000000000..151d16a2e --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotsp_sci_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_b.h new file mode 100644 index 000000000..82e47b4f8 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_b.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * zext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_h.h new file mode 100644 index 000000000..de77009a0 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_h.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * zext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_b.h new file mode 100644 index 000000000..717fffc11 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_b.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * zext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_h.h new file mode 100644 index 000000000..ecf048566 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sc_h.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * zext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_b.h new file mode 100644 index 000000000..bd4d850e6 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_b.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * insn.p_zimm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_h.h new file mode 100644 index 000000000..145e73717 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotup_sci_h.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * insn.p_zimm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_b.h new file mode 100644 index 000000000..05d268ed2 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * sext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_h.h new file mode 100644 index 000000000..fdc550db1 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * sext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sc_b.h new file mode 100644 index 000000000..2840cd148 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sc_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * sext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sc_h.h new file mode 100644 index 000000000..ca4c25ac1 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sc_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * sext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sci_b.h new file mode 100644 index 000000000..d6823f83a --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sci_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sci_h.h new file mode 100644 index 000000000..42c4fbe88 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sdotusp_sci_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_shuffle2_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_shuffle2_b.h new file mode 100644 index 000000000..8dd4e9994 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_shuffle2_b.h @@ -0,0 +1,14 @@ +uint8_t src_sel; // select rd or rs1 as source (bit [2] of second operand) +uint8_t byte_sel; // select which byte from source (bits [1:0] of second operand) +uint8_t source; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + byte_sel = RS2_B(i) & 0x03; // bits [1:0] of RS2_B(i) + src_sel = (RS2_B(i) >> 2) & 0x01; // bit [2] of RS2_B(i) + source = src_sel ? RS1_B(byte_sel) : RD_B(byte_sel); + simd_rd <<= 8; + simd_rd += (uint32_t)source & 0x000000FF; +} + +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_shuffle2_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_shuffle2_h.h new file mode 100644 index 000000000..362a4bdc7 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_shuffle2_h.h @@ -0,0 +1,14 @@ +uint8_t src_sel; // select rd or rs1 as source (bit [1] of second operand) +uint8_t half_sel; // select which half from source (bit [0] of second operand) +uint16_t source; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + half_sel = RS2_H(i) & 0x01; // bit [0] of RS2_H(i) + src_sel = (RS2_H(i) >> 1) & 0x01; // bit [1] of RS2_H(i) + source = src_sel ? RS1_H(half_sel) : RD_H(half_sel); + simd_rd <<= 16; + simd_rd += (uint32_t)source & 0x0000FFFF; +} + +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_b.h new file mode 100644 index 000000000..ca8bcd688 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) << (zext8(RS2_B(i)) & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_h.h new file mode 100644 index 000000000..cb9200cac --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) << (zext16(RS2_H(i)) & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_b.h new file mode 100644 index 000000000..d32051998 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) << (zext8(RS2_B(0)) & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_h.h new file mode 100644 index 000000000..e84cf0214 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) << (zext16(RS2_H(0)) & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_b.h new file mode 100644 index 000000000..8e637bea8 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) << (insn.p_simm6() & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_h.h new file mode 100644 index 000000000..ec94a2e28 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sll_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) << (insn.p_simm6() & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_b.h new file mode 100644 index 000000000..9525a0afc --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) >> (zext8(RS2_B(i)) & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_h.h new file mode 100644 index 000000000..b3e8a0b94 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) >> (zext16(RS2_H(i)) & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_b.h new file mode 100644 index 000000000..9442d9280 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) >> (zext8(RS2_B(0)) & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_h.h new file mode 100644 index 000000000..1e012f750 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) >> (zext16(RS2_H(0)) & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_b.h new file mode 100644 index 000000000..3dafb3cb5 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) >> (insn.p_simm6() & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_h.h new file mode 100644 index 000000000..4f56d0e5e --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sra_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) >> (insn.p_simm6() & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_b.h new file mode 100644 index 000000000..37be2e23a --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) >> (zext8(RS2_B(i)) & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_h.h new file mode 100644 index 000000000..1b35116d3 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) >> (zext16(RS2_H(i)) & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_b.h new file mode 100644 index 000000000..4b04ab6f7 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) >> (zext8(RS2_B(0)) & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_h.h new file mode 100644 index 000000000..f49f784db --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) >> (zext16(RS2_H(0)) & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_b.h new file mode 100644 index 000000000..b0b38f2a9 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) >> (insn.p_simm6() & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_h.h new file mode 100644 index 000000000..5aba29cc9 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_srl_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) >> (insn.p_simm6() & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sub_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_b.h new file mode 100644 index 000000000..2ce1fe224 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) - sext8(RS2_B(i)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sub_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_h.h new file mode 100644 index 000000000..4ec513726 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) - sext16(RS2_H(i)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sc_b.h new file mode 100644 index 000000000..3375e64c2 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) - sext8(RS2_B(0)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sc_h.h new file mode 100644 index 000000000..4bb12839c --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) - sext16(RS2_H(0)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sci_b.h new file mode 100644 index 000000000..20cc94123 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) - insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sci_h.h new file mode 100644 index 000000000..50b11a665 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_sub_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) - insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_b.h new file mode 100644 index 000000000..2fc203b4d --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) ^ RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_h.h new file mode 100644 index 000000000..56cf0b7c9 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) ^ RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_b.h new file mode 100644 index 000000000..ed3d5075a --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) ^ RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_h.h new file mode 100644 index 000000000..9d632f367 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) ^ RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_b.h b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_b.h new file mode 100644 index 000000000..7ecbf94fc --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) ^ insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_h.h b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_h.h new file mode 100644 index 000000000..0a02ced60 --- /dev/null +++ b/toolchain/riscv-isa-sim/riscv/insns/pv_xor_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) ^ insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/toolchain/riscv-isa-sim/riscv/riscv.mk.in b/toolchain/riscv-isa-sim/riscv/riscv.mk.in index af69ae8f0..20c11ab2a 100644 --- a/toolchain/riscv-isa-sim/riscv/riscv.mk.in +++ b/toolchain/riscv-isa-sim/riscv/riscv.mk.in @@ -333,14 +333,23 @@ riscv_insn_ext_q = \ fsqrt_q \ fsub_q \ +# Disabled riscv_insn_ext_v_alu_int instructions for opcode overlap: +#vasubu_vx +#vslide1up_vx +#vaaddu_vx +#vadc_vvm +#vadc_vxm +#vsbc_vvm +#vsbc_vxm +#vmulhu_vx +#vdivu_vx +#vmulhsu_vx + riscv_insn_ext_v_alu_int = \ vaadd_vv \ vaaddu_vv \ vaadd_vx \ - vaaddu_vx \ vadc_vim \ - vadc_vvm \ - vadc_vxm \ vadd_vi \ vadd_vv \ vadd_vx \ @@ -350,12 +359,10 @@ riscv_insn_ext_v_alu_int = \ vasub_vv \ vasubu_vv \ vasub_vx \ - vasubu_vx \ vcompress_vm \ vdiv_vv \ vdiv_vx \ vdivu_vv \ - vdivu_vx \ vdot_vv \ vdotu_vv \ vid_v \ @@ -416,9 +423,7 @@ riscv_insn_ext_v_alu_int = \ vmulh_vv \ vmulh_vx \ vmulhsu_vv \ - vmulhsu_vx \ vmulhu_vv \ - vmulhu_vx \ vmv_s_x \ vmv_v_i \ vmv_v_v \ @@ -473,13 +478,10 @@ riscv_insn_ext_v_alu_int = \ vsaddu_vi \ vsaddu_vv \ vsaddu_vx \ - vsbc_vvm \ - vsbc_vxm \ vsext_vf2 \ vsext_vf4 \ vsext_vf8 \ vslide1down_vx \ - vslide1up_vx \ vslidedown_vi \ vslidedown_vx \ vslideup_vi \ @@ -545,6 +547,9 @@ riscv_insn_ext_v_alu_int = \ vzext_vf4 \ vzext_vf8 \ +# Disabled riscv_insn_ext_v_alu_fp instructions for opcode overlap: +#vfcvt_x_f_v + riscv_insn_ext_v_alu_fp = \ vfadd_vf \ vfadd_vv \ @@ -553,7 +558,6 @@ riscv_insn_ext_v_alu_fp = \ vfcvt_f_xu_v \ vfcvt_rtz_x_f_v \ vfcvt_rtz_xu_f_v \ - vfcvt_x_f_v \ vfcvt_xu_f_v \ vfdiv_vf \ vfdiv_vv \ @@ -741,9 +745,11 @@ riscv_insn_ext_v_ldst = \ vs4r_v \ vs8r_v \ +# Disabled riscv_insn_ext_v_ctrl instructions for opcode overlap: +#vsetvl + riscv_insn_ext_v_ctrl = \ vsetvli \ - vsetvl \ riscv_insn_ext_v = \ $(riscv_insn_ext_v_alu_fp) \ @@ -753,6 +759,30 @@ riscv_insn_ext_v = \ $(riscv_insn_ext_v_ldst) \ riscv_insn_ext_xpulpimg = \ + p_lb_irpost \ + p_lbu_irpost \ + p_lh_irpost \ + p_lhu_irpost \ + p_lw_irpost \ + p_lb_rrpost \ + p_lbu_rrpost \ + p_lh_rrpost \ + p_lhu_rrpost \ + p_lw_rrpost \ + p_lb_rr \ + p_lbu_rr \ + p_lh_rr \ + p_lhu_rr \ + p_lw_rr \ + p_sb_irpost \ + p_sh_irpost \ + p_sw_irpost \ + p_sb_rrpost \ + p_sh_rrpost \ + p_sw_rrpost \ + p_sb_rr \ + p_sh_rr \ + p_sw_rr \ p_abs \ p_slet \ p_sletu \ @@ -770,6 +800,138 @@ riscv_insn_ext_xpulpimg = \ p_clipur \ p_beqimm \ p_bneimm \ + p_mac \ + p_msu \ + pv_add_h \ + pv_add_sc_h \ + pv_add_sci_h \ + pv_add_b \ + pv_add_sc_b \ + pv_add_sci_b \ + pv_sub_h \ + pv_sub_sc_h \ + pv_sub_sci_h \ + pv_sub_b \ + pv_sub_sc_b \ + pv_sub_sci_b \ + pv_avg_h \ + pv_avg_sc_h \ + pv_avg_sci_h \ + pv_avg_b \ + pv_avg_sc_b \ + pv_avg_sci_b \ + pv_avgu_h \ + pv_avgu_sc_h \ + pv_avgu_sci_h \ + pv_avgu_b \ + pv_avgu_sc_b \ + pv_avgu_sci_b \ + pv_min_h \ + pv_min_sc_h \ + pv_min_sci_h \ + pv_min_b \ + pv_min_sc_b \ + pv_min_sci_b \ + pv_minu_h \ + pv_minu_sc_h \ + pv_minu_sci_h \ + pv_minu_b \ + pv_minu_sc_b \ + pv_minu_sci_b \ + pv_max_h \ + pv_max_sc_h \ + pv_max_sci_h \ + pv_max_b \ + pv_max_sc_b \ + pv_max_sci_b \ + pv_maxu_h \ + pv_maxu_sc_h \ + pv_maxu_sci_h \ + pv_maxu_b \ + pv_maxu_sc_b \ + pv_maxu_sci_b \ + pv_srl_h \ + pv_srl_sc_h \ + pv_srl_sci_h \ + pv_srl_b \ + pv_srl_sc_b \ + pv_srl_sci_b \ + pv_sra_h \ + pv_sra_sc_h \ + pv_sra_sci_h \ + pv_sra_b \ + pv_sra_sc_b \ + pv_sra_sci_b \ + pv_sll_h \ + pv_sll_sc_h \ + pv_sll_sci_h \ + pv_sll_b \ + pv_sll_sc_b \ + pv_sll_sci_b \ + pv_or_h \ + pv_or_sc_h \ + pv_or_sci_h \ + pv_or_b \ + pv_or_sc_b \ + pv_or_sci_b \ + pv_xor_h \ + pv_xor_sc_h \ + pv_xor_sci_h \ + pv_xor_b \ + pv_xor_sc_b \ + pv_xor_sci_b \ + pv_and_h \ + pv_and_sc_h \ + pv_and_sci_h \ + pv_and_b \ + pv_and_sc_b \ + pv_and_sci_b \ + pv_abs_h \ + pv_abs_b \ + pv_extract_h \ + pv_extract_b \ + pv_extractu_h \ + pv_extractu_b \ + pv_insert_h \ + pv_insert_b \ + pv_dotup_h \ + pv_dotup_sc_h \ + pv_dotup_sci_h \ + pv_dotup_b \ + pv_dotup_sc_b \ + pv_dotup_sci_b \ + pv_dotusp_h \ + pv_dotusp_sc_h \ + pv_dotusp_sci_h \ + pv_dotusp_b \ + pv_dotusp_sc_b \ + pv_dotusp_sci_b \ + pv_dotsp_h \ + pv_dotsp_sc_h \ + pv_dotsp_sci_h \ + pv_dotsp_b \ + pv_dotsp_sc_b \ + pv_dotsp_sci_b \ + pv_sdotup_h \ + pv_sdotup_sc_h \ + pv_sdotup_sci_h \ + pv_sdotup_b \ + pv_sdotup_sc_b \ + pv_sdotup_sci_b \ + pv_sdotusp_h \ + pv_sdotusp_sc_h \ + pv_sdotusp_sci_h \ + pv_sdotusp_b \ + pv_sdotusp_sc_b \ + pv_sdotusp_sci_b \ + pv_sdotsp_h \ + pv_sdotsp_sc_h \ + pv_sdotsp_sci_h \ + pv_sdotsp_b \ + pv_sdotsp_sc_b \ + pv_sdotsp_sci_b \ + pv_shuffle2_h \ + pv_shuffle2_b \ riscv_insn_ext_h = \ hfence_gvma \ diff --git a/toolchain/riscv-opcodes/Makefile b/toolchain/riscv-opcodes/Makefile index 5f39502b0..12d02b4aa 100644 --- a/toolchain/riscv-opcodes/Makefile +++ b/toolchain/riscv-opcodes/Makefile @@ -7,7 +7,7 @@ MY_OPCODES := opcodes-frep_CUSTOM opcodes-xpulpimg_CUSTOM opcodes-rv32d-zfh_DRAF ALL_OPCODES := opcodes-pseudo $(ALL_REAL_OPCODES) $(MY_OPCODES) opcodes-rvv-pseudo # Opcodes to be discarded -DISCARDED_OPCODES := +DISCARDED_OPCODES := opcodes-frep_CUSTOM OPCODES = $(filter-out $(sort $(DISCARDED_OPCODES)), $(sort $(ALL_OPCODES))) diff --git a/toolchain/riscv-opcodes/README.md b/toolchain/riscv-opcodes/README.md index f3ac222ac..4b8110225 100644 --- a/toolchain/riscv-opcodes/README.md +++ b/toolchain/riscv-opcodes/README.md @@ -32,4 +32,8 @@ starting from their high-level, human-readable description. for the parsing script execution, basing on the target architecture, by listing them in the variable `DISCARDED_OPCODES`; - opcodes files from the official 128-bit extension have not been introduced - due to the other changes which they imply to other opcodes specifications. + due to the other changes which they imply to other opcodes specifications; +- some of the instructions originally declared in the vectorial extension + (`opcodes-rvv` file) have been set as pseudo-instruction due to the overlapping + of their opcodes space with the opcodes space of the SIMD instructions from + Xpulpv2, defined in `opcodes-xpulpimg_CUSTOM`. diff --git a/toolchain/riscv-opcodes/encoding_out.h b/toolchain/riscv-opcodes/encoding_out.h index 98660a4cb..f78afbd15 100644 --- a/toolchain/riscv-opcodes/encoding_out.h +++ b/toolchain/riscv-opcodes/encoding_out.h @@ -326,8 +326,6 @@ #define MASK_CUSTOM3_RD_RS1 0x707f #define MATCH_CUSTOM3_RD_RS1_RS2 0x707b #define MASK_CUSTOM3_RD_RS1_RS2 0x707f -#define MATCH_FREP 0xb -#define MASK_FREP 0x7f #define MATCH_SLLI_RV32 0x1013 #define MASK_SLLI_RV32 0xfe00707f #define MATCH_SRLI_RV32 0x5013 @@ -928,8 +926,6 @@ #define MASK_C_FSWSP 0xe003 #define MATCH_VSETVLI 0x7057 #define MASK_VSETVLI 0x8000707f -#define MATCH_VSETVL 0x80007057 -#define MASK_VSETVL 0xfe00707f #define MATCH_VLE8_V 0x7 #define MASK_VLE8_V 0x1df0707f #define MATCH_VLE16_V 0x5007 @@ -1230,8 +1226,6 @@ #define MASK_VFNMSAC_VV 0xfc00707f #define MATCH_VFCVT_XU_F_V 0x48001057 #define MASK_VFCVT_XU_F_V 0xfc0ff07f -#define MATCH_VFCVT_X_F_V 0x48009057 -#define MASK_VFCVT_X_F_V 0xfc0ff07f #define MATCH_VFCVT_F_XU_V 0x48011057 #define MASK_VFCVT_F_XU_V 0xfc0ff07f #define MATCH_VFCVT_F_X_V 0x48019057 @@ -1328,12 +1322,8 @@ #define MASK_VSLIDEUP_VX 0xfc00707f #define MATCH_VSLIDEDOWN_VX 0x3c004057 #define MASK_VSLIDEDOWN_VX 0xfc00707f -#define MATCH_VADC_VXM 0x40004057 -#define MASK_VADC_VXM 0xfe00707f #define MATCH_VMADC_VXM 0x44004057 #define MASK_VMADC_VXM 0xfc00707f -#define MATCH_VSBC_VXM 0x48004057 -#define MASK_VSBC_VXM 0xfe00707f #define MATCH_VMSBC_VXM 0x4c004057 #define MASK_VMSBC_VXM 0xfc00707f #define MATCH_VMERGE_VXM 0x5c004057 @@ -1414,12 +1404,8 @@ #define MASK_VRGATHER_VV 0xfc00707f #define MATCH_VRGATHEREI16_VV 0x38000057 #define MASK_VRGATHEREI16_VV 0xfc00707f -#define MATCH_VADC_VVM 0x40000057 -#define MASK_VADC_VVM 0xfe00707f #define MATCH_VMADC_VVM 0x44000057 #define MASK_VMADC_VVM 0xfc00707f -#define MATCH_VSBC_VVM 0x48000057 -#define MASK_VSBC_VVM 0xfe00707f #define MATCH_VMSBC_VVM 0x4c000057 #define MASK_VMSBC_VVM 0xfc00707f #define MATCH_VMERGE_VVM 0x5c000057 @@ -1668,34 +1654,22 @@ #define MASK_VWMACC_VV 0xfc00707f #define MATCH_VWMACCSU_VV 0xfc002057 #define MASK_VWMACCSU_VV 0xfc00707f -#define MATCH_VAADDU_VX 0x20006057 -#define MASK_VAADDU_VX 0xfc00707f #define MATCH_VAADD_VX 0x24006057 #define MASK_VAADD_VX 0xfc00707f -#define MATCH_VASUBU_VX 0x28006057 -#define MASK_VASUBU_VX 0xfc00707f #define MATCH_VASUB_VX 0x2c006057 #define MASK_VASUB_VX 0xfc00707f #define MATCH_VMV_S_X 0x42006057 #define MASK_VMV_S_X 0xfff0707f -#define MATCH_VSLIDE1UP_VX 0x38006057 -#define MASK_VSLIDE1UP_VX 0xfc00707f #define MATCH_VSLIDE1DOWN_VX 0x3c006057 #define MASK_VSLIDE1DOWN_VX 0xfc00707f -#define MATCH_VDIVU_VX 0x80006057 -#define MASK_VDIVU_VX 0xfc00707f #define MATCH_VDIV_VX 0x84006057 #define MASK_VDIV_VX 0xfc00707f #define MATCH_VREMU_VX 0x88006057 #define MASK_VREMU_VX 0xfc00707f #define MATCH_VREM_VX 0x8c006057 #define MASK_VREM_VX 0xfc00707f -#define MATCH_VMULHU_VX 0x90006057 -#define MASK_VMULHU_VX 0xfc00707f #define MATCH_VMUL_VX 0x94006057 #define MASK_VMUL_VX 0xfc00707f -#define MATCH_VMULHSU_VX 0x98006057 -#define MASK_VMULHSU_VX 0xfc00707f #define MATCH_VMULH_VX 0x9c006057 #define MASK_VMULH_VX 0xfc00707f #define MATCH_VMADD_VX 0xa4006057 @@ -1846,6 +1820,54 @@ #define MASK_CSRRSI 0x707f #define MATCH_CSRRCI 0x7073 #define MASK_CSRRCI 0x707f +#define MATCH_P_LB_IRPOST 0xb +#define MASK_P_LB_IRPOST 0x707f +#define MATCH_P_LBU_IRPOST 0x400b +#define MASK_P_LBU_IRPOST 0x707f +#define MATCH_P_LH_IRPOST 0x100b +#define MASK_P_LH_IRPOST 0x707f +#define MATCH_P_LHU_IRPOST 0x500b +#define MASK_P_LHU_IRPOST 0x707f +#define MATCH_P_LW_IRPOST 0x200b +#define MASK_P_LW_IRPOST 0x707f +#define MATCH_P_LB_RRPOST 0x700b +#define MASK_P_LB_RRPOST 0xfe00707f +#define MATCH_P_LBU_RRPOST 0x4000700b +#define MASK_P_LBU_RRPOST 0xfe00707f +#define MATCH_P_LH_RRPOST 0x1000700b +#define MASK_P_LH_RRPOST 0xfe00707f +#define MATCH_P_LHU_RRPOST 0x5000700b +#define MASK_P_LHU_RRPOST 0xfe00707f +#define MATCH_P_LW_RRPOST 0x2000700b +#define MASK_P_LW_RRPOST 0xfe00707f +#define MATCH_P_LB_RR 0x7003 +#define MASK_P_LB_RR 0xfe00707f +#define MATCH_P_LBU_RR 0x40007003 +#define MASK_P_LBU_RR 0xfe00707f +#define MATCH_P_LH_RR 0x10007003 +#define MASK_P_LH_RR 0xfe00707f +#define MATCH_P_LHU_RR 0x50007003 +#define MASK_P_LHU_RR 0xfe00707f +#define MATCH_P_LW_RR 0x20007003 +#define MASK_P_LW_RR 0xfe00707f +#define MATCH_P_SB_IRPOST 0x2b +#define MASK_P_SB_IRPOST 0x707f +#define MATCH_P_SH_IRPOST 0x102b +#define MASK_P_SH_IRPOST 0x707f +#define MATCH_P_SW_IRPOST 0x202b +#define MASK_P_SW_IRPOST 0x707f +#define MATCH_P_SB_RRPOST 0x402b +#define MASK_P_SB_RRPOST 0xfe00707f +#define MATCH_P_SH_RRPOST 0x502b +#define MASK_P_SH_RRPOST 0xfe00707f +#define MATCH_P_SW_RRPOST 0x602b +#define MASK_P_SW_RRPOST 0xfe00707f +#define MATCH_P_SB_RR 0x4023 +#define MASK_P_SB_RR 0xfe00707f +#define MATCH_P_SH_RR 0x5023 +#define MASK_P_SH_RR 0xfe00707f +#define MATCH_P_SW_RR 0x6023 +#define MASK_P_SW_RR 0xfe00707f #define MATCH_P_ABS 0x4000033 #define MASK_P_ABS 0xfff0707f #define MATCH_P_SLET 0x4002033 @@ -1880,6 +1902,270 @@ #define MASK_P_BEQIMM 0x707f #define MATCH_P_BNEIMM 0x3063 #define MASK_P_BNEIMM 0x707f +#define MATCH_P_MAC 0x42000033 +#define MASK_P_MAC 0xfe00707f +#define MATCH_P_MSU 0x42001033 +#define MASK_P_MSU 0xfe00707f +#define MATCH_PV_ADD_H 0x57 +#define MASK_PV_ADD_H 0xfe00707f +#define MATCH_PV_ADD_SC_H 0x4057 +#define MASK_PV_ADD_SC_H 0xfe00707f +#define MATCH_PV_ADD_SCI_H 0x6057 +#define MASK_PV_ADD_SCI_H 0xfc00707f +#define MATCH_PV_ADD_B 0x1057 +#define MASK_PV_ADD_B 0xfe00707f +#define MATCH_PV_ADD_SC_B 0x5057 +#define MASK_PV_ADD_SC_B 0xfe00707f +#define MATCH_PV_ADD_SCI_B 0x7057 +#define MASK_PV_ADD_SCI_B 0xfc00707f +#define MATCH_PV_SUB_H 0x8000057 +#define MASK_PV_SUB_H 0xfe00707f +#define MATCH_PV_SUB_SC_H 0x8004057 +#define MASK_PV_SUB_SC_H 0xfe00707f +#define MATCH_PV_SUB_SCI_H 0x8006057 +#define MASK_PV_SUB_SCI_H 0xfc00707f +#define MATCH_PV_SUB_B 0x8001057 +#define MASK_PV_SUB_B 0xfe00707f +#define MATCH_PV_SUB_SC_B 0x8005057 +#define MASK_PV_SUB_SC_B 0xfe00707f +#define MATCH_PV_SUB_SCI_B 0x8007057 +#define MASK_PV_SUB_SCI_B 0xfc00707f +#define MATCH_PV_AVG_H 0x10000057 +#define MASK_PV_AVG_H 0xfe00707f +#define MATCH_PV_AVG_SC_H 0x10004057 +#define MASK_PV_AVG_SC_H 0xfe00707f +#define MATCH_PV_AVG_SCI_H 0x10006057 +#define MASK_PV_AVG_SCI_H 0xfc00707f +#define MATCH_PV_AVG_B 0x10001057 +#define MASK_PV_AVG_B 0xfe00707f +#define MATCH_PV_AVG_SC_B 0x10005057 +#define MASK_PV_AVG_SC_B 0xfe00707f +#define MATCH_PV_AVG_SCI_B 0x10007057 +#define MASK_PV_AVG_SCI_B 0xfc00707f +#define MATCH_PV_AVGU_H 0x18000057 +#define MASK_PV_AVGU_H 0xfe00707f +#define MATCH_PV_AVGU_SC_H 0x18004057 +#define MASK_PV_AVGU_SC_H 0xfe00707f +#define MATCH_PV_AVGU_SCI_H 0x18006057 +#define MASK_PV_AVGU_SCI_H 0xfc00707f +#define MATCH_PV_AVGU_B 0x18001057 +#define MASK_PV_AVGU_B 0xfe00707f +#define MATCH_PV_AVGU_SC_B 0x18005057 +#define MASK_PV_AVGU_SC_B 0xfe00707f +#define MATCH_PV_AVGU_SCI_B 0x18007057 +#define MASK_PV_AVGU_SCI_B 0xfc00707f +#define MATCH_PV_MIN_H 0x20000057 +#define MASK_PV_MIN_H 0xfe00707f +#define MATCH_PV_MIN_SC_H 0x20004057 +#define MASK_PV_MIN_SC_H 0xfe00707f +#define MATCH_PV_MIN_SCI_H 0x20006057 +#define MASK_PV_MIN_SCI_H 0xfc00707f +#define MATCH_PV_MIN_B 0x20001057 +#define MASK_PV_MIN_B 0xfe00707f +#define MATCH_PV_MIN_SC_B 0x20005057 +#define MASK_PV_MIN_SC_B 0xfe00707f +#define MATCH_PV_MIN_SCI_B 0x20007057 +#define MASK_PV_MIN_SCI_B 0xfc00707f +#define MATCH_PV_MINU_H 0x28000057 +#define MASK_PV_MINU_H 0xfe00707f +#define MATCH_PV_MINU_SC_H 0x28004057 +#define MASK_PV_MINU_SC_H 0xfe00707f +#define MATCH_PV_MINU_SCI_H 0x28006057 +#define MASK_PV_MINU_SCI_H 0xfc00707f +#define MATCH_PV_MINU_B 0x28001057 +#define MASK_PV_MINU_B 0xfe00707f +#define MATCH_PV_MINU_SC_B 0x28005057 +#define MASK_PV_MINU_SC_B 0xfe00707f +#define MATCH_PV_MINU_SCI_B 0x28007057 +#define MASK_PV_MINU_SCI_B 0xfc00707f +#define MATCH_PV_MAX_H 0x30000057 +#define MASK_PV_MAX_H 0xfe00707f +#define MATCH_PV_MAX_SC_H 0x30004057 +#define MASK_PV_MAX_SC_H 0xfe00707f +#define MATCH_PV_MAX_SCI_H 0x30006057 +#define MASK_PV_MAX_SCI_H 0xfc00707f +#define MATCH_PV_MAX_B 0x30001057 +#define MASK_PV_MAX_B 0xfe00707f +#define MATCH_PV_MAX_SC_B 0x30005057 +#define MASK_PV_MAX_SC_B 0xfe00707f +#define MATCH_PV_MAX_SCI_B 0x30007057 +#define MASK_PV_MAX_SCI_B 0xfc00707f +#define MATCH_PV_MAXU_H 0x38000057 +#define MASK_PV_MAXU_H 0xfe00707f +#define MATCH_PV_MAXU_SC_H 0x38004057 +#define MASK_PV_MAXU_SC_H 0xfe00707f +#define MATCH_PV_MAXU_SCI_H 0x38006057 +#define MASK_PV_MAXU_SCI_H 0xfc00707f +#define MATCH_PV_MAXU_B 0x38001057 +#define MASK_PV_MAXU_B 0xfe00707f +#define MATCH_PV_MAXU_SC_B 0x38005057 +#define MASK_PV_MAXU_SC_B 0xfe00707f +#define MATCH_PV_MAXU_SCI_B 0x38007057 +#define MASK_PV_MAXU_SCI_B 0xfc00707f +#define MATCH_PV_SRL_H 0x40000057 +#define MASK_PV_SRL_H 0xfe00707f +#define MATCH_PV_SRL_SC_H 0x40004057 +#define MASK_PV_SRL_SC_H 0xfe00707f +#define MATCH_PV_SRL_SCI_H 0x40006057 +#define MASK_PV_SRL_SCI_H 0xfc00707f +#define MATCH_PV_SRL_B 0x40001057 +#define MASK_PV_SRL_B 0xfe00707f +#define MATCH_PV_SRL_SC_B 0x40005057 +#define MASK_PV_SRL_SC_B 0xfe00707f +#define MATCH_PV_SRL_SCI_B 0x40007057 +#define MASK_PV_SRL_SCI_B 0xfc00707f +#define MATCH_PV_SRA_H 0x48000057 +#define MASK_PV_SRA_H 0xfe00707f +#define MATCH_PV_SRA_SC_H 0x48004057 +#define MASK_PV_SRA_SC_H 0xfe00707f +#define MATCH_PV_SRA_SCI_H 0x48006057 +#define MASK_PV_SRA_SCI_H 0xfc00707f +#define MATCH_PV_SRA_B 0x48001057 +#define MASK_PV_SRA_B 0xfe00707f +#define MATCH_PV_SRA_SC_B 0x48005057 +#define MASK_PV_SRA_SC_B 0xfe00707f +#define MATCH_PV_SRA_SCI_B 0x48007057 +#define MASK_PV_SRA_SCI_B 0xfc00707f +#define MATCH_PV_SLL_H 0x50000057 +#define MASK_PV_SLL_H 0xfe00707f +#define MATCH_PV_SLL_SC_H 0x50004057 +#define MASK_PV_SLL_SC_H 0xfe00707f +#define MATCH_PV_SLL_SCI_H 0x50006057 +#define MASK_PV_SLL_SCI_H 0xfc00707f +#define MATCH_PV_SLL_B 0x50001057 +#define MASK_PV_SLL_B 0xfe00707f +#define MATCH_PV_SLL_SC_B 0x50005057 +#define MASK_PV_SLL_SC_B 0xfe00707f +#define MATCH_PV_SLL_SCI_B 0x50007057 +#define MASK_PV_SLL_SCI_B 0xfc00707f +#define MATCH_PV_OR_H 0x58000057 +#define MASK_PV_OR_H 0xfe00707f +#define MATCH_PV_OR_SC_H 0x58004057 +#define MASK_PV_OR_SC_H 0xfe00707f +#define MATCH_PV_OR_SCI_H 0x58006057 +#define MASK_PV_OR_SCI_H 0xfc00707f +#define MATCH_PV_OR_B 0x58001057 +#define MASK_PV_OR_B 0xfe00707f +#define MATCH_PV_OR_SC_B 0x58005057 +#define MASK_PV_OR_SC_B 0xfe00707f +#define MATCH_PV_OR_SCI_B 0x58007057 +#define MASK_PV_OR_SCI_B 0xfc00707f +#define MATCH_PV_XOR_H 0x60000057 +#define MASK_PV_XOR_H 0xfe00707f +#define MATCH_PV_XOR_SC_H 0x60004057 +#define MASK_PV_XOR_SC_H 0xfe00707f +#define MATCH_PV_XOR_SCI_H 0x60006057 +#define MASK_PV_XOR_SCI_H 0xfc00707f +#define MATCH_PV_XOR_B 0x60001057 +#define MASK_PV_XOR_B 0xfe00707f +#define MATCH_PV_XOR_SC_B 0x60005057 +#define MASK_PV_XOR_SC_B 0xfe00707f +#define MATCH_PV_XOR_SCI_B 0x60007057 +#define MASK_PV_XOR_SCI_B 0xfc00707f +#define MATCH_PV_AND_H 0x68000057 +#define MASK_PV_AND_H 0xfe00707f +#define MATCH_PV_AND_SC_H 0x68004057 +#define MASK_PV_AND_SC_H 0xfe00707f +#define MATCH_PV_AND_SCI_H 0x68006057 +#define MASK_PV_AND_SCI_H 0xfc00707f +#define MATCH_PV_AND_B 0x68001057 +#define MASK_PV_AND_B 0xfe00707f +#define MATCH_PV_AND_SC_B 0x68005057 +#define MASK_PV_AND_SC_B 0xfe00707f +#define MATCH_PV_AND_SCI_B 0x68007057 +#define MASK_PV_AND_SCI_B 0xfc00707f +#define MATCH_PV_ABS_H 0x70000057 +#define MASK_PV_ABS_H 0xfff0707f +#define MATCH_PV_ABS_B 0x70001057 +#define MASK_PV_ABS_B 0xfff0707f +#define MATCH_PV_EXTRACT_H 0x78006057 +#define MASK_PV_EXTRACT_H 0xfc00707f +#define MATCH_PV_EXTRACT_B 0x78007057 +#define MASK_PV_EXTRACT_B 0xfc00707f +#define MATCH_PV_EXTRACTU_H 0x90006057 +#define MASK_PV_EXTRACTU_H 0xfc00707f +#define MATCH_PV_EXTRACTU_B 0x90007057 +#define MASK_PV_EXTRACTU_B 0xfc00707f +#define MATCH_PV_INSERT_H 0xb0006057 +#define MASK_PV_INSERT_H 0xfc00707f +#define MATCH_PV_INSERT_B 0xb0007057 +#define MASK_PV_INSERT_B 0xfc00707f +#define MATCH_PV_DOTUP_H 0x80000057 +#define MASK_PV_DOTUP_H 0xfe00707f +#define MATCH_PV_DOTUP_SC_H 0x80004057 +#define MASK_PV_DOTUP_SC_H 0xfe00707f +#define MATCH_PV_DOTUP_SCI_H 0x80006057 +#define MASK_PV_DOTUP_SCI_H 0xfc00707f +#define MATCH_PV_DOTUP_B 0x80001057 +#define MASK_PV_DOTUP_B 0xfe00707f +#define MATCH_PV_DOTUP_SC_B 0x80005057 +#define MASK_PV_DOTUP_SC_B 0xfe00707f +#define MATCH_PV_DOTUP_SCI_B 0x80007057 +#define MASK_PV_DOTUP_SCI_B 0xfc00707f +#define MATCH_PV_DOTUSP_H 0x88000057 +#define MASK_PV_DOTUSP_H 0xfe00707f +#define MATCH_PV_DOTUSP_SC_H 0x88004057 +#define MASK_PV_DOTUSP_SC_H 0xfe00707f +#define MATCH_PV_DOTUSP_SCI_H 0x88006057 +#define MASK_PV_DOTUSP_SCI_H 0xfc00707f +#define MATCH_PV_DOTUSP_B 0x88001057 +#define MASK_PV_DOTUSP_B 0xfe00707f +#define MATCH_PV_DOTUSP_SC_B 0x88005057 +#define MASK_PV_DOTUSP_SC_B 0xfe00707f +#define MATCH_PV_DOTUSP_SCI_B 0x88007057 +#define MASK_PV_DOTUSP_SCI_B 0xfc00707f +#define MATCH_PV_DOTSP_H 0x98000057 +#define MASK_PV_DOTSP_H 0xfe00707f +#define MATCH_PV_DOTSP_SC_H 0x98004057 +#define MASK_PV_DOTSP_SC_H 0xfe00707f +#define MATCH_PV_DOTSP_SCI_H 0x98006057 +#define MASK_PV_DOTSP_SCI_H 0xfc00707f +#define MATCH_PV_DOTSP_B 0x98001057 +#define MASK_PV_DOTSP_B 0xfe00707f +#define MATCH_PV_DOTSP_SC_B 0x98005057 +#define MASK_PV_DOTSP_SC_B 0xfe00707f +#define MATCH_PV_DOTSP_SCI_B 0x98007057 +#define MASK_PV_DOTSP_SCI_B 0xfc00707f +#define MATCH_PV_SDOTUP_H 0xa0000057 +#define MASK_PV_SDOTUP_H 0xfe00707f +#define MATCH_PV_SDOTUP_SC_H 0xa0004057 +#define MASK_PV_SDOTUP_SC_H 0xfe00707f +#define MATCH_PV_SDOTUP_SCI_H 0xa0006057 +#define MASK_PV_SDOTUP_SCI_H 0xfc00707f +#define MATCH_PV_SDOTUP_B 0xa0001057 +#define MASK_PV_SDOTUP_B 0xfe00707f +#define MATCH_PV_SDOTUP_SC_B 0xa0005057 +#define MASK_PV_SDOTUP_SC_B 0xfe00707f +#define MATCH_PV_SDOTUP_SCI_B 0xa0007057 +#define MASK_PV_SDOTUP_SCI_B 0xfc00707f +#define MATCH_PV_SDOTUSP_H 0xa8000057 +#define MASK_PV_SDOTUSP_H 0xfe00707f +#define MATCH_PV_SDOTUSP_SC_H 0xa8004057 +#define MASK_PV_SDOTUSP_SC_H 0xfe00707f +#define MATCH_PV_SDOTUSP_SCI_H 0xa8006057 +#define MASK_PV_SDOTUSP_SCI_H 0xfc00707f +#define MATCH_PV_SDOTUSP_B 0xa8001057 +#define MASK_PV_SDOTUSP_B 0xfe00707f +#define MATCH_PV_SDOTUSP_SC_B 0xa8005057 +#define MASK_PV_SDOTUSP_SC_B 0xfe00707f +#define MATCH_PV_SDOTUSP_SCI_B 0xa8007057 +#define MASK_PV_SDOTUSP_SCI_B 0xfc00707f +#define MATCH_PV_SDOTSP_H 0xb8000057 +#define MASK_PV_SDOTSP_H 0xfe00707f +#define MATCH_PV_SDOTSP_SC_H 0xb8004057 +#define MASK_PV_SDOTSP_SC_H 0xfe00707f +#define MATCH_PV_SDOTSP_SCI_H 0xb8006057 +#define MASK_PV_SDOTSP_SCI_H 0xfc00707f +#define MATCH_PV_SDOTSP_B 0xb8001057 +#define MASK_PV_SDOTSP_B 0xfe00707f +#define MATCH_PV_SDOTSP_SC_B 0xb8005057 +#define MASK_PV_SDOTSP_SC_B 0xfe00707f +#define MATCH_PV_SDOTSP_SCI_B 0xb8007057 +#define MASK_PV_SDOTSP_SCI_B 0xfc00707f +#define MATCH_PV_SHUFFLE2_H 0xc8000057 +#define MASK_PV_SHUFFLE2_H 0xfe00707f +#define MATCH_PV_SHUFFLE2_B 0xc8001057 +#define MASK_PV_SHUFFLE2_B 0xfe00707f #define MATCH_FLAH 0x1007 #define MASK_FLAH 0x707f #define MATCH_FSAH 0x1027 @@ -2848,7 +3134,6 @@ DECLARE_INSN(custom3_rs1_rs2, MATCH_CUSTOM3_RS1_RS2, MASK_CUSTOM3_RS1_RS2) DECLARE_INSN(custom3_rd, MATCH_CUSTOM3_RD, MASK_CUSTOM3_RD) DECLARE_INSN(custom3_rd_rs1, MATCH_CUSTOM3_RD_RS1, MASK_CUSTOM3_RD_RS1) DECLARE_INSN(custom3_rd_rs1_rs2, MATCH_CUSTOM3_RD_RS1_RS2, MASK_CUSTOM3_RD_RS1_RS2) -DECLARE_INSN(frep, MATCH_FREP, MASK_FREP) DECLARE_INSN(slli_rv32, MATCH_SLLI_RV32, MASK_SLLI_RV32) DECLARE_INSN(srli_rv32, MATCH_SRLI_RV32, MASK_SRLI_RV32) DECLARE_INSN(srai_rv32, MATCH_SRAI_RV32, MASK_SRAI_RV32) @@ -3149,7 +3434,6 @@ DECLARE_INSN(c_fsdsp, MATCH_C_FSDSP, MASK_C_FSDSP) DECLARE_INSN(c_swsp, MATCH_C_SWSP, MASK_C_SWSP) DECLARE_INSN(c_fswsp, MATCH_C_FSWSP, MASK_C_FSWSP) DECLARE_INSN(vsetvli, MATCH_VSETVLI, MASK_VSETVLI) -DECLARE_INSN(vsetvl, MATCH_VSETVL, MASK_VSETVL) DECLARE_INSN(vle8_v, MATCH_VLE8_V, MASK_VLE8_V) DECLARE_INSN(vle16_v, MATCH_VLE16_V, MASK_VLE16_V) DECLARE_INSN(vle32_v, MATCH_VLE32_V, MASK_VLE32_V) @@ -3300,7 +3584,6 @@ DECLARE_INSN(vfnmacc_vv, MATCH_VFNMACC_VV, MASK_VFNMACC_VV) DECLARE_INSN(vfmsac_vv, MATCH_VFMSAC_VV, MASK_VFMSAC_VV) DECLARE_INSN(vfnmsac_vv, MATCH_VFNMSAC_VV, MASK_VFNMSAC_VV) DECLARE_INSN(vfcvt_xu_f_v, MATCH_VFCVT_XU_F_V, MASK_VFCVT_XU_F_V) -DECLARE_INSN(vfcvt_x_f_v, MATCH_VFCVT_X_F_V, MASK_VFCVT_X_F_V) DECLARE_INSN(vfcvt_f_xu_v, MATCH_VFCVT_F_XU_V, MASK_VFCVT_F_XU_V) DECLARE_INSN(vfcvt_f_x_v, MATCH_VFCVT_F_X_V, MASK_VFCVT_F_X_V) DECLARE_INSN(vfcvt_rtz_xu_f_v, MATCH_VFCVT_RTZ_XU_F_V, MASK_VFCVT_RTZ_XU_F_V) @@ -3349,9 +3632,7 @@ DECLARE_INSN(vxor_vx, MATCH_VXOR_VX, MASK_VXOR_VX) DECLARE_INSN(vrgather_vx, MATCH_VRGATHER_VX, MASK_VRGATHER_VX) DECLARE_INSN(vslideup_vx, MATCH_VSLIDEUP_VX, MASK_VSLIDEUP_VX) DECLARE_INSN(vslidedown_vx, MATCH_VSLIDEDOWN_VX, MASK_VSLIDEDOWN_VX) -DECLARE_INSN(vadc_vxm, MATCH_VADC_VXM, MASK_VADC_VXM) DECLARE_INSN(vmadc_vxm, MATCH_VMADC_VXM, MASK_VMADC_VXM) -DECLARE_INSN(vsbc_vxm, MATCH_VSBC_VXM, MASK_VSBC_VXM) DECLARE_INSN(vmsbc_vxm, MATCH_VMSBC_VXM, MASK_VMSBC_VXM) DECLARE_INSN(vmerge_vxm, MATCH_VMERGE_VXM, MASK_VMERGE_VXM) DECLARE_INSN(vmv_v_x, MATCH_VMV_V_X, MASK_VMV_V_X) @@ -3392,9 +3673,7 @@ DECLARE_INSN(vor_vv, MATCH_VOR_VV, MASK_VOR_VV) DECLARE_INSN(vxor_vv, MATCH_VXOR_VV, MASK_VXOR_VV) DECLARE_INSN(vrgather_vv, MATCH_VRGATHER_VV, MASK_VRGATHER_VV) DECLARE_INSN(vrgatherei16_vv, MATCH_VRGATHEREI16_VV, MASK_VRGATHEREI16_VV) -DECLARE_INSN(vadc_vvm, MATCH_VADC_VVM, MASK_VADC_VVM) DECLARE_INSN(vmadc_vvm, MATCH_VMADC_VVM, MASK_VMADC_VVM) -DECLARE_INSN(vsbc_vvm, MATCH_VSBC_VVM, MASK_VSBC_VVM) DECLARE_INSN(vmsbc_vvm, MATCH_VMSBC_VVM, MASK_VMSBC_VVM) DECLARE_INSN(vmerge_vvm, MATCH_VMERGE_VVM, MASK_VMERGE_VVM) DECLARE_INSN(vmv_v_v, MATCH_VMV_V_V, MASK_VMV_V_V) @@ -3519,20 +3798,14 @@ DECLARE_INSN(vwmul_vv, MATCH_VWMUL_VV, MASK_VWMUL_VV) DECLARE_INSN(vwmaccu_vv, MATCH_VWMACCU_VV, MASK_VWMACCU_VV) DECLARE_INSN(vwmacc_vv, MATCH_VWMACC_VV, MASK_VWMACC_VV) DECLARE_INSN(vwmaccsu_vv, MATCH_VWMACCSU_VV, MASK_VWMACCSU_VV) -DECLARE_INSN(vaaddu_vx, MATCH_VAADDU_VX, MASK_VAADDU_VX) DECLARE_INSN(vaadd_vx, MATCH_VAADD_VX, MASK_VAADD_VX) -DECLARE_INSN(vasubu_vx, MATCH_VASUBU_VX, MASK_VASUBU_VX) DECLARE_INSN(vasub_vx, MATCH_VASUB_VX, MASK_VASUB_VX) DECLARE_INSN(vmv_s_x, MATCH_VMV_S_X, MASK_VMV_S_X) -DECLARE_INSN(vslide1up_vx, MATCH_VSLIDE1UP_VX, MASK_VSLIDE1UP_VX) DECLARE_INSN(vslide1down_vx, MATCH_VSLIDE1DOWN_VX, MASK_VSLIDE1DOWN_VX) -DECLARE_INSN(vdivu_vx, MATCH_VDIVU_VX, MASK_VDIVU_VX) DECLARE_INSN(vdiv_vx, MATCH_VDIV_VX, MASK_VDIV_VX) DECLARE_INSN(vremu_vx, MATCH_VREMU_VX, MASK_VREMU_VX) DECLARE_INSN(vrem_vx, MATCH_VREM_VX, MASK_VREM_VX) -DECLARE_INSN(vmulhu_vx, MATCH_VMULHU_VX, MASK_VMULHU_VX) DECLARE_INSN(vmul_vx, MATCH_VMUL_VX, MASK_VMUL_VX) -DECLARE_INSN(vmulhsu_vx, MATCH_VMULHSU_VX, MASK_VMULHSU_VX) DECLARE_INSN(vmulh_vx, MATCH_VMULH_VX, MASK_VMULH_VX) DECLARE_INSN(vmadd_vx, MATCH_VMADD_VX, MASK_VMADD_VX) DECLARE_INSN(vnmsub_vx, MATCH_VNMSUB_VX, MASK_VNMSUB_VX) @@ -3608,6 +3881,30 @@ DECLARE_INSN(csrrc, MATCH_CSRRC, MASK_CSRRC) DECLARE_INSN(csrrwi, MATCH_CSRRWI, MASK_CSRRWI) DECLARE_INSN(csrrsi, MATCH_CSRRSI, MASK_CSRRSI) DECLARE_INSN(csrrci, MATCH_CSRRCI, MASK_CSRRCI) +DECLARE_INSN(p_lb_irpost, MATCH_P_LB_IRPOST, MASK_P_LB_IRPOST) +DECLARE_INSN(p_lbu_irpost, MATCH_P_LBU_IRPOST, MASK_P_LBU_IRPOST) +DECLARE_INSN(p_lh_irpost, MATCH_P_LH_IRPOST, MASK_P_LH_IRPOST) +DECLARE_INSN(p_lhu_irpost, MATCH_P_LHU_IRPOST, MASK_P_LHU_IRPOST) +DECLARE_INSN(p_lw_irpost, MATCH_P_LW_IRPOST, MASK_P_LW_IRPOST) +DECLARE_INSN(p_lb_rrpost, MATCH_P_LB_RRPOST, MASK_P_LB_RRPOST) +DECLARE_INSN(p_lbu_rrpost, MATCH_P_LBU_RRPOST, MASK_P_LBU_RRPOST) +DECLARE_INSN(p_lh_rrpost, MATCH_P_LH_RRPOST, MASK_P_LH_RRPOST) +DECLARE_INSN(p_lhu_rrpost, MATCH_P_LHU_RRPOST, MASK_P_LHU_RRPOST) +DECLARE_INSN(p_lw_rrpost, MATCH_P_LW_RRPOST, MASK_P_LW_RRPOST) +DECLARE_INSN(p_lb_rr, MATCH_P_LB_RR, MASK_P_LB_RR) +DECLARE_INSN(p_lbu_rr, MATCH_P_LBU_RR, MASK_P_LBU_RR) +DECLARE_INSN(p_lh_rr, MATCH_P_LH_RR, MASK_P_LH_RR) +DECLARE_INSN(p_lhu_rr, MATCH_P_LHU_RR, MASK_P_LHU_RR) +DECLARE_INSN(p_lw_rr, MATCH_P_LW_RR, MASK_P_LW_RR) +DECLARE_INSN(p_sb_irpost, MATCH_P_SB_IRPOST, MASK_P_SB_IRPOST) +DECLARE_INSN(p_sh_irpost, MATCH_P_SH_IRPOST, MASK_P_SH_IRPOST) +DECLARE_INSN(p_sw_irpost, MATCH_P_SW_IRPOST, MASK_P_SW_IRPOST) +DECLARE_INSN(p_sb_rrpost, MATCH_P_SB_RRPOST, MASK_P_SB_RRPOST) +DECLARE_INSN(p_sh_rrpost, MATCH_P_SH_RRPOST, MASK_P_SH_RRPOST) +DECLARE_INSN(p_sw_rrpost, MATCH_P_SW_RRPOST, MASK_P_SW_RRPOST) +DECLARE_INSN(p_sb_rr, MATCH_P_SB_RR, MASK_P_SB_RR) +DECLARE_INSN(p_sh_rr, MATCH_P_SH_RR, MASK_P_SH_RR) +DECLARE_INSN(p_sw_rr, MATCH_P_SW_RR, MASK_P_SW_RR) DECLARE_INSN(p_abs, MATCH_P_ABS, MASK_P_ABS) DECLARE_INSN(p_slet, MATCH_P_SLET, MASK_P_SLET) DECLARE_INSN(p_sletu, MATCH_P_SLETU, MASK_P_SLETU) @@ -3625,6 +3922,138 @@ DECLARE_INSN(p_clipr, MATCH_P_CLIPR, MASK_P_CLIPR) DECLARE_INSN(p_clipur, MATCH_P_CLIPUR, MASK_P_CLIPUR) DECLARE_INSN(p_beqimm, MATCH_P_BEQIMM, MASK_P_BEQIMM) DECLARE_INSN(p_bneimm, MATCH_P_BNEIMM, MASK_P_BNEIMM) +DECLARE_INSN(p_mac, MATCH_P_MAC, MASK_P_MAC) +DECLARE_INSN(p_msu, MATCH_P_MSU, MASK_P_MSU) +DECLARE_INSN(pv_add_h, MATCH_PV_ADD_H, MASK_PV_ADD_H) +DECLARE_INSN(pv_add_sc_h, MATCH_PV_ADD_SC_H, MASK_PV_ADD_SC_H) +DECLARE_INSN(pv_add_sci_h, MATCH_PV_ADD_SCI_H, MASK_PV_ADD_SCI_H) +DECLARE_INSN(pv_add_b, MATCH_PV_ADD_B, MASK_PV_ADD_B) +DECLARE_INSN(pv_add_sc_b, MATCH_PV_ADD_SC_B, MASK_PV_ADD_SC_B) +DECLARE_INSN(pv_add_sci_b, MATCH_PV_ADD_SCI_B, MASK_PV_ADD_SCI_B) +DECLARE_INSN(pv_sub_h, MATCH_PV_SUB_H, MASK_PV_SUB_H) +DECLARE_INSN(pv_sub_sc_h, MATCH_PV_SUB_SC_H, MASK_PV_SUB_SC_H) +DECLARE_INSN(pv_sub_sci_h, MATCH_PV_SUB_SCI_H, MASK_PV_SUB_SCI_H) +DECLARE_INSN(pv_sub_b, MATCH_PV_SUB_B, MASK_PV_SUB_B) +DECLARE_INSN(pv_sub_sc_b, MATCH_PV_SUB_SC_B, MASK_PV_SUB_SC_B) +DECLARE_INSN(pv_sub_sci_b, MATCH_PV_SUB_SCI_B, MASK_PV_SUB_SCI_B) +DECLARE_INSN(pv_avg_h, MATCH_PV_AVG_H, MASK_PV_AVG_H) +DECLARE_INSN(pv_avg_sc_h, MATCH_PV_AVG_SC_H, MASK_PV_AVG_SC_H) +DECLARE_INSN(pv_avg_sci_h, MATCH_PV_AVG_SCI_H, MASK_PV_AVG_SCI_H) +DECLARE_INSN(pv_avg_b, MATCH_PV_AVG_B, MASK_PV_AVG_B) +DECLARE_INSN(pv_avg_sc_b, MATCH_PV_AVG_SC_B, MASK_PV_AVG_SC_B) +DECLARE_INSN(pv_avg_sci_b, MATCH_PV_AVG_SCI_B, MASK_PV_AVG_SCI_B) +DECLARE_INSN(pv_avgu_h, MATCH_PV_AVGU_H, MASK_PV_AVGU_H) +DECLARE_INSN(pv_avgu_sc_h, MATCH_PV_AVGU_SC_H, MASK_PV_AVGU_SC_H) +DECLARE_INSN(pv_avgu_sci_h, MATCH_PV_AVGU_SCI_H, MASK_PV_AVGU_SCI_H) +DECLARE_INSN(pv_avgu_b, MATCH_PV_AVGU_B, MASK_PV_AVGU_B) +DECLARE_INSN(pv_avgu_sc_b, MATCH_PV_AVGU_SC_B, MASK_PV_AVGU_SC_B) +DECLARE_INSN(pv_avgu_sci_b, MATCH_PV_AVGU_SCI_B, MASK_PV_AVGU_SCI_B) +DECLARE_INSN(pv_min_h, MATCH_PV_MIN_H, MASK_PV_MIN_H) +DECLARE_INSN(pv_min_sc_h, MATCH_PV_MIN_SC_H, MASK_PV_MIN_SC_H) +DECLARE_INSN(pv_min_sci_h, MATCH_PV_MIN_SCI_H, MASK_PV_MIN_SCI_H) +DECLARE_INSN(pv_min_b, MATCH_PV_MIN_B, MASK_PV_MIN_B) +DECLARE_INSN(pv_min_sc_b, MATCH_PV_MIN_SC_B, MASK_PV_MIN_SC_B) +DECLARE_INSN(pv_min_sci_b, MATCH_PV_MIN_SCI_B, MASK_PV_MIN_SCI_B) +DECLARE_INSN(pv_minu_h, MATCH_PV_MINU_H, MASK_PV_MINU_H) +DECLARE_INSN(pv_minu_sc_h, MATCH_PV_MINU_SC_H, MASK_PV_MINU_SC_H) +DECLARE_INSN(pv_minu_sci_h, MATCH_PV_MINU_SCI_H, MASK_PV_MINU_SCI_H) +DECLARE_INSN(pv_minu_b, MATCH_PV_MINU_B, MASK_PV_MINU_B) +DECLARE_INSN(pv_minu_sc_b, MATCH_PV_MINU_SC_B, MASK_PV_MINU_SC_B) +DECLARE_INSN(pv_minu_sci_b, MATCH_PV_MINU_SCI_B, MASK_PV_MINU_SCI_B) +DECLARE_INSN(pv_max_h, MATCH_PV_MAX_H, MASK_PV_MAX_H) +DECLARE_INSN(pv_max_sc_h, MATCH_PV_MAX_SC_H, MASK_PV_MAX_SC_H) +DECLARE_INSN(pv_max_sci_h, MATCH_PV_MAX_SCI_H, MASK_PV_MAX_SCI_H) +DECLARE_INSN(pv_max_b, MATCH_PV_MAX_B, MASK_PV_MAX_B) +DECLARE_INSN(pv_max_sc_b, MATCH_PV_MAX_SC_B, MASK_PV_MAX_SC_B) +DECLARE_INSN(pv_max_sci_b, MATCH_PV_MAX_SCI_B, MASK_PV_MAX_SCI_B) +DECLARE_INSN(pv_maxu_h, MATCH_PV_MAXU_H, MASK_PV_MAXU_H) +DECLARE_INSN(pv_maxu_sc_h, MATCH_PV_MAXU_SC_H, MASK_PV_MAXU_SC_H) +DECLARE_INSN(pv_maxu_sci_h, MATCH_PV_MAXU_SCI_H, MASK_PV_MAXU_SCI_H) +DECLARE_INSN(pv_maxu_b, MATCH_PV_MAXU_B, MASK_PV_MAXU_B) +DECLARE_INSN(pv_maxu_sc_b, MATCH_PV_MAXU_SC_B, MASK_PV_MAXU_SC_B) +DECLARE_INSN(pv_maxu_sci_b, MATCH_PV_MAXU_SCI_B, MASK_PV_MAXU_SCI_B) +DECLARE_INSN(pv_srl_h, MATCH_PV_SRL_H, MASK_PV_SRL_H) +DECLARE_INSN(pv_srl_sc_h, MATCH_PV_SRL_SC_H, MASK_PV_SRL_SC_H) +DECLARE_INSN(pv_srl_sci_h, MATCH_PV_SRL_SCI_H, MASK_PV_SRL_SCI_H) +DECLARE_INSN(pv_srl_b, MATCH_PV_SRL_B, MASK_PV_SRL_B) +DECLARE_INSN(pv_srl_sc_b, MATCH_PV_SRL_SC_B, MASK_PV_SRL_SC_B) +DECLARE_INSN(pv_srl_sci_b, MATCH_PV_SRL_SCI_B, MASK_PV_SRL_SCI_B) +DECLARE_INSN(pv_sra_h, MATCH_PV_SRA_H, MASK_PV_SRA_H) +DECLARE_INSN(pv_sra_sc_h, MATCH_PV_SRA_SC_H, MASK_PV_SRA_SC_H) +DECLARE_INSN(pv_sra_sci_h, MATCH_PV_SRA_SCI_H, MASK_PV_SRA_SCI_H) +DECLARE_INSN(pv_sra_b, MATCH_PV_SRA_B, MASK_PV_SRA_B) +DECLARE_INSN(pv_sra_sc_b, MATCH_PV_SRA_SC_B, MASK_PV_SRA_SC_B) +DECLARE_INSN(pv_sra_sci_b, MATCH_PV_SRA_SCI_B, MASK_PV_SRA_SCI_B) +DECLARE_INSN(pv_sll_h, MATCH_PV_SLL_H, MASK_PV_SLL_H) +DECLARE_INSN(pv_sll_sc_h, MATCH_PV_SLL_SC_H, MASK_PV_SLL_SC_H) +DECLARE_INSN(pv_sll_sci_h, MATCH_PV_SLL_SCI_H, MASK_PV_SLL_SCI_H) +DECLARE_INSN(pv_sll_b, MATCH_PV_SLL_B, MASK_PV_SLL_B) +DECLARE_INSN(pv_sll_sc_b, MATCH_PV_SLL_SC_B, MASK_PV_SLL_SC_B) +DECLARE_INSN(pv_sll_sci_b, MATCH_PV_SLL_SCI_B, MASK_PV_SLL_SCI_B) +DECLARE_INSN(pv_or_h, MATCH_PV_OR_H, MASK_PV_OR_H) +DECLARE_INSN(pv_or_sc_h, MATCH_PV_OR_SC_H, MASK_PV_OR_SC_H) +DECLARE_INSN(pv_or_sci_h, MATCH_PV_OR_SCI_H, MASK_PV_OR_SCI_H) +DECLARE_INSN(pv_or_b, MATCH_PV_OR_B, MASK_PV_OR_B) +DECLARE_INSN(pv_or_sc_b, MATCH_PV_OR_SC_B, MASK_PV_OR_SC_B) +DECLARE_INSN(pv_or_sci_b, MATCH_PV_OR_SCI_B, MASK_PV_OR_SCI_B) +DECLARE_INSN(pv_xor_h, MATCH_PV_XOR_H, MASK_PV_XOR_H) +DECLARE_INSN(pv_xor_sc_h, MATCH_PV_XOR_SC_H, MASK_PV_XOR_SC_H) +DECLARE_INSN(pv_xor_sci_h, MATCH_PV_XOR_SCI_H, MASK_PV_XOR_SCI_H) +DECLARE_INSN(pv_xor_b, MATCH_PV_XOR_B, MASK_PV_XOR_B) +DECLARE_INSN(pv_xor_sc_b, MATCH_PV_XOR_SC_B, MASK_PV_XOR_SC_B) +DECLARE_INSN(pv_xor_sci_b, MATCH_PV_XOR_SCI_B, MASK_PV_XOR_SCI_B) +DECLARE_INSN(pv_and_h, MATCH_PV_AND_H, MASK_PV_AND_H) +DECLARE_INSN(pv_and_sc_h, MATCH_PV_AND_SC_H, MASK_PV_AND_SC_H) +DECLARE_INSN(pv_and_sci_h, MATCH_PV_AND_SCI_H, MASK_PV_AND_SCI_H) +DECLARE_INSN(pv_and_b, MATCH_PV_AND_B, MASK_PV_AND_B) +DECLARE_INSN(pv_and_sc_b, MATCH_PV_AND_SC_B, MASK_PV_AND_SC_B) +DECLARE_INSN(pv_and_sci_b, MATCH_PV_AND_SCI_B, MASK_PV_AND_SCI_B) +DECLARE_INSN(pv_abs_h, MATCH_PV_ABS_H, MASK_PV_ABS_H) +DECLARE_INSN(pv_abs_b, MATCH_PV_ABS_B, MASK_PV_ABS_B) +DECLARE_INSN(pv_extract_h, MATCH_PV_EXTRACT_H, MASK_PV_EXTRACT_H) +DECLARE_INSN(pv_extract_b, MATCH_PV_EXTRACT_B, MASK_PV_EXTRACT_B) +DECLARE_INSN(pv_extractu_h, MATCH_PV_EXTRACTU_H, MASK_PV_EXTRACTU_H) +DECLARE_INSN(pv_extractu_b, MATCH_PV_EXTRACTU_B, MASK_PV_EXTRACTU_B) +DECLARE_INSN(pv_insert_h, MATCH_PV_INSERT_H, MASK_PV_INSERT_H) +DECLARE_INSN(pv_insert_b, MATCH_PV_INSERT_B, MASK_PV_INSERT_B) +DECLARE_INSN(pv_dotup_h, MATCH_PV_DOTUP_H, MASK_PV_DOTUP_H) +DECLARE_INSN(pv_dotup_sc_h, MATCH_PV_DOTUP_SC_H, MASK_PV_DOTUP_SC_H) +DECLARE_INSN(pv_dotup_sci_h, MATCH_PV_DOTUP_SCI_H, MASK_PV_DOTUP_SCI_H) +DECLARE_INSN(pv_dotup_b, MATCH_PV_DOTUP_B, MASK_PV_DOTUP_B) +DECLARE_INSN(pv_dotup_sc_b, MATCH_PV_DOTUP_SC_B, MASK_PV_DOTUP_SC_B) +DECLARE_INSN(pv_dotup_sci_b, MATCH_PV_DOTUP_SCI_B, MASK_PV_DOTUP_SCI_B) +DECLARE_INSN(pv_dotusp_h, MATCH_PV_DOTUSP_H, MASK_PV_DOTUSP_H) +DECLARE_INSN(pv_dotusp_sc_h, MATCH_PV_DOTUSP_SC_H, MASK_PV_DOTUSP_SC_H) +DECLARE_INSN(pv_dotusp_sci_h, MATCH_PV_DOTUSP_SCI_H, MASK_PV_DOTUSP_SCI_H) +DECLARE_INSN(pv_dotusp_b, MATCH_PV_DOTUSP_B, MASK_PV_DOTUSP_B) +DECLARE_INSN(pv_dotusp_sc_b, MATCH_PV_DOTUSP_SC_B, MASK_PV_DOTUSP_SC_B) +DECLARE_INSN(pv_dotusp_sci_b, MATCH_PV_DOTUSP_SCI_B, MASK_PV_DOTUSP_SCI_B) +DECLARE_INSN(pv_dotsp_h, MATCH_PV_DOTSP_H, MASK_PV_DOTSP_H) +DECLARE_INSN(pv_dotsp_sc_h, MATCH_PV_DOTSP_SC_H, MASK_PV_DOTSP_SC_H) +DECLARE_INSN(pv_dotsp_sci_h, MATCH_PV_DOTSP_SCI_H, MASK_PV_DOTSP_SCI_H) +DECLARE_INSN(pv_dotsp_b, MATCH_PV_DOTSP_B, MASK_PV_DOTSP_B) +DECLARE_INSN(pv_dotsp_sc_b, MATCH_PV_DOTSP_SC_B, MASK_PV_DOTSP_SC_B) +DECLARE_INSN(pv_dotsp_sci_b, MATCH_PV_DOTSP_SCI_B, MASK_PV_DOTSP_SCI_B) +DECLARE_INSN(pv_sdotup_h, MATCH_PV_SDOTUP_H, MASK_PV_SDOTUP_H) +DECLARE_INSN(pv_sdotup_sc_h, MATCH_PV_SDOTUP_SC_H, MASK_PV_SDOTUP_SC_H) +DECLARE_INSN(pv_sdotup_sci_h, MATCH_PV_SDOTUP_SCI_H, MASK_PV_SDOTUP_SCI_H) +DECLARE_INSN(pv_sdotup_b, MATCH_PV_SDOTUP_B, MASK_PV_SDOTUP_B) +DECLARE_INSN(pv_sdotup_sc_b, MATCH_PV_SDOTUP_SC_B, MASK_PV_SDOTUP_SC_B) +DECLARE_INSN(pv_sdotup_sci_b, MATCH_PV_SDOTUP_SCI_B, MASK_PV_SDOTUP_SCI_B) +DECLARE_INSN(pv_sdotusp_h, MATCH_PV_SDOTUSP_H, MASK_PV_SDOTUSP_H) +DECLARE_INSN(pv_sdotusp_sc_h, MATCH_PV_SDOTUSP_SC_H, MASK_PV_SDOTUSP_SC_H) +DECLARE_INSN(pv_sdotusp_sci_h, MATCH_PV_SDOTUSP_SCI_H, MASK_PV_SDOTUSP_SCI_H) +DECLARE_INSN(pv_sdotusp_b, MATCH_PV_SDOTUSP_B, MASK_PV_SDOTUSP_B) +DECLARE_INSN(pv_sdotusp_sc_b, MATCH_PV_SDOTUSP_SC_B, MASK_PV_SDOTUSP_SC_B) +DECLARE_INSN(pv_sdotusp_sci_b, MATCH_PV_SDOTUSP_SCI_B, MASK_PV_SDOTUSP_SCI_B) +DECLARE_INSN(pv_sdotsp_h, MATCH_PV_SDOTSP_H, MASK_PV_SDOTSP_H) +DECLARE_INSN(pv_sdotsp_sc_h, MATCH_PV_SDOTSP_SC_H, MASK_PV_SDOTSP_SC_H) +DECLARE_INSN(pv_sdotsp_sci_h, MATCH_PV_SDOTSP_SCI_H, MASK_PV_SDOTSP_SCI_H) +DECLARE_INSN(pv_sdotsp_b, MATCH_PV_SDOTSP_B, MASK_PV_SDOTSP_B) +DECLARE_INSN(pv_sdotsp_sc_b, MATCH_PV_SDOTSP_SC_B, MASK_PV_SDOTSP_SC_B) +DECLARE_INSN(pv_sdotsp_sci_b, MATCH_PV_SDOTSP_SCI_B, MASK_PV_SDOTSP_SCI_B) +DECLARE_INSN(pv_shuffle2_h, MATCH_PV_SHUFFLE2_H, MASK_PV_SHUFFLE2_H) +DECLARE_INSN(pv_shuffle2_b, MATCH_PV_SHUFFLE2_B, MASK_PV_SHUFFLE2_B) DECLARE_INSN(flah, MATCH_FLAH, MASK_FLAH) DECLARE_INSN(fsah, MATCH_FSAH, MASK_FSAH) DECLARE_INSN(fmadd_ah, MATCH_FMADD_AH, MASK_FMADD_AH) diff --git a/toolchain/riscv-opcodes/inst.sverilog b/toolchain/riscv-opcodes/inst.sverilog index 00ee613df..f50df39ca 100644 --- a/toolchain/riscv-opcodes/inst.sverilog +++ b/toolchain/riscv-opcodes/inst.sverilog @@ -24,7 +24,6 @@ package riscv_instr; localparam [31:0] CUSTOM3_RD = 32'b?????????????????100?????1111011; localparam [31:0] CUSTOM3_RD_RS1 = 32'b?????????????????110?????1111011; localparam [31:0] CUSTOM3_RD_RS1_RS2 = 32'b?????????????????111?????1111011; - localparam [31:0] FREP = 32'b?????????????????????????0001011; localparam [31:0] SLLI_RV32 = 32'b0000000??????????001?????0010011; localparam [31:0] SRLI_RV32 = 32'b0000000??????????101?????0010011; localparam [31:0] SRAI_RV32 = 32'b0100000??????????101?????0010011; @@ -325,7 +324,6 @@ package riscv_instr; localparam [31:0] C_SWSP = 32'b????????????????110???????????10; localparam [31:0] C_FSWSP = 32'b????????????????111???????????10; localparam [31:0] VSETVLI = 32'b0????????????????111?????1010111; - localparam [31:0] VSETVL = 32'b1000000??????????111?????1010111; localparam [31:0] VLE8_V = 32'b???000?00000?????000?????0000111; localparam [31:0] VLE16_V = 32'b???000?00000?????101?????0000111; localparam [31:0] VLE32_V = 32'b???000?00000?????110?????0000111; @@ -476,7 +474,6 @@ package riscv_instr; localparam [31:0] VFMSAC_VV = 32'b101110???????????001?????1010111; localparam [31:0] VFNMSAC_VV = 32'b101111???????????001?????1010111; localparam [31:0] VFCVT_XU_F_V = 32'b010010??????00000001?????1010111; - localparam [31:0] VFCVT_X_F_V = 32'b010010??????00001001?????1010111; localparam [31:0] VFCVT_F_XU_V = 32'b010010??????00010001?????1010111; localparam [31:0] VFCVT_F_X_V = 32'b010010??????00011001?????1010111; localparam [31:0] VFCVT_RTZ_XU_F_V = 32'b010010??????00110001?????1010111; @@ -525,9 +522,7 @@ package riscv_instr; localparam [31:0] VRGATHER_VX = 32'b001100???????????100?????1010111; localparam [31:0] VSLIDEUP_VX = 32'b001110???????????100?????1010111; localparam [31:0] VSLIDEDOWN_VX = 32'b001111???????????100?????1010111; - localparam [31:0] VADC_VXM = 32'b0100000??????????100?????1010111; localparam [31:0] VMADC_VXM = 32'b010001???????????100?????1010111; - localparam [31:0] VSBC_VXM = 32'b0100100??????????100?????1010111; localparam [31:0] VMSBC_VXM = 32'b010011???????????100?????1010111; localparam [31:0] VMERGE_VXM = 32'b0101110??????????100?????1010111; localparam [31:0] VMV_V_X = 32'b010111100000?????100?????1010111; @@ -568,9 +563,7 @@ package riscv_instr; localparam [31:0] VXOR_VV = 32'b001011???????????000?????1010111; localparam [31:0] VRGATHER_VV = 32'b001100???????????000?????1010111; localparam [31:0] VRGATHEREI16_VV = 32'b001110???????????000?????1010111; - localparam [31:0] VADC_VVM = 32'b0100000??????????000?????1010111; localparam [31:0] VMADC_VVM = 32'b010001???????????000?????1010111; - localparam [31:0] VSBC_VVM = 32'b0100100??????????000?????1010111; localparam [31:0] VMSBC_VVM = 32'b010011???????????000?????1010111; localparam [31:0] VMERGE_VVM = 32'b0101110??????????000?????1010111; localparam [31:0] VMV_V_V = 32'b010111100000?????000?????1010111; @@ -695,20 +688,14 @@ package riscv_instr; localparam [31:0] VWMACCU_VV = 32'b111100???????????010?????1010111; localparam [31:0] VWMACC_VV = 32'b111101???????????010?????1010111; localparam [31:0] VWMACCSU_VV = 32'b111111???????????010?????1010111; - localparam [31:0] VAADDU_VX = 32'b001000???????????110?????1010111; localparam [31:0] VAADD_VX = 32'b001001???????????110?????1010111; - localparam [31:0] VASUBU_VX = 32'b001010???????????110?????1010111; localparam [31:0] VASUB_VX = 32'b001011???????????110?????1010111; localparam [31:0] VMV_S_X = 32'b010000100000?????110?????1010111; - localparam [31:0] VSLIDE1UP_VX = 32'b001110???????????110?????1010111; localparam [31:0] VSLIDE1DOWN_VX = 32'b001111???????????110?????1010111; - localparam [31:0] VDIVU_VX = 32'b100000???????????110?????1010111; localparam [31:0] VDIV_VX = 32'b100001???????????110?????1010111; localparam [31:0] VREMU_VX = 32'b100010???????????110?????1010111; localparam [31:0] VREM_VX = 32'b100011???????????110?????1010111; - localparam [31:0] VMULHU_VX = 32'b100100???????????110?????1010111; localparam [31:0] VMUL_VX = 32'b100101???????????110?????1010111; - localparam [31:0] VMULHSU_VX = 32'b100110???????????110?????1010111; localparam [31:0] VMULH_VX = 32'b100111???????????110?????1010111; localparam [31:0] VMADD_VX = 32'b101001???????????110?????1010111; localparam [31:0] VNMSUB_VX = 32'b101011???????????110?????1010111; @@ -784,6 +771,30 @@ package riscv_instr; localparam [31:0] CSRRWI = 32'b?????????????????101?????1110011; localparam [31:0] CSRRSI = 32'b?????????????????110?????1110011; localparam [31:0] CSRRCI = 32'b?????????????????111?????1110011; + localparam [31:0] P_LB_IRPOST = 32'b?????????????????000?????0001011; + localparam [31:0] P_LBU_IRPOST = 32'b?????????????????100?????0001011; + localparam [31:0] P_LH_IRPOST = 32'b?????????????????001?????0001011; + localparam [31:0] P_LHU_IRPOST = 32'b?????????????????101?????0001011; + localparam [31:0] P_LW_IRPOST = 32'b?????????????????010?????0001011; + localparam [31:0] P_LB_RRPOST = 32'b0000000??????????111?????0001011; + localparam [31:0] P_LBU_RRPOST = 32'b0100000??????????111?????0001011; + localparam [31:0] P_LH_RRPOST = 32'b0001000??????????111?????0001011; + localparam [31:0] P_LHU_RRPOST = 32'b0101000??????????111?????0001011; + localparam [31:0] P_LW_RRPOST = 32'b0010000??????????111?????0001011; + localparam [31:0] P_LB_RR = 32'b0000000??????????111?????0000011; + localparam [31:0] P_LBU_RR = 32'b0100000??????????111?????0000011; + localparam [31:0] P_LH_RR = 32'b0001000??????????111?????0000011; + localparam [31:0] P_LHU_RR = 32'b0101000??????????111?????0000011; + localparam [31:0] P_LW_RR = 32'b0010000??????????111?????0000011; + localparam [31:0] P_SB_IRPOST = 32'b?????????????????000?????0101011; + localparam [31:0] P_SH_IRPOST = 32'b?????????????????001?????0101011; + localparam [31:0] P_SW_IRPOST = 32'b?????????????????010?????0101011; + localparam [31:0] P_SB_RRPOST = 32'b0000000??????????100?????0101011; + localparam [31:0] P_SH_RRPOST = 32'b0000000??????????101?????0101011; + localparam [31:0] P_SW_RRPOST = 32'b0000000??????????110?????0101011; + localparam [31:0] P_SB_RR = 32'b0000000??????????100?????0100011; + localparam [31:0] P_SH_RR = 32'b0000000??????????101?????0100011; + localparam [31:0] P_SW_RR = 32'b0000000??????????110?????0100011; localparam [31:0] P_ABS = 32'b000001000000?????000?????0110011; localparam [31:0] P_SLET = 32'b0000010??????????010?????0110011; localparam [31:0] P_SLETU = 32'b0000010??????????011?????0110011; @@ -801,6 +812,138 @@ package riscv_instr; localparam [31:0] P_CLIPUR = 32'b0001010??????????110?????0110011; localparam [31:0] P_BEQIMM = 32'b?????????????????010?????1100011; localparam [31:0] P_BNEIMM = 32'b?????????????????011?????1100011; + localparam [31:0] P_MAC = 32'b0100001??????????000?????0110011; + localparam [31:0] P_MSU = 32'b0100001??????????001?????0110011; + localparam [31:0] PV_ADD_H = 32'b0000000??????????000?????1010111; + localparam [31:0] PV_ADD_SC_H = 32'b0000000??????????100?????1010111; + localparam [31:0] PV_ADD_SCI_H = 32'b000000???????????110?????1010111; + localparam [31:0] PV_ADD_B = 32'b0000000??????????001?????1010111; + localparam [31:0] PV_ADD_SC_B = 32'b0000000??????????101?????1010111; + localparam [31:0] PV_ADD_SCI_B = 32'b000000???????????111?????1010111; + localparam [31:0] PV_SUB_H = 32'b0000100??????????000?????1010111; + localparam [31:0] PV_SUB_SC_H = 32'b0000100??????????100?????1010111; + localparam [31:0] PV_SUB_SCI_H = 32'b000010???????????110?????1010111; + localparam [31:0] PV_SUB_B = 32'b0000100??????????001?????1010111; + localparam [31:0] PV_SUB_SC_B = 32'b0000100??????????101?????1010111; + localparam [31:0] PV_SUB_SCI_B = 32'b000010???????????111?????1010111; + localparam [31:0] PV_AVG_H = 32'b0001000??????????000?????1010111; + localparam [31:0] PV_AVG_SC_H = 32'b0001000??????????100?????1010111; + localparam [31:0] PV_AVG_SCI_H = 32'b000100???????????110?????1010111; + localparam [31:0] PV_AVG_B = 32'b0001000??????????001?????1010111; + localparam [31:0] PV_AVG_SC_B = 32'b0001000??????????101?????1010111; + localparam [31:0] PV_AVG_SCI_B = 32'b000100???????????111?????1010111; + localparam [31:0] PV_AVGU_H = 32'b0001100??????????000?????1010111; + localparam [31:0] PV_AVGU_SC_H = 32'b0001100??????????100?????1010111; + localparam [31:0] PV_AVGU_SCI_H = 32'b000110???????????110?????1010111; + localparam [31:0] PV_AVGU_B = 32'b0001100??????????001?????1010111; + localparam [31:0] PV_AVGU_SC_B = 32'b0001100??????????101?????1010111; + localparam [31:0] PV_AVGU_SCI_B = 32'b000110???????????111?????1010111; + localparam [31:0] PV_MIN_H = 32'b0010000??????????000?????1010111; + localparam [31:0] PV_MIN_SC_H = 32'b0010000??????????100?????1010111; + localparam [31:0] PV_MIN_SCI_H = 32'b001000???????????110?????1010111; + localparam [31:0] PV_MIN_B = 32'b0010000??????????001?????1010111; + localparam [31:0] PV_MIN_SC_B = 32'b0010000??????????101?????1010111; + localparam [31:0] PV_MIN_SCI_B = 32'b001000???????????111?????1010111; + localparam [31:0] PV_MINU_H = 32'b0010100??????????000?????1010111; + localparam [31:0] PV_MINU_SC_H = 32'b0010100??????????100?????1010111; + localparam [31:0] PV_MINU_SCI_H = 32'b001010???????????110?????1010111; + localparam [31:0] PV_MINU_B = 32'b0010100??????????001?????1010111; + localparam [31:0] PV_MINU_SC_B = 32'b0010100??????????101?????1010111; + localparam [31:0] PV_MINU_SCI_B = 32'b001010???????????111?????1010111; + localparam [31:0] PV_MAX_H = 32'b0011000??????????000?????1010111; + localparam [31:0] PV_MAX_SC_H = 32'b0011000??????????100?????1010111; + localparam [31:0] PV_MAX_SCI_H = 32'b001100???????????110?????1010111; + localparam [31:0] PV_MAX_B = 32'b0011000??????????001?????1010111; + localparam [31:0] PV_MAX_SC_B = 32'b0011000??????????101?????1010111; + localparam [31:0] PV_MAX_SCI_B = 32'b001100???????????111?????1010111; + localparam [31:0] PV_MAXU_H = 32'b0011100??????????000?????1010111; + localparam [31:0] PV_MAXU_SC_H = 32'b0011100??????????100?????1010111; + localparam [31:0] PV_MAXU_SCI_H = 32'b001110???????????110?????1010111; + localparam [31:0] PV_MAXU_B = 32'b0011100??????????001?????1010111; + localparam [31:0] PV_MAXU_SC_B = 32'b0011100??????????101?????1010111; + localparam [31:0] PV_MAXU_SCI_B = 32'b001110???????????111?????1010111; + localparam [31:0] PV_SRL_H = 32'b0100000??????????000?????1010111; + localparam [31:0] PV_SRL_SC_H = 32'b0100000??????????100?????1010111; + localparam [31:0] PV_SRL_SCI_H = 32'b010000???????????110?????1010111; + localparam [31:0] PV_SRL_B = 32'b0100000??????????001?????1010111; + localparam [31:0] PV_SRL_SC_B = 32'b0100000??????????101?????1010111; + localparam [31:0] PV_SRL_SCI_B = 32'b010000???????????111?????1010111; + localparam [31:0] PV_SRA_H = 32'b0100100??????????000?????1010111; + localparam [31:0] PV_SRA_SC_H = 32'b0100100??????????100?????1010111; + localparam [31:0] PV_SRA_SCI_H = 32'b010010???????????110?????1010111; + localparam [31:0] PV_SRA_B = 32'b0100100??????????001?????1010111; + localparam [31:0] PV_SRA_SC_B = 32'b0100100??????????101?????1010111; + localparam [31:0] PV_SRA_SCI_B = 32'b010010???????????111?????1010111; + localparam [31:0] PV_SLL_H = 32'b0101000??????????000?????1010111; + localparam [31:0] PV_SLL_SC_H = 32'b0101000??????????100?????1010111; + localparam [31:0] PV_SLL_SCI_H = 32'b010100???????????110?????1010111; + localparam [31:0] PV_SLL_B = 32'b0101000??????????001?????1010111; + localparam [31:0] PV_SLL_SC_B = 32'b0101000??????????101?????1010111; + localparam [31:0] PV_SLL_SCI_B = 32'b010100???????????111?????1010111; + localparam [31:0] PV_OR_H = 32'b0101100??????????000?????1010111; + localparam [31:0] PV_OR_SC_H = 32'b0101100??????????100?????1010111; + localparam [31:0] PV_OR_SCI_H = 32'b010110???????????110?????1010111; + localparam [31:0] PV_OR_B = 32'b0101100??????????001?????1010111; + localparam [31:0] PV_OR_SC_B = 32'b0101100??????????101?????1010111; + localparam [31:0] PV_OR_SCI_B = 32'b010110???????????111?????1010111; + localparam [31:0] PV_XOR_H = 32'b0110000??????????000?????1010111; + localparam [31:0] PV_XOR_SC_H = 32'b0110000??????????100?????1010111; + localparam [31:0] PV_XOR_SCI_H = 32'b011000???????????110?????1010111; + localparam [31:0] PV_XOR_B = 32'b0110000??????????001?????1010111; + localparam [31:0] PV_XOR_SC_B = 32'b0110000??????????101?????1010111; + localparam [31:0] PV_XOR_SCI_B = 32'b011000???????????111?????1010111; + localparam [31:0] PV_AND_H = 32'b0110100??????????000?????1010111; + localparam [31:0] PV_AND_SC_H = 32'b0110100??????????100?????1010111; + localparam [31:0] PV_AND_SCI_H = 32'b011010???????????110?????1010111; + localparam [31:0] PV_AND_B = 32'b0110100??????????001?????1010111; + localparam [31:0] PV_AND_SC_B = 32'b0110100??????????101?????1010111; + localparam [31:0] PV_AND_SCI_B = 32'b011010???????????111?????1010111; + localparam [31:0] PV_ABS_H = 32'b011100000000?????000?????1010111; + localparam [31:0] PV_ABS_B = 32'b011100000000?????001?????1010111; + localparam [31:0] PV_EXTRACT_H = 32'b011110???????????110?????1010111; + localparam [31:0] PV_EXTRACT_B = 32'b011110???????????111?????1010111; + localparam [31:0] PV_EXTRACTU_H = 32'b100100???????????110?????1010111; + localparam [31:0] PV_EXTRACTU_B = 32'b100100???????????111?????1010111; + localparam [31:0] PV_INSERT_H = 32'b101100???????????110?????1010111; + localparam [31:0] PV_INSERT_B = 32'b101100???????????111?????1010111; + localparam [31:0] PV_DOTUP_H = 32'b1000000??????????000?????1010111; + localparam [31:0] PV_DOTUP_SC_H = 32'b1000000??????????100?????1010111; + localparam [31:0] PV_DOTUP_SCI_H = 32'b100000???????????110?????1010111; + localparam [31:0] PV_DOTUP_B = 32'b1000000??????????001?????1010111; + localparam [31:0] PV_DOTUP_SC_B = 32'b1000000??????????101?????1010111; + localparam [31:0] PV_DOTUP_SCI_B = 32'b100000???????????111?????1010111; + localparam [31:0] PV_DOTUSP_H = 32'b1000100??????????000?????1010111; + localparam [31:0] PV_DOTUSP_SC_H = 32'b1000100??????????100?????1010111; + localparam [31:0] PV_DOTUSP_SCI_H = 32'b100010???????????110?????1010111; + localparam [31:0] PV_DOTUSP_B = 32'b1000100??????????001?????1010111; + localparam [31:0] PV_DOTUSP_SC_B = 32'b1000100??????????101?????1010111; + localparam [31:0] PV_DOTUSP_SCI_B = 32'b100010???????????111?????1010111; + localparam [31:0] PV_DOTSP_H = 32'b1001100??????????000?????1010111; + localparam [31:0] PV_DOTSP_SC_H = 32'b1001100??????????100?????1010111; + localparam [31:0] PV_DOTSP_SCI_H = 32'b100110???????????110?????1010111; + localparam [31:0] PV_DOTSP_B = 32'b1001100??????????001?????1010111; + localparam [31:0] PV_DOTSP_SC_B = 32'b1001100??????????101?????1010111; + localparam [31:0] PV_DOTSP_SCI_B = 32'b100110???????????111?????1010111; + localparam [31:0] PV_SDOTUP_H = 32'b1010000??????????000?????1010111; + localparam [31:0] PV_SDOTUP_SC_H = 32'b1010000??????????100?????1010111; + localparam [31:0] PV_SDOTUP_SCI_H = 32'b101000???????????110?????1010111; + localparam [31:0] PV_SDOTUP_B = 32'b1010000??????????001?????1010111; + localparam [31:0] PV_SDOTUP_SC_B = 32'b1010000??????????101?????1010111; + localparam [31:0] PV_SDOTUP_SCI_B = 32'b101000???????????111?????1010111; + localparam [31:0] PV_SDOTUSP_H = 32'b1010100??????????000?????1010111; + localparam [31:0] PV_SDOTUSP_SC_H = 32'b1010100??????????100?????1010111; + localparam [31:0] PV_SDOTUSP_SCI_H = 32'b101010???????????110?????1010111; + localparam [31:0] PV_SDOTUSP_B = 32'b1010100??????????001?????1010111; + localparam [31:0] PV_SDOTUSP_SC_B = 32'b1010100??????????101?????1010111; + localparam [31:0] PV_SDOTUSP_SCI_B = 32'b101010???????????111?????1010111; + localparam [31:0] PV_SDOTSP_H = 32'b1011100??????????000?????1010111; + localparam [31:0] PV_SDOTSP_SC_H = 32'b1011100??????????100?????1010111; + localparam [31:0] PV_SDOTSP_SCI_H = 32'b101110???????????110?????1010111; + localparam [31:0] PV_SDOTSP_B = 32'b1011100??????????001?????1010111; + localparam [31:0] PV_SDOTSP_SC_B = 32'b1011100??????????101?????1010111; + localparam [31:0] PV_SDOTSP_SCI_B = 32'b101110???????????111?????1010111; + localparam [31:0] PV_SHUFFLE2_H = 32'b1100100??????????000?????1010111; + localparam [31:0] PV_SHUFFLE2_B = 32'b1100100??????????001?????1010111; localparam [31:0] FLAH = 32'b?????????????????001?????0000111; localparam [31:0] FSAH = 32'b?????????????????001?????0100111; localparam [31:0] FMADD_AH = 32'b?????10??????????101?????1000011; diff --git a/toolchain/riscv-opcodes/opcodes-rvv b/toolchain/riscv-opcodes/opcodes-rvv index d34cdd81c..f2e6ba6bc 100644 --- a/toolchain/riscv-opcodes/opcodes-rvv +++ b/toolchain/riscv-opcodes/opcodes-rvv @@ -8,8 +8,8 @@ # configuration setting # https://github.com/riscv/riscv-v-spec/blob/master/vcfg-format.adoc -vsetvli 31=0 zimm11 rs1 14..12=0x7 rd 6..0=0x57 -vsetvl 31=1 30..25=0x0 rs2 rs1 14..12=0x7 rd 6..0=0x57 +@vsetvli 31=0 zimm11 rs1 14..12=0x7 rd 6..0=0x57 +#vsetvl 31=1 30..25=0x0 rs2 rs1 14..12=0x7 rd 6..0=0x57 # # Vector Loads and Store @@ -118,37 +118,37 @@ vs8r.v 31..29=7 28=0 27..26=0 25=1 24..20=0x08 rs1 14..12=0x0 vs3 6..0=0 # Vector Floating-Point Instructions # https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#14-vector-floating-point-instructions # OPFVF -vfadd.vf 31..26=0x00 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfsub.vf 31..26=0x02 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfmin.vf 31..26=0x04 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfmax.vf 31..26=0x06 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfsgnj.vf 31..26=0x08 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfsgnjn.vf 31..26=0x09 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfsgnjx.vf 31..26=0x0a vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfslide1up.vf 31..26=0x0e vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfadd.vf 31..26=0x00 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfsub.vf 31..26=0x02 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfmin.vf 31..26=0x04 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfmax.vf 31..26=0x06 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfsgnj.vf 31..26=0x08 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfsgnjn.vf 31..26=0x09 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfsgnjx.vf 31..26=0x0a vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfslide1up.vf 31..26=0x0e vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfslide1down.vf 31..26=0x0f vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfmv.s.f 31..26=0x10 25=1 24..20=0 rs1 14..12=0x5 vd 6..0=0x57 vfmerge.vfm 31..26=0x17 25=0 vs2 rs1 14..12=0x5 vd 6..0=0x57 vfmv.v.f 31..26=0x17 25=1 24..20=0 rs1 14..12=0x5 vd 6..0=0x57 -vmfeq.vf 31..26=0x18 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vmfeq.vf 31..26=0x18 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vmfle.vf 31..26=0x19 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vmflt.vf 31..26=0x1b vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vmfne.vf 31..26=0x1c vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vmfgt.vf 31..26=0x1d vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vmfge.vf 31..26=0x1f vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfdiv.vf 31..26=0x20 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfdiv.vf 31..26=0x20 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfrdiv.vf 31..26=0x21 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfmul.vf 31..26=0x24 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfrsub.vf 31..26=0x27 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfmadd.vf 31..26=0x28 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfmadd.vf 31..26=0x28 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfnmadd.vf 31..26=0x29 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfmsub.vf 31..26=0x2a vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfmsub.vf 31..26=0x2a vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfnmsub.vf 31..26=0x2b vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfmacc.vf 31..26=0x2c vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfnmacc.vf 31..26=0x2d vm vs2 rs1 14..12=0x5 vd 6..0=0x57 -vfmsac.vf 31..26=0x2e vm vs2 rs1 14..12=0x5 vd 6..0=0x57 +@vfmsac.vf 31..26=0x2e vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfnmsac.vf 31..26=0x2f vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfwadd.vf 31..26=0x30 vm vs2 rs1 14..12=0x5 vd 6..0=0x57 @@ -162,58 +162,58 @@ vfwmsac.vf 31..26=0x3e vm vs2 rs1 14..12=0x5 vd 6..0=0x57 vfwnmsac.vf 31..26=0x3f vm vs2 rs1 14..12=0x5 vd 6..0=0x57 # OPFVV -vfadd.vv 31..26=0x00 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfadd.vv 31..26=0x00 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfredsum.vs 31..26=0x01 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfsub.vv 31..26=0x02 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfsub.vv 31..26=0x02 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfredosum.vs 31..26=0x03 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfmin.vv 31..26=0x04 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfmin.vv 31..26=0x04 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfredmin.vs 31..26=0x05 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfmax.vv 31..26=0x06 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfmax.vv 31..26=0x06 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfredmax.vs 31..26=0x07 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfsgnj.vv 31..26=0x08 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfsgnj.vv 31..26=0x08 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfsgnjn.vv 31..26=0x09 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfsgnjx.vv 31..26=0x0a vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfsgnjx.vv 31..26=0x0a vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfmv.f.s 31..26=0x10 25=1 vs2 19..15=0 14..12=0x1 rd 6..0=0x57 -vmfeq.vv 31..26=0x18 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vmfeq.vv 31..26=0x18 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vmfle.vv 31..26=0x19 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vmflt.vv 31..26=0x1b vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vmfne.vv 31..26=0x1c vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vmfne.vv 31..26=0x1c vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfdiv.vv 31..26=0x20 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfdiv.vv 31..26=0x20 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfmul.vv 31..26=0x24 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfmadd.vv 31..26=0x28 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfmadd.vv 31..26=0x28 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfnmadd.vv 31..26=0x29 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfmsub.vv 31..26=0x2a vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfmsub.vv 31..26=0x2a vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfnmsub.vv 31..26=0x2b vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfmacc.vv 31..26=0x2c vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfnmacc.vv 31..26=0x2d vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfmsac.vv 31..26=0x2e vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfmsac.vv 31..26=0x2e vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfnmsac.vv 31..26=0x2f vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfcvt.xu.f.v 31..26=0x12 vm vs2 19..15=0x00 14..12=0x1 vd 6..0=0x57 -vfcvt.x.f.v 31..26=0x12 vm vs2 19..15=0x01 14..12=0x1 vd 6..0=0x57 -vfcvt.f.xu.v 31..26=0x12 vm vs2 19..15=0x02 14..12=0x1 vd 6..0=0x57 -vfcvt.f.x.v 31..26=0x12 vm vs2 19..15=0x03 14..12=0x1 vd 6..0=0x57 -vfcvt.rtz.xu.f.v 31..26=0x12 vm vs2 19..15=0x06 14..12=0x1 vd 6..0=0x57 -vfcvt.rtz.x.f.v 31..26=0x12 vm vs2 19..15=0x07 14..12=0x1 vd 6..0=0x57 - -vfwcvt.xu.f.v 31..26=0x12 vm vs2 19..15=0x08 14..12=0x1 vd 6..0=0x57 -vfwcvt.x.f.v 31..26=0x12 vm vs2 19..15=0x09 14..12=0x1 vd 6..0=0x57 -vfwcvt.f.xu.v 31..26=0x12 vm vs2 19..15=0x0A 14..12=0x1 vd 6..0=0x57 -vfwcvt.f.x.v 31..26=0x12 vm vs2 19..15=0x0B 14..12=0x1 vd 6..0=0x57 -vfwcvt.f.f.v 31..26=0x12 vm vs2 19..15=0x0C 14..12=0x1 vd 6..0=0x57 -vfwcvt.rtz.xu.f.v 31..26=0x12 vm vs2 19..15=0x0E 14..12=0x1 vd 6..0=0x57 -vfwcvt.rtz.x.f.v 31..26=0x12 vm vs2 19..15=0x0F 14..12=0x1 vd 6..0=0x57 - -vfncvt.xu.f.w 31..26=0x12 vm vs2 19..15=0x10 14..12=0x1 vd 6..0=0x57 -vfncvt.x.f.w 31..26=0x12 vm vs2 19..15=0x11 14..12=0x1 vd 6..0=0x57 -vfncvt.f.xu.w 31..26=0x12 vm vs2 19..15=0x12 14..12=0x1 vd 6..0=0x57 -vfncvt.f.x.w 31..26=0x12 vm vs2 19..15=0x13 14..12=0x1 vd 6..0=0x57 -vfncvt.f.f.w 31..26=0x12 vm vs2 19..15=0x14 14..12=0x1 vd 6..0=0x57 -vfncvt.rod.f.f.w 31..26=0x12 vm vs2 19..15=0x15 14..12=0x1 vd 6..0=0x57 -vfncvt.rtz.xu.f.w 31..26=0x12 vm vs2 19..15=0x16 14..12=0x1 vd 6..0=0x57 -vfncvt.rtz.x.f.w 31..26=0x12 vm vs2 19..15=0x17 14..12=0x1 vd 6..0=0x57 +@vfcvt.xu.f.v 31..26=0x12 vm vs2 19..15=0x00 14..12=0x1 vd 6..0=0x57 +#vfcvt.x.f.v 31..26=0x12 vm vs2 19..15=0x01 14..12=0x1 vd 6..0=0x57 +@vfcvt.f.xu.v 31..26=0x12 vm vs2 19..15=0x02 14..12=0x1 vd 6..0=0x57 +@vfcvt.f.x.v 31..26=0x12 vm vs2 19..15=0x03 14..12=0x1 vd 6..0=0x57 +@vfcvt.rtz.xu.f.v 31..26=0x12 vm vs2 19..15=0x06 14..12=0x1 vd 6..0=0x57 +@vfcvt.rtz.x.f.v 31..26=0x12 vm vs2 19..15=0x07 14..12=0x1 vd 6..0=0x57 + +@vfwcvt.xu.f.v 31..26=0x12 vm vs2 19..15=0x08 14..12=0x1 vd 6..0=0x57 +@vfwcvt.x.f.v 31..26=0x12 vm vs2 19..15=0x09 14..12=0x1 vd 6..0=0x57 +@vfwcvt.f.xu.v 31..26=0x12 vm vs2 19..15=0x0A 14..12=0x1 vd 6..0=0x57 +@vfwcvt.f.x.v 31..26=0x12 vm vs2 19..15=0x0B 14..12=0x1 vd 6..0=0x57 +@vfwcvt.f.f.v 31..26=0x12 vm vs2 19..15=0x0C 14..12=0x1 vd 6..0=0x57 +@vfwcvt.rtz.xu.f.v 31..26=0x12 vm vs2 19..15=0x0E 14..12=0x1 vd 6..0=0x57 +@vfwcvt.rtz.x.f.v 31..26=0x12 vm vs2 19..15=0x0F 14..12=0x1 vd 6..0=0x57 + +@vfncvt.xu.f.w 31..26=0x12 vm vs2 19..15=0x10 14..12=0x1 vd 6..0=0x57 +@vfncvt.x.f.w 31..26=0x12 vm vs2 19..15=0x11 14..12=0x1 vd 6..0=0x57 +@vfncvt.f.xu.w 31..26=0x12 vm vs2 19..15=0x12 14..12=0x1 vd 6..0=0x57 +@vfncvt.f.x.w 31..26=0x12 vm vs2 19..15=0x13 14..12=0x1 vd 6..0=0x57 +@vfncvt.f.f.w 31..26=0x12 vm vs2 19..15=0x14 14..12=0x1 vd 6..0=0x57 +@vfncvt.rod.f.f.w 31..26=0x12 vm vs2 19..15=0x15 14..12=0x1 vd 6..0=0x57 +@vfncvt.rtz.xu.f.w 31..26=0x12 vm vs2 19..15=0x16 14..12=0x1 vd 6..0=0x57 +@vfncvt.rtz.x.f.w 31..26=0x12 vm vs2 19..15=0x17 14..12=0x1 vd 6..0=0x57 vfsqrt.v 31..26=0x13 vm vs2 19..15=0x00 14..12=0x1 vd 6..0=0x57 vfrsqrte7.v 31..26=0x13 vm vs2 19..15=0x04 14..12=0x1 vd 6..0=0x57 @@ -222,7 +222,7 @@ vfclass.v 31..26=0x13 vm vs2 19..15=0x10 14..12=0x1 vd 6..0=0x57 vfwadd.vv 31..26=0x30 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfwredsum.vs 31..26=0x31 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 -vfwsub.vv 31..26=0x32 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 +@vfwsub.vv 31..26=0x32 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfwredosum.vs 31..26=0x33 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfwadd.wv 31..26=0x34 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfwsub.wv 31..26=0x36 vm vs2 vs1 14..12=0x1 vd 6..0=0x57 @@ -234,48 +234,48 @@ vfwmsac.vv 31..26=0x3e vm vs2 vs1 14..12=0x1 vd 6..0=0x57 vfwnmsac.vv 31..26=0x3f vm vs2 vs1 14..12=0x1 vd 6..0=0x57 # OPIVX -vadd.vx 31..26=0x00 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vsub.vx 31..26=0x02 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vadd.vx 31..26=0x00 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vsub.vx 31..26=0x02 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vrsub.vx 31..26=0x03 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vminu.vx 31..26=0x04 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vminu.vx 31..26=0x04 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vmin.vx 31..26=0x05 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vmaxu.vx 31..26=0x06 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vmaxu.vx 31..26=0x06 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vmax.vx 31..26=0x07 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vand.vx 31..26=0x09 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vor.vx 31..26=0x0a vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vor.vx 31..26=0x0a vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vxor.vx 31..26=0x0b vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vrgather.vx 31..26=0x0c vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vslideup.vx 31..26=0x0e vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vrgather.vx 31..26=0x0c vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vslideup.vx 31..26=0x0e vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vslidedown.vx 31..26=0x0f vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vadc.vxm 31..26=0x10 25=0 vs2 rs1 14..12=0x4 vd 6..0=0x57 +#vadc.vxm 31..26=0x10 25=0 vs2 rs1 14..12=0x4 vd 6..0=0x57 vmadc.vxm 31..26=0x11 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vsbc.vxm 31..26=0x12 25=0 vs2 rs1 14..12=0x4 vd 6..0=0x57 +#vsbc.vxm 31..26=0x12 25=0 vs2 rs1 14..12=0x4 vd 6..0=0x57 vmsbc.vxm 31..26=0x13 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vmerge.vxm 31..26=0x17 25=0 vs2 rs1 14..12=0x4 vd 6..0=0x57 -vmv.v.x 31..26=0x17 25=1 24..20=0 rs1 14..12=0x4 vd 6..0=0x57 -vmseq.vx 31..26=0x18 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vmsne.vx 31..26=0x19 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vmsltu.vx 31..26=0x1a vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vmerge.vxm 31..26=0x17 25=0 vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vmv.v.x 31..26=0x17 25=1 24..20=0 rs1 14..12=0x4 vd 6..0=0x57 +@vmseq.vx 31..26=0x18 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vmsne.vx 31..26=0x19 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vmsltu.vx 31..26=0x1a vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vmslt.vx 31..26=0x1b vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vmsleu.vx 31..26=0x1c vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vmsle.vx 31..26=0x1d vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vmsgtu.vx 31..26=0x1e vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vmsgt.vx 31..26=0x1f vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vsaddu.vx 31..26=0x20 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vsaddu.vx 31..26=0x20 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vsadd.vx 31..26=0x21 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vssubu.vx 31..26=0x22 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vssubu.vx 31..26=0x22 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vssub.vx 31..26=0x23 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vsll.vx 31..26=0x25 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vsmul.vx 31..26=0x27 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vsrl.vx 31..26=0x28 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vsrl.vx 31..26=0x28 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vsra.vx 31..26=0x29 vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vssrl.vx 31..26=0x2a vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vssrl.vx 31..26=0x2a vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vssra.vx 31..26=0x2b vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vnsrl.wx 31..26=0x2c vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vnsra.wx 31..26=0x2d vm vs2 rs1 14..12=0x4 vd 6..0=0x57 -vnclipu.wx 31..26=0x2e vm vs2 rs1 14..12=0x4 vd 6..0=0x57 +@vnclipu.wx 31..26=0x2e vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vnclip.wx 31..26=0x2f vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vqmaccu.vx 31..26=0x3c vm vs2 rs1 14..12=0x4 vd 6..0=0x57 @@ -284,44 +284,44 @@ vqmaccus.vx 31..26=0x3e vm vs2 rs1 14..12=0x4 vd 6..0=0x57 vqmaccsu.vx 31..26=0x3f vm vs2 rs1 14..12=0x4 vd 6..0=0x57 # OPIVV -vadd.vv 31..26=0x00 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vsub.vv 31..26=0x02 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vminu.vv 31..26=0x04 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vadd.vv 31..26=0x00 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vsub.vv 31..26=0x02 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vminu.vv 31..26=0x04 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vmin.vv 31..26=0x05 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vmaxu.vv 31..26=0x06 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vmaxu.vv 31..26=0x06 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vmax.vv 31..26=0x07 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vand.vv 31..26=0x09 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vor.vv 31..26=0x0a vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vor.vv 31..26=0x0a vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vxor.vv 31..26=0x0b vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vrgather.vv 31..26=0x0c vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vrgatherei16.vv 31..26=0x0e vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vrgather.vv 31..26=0x0c vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vrgatherei16.vv 31..26=0x0e vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vadc.vvm 31..26=0x10 25=0 vs2 vs1 14..12=0x0 vd 6..0=0x57 +#vadc.vvm 31..26=0x10 25=0 vs2 vs1 14..12=0x0 vd 6..0=0x57 vmadc.vvm 31..26=0x11 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vsbc.vvm 31..26=0x12 25=0 vs2 vs1 14..12=0x0 vd 6..0=0x57 +#vsbc.vvm 31..26=0x12 25=0 vs2 vs1 14..12=0x0 vd 6..0=0x57 vmsbc.vvm 31..26=0x13 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vmerge.vvm 31..26=0x17 25=0 vs2 vs1 14..12=0x0 vd 6..0=0x57 vmv.v.v 31..26=0x17 25=1 24..20=0 vs1 14..12=0x0 vd 6..0=0x57 -vmseq.vv 31..26=0x18 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vmseq.vv 31..26=0x18 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vmsne.vv 31..26=0x19 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vmsltu.vv 31..26=0x1a vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vmsltu.vv 31..26=0x1a vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vmslt.vv 31..26=0x1b vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vmsleu.vv 31..26=0x1c vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vmsleu.vv 31..26=0x1c vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vmsle.vv 31..26=0x1d vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vsaddu.vv 31..26=0x20 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vsaddu.vv 31..26=0x20 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vsadd.vv 31..26=0x21 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vssubu.vv 31..26=0x22 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vssubu.vv 31..26=0x22 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vssub.vv 31..26=0x23 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vsll.vv 31..26=0x25 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vsmul.vv 31..26=0x27 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vsrl.vv 31..26=0x28 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vsrl.vv 31..26=0x28 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vsra.vv 31..26=0x29 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vssrl.vv 31..26=0x2a vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vssrl.vv 31..26=0x2a vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vssra.vv 31..26=0x2b vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vnsrl.wv 31..26=0x2c vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vnsra.wv 31..26=0x2d vm vs2 vs1 14..12=0x0 vd 6..0=0x57 -vnclipu.wv 31..26=0x2e vm vs2 vs1 14..12=0x0 vd 6..0=0x57 +@vnclipu.wv 31..26=0x2e vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vnclip.wv 31..26=0x2f vm vs2 vs1 14..12=0x0 vd 6..0=0x57 vwredsumu.vs 31..26=0x30 vm vs2 vs1 14..12=0x0 vd 6..0=0x57 @@ -395,9 +395,9 @@ vsext.vf4 31..26=0x12 vm vs2 19..15=5 14..12=0x2 vd 6..0=0x57 vzext.vf2 31..26=0x12 vm vs2 19..15=6 14..12=0x2 vd 6..0=0x57 vsext.vf2 31..26=0x12 vm vs2 19..15=7 14..12=0x2 vd 6..0=0x57 -vcompress.vm 31..26=0x17 25=1 vs2 vs1 14..12=0x2 vd 6..0=0x57 +@vcompress.vm 31..26=0x17 25=1 vs2 vs1 14..12=0x2 vd 6..0=0x57 vmandnot.mm 31..26=0x18 vm vs2 vs1 14..12=0x2 vd 6..0=0x57 -vmand.mm 31..26=0x19 vm vs2 vs1 14..12=0x2 vd 6..0=0x57 +@vmand.mm 31..26=0x19 vm vs2 vs1 14..12=0x2 vd 6..0=0x57 vmor.mm 31..26=0x1a vm vs2 vs1 14..12=0x2 vd 6..0=0x57 vmxor.mm 31..26=0x1b vm vs2 vs1 14..12=0x2 vd 6..0=0x57 vmornot.mm 31..26=0x1c vm vs2 vs1 14..12=0x2 vd 6..0=0x57 @@ -442,22 +442,22 @@ vwmacc.vv 31..26=0x3d vm vs2 vs1 14..12=0x2 vd 6..0=0x57 vwmaccsu.vv 31..26=0x3f vm vs2 vs1 14..12=0x2 vd 6..0=0x57 # OPMVX -vaaddu.vx 31..26=0x08 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 +#vaaddu.vx 31..26=0x08 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vaadd.vx 31..26=0x09 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 -vasubu.vx 31..26=0x0a vm vs2 rs1 14..12=0x6 vd 6..0=0x57 +#vasubu.vx 31..26=0x0a vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vasub.vx 31..26=0x0b vm vs2 rs1 14..12=0x6 vd 6..0=0x57 -vmv.s.x 31..26=0x10 25=1 24..20=0 rs1 14..12=0x6 vd 6..0=0x57 -vslide1up.vx 31..26=0x0e vm vs2 rs1 14..12=0x6 vd 6..0=0x57 +@vmv.s.x 31..26=0x10 25=1 24..20=0 rs1 14..12=0x6 vd 6..0=0x57 +#vslide1up.vx 31..26=0x0e vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vslide1down.vx 31..26=0x0f vm vs2 rs1 14..12=0x6 vd 6..0=0x57 -vdivu.vx 31..26=0x20 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 +#vdivu.vx 31..26=0x20 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vdiv.vx 31..26=0x21 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 -vremu.vx 31..26=0x22 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 +@vremu.vx 31..26=0x22 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vrem.vx 31..26=0x23 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 -vmulhu.vx 31..26=0x24 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 +#vmulhu.vx 31..26=0x24 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vmul.vx 31..26=0x25 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 -vmulhsu.vx 31..26=0x26 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 +#vmulhsu.vx 31..26=0x26 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vmulh.vx 31..26=0x27 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vmadd.vx 31..26=0x29 vm vs2 rs1 14..12=0x6 vd 6..0=0x57 vnmsub.vx 31..26=0x2b vm vs2 rs1 14..12=0x6 vd 6..0=0x57 diff --git a/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM b/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM index daf000556..1e4bc4956 100644 --- a/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM +++ b/toolchain/riscv-opcodes/opcodes-xpulpimg_CUSTOM @@ -9,6 +9,35 @@ # Xpulpimg extension +# Post-increment and reg-reg loads +p.lb_irpost rd rs1 imm12 14..12=0 6..2=0x02 1..0=3 +p.lbu_irpost rd rs1 imm12 14..12=4 6..2=0x02 1..0=3 +p.lh_irpost rd rs1 imm12 14..12=1 6..2=0x02 1..0=3 +p.lhu_irpost rd rs1 imm12 14..12=5 6..2=0x02 1..0=3 +p.lw_irpost rd rs1 imm12 14..12=2 6..2=0x02 1..0=3 +p.lb_rrpost rd rs1 rs2 31..25=0x00 14..12=7 6..2=0x02 1..0=3 +p.lbu_rrpost rd rs1 rs2 31..25=0x20 14..12=7 6..2=0x02 1..0=3 +p.lh_rrpost rd rs1 rs2 31..25=0x08 14..12=7 6..2=0x02 1..0=3 +p.lhu_rrpost rd rs1 rs2 31..25=0x28 14..12=7 6..2=0x02 1..0=3 +p.lw_rrpost rd rs1 rs2 31..25=0x10 14..12=7 6..2=0x02 1..0=3 +p.lb_rr rd rs1 rs2 31..25=0x00 14..12=7 6..2=0x00 1..0=3 +p.lbu_rr rd rs1 rs2 31..25=0x20 14..12=7 6..2=0x00 1..0=3 +p.lh_rr rd rs1 rs2 31..25=0x08 14..12=7 6..2=0x00 1..0=3 +p.lhu_rr rd rs1 rs2 31..25=0x28 14..12=7 6..2=0x00 1..0=3 +p.lw_rr rd rs1 rs2 31..25=0x10 14..12=7 6..2=0x00 1..0=3 + +# Post-increment and reg-reg stores +p.sb_irpost rs1 rs2 imm12hi imm12lo 14..12=0 6..2=0x0A 1..0=3 +p.sh_irpost rs1 rs2 imm12hi imm12lo 14..12=1 6..2=0x0A 1..0=3 +p.sw_irpost rs1 rs2 imm12hi imm12lo 14..12=2 6..2=0x0A 1..0=3 +p.sb_rrpost rs1 rs2 prs3 31..25=0x00 14..12=4 6..2=0x0A 1..0=3 +p.sh_rrpost rs1 rs2 prs3 31..25=0x00 14..12=5 6..2=0x0A 1..0=3 +p.sw_rrpost rs1 rs2 prs3 31..25=0x00 14..12=6 6..2=0x0A 1..0=3 +p.sb_rr rs1 rs2 prs3 31..25=0x00 14..12=4 6..2=0x08 1..0=3 +p.sh_rr rs1 rs2 prs3 31..25=0x00 14..12=5 6..2=0x08 1..0=3 +p.sw_rr rs1 rs2 prs3 31..25=0x00 14..12=6 6..2=0x08 1..0=3 + +# Generic ALU operations p.abs rd rs1 31..25=2 24..20=0 14..12=0 6..2=0x0C 1..0=3 p.slet rd rs1 rs2 31..25=2 14..12=2 6..2=0x0C 1..0=3 p.sletu rd rs1 rs2 31..25=2 14..12=3 6..2=0x0C 1..0=3 @@ -25,5 +54,156 @@ p.clipu rd rs1 imm5 31..25=10 14..12=2 6..2=0x0C 1..0=3 p.clipr rd rs1 rs2 31..25=10 14..12=5 6..2=0x0C 1..0=3 p.clipur rd rs1 rs2 31..25=10 14..12=6 6..2=0x0C 1..0=3 +# Immediate branching p.beqimm rs1 imm5 bimm12hi bimm12lo 14..12=2 6..2=0x18 1..0=3 p.bneimm rs1 imm5 bimm12hi bimm12lo 14..12=3 6..2=0x18 1..0=3 + +# MAC operations +p.mac rd rs1 rs2 31..25=33 14..12=0 6..2=0x0C 1..0=3 +p.msu rd rs1 rs2 31..25=33 14..12=1 6..2=0x0C 1..0=3 + +# SIMD arithmetical operations +pv.add.h rd rs1 rs2 31..27=0 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.add.sc.h rd rs1 rs2 31..27=0 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.add.sci.h rd rs1 imm6 31..27=0 26=0 14..12=6 6..2=0x15 1..0=3 +pv.add.b rd rs1 rs2 31..27=0 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.add.sc.b rd rs1 rs2 31..27=0 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.add.sci.b rd rs1 imm6 31..27=0 26=0 14..12=7 6..2=0x15 1..0=3 +pv.sub.h rd rs1 rs2 31..27=1 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.sub.sc.h rd rs1 rs2 31..27=1 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.sub.sci.h rd rs1 imm6 31..27=1 26=0 14..12=6 6..2=0x15 1..0=3 +pv.sub.b rd rs1 rs2 31..27=1 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.sub.sc.b rd rs1 rs2 31..27=1 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.sub.sci.b rd rs1 imm6 31..27=1 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.avg.h rd rs1 rs2 31..27=2 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.avg.sc.h rd rs1 rs2 31..27=2 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.avg.sci.h rd rs1 imm6 31..27=2 26=0 14..12=6 6..2=0x15 1..0=3 +pv.avg.b rd rs1 rs2 31..27=2 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.avg.sc.b rd rs1 rs2 31..27=2 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.avg.sci.b rd rs1 imm6 31..27=2 26=0 14..12=7 6..2=0x15 1..0=3 +pv.avgu.h rd rs1 rs2 31..27=3 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.avgu.sc.h rd rs1 rs2 31..27=3 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.avgu.sci.h rd rs1 imm6 31..27=3 26=0 14..12=6 6..2=0x15 1..0=3 +pv.avgu.b rd rs1 rs2 31..27=3 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.avgu.sc.b rd rs1 rs2 31..27=3 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.avgu.sci.b rd rs1 imm6 31..27=3 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.min.h rd rs1 rs2 31..27=4 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.min.sc.h rd rs1 rs2 31..27=4 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.min.sci.h rd rs1 imm6 31..27=4 26=0 14..12=6 6..2=0x15 1..0=3 +pv.min.b rd rs1 rs2 31..27=4 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.min.sc.b rd rs1 rs2 31..27=4 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.min.sci.b rd rs1 imm6 31..27=4 26=0 14..12=7 6..2=0x15 1..0=3 +pv.minu.h rd rs1 rs2 31..27=5 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.minu.sc.h rd rs1 rs2 31..27=5 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.minu.sci.h rd rs1 imm6 31..27=5 26=0 14..12=6 6..2=0x15 1..0=3 +pv.minu.b rd rs1 rs2 31..27=5 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.minu.sc.b rd rs1 rs2 31..27=5 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.minu.sci.b rd rs1 imm6 31..27=5 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.max.h rd rs1 rs2 31..27=6 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.max.sc.h rd rs1 rs2 31..27=6 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.max.sci.h rd rs1 imm6 31..27=6 26=0 14..12=6 6..2=0x15 1..0=3 +pv.max.b rd rs1 rs2 31..27=6 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.max.sc.b rd rs1 rs2 31..27=6 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.max.sci.b rd rs1 imm6 31..27=6 26=0 14..12=7 6..2=0x15 1..0=3 +pv.maxu.h rd rs1 rs2 31..27=7 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.maxu.sc.h rd rs1 rs2 31..27=7 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.maxu.sci.h rd rs1 imm6 31..27=7 26=0 14..12=6 6..2=0x15 1..0=3 +pv.maxu.b rd rs1 rs2 31..27=7 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.maxu.sc.b rd rs1 rs2 31..27=7 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.maxu.sci.b rd rs1 imm6 31..27=7 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.srl.h rd rs1 rs2 31..27=8 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.srl.sc.h rd rs1 rs2 31..27=8 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.srl.sci.h rd rs1 imm6 31..27=8 26=0 14..12=6 6..2=0x15 1..0=3 +pv.srl.b rd rs1 rs2 31..27=8 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.srl.sc.b rd rs1 rs2 31..27=8 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.srl.sci.b rd rs1 imm6 31..27=8 26=0 14..12=7 6..2=0x15 1..0=3 +pv.sra.h rd rs1 rs2 31..27=9 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.sra.sc.h rd rs1 rs2 31..27=9 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.sra.sci.h rd rs1 imm6 31..27=9 26=0 14..12=6 6..2=0x15 1..0=3 +pv.sra.b rd rs1 rs2 31..27=9 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.sra.sc.b rd rs1 rs2 31..27=9 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.sra.sci.b rd rs1 imm6 31..27=9 26=0 14..12=7 6..2=0x15 1..0=3 +pv.sll.h rd rs1 rs2 31..27=10 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.sll.sc.h rd rs1 rs2 31..27=10 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.sll.sci.h rd rs1 imm6 31..27=10 26=0 14..12=6 6..2=0x15 1..0=3 +pv.sll.b rd rs1 rs2 31..27=10 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.sll.sc.b rd rs1 rs2 31..27=10 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.sll.sci.b rd rs1 imm6 31..27=10 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.or.h rd rs1 rs2 31..27=11 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.or.sc.h rd rs1 rs2 31..27=11 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.or.sci.h rd rs1 imm6 31..27=11 26=0 14..12=6 6..2=0x15 1..0=3 +pv.or.b rd rs1 rs2 31..27=11 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.or.sc.b rd rs1 rs2 31..27=11 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.or.sci.b rd rs1 imm6 31..27=11 26=0 14..12=7 6..2=0x15 1..0=3 +pv.xor.h rd rs1 rs2 31..27=12 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.xor.sc.h rd rs1 rs2 31..27=12 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.xor.sci.h rd rs1 imm6 31..27=12 26=0 14..12=6 6..2=0x15 1..0=3 +pv.xor.b rd rs1 rs2 31..27=12 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.xor.sc.b rd rs1 rs2 31..27=12 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.xor.sci.b rd rs1 imm6 31..27=12 26=0 14..12=7 6..2=0x15 1..0=3 +pv.and.h rd rs1 rs2 31..27=13 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.and.sc.h rd rs1 rs2 31..27=13 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.and.sci.h rd rs1 imm6 31..27=13 26=0 14..12=6 6..2=0x15 1..0=3 +pv.and.b rd rs1 rs2 31..27=13 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.and.sc.b rd rs1 rs2 31..27=13 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.and.sci.b rd rs1 imm6 31..27=13 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.abs.h rd rs1 31..27=14 26=0 25=0 24..20=0 14..12=0 6..2=0x15 1..0=3 +pv.abs.b rd rs1 31..27=14 26=0 25=0 24..20=0 14..12=1 6..2=0x15 1..0=3 + +pv.extract.h rd rs1 imm6 31..27=15 26=0 14..12=6 6..2=0x15 1..0=3 +pv.extract.b rd rs1 imm6 31..27=15 26=0 14..12=7 6..2=0x15 1..0=3 +pv.extractu.h rd rs1 imm6 31..27=18 26=0 14..12=6 6..2=0x15 1..0=3 +pv.extractu.b rd rs1 imm6 31..27=18 26=0 14..12=7 6..2=0x15 1..0=3 +pv.insert.h rd rs1 imm6 31..27=22 26=0 14..12=6 6..2=0x15 1..0=3 +pv.insert.b rd rs1 imm6 31..27=22 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.dotup.h rd rs1 rs2 31..27=16 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.dotup.sc.h rd rs1 rs2 31..27=16 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.dotup.sci.h rd rs1 imm6 31..27=16 26=0 14..12=6 6..2=0x15 1..0=3 +pv.dotup.b rd rs1 rs2 31..27=16 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.dotup.sc.b rd rs1 rs2 31..27=16 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.dotup.sci.b rd rs1 imm6 31..27=16 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.dotusp.h rd rs1 rs2 31..27=17 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.dotusp.sc.h rd rs1 rs2 31..27=17 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.dotusp.sci.h rd rs1 imm6 31..27=17 26=0 14..12=6 6..2=0x15 1..0=3 +pv.dotusp.b rd rs1 rs2 31..27=17 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.dotusp.sc.b rd rs1 rs2 31..27=17 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.dotusp.sci.b rd rs1 imm6 31..27=17 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.dotsp.h rd rs1 rs2 31..27=19 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.dotsp.sc.h rd rs1 rs2 31..27=19 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.dotsp.sci.h rd rs1 imm6 31..27=19 26=0 14..12=6 6..2=0x15 1..0=3 +pv.dotsp.b rd rs1 rs2 31..27=19 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.dotsp.sc.b rd rs1 rs2 31..27=19 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.dotsp.sci.b rd rs1 imm6 31..27=19 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.sdotup.h rd rs1 rs2 31..27=20 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.sdotup.sc.h rd rs1 rs2 31..27=20 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.sdotup.sci.h rd rs1 imm6 31..27=20 26=0 14..12=6 6..2=0x15 1..0=3 +pv.sdotup.b rd rs1 rs2 31..27=20 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.sdotup.sc.b rd rs1 rs2 31..27=20 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.sdotup.sci.b rd rs1 imm6 31..27=20 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.sdotusp.h rd rs1 rs2 31..27=21 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.sdotusp.sc.h rd rs1 rs2 31..27=21 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.sdotusp.sci.h rd rs1 imm6 31..27=21 26=0 14..12=6 6..2=0x15 1..0=3 +pv.sdotusp.b rd rs1 rs2 31..27=21 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.sdotusp.sc.b rd rs1 rs2 31..27=21 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.sdotusp.sci.b rd rs1 imm6 31..27=21 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.sdotsp.h rd rs1 rs2 31..27=23 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.sdotsp.sc.h rd rs1 rs2 31..27=23 26=0 25=0 14..12=4 6..2=0x15 1..0=3 +pv.sdotsp.sci.h rd rs1 imm6 31..27=23 26=0 14..12=6 6..2=0x15 1..0=3 +pv.sdotsp.b rd rs1 rs2 31..27=23 26=0 25=0 14..12=1 6..2=0x15 1..0=3 +pv.sdotsp.sc.b rd rs1 rs2 31..27=23 26=0 25=0 14..12=5 6..2=0x15 1..0=3 +pv.sdotsp.sci.b rd rs1 imm6 31..27=23 26=0 14..12=7 6..2=0x15 1..0=3 + +pv.shuffle2.h rd rs1 rs2 31..27=25 26=0 25=0 14..12=0 6..2=0x15 1..0=3 +pv.shuffle2.b rd rs1 rs2 31..27=25 26=0 25=0 14..12=1 6..2=0x15 1..0=3 diff --git a/toolchain/riscv-opcodes/parse_opcodes b/toolchain/riscv-opcodes/parse_opcodes index a33c7a43c..f7b0a837e 100755 --- a/toolchain/riscv-opcodes/parse_opcodes +++ b/toolchain/riscv-opcodes/parse_opcodes @@ -38,6 +38,8 @@ arglut['shamtw'] = (24,20) # for xpulpimg arglut['imm5'] = (24,20) +arglut['prs3'] = (11,7) +arglut['imm6'] = (25,20) # for vectors arglut['vd'] = (11,7)