Skip to content

Commit

Permalink
Refactor detect_mem{set,cpy}
Browse files Browse the repository at this point in the history
This commit decouples the instruction listings for mem{set,cpy} out of
src/emulate.c, enhancing the maintainability of these listings.
  • Loading branch information
jserv committed Oct 8, 2023
1 parent b528364 commit 24e857a
Show file tree
Hide file tree
Showing 2 changed files with 236 additions and 222 deletions.
264 changes: 42 additions & 222 deletions src/emulate.c
Original file line number Diff line number Diff line change
Expand Up @@ -620,228 +620,46 @@ static void block_translate(riscv_t *rv, block_map_t *map, block_t *block)
remove_next_nth_ir(rv, ir, block, count - 1); \
}

static bool detect_memset(riscv_t *rv, int lib)
#include "rv32_libc.h"

static bool detect_memset(riscv_t *rv, size_t type)
{
static const uint32_t memset_insn[] = {
0x00f00313, /* li t1,15 */
0x00050713, /* mv a4,a0 */
0x02c37e63, /* bgeu t1,a2,0x11828 */
0x00f77793, /* and a5,a4,15 */
0x0a079063, /* bnez a5,0x11894 */
0x08059263, /* bnez a1,0x1187c */
0xff067693, /* and a3,a2,-16 */
0x00f67613, /* and a2,a2,15 */
0x00e686b3, /* add a3,a3,a4 */
0x00b72023, /* sw a1,0(a4) */
0x00b72223, /* sw a1,4(a4) */
0x00b72423, /* sw a1,8(a4) */
0x00b72623, /* sw a1,12(a4) */
0x01070713, /* add a4,a4,16 */
0xfed766e3, /* bltu a4,a3,0x11808 */
0x00061463, /* bnez a2,0x11828 */
0x00008067, /* ret */
0x40c306b3, /* sub a3,t1,a2 */
0x00269693, /* sll a3,a3,0x2 */
0x00000297, /* auipc t0,0x0 */
0x005686b3, /* add a3,a3,t0 */
0x00c68067, /* jr 12(a3) */
0x00b70723, /* sb a1,14(a4) */
0x00b706a3, /* sb a1,13(a4) */
0x00b70623, /* sb a1,12(a4) */
0x00b705a3, /* sb a1,11(a4) */
0x00b70523, /* sb a1,10(a4) */
0x00b704a3, /* sb a1,9(a4) */
0x00b70423, /* sb a1,8(a4) */
0x00b703a3, /* sb a1,7(a4) */
0x00b70323, /* sb a1,6(a4) */
0x00b702a3, /* sb a1,5(a4) */
0x00b70223, /* sb a1,4(a4) */
0x00b701a3, /* sb a1,3(a4) */
0x00b70123, /* sb a1,2(a4) */
0x00b700a3, /* sb a1,1(a4) */
0x00b70023, /* sb a1,0(a4) */
0x00008067, /* ret */
0x0ff5f593, /* zext.b a1,a1 */
0x00859693, /* sll a3,a1,0x8 */
0x00d5e5b3, /* or a1,a1,a3 */
0x01059693, /* sll a3,a1,0x10 */
0x00d5e5b3, /* or a1,a1,a3 */
0xf6dff06f, /* j 0x117fc */
0x00279693, /* sll a3,a5,0x2 */
0x00000297, /* auipc t0,0x0 */
0x005686b3, /* add a3,a3,t0 */
0x00008293, /* mv t0,ra */
0xfa0680e7, /* jalr -96(a3) */
0x00028093, /* mv ra,t0 */
0xff078793, /* add a5,a5,-16 */
0x40f70733, /* sub a4,a4,a5 */
0x00f60633, /* add a2,a2,a5 */
0xf6c378e3, /* bgeu t1,a2,0x11828 */
0xf3dff06f, /* j 0x117f8 */
};
static const uint32_t memset2_insn[] = {
0x00050313, /* mv t1,a0 */
0x00060a63, /* beqz a2, 0x18 */
0x00b30023, /* sb a1,0(t1) */
0xfff60613, /* add a2,a2,-1 */
0x00130313, /* add t1,t1,1 */
0xfe061ae3, /* bnez a2, 0x8 */
0x00008067, /* ret */
static const struct rv32libc_impl rv32_memset[] = {
{memset0_insn, ARRAYS_SIZE(memset0_insn)},
{memset1_insn, ARRAYS_SIZE(memset1_insn)},
};
assert(type < ARRAYS_SIZE(rv32_memset));

const uint32_t *memset_insn = rv32_memset[type].insn;
const size_t memset_len = rv32_memset[type].len;

uint32_t tmp_pc = rv->PC;
if (lib == 1) {
for (uint32_t i = 0; i < ARRAYS_SIZE(memset_insn); i++) {
const uint32_t insn = rv->io.mem_ifetch(tmp_pc);
if (insn != memset_insn[i])
return false;
tmp_pc += 4;
}
} else {
for (uint32_t i = 0; i < ARRAYS_SIZE(memset2_insn); i++) {
const uint32_t insn = rv->io.mem_ifetch(tmp_pc);
if (insn != memset2_insn[i])
return false;
tmp_pc += 4;
}
for (uint32_t i = 0; i < memset_len; i++) {
const uint32_t insn = rv->io.mem_ifetch(tmp_pc);
if (unlikely(insn != memset_insn[i]))
return false;
tmp_pc += 4;
}
return true;
}

static bool detect_memcpy(riscv_t *rv, int lib)
static bool detect_memcpy(riscv_t *rv, size_t type)
{
static const uint32_t memcpy_insn[] = {
0x00b547b3, /* xor a5,a0,a1 */
0x0037f793, /* and a5,a5,3 */
0x00c508b3, /* add a7,a0,a2 */
0x06079463, /* bnez a5,0x21428 */
0x00300793, /* li a5,3 */
0x06c7f063, /* bgeu a5,a2,0x21428 */
0x00357793, /* and a5,a0,3 */
0x00050713, /* mv a4,a0 */
0x06079a63, /* bnez a5,0x21448 */
0xffc8f613, /* and a2,a7,-4 */
0x40e606b3, /* sub a3,a2,a4 */
0x02000793, /* li a5,32 */
0x08d7ce63, /* blt a5,a3,0x21480 */
0x00058693, /* mv a3,a1 */
0x00070793, /* mv a5,a4 */
0x02c77863, /* bgeu a4,a2,0x21420 */
0x0006a803, /* lw a6,0(a3) */
0x00478793, /* add a5,a5,4 */
0x00468693, /* add a3,a3,4 */
0xff07ae23, /* sw a6,-4(a5) */
0xfec7e8e3, /* bltu a5,a2,0x213f4 */
0xfff60793, /* add a5,a2,-1 */
0x40e787b3, /* sub a5,a5,a4 */
0xffc7f793, /* and a5,a5,-4 */
0x00478793, /* add a5,a5,4 */
0x00f70733, /* add a4,a4,a5 */
0x00f585b3, /* add a1,a1,a5 */
0x01176863, /* bltu a4,a7,0x21430 */
0x00008067, /* ret */
0x00050713, /* mv a4,a0 */
0x05157863, /* bgeu a0,a7,0x2147c */
0x0005c783, /* lbu a5,0(a1) */
0x00170713, /* add a4,a4,1 */
0x00158593, /* add a1,a1,1 */
0xfef70fa3, /* sb a5,-1(a4) */
0xfee898e3, /* bne a7,a4,0x21430 */
0x00008067, /* ret */
0x0005c683, /* lbu a3,0(a1) */
0x00170713, /* add a4,a4,1 */
0x00377793, /* and a5,a4,3 */
0xfed70fa3, /* sb a3,-1(a4) */
0x00158593, /* add a1,a1,1 */
0xf6078ee3, /* beqz a5,0x213d8 */
0x0005c683, /* lbu a3,0(a1) */
0x00170713, /* add a4,a4,1 */
0x00377793, /* and a5,a4,3 */
0xfed70fa3, /* sb a3,-1(a4) */
0x00158593, /* add a1,a1,1 */
0xfc079ae3, /* bnez a5,0x21448 */
0xf61ff06f, /* j 0x213d8 */
0x00008067, /* ret */
0xff010113, /* add sp,sp,-16 */
0x00812623, /* sw s0,12(sp) */
0x02000413, /* li s0,32 */
0x0005a383, /* lw t2,0(a1) */
0x0045a283, /* lw t0,4(a1) */
0x0085af83, /* lw t6,8(a1) */
0x00c5af03, /* lw t5,12(a1) */
0x0105ae83, /* lw t4,16(a1) */
0x0145ae03, /* lw t3,20(a1) */
0x0185a303, /* lw t1,24(a1) */
0x01c5a803, /* lw a6,28(a1) */
0x0205a683, /* lw a3,32(a1) */
0x02470713, /* add a4,a4,36 */
0x40e607b3, /* sub a5,a2,a4 */
0xfc772e23, /* sw t2,-36(a4) */
0xfe572023, /* sw t0,-32(a4) */
0xfff72223, /* sw t6,-28(a4) */
0xffe72423, /* sw t5,-24(a4) */
0xffd72623, /* sw t4,-20(a4) */
0xffc72823, /* sw t3,-16(a4) */
0xfe672a23, /* sw t1,-12(a4) */
0xff072c23, /* sw a6,-8(a4) */
0xfed72e23, /* sw a3,-4(a4) */
0x02458593, /* add a1,a1,36 */
0xfaf446e3, /* blt s0,a5,0x2148c */
0x00058693, /* mv a3,a1 */
0x00070793, /* mv a5,a4 */
0x02c77863, /* bgeu a4,a2,0x2151c */
0x0006a803, /* lw a6,0(a3) */
0x00478793, /* add a5,a5,4 */
0x00468693, /* add a3,a3,4 */
0xff07ae23, /* sw a6,-4(a5) */
0xfec7e8e3, /* bltu a5,a2,0x214f0 */
0xfff60793, /* add a5,a2,-1 */
0x40e787b3, /* sub a5,a5,a4 */
0xffc7f793, /* and a5,a5,-4 */
0x00478793, /* add a5,a5,4 */
0x00f70733, /* add a4,a4,a5 */
0x00f585b3, /* add a1,a1,a5 */
0x01176863, /* bltu a4,a7,0x2152c */
0x00c12403, /* lw s0,12(sp) */
0x01010113, /* add sp,sp,16 */
0x00008067, /* ret */
0x0005c783, /* lbu a5,0(a1) */
0x00170713, /* add a4,a4,1 */
0x00158593, /* add a1,a1,1 */
0xfef70fa3, /* sb a5,-1(a4) */
0xfee882e3, /* beq a7,a4,0x21520 */
0x0005c783, /* lbu a5,0(a1) */
0x00170713, /* add a4,a4,1 */
0x00158593, /* add a1,a1,1 */
0xfef70fa3, /* sb a5,-1(a4) */
0xfce89ee3, /* bne a7,a4,0x2152c */
0xfcdff06f, /* j 0x21520 */
};
static const uint32_t memcpy2_insn[] = {
0x00050313, /* mv t1,a0 */
0x00060e63, /* beqz a2,44d18 */
0x00058383, /* lb t2,0(a1) */
0x00730023, /* sb t2,0(t1) */
0xfff60613, /* add a2,a2,-1 */
0x00130313, /* add t1,t1,1 */
0x00158593, /* add a1,a1,1 */
0xfe0616e3, /* bnez a2,44d00 */
0x00008067, /* ret */
static const struct rv32libc_impl rv32_memcpy[] = {
{memcpy0_insn, ARRAYS_SIZE(memcpy0_insn)},
{memcpy1_insn, ARRAYS_SIZE(memcpy1_insn)},
};
assert(type < ARRAYS_SIZE(rv32_memcpy));

const uint32_t *memcpy_insn = rv32_memcpy[type].insn;
const size_t memcpy_len = rv32_memcpy[type].len;

uint32_t tmp_pc = rv->PC;
if (lib == 1) {
for (uint32_t i = 0; i < ARRAYS_SIZE(memcpy_insn); i++) {
const uint32_t insn = rv->io.mem_ifetch(tmp_pc);
if (insn != memcpy_insn[i])
return false;
tmp_pc += 4;
}
} else {
for (uint32_t i = 0; i < ARRAYS_SIZE(memcpy2_insn); i++) {
const uint32_t insn = rv->io.mem_ifetch(tmp_pc);
if (insn != memcpy2_insn[i])
return false;
tmp_pc += 4;
}
for (uint32_t i = 0; i < memcpy_len; i++) {
const uint32_t insn = rv->io.mem_ifetch(tmp_pc);
if (unlikely(insn != memcpy_insn[i]))
return false;
tmp_pc += 4;
}
return true;
}
Expand All @@ -868,11 +686,11 @@ static bool libc_substitute(riscv_t *rv, block_t *block)
rv_insn_t *ir = block->ir_head, *next_ir = NULL;
switch (ir->opcode) {
case rv_insn_addi:
/* Compare the target block with the first basic block of
* memset/memcpy, if two block is match, we would extract the
* instruction sequence starting from the pc_start of the basic
* block and then compare it with the pre-recorded memset/memcpy
* instruction sequence.
/* Compare the target block with the first basic block of memset and
* memcpy.
* If the two blocks match, extract the instruction sequence starting
* from pc_start of the basic block and compare it with the pre-recorded
* memset/memcpy instruction sequence.
*/
if (ir->imm == 15 && ir->rd == rv_reg_t1 && ir->rs1 == rv_reg_zero) {
next_ir = ir->next;
Expand All @@ -881,7 +699,7 @@ static bool libc_substitute(riscv_t *rv, block_t *block)
next_ir = next_ir->next;
if (next_ir->opcode == rv_insn_bgeu && next_ir->imm == 60 &&
next_ir->rs1 == rv_reg_t1 && next_ir->rs2 == rv_reg_a2) {
if (detect_memset(rv, 1)) {
if (detect_memset(rv, 0)) {
ir->opcode = rv_insn_fuse5;
ir->impl = dispatch_table[ir->opcode];
remove_next_nth_ir(rv, ir, block, 2);
Expand All @@ -894,12 +712,13 @@ static bool libc_substitute(riscv_t *rv, block_t *block)
next_ir = ir->next;
if (next_ir->opcode == rv_insn_beq && next_ir->rs1 == rv_reg_a2 &&
next_ir->rs2 == rv_reg_zero) {
if (next_ir->imm == 20 && detect_memset(rv, 2)) {
if (next_ir->imm == 20 && detect_memset(rv, 1)) {
ir->opcode = rv_insn_fuse5;
ir->impl = dispatch_table[ir->opcode];
remove_next_nth_ir(rv, ir, block, 2);
return true;
} else if (next_ir->imm == 28 && detect_memcpy(rv, 2)) {
}
if (next_ir->imm == 28 && detect_memcpy(rv, 1)) {
ir->opcode = rv_insn_fuse6;
ir->impl = dispatch_table[ir->opcode];
remove_next_nth_ir(rv, ir, block, 2);
Expand Down Expand Up @@ -927,7 +746,7 @@ static bool libc_substitute(riscv_t *rv, block_t *block)
if (next_ir->opcode == rv_insn_bne && next_ir->imm == 104 &&
next_ir->rs1 == rv_reg_a5 &&
next_ir->rs2 == rv_reg_zero) {
if (detect_memcpy(rv, 1)) {
if (detect_memcpy(rv, 0)) {
ir->opcode = rv_insn_fuse6;
ir->impl = dispatch_table[ir->opcode];
remove_next_nth_ir(rv, ir, block, 3);
Expand All @@ -939,7 +758,8 @@ static bool libc_substitute(riscv_t *rv, block_t *block)
}
break;
/* TODO: Inject other frequently used function calls from the C standard
* library */
* library.
*/
}
return false;
}
Expand Down
Loading

1 comment on commit 24e857a

@jserv
Copy link
Contributor Author

@jserv jserv commented on 24e857a Oct 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmarks

Benchmark suite Current: 24e857a Previous: b528364 Ratio
Dhrystone 1276.5 Average DMIPS over 10 runs 1228.44 Average DMIPS over 10 runs 0.96
Coremark 1020.681 Average iterations/sec over 10 runs 897.752 Average iterations/sec over 10 runs 0.88

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.