Skip to content

Commit

Permalink
Merge pull request #355 from qwe661234/improve_idj
Browse files Browse the repository at this point in the history
Improve the performance of indirect jump for T1C
  • Loading branch information
jserv authored Feb 25, 2024
2 parents d99884e + 8c2a753 commit dc13bec
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 11 deletions.
6 changes: 5 additions & 1 deletion src/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,9 +278,13 @@ typedef struct {

#define HISTORY_SIZE 16
typedef struct {
uint8_t idx;
uint32_t PC[HISTORY_SIZE];
#if !RV32_HAS(JIT)
uint8_t idx;
struct rv_insn *target[HISTORY_SIZE];
#else
uint32_t times[HISTORY_SIZE];
#endif
} branch_history_table_t;

typedef struct rv_insn {
Expand Down
36 changes: 36 additions & 0 deletions src/jit.c
Original file line number Diff line number Diff line change
Expand Up @@ -1336,6 +1336,26 @@ static void ra_load2_sext(struct jit_state *state,
}
}

void parse_branch_history_table(struct jit_state *state, rv_insn_t *ir)
{
int max_idx = 0;
branch_history_table_t *bt = ir->branch_table;
for (int i = 0; i < HISTORY_SIZE; i++) {
if (!bt->times[i])
break;
if (bt->times[max_idx] < bt->times[i])
max_idx = i;
}
if (bt->PC[max_idx]) {
emit_load_imm(state, register_map[0], bt->PC[max_idx]);
emit_cmp32(state, temp_reg, register_map[0]);
uint32_t jump_loc = state->offset;
emit_jcc_offset(state, 0x85);
emit_jmp(state, bt->PC[max_idx]);
emit_jump_target_offset(state, JUMP_LOC, state->offset);
}
}

#define GEN(inst, code) \
static void do_##inst(struct jit_state *state UNUSED, riscv_t *rv UNUSED, \
rv_insn_t *ir UNUSED) \
Expand Down Expand Up @@ -1532,6 +1552,22 @@ static void translate_chained_block(struct jit_state *state,
if (block1->translatable)
translate_chained_block(state, rv, block1, set);
}
branch_history_table_t *bt = ir->branch_table;
if (bt) {
int max_idx = 0;
for (int i = 0; i < HISTORY_SIZE; i++) {
if (!bt->times[i])
break;
if (bt->times[max_idx] < bt->times[i])
max_idx = i;
}
if (bt->PC[max_idx] && !set_has(set, bt->PC[max_idx])) {
block_t *block1 =
cache_get(rv->block_cache, bt->PC[max_idx], false);
if (block1 && block1->translatable)
translate_chained_block(state, rv, block1, set);
}
}
}

uint32_t jit_translate(riscv_t *rv, block_t *block)
Expand Down
49 changes: 39 additions & 10 deletions src/rv32_template.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,11 @@
* | | store the result into dst. |
* | cond, src; | set condition if (src) |
* | end; | set the end of condition if (src) |
* | predict; | parse the branch table of indirect |
* | | jump and search the jump target with |
* | | maxiumal frequency. Then, comparing |
* | | and jumping to the target if the |
* | | program counter matches. |
* | break; | In the end of a basic block, we need |
* | | to store all VM register value to rv |
* | | data, becasue the register allocation |
Expand Down Expand Up @@ -197,6 +202,7 @@ RVOP(
* recorded. Additionally, the C code generator can reference the branch history
* table to link he indirect jump targets.
*/
#if !RV32_HAS(JIT)
#define LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE() \
/* lookup branch history table */ \
for (int i = 0; i < HISTORY_SIZE; i++) { \
Expand All @@ -213,7 +219,33 @@ RVOP(
ir->branch_table->idx = (ir->branch_table->idx + 1) % HISTORY_SIZE; \
MUST_TAIL return block->ir_head->impl(rv, block->ir_head, cycle, PC); \
}

#else
#define LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE() \
block_t *block = cache_get(rv->block_cache, PC, true); \
if (block) { \
for (int i = 0; i < HISTORY_SIZE; i++) { \
if (ir->branch_table->PC[i] == PC) { \
ir->branch_table->times[i]++; \
MUST_TAIL return block->ir_head->impl(rv, block->ir_head, \
cycle, PC); \
} \
} \
/* update branch history table */ \
int min_idx = 0; \
for (int i = 0; i < HISTORY_SIZE; i++) { \
if (!ir->branch_table->times[i]) { \
min_idx = i; \
break; \
} else if (ir->branch_table->times[min_idx] > \
ir->branch_table->times[i]) { \
min_idx = i; \
} \
} \
ir->branch_table->times[min_idx] = 1; \
ir->branch_table->PC[min_idx] = PC; \
MUST_TAIL return block->ir_head->impl(rv, block->ir_head, cycle, PC); \
}
#endif
/* The indirect jump instruction JALR uses the I-type encoding. The target
* address is obtained by adding the sign-extended 12-bit I-immediate to the
* register rs1, then setting the least-significant bit of the result to zero.
Expand All @@ -234,9 +266,7 @@ RVOP(
#if !RV32_HAS(EXT_C)
RV_EXC_MISALIGN_HANDLER(pc, insn, false, 0);
#endif
#if !RV32_HAS(JIT)
LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE();
#endif
rv->csr_cycle = cycle;
rv->PC = PC;
return true;
Expand All @@ -250,8 +280,9 @@ RVOP(
mov, VR1, TMP;
alu32imm, 32, 0x81, 0, TMP, imm;
alu32imm, 32, 0x81, 4, TMP, ~1U;
st, S32, TMP, PC;
break;
predict;
st, S32, TMP, PC;
exit;
}))

Expand Down Expand Up @@ -2254,18 +2285,17 @@ RVOP(
cjr,
{
PC = rv->X[ir->rs1];
#if !RV32_HAS(JIT)
LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE();
#endif
rv->csr_cycle = cycle;
rv->PC = PC;
return true;
},
GEN({
rald, VR0, rs1;
mov, VR0, TMP;
st, S32, TMP, PC;
break;
predict;
st, S32, TMP, PC;
exit;
}))

Expand Down Expand Up @@ -2307,9 +2337,7 @@ RVOP(
const int32_t jump_to = rv->X[ir->rs1];
rv->X[rv_reg_ra] = PC + 2;
PC = jump_to;
#if !RV32_HAS(JIT)
LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE();
#endif
rv->csr_cycle = cycle;
rv->PC = PC;
return true;
Expand All @@ -2319,8 +2347,9 @@ RVOP(
ldimm, VR0, pc, 2;
rald, VR1, rs1;
mov, VR1, TMP;
st, S32, TMP, PC;
break;
predict;
st, S32, TMP, PC;
exit;
}))

Expand Down
2 changes: 2 additions & 0 deletions tools/gen-jit-template.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ def parse_argv(EXT_LIST, SKIP_LIST):
asm = "store_back(state);"
elif items[0] == "assert":
asm = "assert(NULL);"
elif items[0] == "predict":
asm = "parse_branch_history_table(state, ir);"
output += asm + "\n"
output += "})\n"

Expand Down

0 comments on commit dc13bec

Please sign in to comment.