-
Notifications
You must be signed in to change notification settings - Fork 1
/
unrollerp.h
51 lines (45 loc) · 1.2 KB
/
unrollerp.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
//UnrollerP: loops over given size, partial unrolled
template<int InnerUnroll = 8, int Begin = 0>
public:
struct UnrollerP {
template<typename Lambda>
static void step(size_t N, Lambda& func) {
size_t i = Begin;
for (; i < N - InnerUnroll; i += InnerUnroll) {
UnrollerInternal<>::step(func, i);
}
for (; i < N; ++i) {
func(i);
}
}
private:
//start of UnrollerInternal
template<size_t Offset = 0>
struct UnrollerInternal {
template<typename Lambda>
static void step(Lambda& func, size_t i) {
func(i + Offset);
UnrollerInternal<Offset + 1>::step(func, i);
}
};
//end of UnrollerInternal
template<>
struct UnrollerInternal<InnerUnroll> {
template<typename Lambda>
static void step(Lambda& func, size_t i) {
}
};
};
// Usage:
// int numbers; //get 'numbers' at runtime
// int *arr = new int[numbers];
// int sum = 0, tmp;
// unroll the loop 8 times, offset is 0 so
// the range is from 0 to numbers
// UnrollerP<8>::step(numbers, [&] (size_t i) {
// arr[i] = i;
// tmp = arr[i] + sum;
// arr[i] = sum;
// sum = tmp;
// }
// );