-
Notifications
You must be signed in to change notification settings - Fork 0
/
two_arg_fn_body.h
109 lines (108 loc) · 7.32 KB
/
two_arg_fn_body.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#define two_arg_fn_body(name, _stride, itype, ivec, otype, ovec) \
void BMAS_##name(const long n, \
itype *x, const long incx, \
itype *y, const long incy, \
otype *out, const long inc_out){ \
otype *out_end = out + inc_out * n; \
ivec va, vb; \
ovec vc; \
const int stride = _stride; \
if (incx == 1 && incy == 1 && inc_out == 1){ \
otype *simd_end = out + (n/stride)*stride; \
while(out != simd_end){ \
va = ivec##_load(x); \
vb = ivec##_load(y); \
vc = BMAS_vector_##name(va, vb); \
ovec##_store(out, vc); \
x += stride; \
y += stride; \
out += stride; \
} \
}else if(incy == 1 && inc_out == 1){ \
otype *simd_end = out + (n/stride)*stride; \
while(out != simd_end){ \
va = ivec##_make(x, incx, sizeof(itype)); \
vb = ivec##_load(y); \
vc = BMAS_vector_##name(va, vb); \
ovec##_store(out, vc); \
x += stride*incx; \
y += stride; \
out += stride; \
} \
}else if(incx == 1 && inc_out == 1){ \
otype *simd_end = out + (n/stride)*stride; \
while(out != simd_end){ \
va = ivec##_load(x); \
vb = ivec##_make(y, incy, sizeof(itype)); \
vc = BMAS_vector_##name(va, vb); \
ovec##_store(out, vc); \
x += stride; \
y += stride*incy; \
out += stride; \
} \
}else if(incx == 1 && incy == 1){ \
itype *simd_end = x + (n/stride)*stride; \
while(x != simd_end){ \
va = ivec##_load(x); \
vb = ivec##_load(y); \
vc = BMAS_vector_##name(va, vb); \
ovec##_store_multi(vc, out, inc_out, sizeof(otype)); \
x += stride; \
y += stride; \
out += stride*inc_out; \
} \
}else if(inc_out == 1){ \
otype *simd_end = out + (n/stride)*stride; \
while(out != simd_end){ \
va = ivec##_make(x, incx, sizeof(itype)); \
vb = ivec##_make(y, incy, sizeof(itype)); \
vc = BMAS_vector_##name(va, vb); \
ovec##_store(out, vc); \
x += stride*incx; \
y += stride*incy; \
out += stride; \
} \
}else if(incy == 1){ \
itype *simd_end = y + (n/stride)*stride; \
while(y != simd_end){ \
va = ivec##_make(x, incx, sizeof(itype)); \
vb = ivec##_load(y); \
vc = BMAS_vector_##name(va, vb); \
ovec##_store_multi(vc, out, inc_out, sizeof(otype)); \
x += stride*incx; \
y += stride; \
out += stride*inc_out; \
} \
}else if(incx == 1){ \
itype *simd_end = x + (n/stride)*stride; \
while(x != simd_end){ \
va = ivec##_load(x); \
vb = ivec##_make(y, incy, sizeof(itype)); \
vc = BMAS_vector_##name(va, vb); \
ovec##_store_multi(vc, out, inc_out, sizeof(otype)); \
x += stride; \
y += stride*incy; \
out += stride*inc_out; \
} \
}else{ \
long i=0; \
const long simd_end = (n/stride)*stride; \
while(i != simd_end){ \
va = ivec##_make(x, incx, sizeof(itype)); \
vb = ivec##_make(y, incy, sizeof(itype)); \
vc = BMAS_vector_##name(va, vb); \
ovec##_store_multi(vc, out, inc_out, sizeof(otype)); \
i += stride; \
x += stride*incx; \
y += stride*incy; \
out += stride*inc_out; \
} \
} \
\
while(out!=out_end){ \
out[0] = BMAS_scalar_##name(x[0], y[0]); \
x += incx; \
y += incy; \
out += inc_out; \
} \
};