-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest1.jl
59 lines (43 loc) · 1.06 KB
/
test1.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
using CUDA
using BenchmarkTools
arr = rand(32, 32, 10000)
function mykernel1(inp)
x = threadIdx().x
y = threadIdx().y
z = blockIdx().x
if x <= 32 && y <= 32
@inbounds inp[x, y, z] += 1
end
return
end
arr_GPU = CuArray(arr)
@cuda threads = (32, 32) blocks = 10000 mykernel1(arr_GPU)
time_taken = 0
for i in 1:100
start_t = time()
global arr_GPU = CuArray(arr)
@cuda threads = (32, 32) blocks = 10000 mykernel1(arr_GPU)
end_t = time()
global time_taken += (end_t - start_t)
end
println("Time: ", time_taken, " s")
function mykernel2(inp)
x = threadIdx().x
y = threadIdx().y
z = blockIdx().x
if x <= 32 && y <= 32
@inbounds inp[y, x, z] += 1
end
return
end
arr_GPU = CuArray(arr)
@cuda threads = (32, 32) blocks = 10000 mykernel2(arr_GPU)
time_taken = 0
for i in 1:100
start_t = time()
global arr_GPU = CuArray(arr)
@cuda threads = (32, 32) blocks = 10000 mykernel2(arr_GPU)
end_t = time()
global time_taken += (end_t - start_t)
end
println("Time: ", time_taken, " s")