From d6ff74af18f3aeb5833153e6bf39ba129f700521 Mon Sep 17 00:00:00 2001
From: Leo <cacate0129@gmail.com>
Date: Mon, 10 May 2021 00:06:14 -0400
Subject: [PATCH] update CUDA (#69)

* update CUDA

* cleanup

* update version

Co-authored-by: Roger-Luo <rogerluo.rl18@gmail.com>
---
 Project.toml      |  6 +++---
 src/CUDApatch.jl  | 26 +-------------------------
 src/CuYao.jl      |  1 -
 src/kernels.jl    |  2 +-
 test/CUDApatch.jl |  4 ++--
 test/GPUReg.jl    |  4 ++--
 6 files changed, 9 insertions(+), 34 deletions(-)

diff --git a/Project.toml b/Project.toml
index e86f2c4..994b87f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "CuYao"
 uuid = "b48ca7a8-dd42-11e8-2b8e-1b7706800275"
-version = "0.2.9"
+version = "0.3.0"
 
 [deps]
 BitBasis = "50ba71b6-fa0f-514d-ae9a-0916efc90dcf"
@@ -16,14 +16,14 @@ Yao = "5872b779-8223-5990-8dd0-5abbb0748c8c"
 
 [compat]
 BitBasis = "0.7"
-CUDA = "2.0, 3.0"
+CUDA = "3.1"
 LuxurySparse = "0.6"
 Reexport = "0.2, 1.0"
 StaticArrays = "0.12, 1.0"
 StatsBase = "0.33"
 TupleTools = "1"
 Yao = "0.6, 0.7"
-julia = "1"
+julia = "1.6"
 
 [extras]
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
diff --git a/src/CUDApatch.jl b/src/CUDApatch.jl
index c42d7d1..c45a13b 100644
--- a/src/CUDApatch.jl
+++ b/src/CUDApatch.jl
@@ -1,27 +1,3 @@
-#import CUDA: _cuview, ViewIndex, NonContiguous
-#using CUDA: genperm
-# fallback to SubArray when the view is not contiguous
-
-#=
-function LinearAlgebra.permutedims!(dest::GPUArray, src::GPUArray, perm) where N
-    perm isa Tuple || (perm = Tuple(perm))
-    gpu_call(dest, (dest, src, perm)) do state, dest, src, perm
-        I = @cartesianidx src state
-        @inbounds dest[genperm(I, perm)...] = src[I...]
-        return
-    end
-    return dest
-end
-=#
-
-import CUDA: pow, abs, angle
-for (RT, CT) in [(:Float64, :ComplexF64), (:Float32, :ComplexF32)]
-    @eval cp2c(d::$RT, a::$RT) = CUDA.Complex(d*CUDA.cos(a), d*CUDA.sin(a))
-    for NT in [RT, :Int32]
-        @eval CUDA.pow(z::$CT, n::$NT) = CUDA.Complex((CUDA.pow(CUDA.abs(z), n)*CUDA.cos(n*CUDA.angle(z))), (CUDA.pow(CUDA.abs(z), n)*CUDA.sin(n*CUDA.angle(z))))
-    end
-end
-
 @inline function bit_count(x::UInt32)
     x = ((x >> 1) & 0b01010101010101010101010101010101) + (x & 0b01010101010101010101010101010101)
     x = ((x >> 2) & 0b00110011001100110011001100110011) + (x & 0b00110011001100110011001100110011)
@@ -95,7 +71,7 @@ end
 Computes Kronecker products in-place on the GPU.
 The results are stored in 'C', overwriting the existing values of 'C'.
 """
-function kron!(C::CuArray{T3}, A::DenseCuArray{T1}, B::DenseCuArray{T2}) where {T1, T2, T3}
+function Yao.YaoBase.kron!(C::CuArray{T3}, A::DenseCuArray{T1}, B::DenseCuArray{T2}) where {T1, T2, T3}
     @boundscheck (size(C) == (size(A,1)*size(B,1), size(A,2)*size(B,2))) || throw(DimensionMismatch())
     CI = Base.CartesianIndices(C)
     @inline function kernel(C, A, B)
diff --git a/src/CuYao.jl b/src/CuYao.jl
index 46bde19..9c776bb 100644
--- a/src/CuYao.jl
+++ b/src/CuYao.jl
@@ -8,7 +8,6 @@ using Random
 
 using Yao.YaoArrayRegister
 using CUDA
-import Yao: kron!
 @reexport using Yao
 
 const Ints = NTuple{<:Any, Int}
diff --git a/src/kernels.jl b/src/kernels.jl
index fd437da..cd5351a 100644
--- a/src/kernels.jl
+++ b/src/kernels.jl
@@ -106,7 +106,7 @@ end
     mask = bmask(Int32, bits...)
     1<<nbit,@inline function kernel(state, inds)
         i = inds[1]
-        piecewise(state, inds)[i] *= CUDA.pow(d, bit_count(Int32(i-1)&mask))
+        piecewise(state, inds)[i] *= d ^ bit_count(Int32(i-1)&mask)
         return
     end
 end
diff --git a/test/CUDApatch.jl b/test/CUDApatch.jl
index 702645a..93e69c7 100644
--- a/test/CUDApatch.jl
+++ b/test/CUDApatch.jl
@@ -25,8 +25,8 @@ end
 @testset "Complex pow" begin
     for T in [ComplexF64, ComplexF32]
         a = CuArray(randn(T, 4, 4))
-        @test Array(CUDA.pow.(a, Int32(3))) ≈ Array(a).^3
-        @test Array(CUDA.pow.(a, real(T)(3))) ≈ Array(a).^3
+        @test Array(a .^ Int32(3)) ≈ Array(a).^3
+        @test Array(a .^ real(T)(3)) ≈ Array(a).^3
     end
 end
 
diff --git a/test/GPUReg.jl b/test/GPUReg.jl
index 5265a3f..3704dc9 100644
--- a/test/GPUReg.jl
+++ b/test/GPUReg.jl
@@ -108,9 +108,9 @@ end
     c = zeros(12,8)
     ca, cb, cc = cu(a), cu(b), cu(c)
     @test kron(ca, cb) |> Array ≈ kron(a, b)
-    @test kron!(cc, ca, cb) |> Array ≈ kron(a,b)
+    @test Yao.YaoBase.kron!(cc, ca, cb) |> Array ≈ kron(a,b)
 
-    kron!(c,a,b)
+    Yao.YaoBase.kron!(c,a,b)
     @test cc |> Array ≈ c
 
     v = randn(100) |> cu