From d6ff74af18f3aeb5833153e6bf39ba129f700521 Mon Sep 17 00:00:00 2001 From: Leo Date: Mon, 10 May 2021 00:06:14 -0400 Subject: [PATCH] update CUDA (#69) * update CUDA * cleanup * update version Co-authored-by: Roger-Luo --- Project.toml | 6 +++--- src/CUDApatch.jl | 26 +------------------------- src/CuYao.jl | 1 - src/kernels.jl | 2 +- test/CUDApatch.jl | 4 ++-- test/GPUReg.jl | 4 ++-- 6 files changed, 9 insertions(+), 34 deletions(-) diff --git a/Project.toml b/Project.toml index e86f2c4..994b87f 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "CuYao" uuid = "b48ca7a8-dd42-11e8-2b8e-1b7706800275" -version = "0.2.9" +version = "0.3.0" [deps] BitBasis = "50ba71b6-fa0f-514d-ae9a-0916efc90dcf" @@ -16,14 +16,14 @@ Yao = "5872b779-8223-5990-8dd0-5abbb0748c8c" [compat] BitBasis = "0.7" -CUDA = "2.0, 3.0" +CUDA = "3.1" LuxurySparse = "0.6" Reexport = "0.2, 1.0" StaticArrays = "0.12, 1.0" StatsBase = "0.33" TupleTools = "1" Yao = "0.6, 0.7" -julia = "1" +julia = "1.6" [extras] Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" diff --git a/src/CUDApatch.jl b/src/CUDApatch.jl index c42d7d1..c45a13b 100644 --- a/src/CUDApatch.jl +++ b/src/CUDApatch.jl @@ -1,27 +1,3 @@ -#import CUDA: _cuview, ViewIndex, NonContiguous -#using CUDA: genperm -# fallback to SubArray when the view is not contiguous - -#= -function LinearAlgebra.permutedims!(dest::GPUArray, src::GPUArray, perm) where N - perm isa Tuple || (perm = Tuple(perm)) - gpu_call(dest, (dest, src, perm)) do state, dest, src, perm - I = @cartesianidx src state - @inbounds dest[genperm(I, perm)...] = src[I...] - return - end - return dest -end -=# - -import CUDA: pow, abs, angle -for (RT, CT) in [(:Float64, :ComplexF64), (:Float32, :ComplexF32)] - @eval cp2c(d::$RT, a::$RT) = CUDA.Complex(d*CUDA.cos(a), d*CUDA.sin(a)) - for NT in [RT, :Int32] - @eval CUDA.pow(z::$CT, n::$NT) = CUDA.Complex((CUDA.pow(CUDA.abs(z), n)*CUDA.cos(n*CUDA.angle(z))), (CUDA.pow(CUDA.abs(z), n)*CUDA.sin(n*CUDA.angle(z)))) - end -end - @inline function bit_count(x::UInt32) x = ((x >> 1) & 0b01010101010101010101010101010101) + (x & 0b01010101010101010101010101010101) x = ((x >> 2) & 0b00110011001100110011001100110011) + (x & 0b00110011001100110011001100110011) @@ -95,7 +71,7 @@ end Computes Kronecker products in-place on the GPU. The results are stored in 'C', overwriting the existing values of 'C'. """ -function kron!(C::CuArray{T3}, A::DenseCuArray{T1}, B::DenseCuArray{T2}) where {T1, T2, T3} +function Yao.YaoBase.kron!(C::CuArray{T3}, A::DenseCuArray{T1}, B::DenseCuArray{T2}) where {T1, T2, T3} @boundscheck (size(C) == (size(A,1)*size(B,1), size(A,2)*size(B,2))) || throw(DimensionMismatch()) CI = Base.CartesianIndices(C) @inline function kernel(C, A, B) diff --git a/src/CuYao.jl b/src/CuYao.jl index 46bde19..9c776bb 100644 --- a/src/CuYao.jl +++ b/src/CuYao.jl @@ -8,7 +8,6 @@ using Random using Yao.YaoArrayRegister using CUDA -import Yao: kron! @reexport using Yao const Ints = NTuple{<:Any, Int} diff --git a/src/kernels.jl b/src/kernels.jl index fd437da..cd5351a 100644 --- a/src/kernels.jl +++ b/src/kernels.jl @@ -106,7 +106,7 @@ end mask = bmask(Int32, bits...) 1< Array ≈ kron(a, b) - @test kron!(cc, ca, cb) |> Array ≈ kron(a,b) + @test Yao.YaoBase.kron!(cc, ca, cb) |> Array ≈ kron(a,b) - kron!(c,a,b) + Yao.YaoBase.kron!(c,a,b) @test cc |> Array ≈ c v = randn(100) |> cu