Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

KernelAbstractions support #147

Draft
wants to merge 9 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
*.jl.*.cov
*.jl.mem
docs/build
/Manifest.toml
*Manifest.toml
benchmark/tune.json
benchmark/results
.vscode/settings.json
7 changes: 6 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
AtomsBase = "a963bdd2-2df7-4f54-a1ee-49d51e6be12a"
AtomsCalculators = "a3e0e189-c65a-42c1-833c-339540406eb1"
BioStructures = "de9282ab-8554-53be-b2d6-f6c222edabfc"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
CellListMap = "69e1c6dd-3888-40e6-b3c8-31ac5f578864"
ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2"
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
Expand All @@ -21,7 +20,9 @@ Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
EzXML = "8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615"
FLoops = "cc61a311-1640-44b5-9fba-1b764f453329"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
KernelDensity = "5ab0869b-81aa-558d-bb23-cbf5423bbe9b"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
Expand All @@ -42,10 +43,12 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a"
PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"

[extensions]
MollyGLMakieExt = ["GLMakie", "Colors"]
MollyPythonCallExt = "PythonCall"
MollyCUDAExt = "CUDA"

[compat]
Atomix = "0.1"
Expand All @@ -67,7 +70,9 @@ EzXML = "1"
FLoops = "0.2"
ForwardDiff = "0.10.35"
GLMakie = "0.8, 0.9, 0.10"
GPUArrays = "10"
Graphs = "1.8"
KernelAbstractions = "0.9"
KernelDensity = "0.5, 0.6"
LinearAlgebra = "1.9"
NearestNeighbors = "0.4"
Expand Down
52 changes: 23 additions & 29 deletions benchmark/benchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ const starting_velocities = [random_velocity(atom_mass, 1.0u"K") for i in 1:n_at
const starting_coords_f32 = [Float32.(c) for c in starting_coords]
const starting_velocities_f32 = [Float32.(c) for c in starting_velocities]

function test_sim(nl::Bool, parallel::Bool, f32::Bool, gpu::Bool)
function test_sim(nl::Bool, parallel::Bool, f32::Bool,
ArrayType::Type{AT}) where AT <: AbstractArray
n_atoms = 400
n_steps = 200
atom_mass = f32 ? 10.0f0u"g/mol" : 10.0u"g/mol"
Expand All @@ -72,34 +73,27 @@ function test_sim(nl::Bool, parallel::Bool, f32::Bool, gpu::Bool)
r0 = f32 ? 0.2f0u"nm" : 0.2u"nm"
bonds = [HarmonicBond(k=k, r0=r0) for i in 1:(n_atoms ÷ 2)]
specific_inter_lists = (InteractionList2Atoms(
gpu ? CuArray(Int32.(collect(1:2:n_atoms))) : Int32.(collect(1:2:n_atoms)),
gpu ? CuArray(Int32.(collect(2:2:n_atoms))) : Int32.(collect(2:2:n_atoms)),
gpu ? CuArray(bonds) : bonds,
ArrayType(Int32.(collect(1:2:n_atoms))),
ArrayType(Int32.(collect(2:2:n_atoms))),
ArrayType(bonds),
),)

neighbor_finder = NoNeighborFinder()
cutoff = DistanceCutoff(f32 ? 1.0f0u"nm" : 1.0u"nm")
pairwise_inters = (LennardJones(use_neighbors=false, cutoff=cutoff),)
if nl
neighbor_finder = DistanceNeighborFinder(
eligible=gpu ? CuArray(trues(n_atoms, n_atoms)) : trues(n_atoms, n_atoms),
eligible=ArrayType(trues(n_atoms, n_atoms)),
n_steps=10,
dist_cutoff=f32 ? 1.5f0u"nm" : 1.5u"nm",
)
pairwise_inters = (LennardJones(use_neighbors=true, cutoff=cutoff),)
end

if gpu
coords = CuArray(deepcopy(f32 ? starting_coords_f32 : starting_coords))
velocities = CuArray(deepcopy(f32 ? starting_velocities_f32 : starting_velocities))
atoms = CuArray([Atom(charge=f32 ? 0.0f0 : 0.0, mass=atom_mass, σ=f32 ? 0.2f0u"nm" : 0.2u"nm",
ϵ=f32 ? 0.2f0u"kJ * mol^-1" : 0.2u"kJ * mol^-1") for i in 1:n_atoms])
else
coords = deepcopy(f32 ? starting_coords_f32 : starting_coords)
velocities = deepcopy(f32 ? starting_velocities_f32 : starting_velocities)
atoms = [Atom(charge=f32 ? 0.0f0 : 0.0, mass=atom_mass, σ=f32 ? 0.2f0u"nm" : 0.2u"nm",
ϵ=f32 ? 0.2f0u"kJ * mol^-1" : 0.2u"kJ * mol^-1") for i in 1:n_atoms]
end
coords = ArrayType(deepcopy(f32 ? starting_coords_f32 : starting_coords))
velocities = ArrayType(deepcopy(f32 ? starting_velocities_f32 : starting_velocities))
atoms = ArrayType([Atom(charge=f32 ? 0.0f0 : 0.0, mass=atom_mass, σ=f32 ? 0.2f0u"nm" : 0.2u"nm",
ϵ=f32 ? 0.2f0u"kJ * mol^-1" : 0.2u"kJ * mol^-1") for i in 1:n_atoms])

sys = System(
atoms=atoms,
Expand All @@ -117,22 +111,22 @@ function test_sim(nl::Bool, parallel::Bool, f32::Bool, gpu::Bool)
end

runs = [
("CPU" , [false, false, false, false]),
("CPU f32" , [false, false, true , false]),
("CPU NL" , [true , false, false, false]),
("CPU f32 NL", [true , false, true , false]),
("CPU" , [false, false, false, Array]),
("CPU f32" , [false, false, true , Array]),
("CPU NL" , [true , false, false, Array]),
("CPU f32 NL", [true , false, true , Array]),
]
if run_parallel_tests
push!(runs, ("CPU parallel" , [false, true , false, false]))
push!(runs, ("CPU parallel f32" , [false, true , true , false]))
push!(runs, ("CPU parallel NL" , [true , true , false, false]))
push!(runs, ("CPU parallel f32 NL", [true , true , true , false]))
push!(runs, ("CPU parallel" , [false, true , false, Array]))
push!(runs, ("CPU parallel f32" , [false, true , true , Array]))
push!(runs, ("CPU parallel NL" , [true , true , false, Array]))
push!(runs, ("CPU parallel f32 NL", [true , true , true , Array]))
end
if run_gpu_tests
push!(runs, ("GPU" , [false, false, false, true]))
push!(runs, ("GPU f32" , [false, false, true , true]))
push!(runs, ("GPU NL" , [true , false, false, true]))
push!(runs, ("GPU f32 NL", [true , false, true , true]))
if run_cuda_tests
push!(runs, ("GPU" , [false, false, false, CuArray]))
push!(runs, ("GPU f32" , [false, false, true , CuArray]))
push!(runs, ("GPU NL" , [true , false, false, CuArray]))
push!(runs, ("GPU f32 NL", [true , false, true , CuArray]))
end

for (name, args) in runs
Expand Down
22 changes: 11 additions & 11 deletions benchmark/protein.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ const data_dir = normpath(dirname(pathof(Molly)), "..", "data")
const ff_dir = joinpath(data_dir, "force_fields")
const openmm_dir = joinpath(data_dir, "openmm_6mrr")

function setup_system(gpu::Bool, f32::Bool, units::Bool)
function setup_system(ArrayType::AbstractArray, f32::Bool, units::Bool)
T = f32 ? Float32 : Float64
ff = MolecularForceField(
T,
Expand All @@ -27,7 +27,7 @@ function setup_system(gpu::Bool, f32::Bool, units::Bool)
sys = System(
joinpath(data_dir, "6mrr_equil.pdb"),
ff;
velocities=gpu ? CuArray(velocities) : velocities,
velocities=ArrayType(velocities),
units=units,
gpu=gpu,
dist_cutoff=(units ? dist_cutoff * u"nm" : dist_cutoff),
Expand All @@ -42,15 +42,15 @@ end

runs = [
# run_name gpu parr f32 units
("CPU 1 thread" , false, false, false, true ),
("CPU 1 thread f32" , false, false, true , true ),
("CPU 1 thread f32 nounits" , false, false, true , false),
("CPU $n_threads threads" , false, true , false, true ),
("CPU $n_threads threads f32" , false, true , true , true ),
("CPU $n_threads threads f32 nounits", false, true , true , false),
("GPU" , true , false, false, true ),
("GPU f32" , true , false, true , true ),
("GPU f32 nounits" , true , false, true , false),
("CPU 1 thread" , Array, false, false, true ),
("CPU 1 thread f32" , Array, false, true , true ),
("CPU 1 thread f32 nounits" , Array, false, true , false),
("CPU $n_threads threads" , Array, true , false, true ),
("CPU $n_threads threads f32" , Array, true , true , true ),
("CPU $n_threads threads f32 nounits", Array, true , true , false),
("GPU" , CuArray, false, false, true ),
("GPU f32" , CuArray, false, true , true ),
("GPU f32 nounits" , CuArray, false, true , false),
]

for (run_name, gpu, parallel, f32, units) in runs
Expand Down
38 changes: 27 additions & 11 deletions src/cuda.jl → ext/MollyCUDAExt.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
module MollyCUDAExt

using Molly
using CUDA
using ChainRulesCore
using Atomix

CUDA.Const(nl::Molly.NoNeighborList) = nl

Check warning on line 8 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L8

Added line #L8 was not covered by tests

# CUDA.jl kernels
const WARPSIZE = UInt32(32)

Expand Down Expand Up @@ -29,7 +38,7 @@
return n_threads_gpu, n_blocks
end

function pairwise_force_gpu(coords::AbstractArray{SVector{D, C}}, atoms, boundary,
function pairwise_force_gpu(coords::CuArray{SVector{D, C}}, atoms, boundary,

Check warning on line 41 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L41

Added line #L41 was not covered by tests
pairwise_inters, nbs, force_units, ::Val{T}) where {D, C, T}
fs_mat = CUDA.zeros(T, D, length(atoms))

Expand Down Expand Up @@ -112,7 +121,7 @@
h | 1 2 3 4 5 6
```
=#
function pairwise_force_kernel_nonl!(forces::AbstractArray{T}, coords_var, atoms_var, boundary, inters,
function pairwise_force_kernel_nonl!(forces::CuArray{T}, coords_var, atoms_var, boundary, inters,

Check warning on line 124 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L124

Added line #L124 was not covered by tests
::Val{D}, ::Val{F}) where {T, D, F}
coords = CUDA.Const(coords_var)
atoms = CUDA.Const(atoms_var)
Expand Down Expand Up @@ -193,7 +202,7 @@
return f
end

function specific_force_gpu(inter_list::InteractionList1Atoms, coords::AbstractArray{SVector{D, C}},
function specific_force_gpu(inter_list::InteractionList1Atoms, coords::CuArray{SVector{D, C}},

Check warning on line 205 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L205

Added line #L205 was not covered by tests
boundary, force_units, ::Val{T}) where {D, C, T}
fs_mat = CUDA.zeros(T, D, length(coords))
n_threads_gpu, n_blocks = cuda_threads_blocks_specific(length(inter_list))
Expand All @@ -202,7 +211,7 @@
return fs_mat
end

function specific_force_gpu(inter_list::InteractionList2Atoms, coords::AbstractArray{SVector{D, C}},
function specific_force_gpu(inter_list::InteractionList2Atoms, coords::CuArray{SVector{D, C}},

Check warning on line 214 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L214

Added line #L214 was not covered by tests
boundary, force_units, ::Val{T}) where {D, C, T}
fs_mat = CUDA.zeros(T, D, length(coords))
n_threads_gpu, n_blocks = cuda_threads_blocks_specific(length(inter_list))
Expand All @@ -212,7 +221,7 @@
return fs_mat
end

function specific_force_gpu(inter_list::InteractionList3Atoms, coords::AbstractArray{SVector{D, C}},
function specific_force_gpu(inter_list::InteractionList3Atoms, coords::CuArray{SVector{D, C}},

Check warning on line 224 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L224

Added line #L224 was not covered by tests
boundary, force_units, ::Val{T}) where {D, C, T}
fs_mat = CUDA.zeros(T, D, length(coords))
n_threads_gpu, n_blocks = cuda_threads_blocks_specific(length(inter_list))
Expand All @@ -222,7 +231,7 @@
return fs_mat
end

function specific_force_gpu(inter_list::InteractionList4Atoms, coords::AbstractArray{SVector{D, C}},
function specific_force_gpu(inter_list::InteractionList4Atoms, coords::CuArray{SVector{D, C}},

Check warning on line 234 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L234

Added line #L234 was not covered by tests
boundary, force_units, ::Val{T}) where {D, C, T}
fs_mat = CUDA.zeros(T, D, length(coords))
n_threads_gpu, n_blocks = cuda_threads_blocks_specific(length(inter_list))
Expand Down Expand Up @@ -328,7 +337,7 @@
return nothing
end

function pairwise_pe_gpu(coords::AbstractArray{SVector{D, C}}, atoms, boundary,
function pairwise_pe_gpu(coords::CuArray{SVector{D, C}}, atoms, boundary,

Check warning on line 340 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L340

Added line #L340 was not covered by tests
pairwise_inters, nbs, energy_units, ::Val{T}) where {D, C, T}
pe_vec = CUDA.zeros(T, 1)
n_threads_gpu, n_blocks = cuda_threads_blocks_pairwise(length(nbs))
Expand Down Expand Up @@ -363,7 +372,7 @@
return nothing
end

function specific_pe_gpu(inter_list::InteractionList1Atoms, coords::AbstractArray{SVector{D, C}},
function specific_pe_gpu(inter_list::InteractionList1Atoms, coords::CuArray{SVector{D, C}},

Check warning on line 375 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L375

Added line #L375 was not covered by tests
boundary, energy_units, ::Val{T}) where {D, C, T}
pe_vec = CUDA.zeros(T, 1)
n_threads_gpu, n_blocks = cuda_threads_blocks_specific(length(inter_list))
Expand All @@ -372,7 +381,7 @@
return pe_vec
end

function specific_pe_gpu(inter_list::InteractionList2Atoms, coords::AbstractArray{SVector{D, C}},
function specific_pe_gpu(inter_list::InteractionList2Atoms, coords::CuArray{SVector{D, C}},

Check warning on line 384 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L384

Added line #L384 was not covered by tests
boundary, energy_units, ::Val{T}) where {D, C, T}
pe_vec = CUDA.zeros(T, 1)
n_threads_gpu, n_blocks = cuda_threads_blocks_specific(length(inter_list))
Expand All @@ -381,7 +390,7 @@
return pe_vec
end

function specific_pe_gpu(inter_list::InteractionList3Atoms, coords::AbstractArray{SVector{D, C}},
function specific_pe_gpu(inter_list::InteractionList3Atoms, coords::CuArray{SVector{D, C}},

Check warning on line 393 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L393

Added line #L393 was not covered by tests
boundary, energy_units, ::Val{T}) where {D, C, T}
pe_vec = CUDA.zeros(T, 1)
n_threads_gpu, n_blocks = cuda_threads_blocks_specific(length(inter_list))
Expand All @@ -391,7 +400,7 @@
return pe_vec
end

function specific_pe_gpu(inter_list::InteractionList4Atoms, coords::AbstractArray{SVector{D, C}},
function specific_pe_gpu(inter_list::InteractionList4Atoms, coords::CuArray{SVector{D, C}},

Check warning on line 403 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L403

Added line #L403 was not covered by tests
boundary, energy_units, ::Val{T}) where {D, C, T}
pe_vec = CUDA.zeros(T, 1)
n_threads_gpu, n_blocks = cuda_threads_blocks_specific(length(inter_list))
Expand Down Expand Up @@ -482,3 +491,10 @@
end
return nothing
end

# CUDA specific calls for Molly
@non_differentiable CUDA.zeros(args...)
@non_differentiable cuda_threads_blocks_pairwise(args...)
@non_differentiable cuda_threads_blocks_specific(args...)

end
3 changes: 1 addition & 2 deletions ext/MollyGLMakieExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

using Molly
using GLMakie
using Colors
using Unitful

using LinearAlgebra
Expand Down Expand Up @@ -95,7 +94,7 @@
push!(trail_positions, Observable(PointType.(ustrip_vec.(coords_start))))
col = parse.(Colorant, color)
alpha = 1 - (trail_i / (trails + 1))
alpha_col = RGBA.(red.(col), green.(col), blue.(col), alpha)
alpha_col = GLMakie.RGBAf.(red.(col), green.(col), blue.(col), alpha)

Check warning on line 97 in ext/MollyGLMakieExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyGLMakieExt.jl#L97

Added line #L97 was not covered by tests
scatter!(ax, trail_positions[end]; color=alpha_col, markersize=markersize,
transparency=transparency, markerspace=:data, kwargs...)
end
Expand Down
6 changes: 3 additions & 3 deletions ext/MollyPythonCallExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
using Molly
using PythonCall
import AtomsCalculators
using CUDA
using GPUArrays
using StaticArrays
using Unitful

Expand Down Expand Up @@ -91,7 +91,7 @@

uconvert_vec(x...) = uconvert.(x...)

function AtomsCalculators.forces(sys::System{D, G, T},
function AtomsCalculators.forces(sys::System{D, AT, T},

Check warning on line 94 in ext/MollyPythonCallExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyPythonCallExt.jl#L94

Added line #L94 was not covered by tests
ase_calc::ASECalculator;
kwargs...) where {D, G, T}
update_ase_calc!(ase_calc, sys)
Expand All @@ -105,7 +105,7 @@
else
fs_unit = uconvert_vec.(sys.force_units, fs * u"eV/Å")
end
return G ? CuArray(fs_unit) : fs_unit
return AT <: AbstractGPUArray ? AT(fs_unit) : fs_unit

Check warning on line 108 in ext/MollyPythonCallExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyPythonCallExt.jl#L108

Added line #L108 was not covered by tests
end

function AtomsCalculators.potential_energy(sys::System{D, G, T},
Expand Down
5 changes: 3 additions & 2 deletions src/Molly.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ using ChainRules
using ChainRulesCore
import Chemfiles
using Combinatorics
using CUDA
using KernelAbstractions
using GPUArrays
using DataStructures
using Distances
using Distributions
Expand Down Expand Up @@ -41,7 +42,7 @@ include("types.jl")
include("units.jl")
include("spatial.jl")
include("cutoffs.jl")
include("cuda.jl")
include("kernels.jl")
include("force.jl")
include("interactions/lennard_jones.jl")
include("interactions/soft_sphere.jl")
Expand Down
2 changes: 1 addition & 1 deletion src/analysis.jl
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@
function hydrodynamic_radius(coords::AbstractArray{SVector{D, T}}, boundary) where {D, T}
n_atoms = length(coords)
diag_cpu = Diagonal(ones(T, n_atoms))
diag = isa(coords, CuArray) ? CuArray(diag_cpu) : diag_cpu
diag = isa(coords, AbstractGPUArray) ? get_array_type(coords)((diag_cpu)) : diag_cpu

Check warning on line 165 in src/analysis.jl

View check run for this annotation

Codecov / codecov/patch

src/analysis.jl#L165

Added line #L165 was not covered by tests
# Other approaches to removing the diagonal Inf didn't work with Zygote
dists = distances(coords, boundary) .+ diag
sum_inv_dists = sum(inv.(dists)) - sum(inv(diag))
Expand Down
Loading
Loading