Skip to content

Commit 6e09958

Browse files
authored
Merge pull request #3 from JuliaGPU/jps/dagger-0.9
Update to Dagger 0.9
2 parents b1ca6d9 + 3052ded commit 6e09958

10 files changed

Lines changed: 147 additions & 202 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Manifest.toml

Manifest.toml

Lines changed: 0 additions & 128 deletions
This file was deleted.

Project.toml

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,24 @@
11
name = "DaggerGPU"
22
uuid = "68e73e28-2238-4d5a-bf97-e5d4aa3c4be2"
33
authors = ["Julian P Samaroo <jpsamaroo@jpsamaroo.me>"]
4-
version = "0.1.0"
4+
version = "0.1.1"
55

66
[deps]
7+
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
78
Dagger = "d58978e5-989f-55fb-8d15-ea34adc7bf54"
9+
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
810
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
911

1012
[compat]
11-
Dagger = "0.8"
12-
Requires = "1.0"
13+
Dagger = "0.9"
14+
Requires = "1"
1315
julia = "1"
1416

1517
[extras]
16-
CuArrays = "3a865a2d-5b23-5a0f-bc46-62713ec82fae"
18+
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
19+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
1720
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
18-
ROCArrays = "ddf941ca-5d6a-11e9-36cc-a3fed13dd2fc"
1921
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
2022

2123
[targets]
22-
test = ["CuArrays", "Distributed", "ROCArrays", "Test"]
24+
test = ["AMDGPU", "CUDA", "Distributed", "Test"]

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
**GPU integrations for Dagger.jl**
44

5-
DaggerGPU.jl makes use of the `Dagger.Processor` infrastructure to dispatch Dagger kernels to NVIDIA and AMD GPUs, via CuArrays.jl and ROCArrays.jl respectively. Usage is simple: `add` or `dev` DaggerGPU.jl and CuArrays.jl/ROCArrays.jl appropriately, load it with `using DaggerGPU`, and add `DaggerGPU.CuArrayProc`/`DaggerGPU.ROCArrayProc` to your scheduler or thunk options (see Dagger.jl documentation for details on how to do this).
5+
DaggerGPU.jl makes use of the `Dagger.Processor` infrastructure to dispatch Dagger kernels to NVIDIA and AMD GPUs, via CUDA.jl and AMDGPU.jl respectively. Usage is simple: `add` or `dev` DaggerGPU.jl and CUDA.jl/AMDGPU.jl appropriately, load it with `using DaggerGPU`, and add `DaggerGPU.CuArrayProc`/`DaggerGPU.ROCArrayProc` to your scheduler or thunk options (see Dagger.jl documentation for details on how to do this).
66

77
DaggerGPU.jl is still experimental, but we welcome GPU-owning users to try it out and report back on any issues or sharp edges that they encounter. When filing an issue about DaggerGPU.jl, please provide your:
88
- The complete error message and backtrace
99
- Julia version
1010
- GPU vendor and model
11-
- CuArrays/ROCArrays version(s)
11+
- CUDA/AMDGPU version(s)

src/DaggerGPU.jl

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
11
module DaggerGPU
22

3-
using Dagger, Requires
3+
using Dagger, Requires, Adapt
4+
using Distributed
45

56
macro gpuproc(PROC, T)
67
quote
7-
Dagger.iscompatible(proc::$PROC, opts, x::AbstractArray{AT}) where AT =
8-
isbitstype(AT)
9-
Dagger.move(ctx, from_proc::OSProc, to_proc::$PROC, x::AbstractArray) =
10-
$T(x)
11-
Dagger.move(ctx, from_proc::$PROC, to_proc::OSProc, x) = x
12-
Dagger.move(ctx, from_proc::$PROC, to_proc::OSProc, x::$T) =
13-
collect(x)
14-
Dagger.execute!(proc::$PROC, func, args...) = func(args...)
8+
# Assume that we can run anything
9+
Dagger.iscompatible_func(proc::$PROC, opts, f) = true
10+
Dagger.iscompatible_arg(proc::$PROC, opts, x) = true
11+
12+
# CPUs shouldn't process our array type
13+
Dagger.iscompatible_arg(proc::Dagger.ThreadProc, opts, x::$T) = false
14+
15+
# Adapt to/from the appropriate type
16+
Dagger.move(ctx, from_proc::OSProc, to_proc::$PROC, x) = adapt($T, x)
17+
Dagger.move(ctx, from_proc::$PROC, to_proc::OSProc, x) = adapt(Array, x)
1518
end
1619
end
1720

@@ -21,11 +24,11 @@ cancompute(kind::Symbol) = cancompute(Val(kind))
2124
cancompute(::Val) = false
2225

2326
function __init__()
24-
@require CuArrays="3a865a2d-5b23-5a0f-bc46-62713ec82fae" begin
25-
include("cuarrays.jl")
27+
@require CUDA="052768ef-5323-5732-b1bb-66c8b64840ba" begin
28+
include("cu.jl")
2629
end
2730
@require ROCArrays="ddf941ca-5d6a-11e9-36cc-a3fed13dd2fc" begin
28-
include("rocarrays.jl")
31+
include("roc.jl")
2932
end
3033
end
3134

src/cu.jl

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
using .CUDA
2+
import .CUDA: CuDevice, CuContext, devices, attribute
3+
4+
export CuArrayProc, CuArrayDeviceProc, CuArraySMProc
5+
6+
"Represents a single CUDA GPU device."
7+
struct CuArrayDeviceProc <: Dagger.Processor
8+
owner::Int
9+
#ctx::CuContext
10+
device::CuDevice
11+
end
12+
@gpuproc(CuArrayDeviceProc, CuArray)
13+
const CuArrayProc = CuArrayDeviceProc
14+
#= FIXME: CUDA IPC
15+
function Dagger.move(ctx, from::CuArrayDeviceProc, to::CuArrayDeviceProc, x)
16+
if from === to
17+
return x
18+
else
19+
error("Not implemented")
20+
end
21+
end
22+
=#
23+
function Dagger.execute!(proc::CuArrayDeviceProc, func, args...)
24+
#CUDA.context!(proc.ctx)
25+
CUDA.@sync func(args...)
26+
end
27+
28+
"Represents a single CUDA GPU Streaming Multiprocessor."
29+
struct CuArraySMProc <: Dagger.Processor
30+
owner::Int
31+
#ctx::CuContext
32+
device::CuDevice
33+
sm::Int
34+
end
35+
@gpuproc(CuArraySMProc, CuArray)
36+
#= FIXME: CUDA IPC
37+
function Dagger.move(ctx, from::CuArraySMProc, to::CuArraySMProc, x)
38+
if from.device === to.device
39+
return x
40+
else
41+
error("Not implemented")
42+
end
43+
end
44+
=#
45+
function Dagger.execute!(proc::CuArraySMProc, func, args...)
46+
#CUDA.context!(proc.ctx)
47+
CUDA.@sync func(args...)
48+
end
49+
50+
processor(::Val{:CUDA}) = CuArrayDeviceProc
51+
cancompute(::Val{:CUDA}) = CUDA.has_cuda()
52+
# TODO: CuArraySMProc
53+
54+
if CUDA.has_cuda()
55+
for dev in devices()
56+
Dagger.add_callback!(proc -> begin
57+
return CuArrayDeviceProc(Distributed.myid(), #=CuContext(dev),=# dev)
58+
end)
59+
for i in 1:attribute(dev, CUDA.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT)
60+
Dagger.add_callback!(proc -> begin
61+
return CuArraySMProc(Distributed.myid(), #=CuContext(dev),=# dev, i)
62+
end)
63+
end
64+
end
65+
end

src/cuarrays.jl

Lines changed: 0 additions & 17 deletions
This file was deleted.

src/roc.jl

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
using .AMDGPU
2+
3+
struct ROCArrayProc <: Dagger.Processor
4+
device
5+
end
6+
7+
@gpuproc(ROCArrayProc, ROCArray)
8+
9+
Dagger.execute!(proc::ROCArrayProc, func, args...) = func(args...)
10+
11+
processor(::Val{:ROC}) = ROCArrayProc
12+
cancompute(::Val{:ROC}) = AMDGPU.configured
13+
14+
if AMDGPU.configured
15+
Dagger.add_callback!(proc -> begin
16+
return ROCArrayProc(AMDGPU.get_default_agent())
17+
end)
18+
end

src/rocarrays.jl

Lines changed: 0 additions & 17 deletions
This file was deleted.

0 commit comments

Comments
 (0)