Skip to content

Commit 3052ded

Browse files
committed
Update to Dagger 0.9
Change CuArrays to CUDA, ROCArrays to AMDGPU Don't let GPU arrays be scheduled on the CPU Split CuArrayProc into Device and SM processors
1 parent b1ca6d9 commit 3052ded

10 files changed

Lines changed: 147 additions & 202 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Manifest.toml

Manifest.toml

Lines changed: 0 additions & 128 deletions
This file was deleted.

Project.toml

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,24 @@
11
name = "DaggerGPU"
22
uuid = "68e73e28-2238-4d5a-bf97-e5d4aa3c4be2"
33
authors = ["Julian P Samaroo <jpsamaroo@jpsamaroo.me>"]
4-
version = "0.1.0"
4+
version = "0.1.1"
55

66
[deps]
7+
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
78
Dagger = "d58978e5-989f-55fb-8d15-ea34adc7bf54"
9+
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
810
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
911

1012
[compat]
11-
Dagger = "0.8"
12-
Requires = "1.0"
13+
Dagger = "0.9"
14+
Requires = "1"
1315
julia = "1"
1416

1517
[extras]
16-
CuArrays = "3a865a2d-5b23-5a0f-bc46-62713ec82fae"
18+
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
19+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
1720
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
18-
ROCArrays = "ddf941ca-5d6a-11e9-36cc-a3fed13dd2fc"
1921
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
2022

2123
[targets]
22-
test = ["CuArrays", "Distributed", "ROCArrays", "Test"]
24+
test = ["AMDGPU", "CUDA", "Distributed", "Test"]

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
**GPU integrations for Dagger.jl**
44

5-
DaggerGPU.jl makes use of the `Dagger.Processor` infrastructure to dispatch Dagger kernels to NVIDIA and AMD GPUs, via CuArrays.jl and ROCArrays.jl respectively. Usage is simple: `add` or `dev` DaggerGPU.jl and CuArrays.jl/ROCArrays.jl appropriately, load it with `using DaggerGPU`, and add `DaggerGPU.CuArrayProc`/`DaggerGPU.ROCArrayProc` to your scheduler or thunk options (see Dagger.jl documentation for details on how to do this).
5+
DaggerGPU.jl makes use of the `Dagger.Processor` infrastructure to dispatch Dagger kernels to NVIDIA and AMD GPUs, via CUDA.jl and AMDGPU.jl respectively. Usage is simple: `add` or `dev` DaggerGPU.jl and CUDA.jl/AMDGPU.jl appropriately, load it with `using DaggerGPU`, and add `DaggerGPU.CuArrayProc`/`DaggerGPU.ROCArrayProc` to your scheduler or thunk options (see Dagger.jl documentation for details on how to do this).
66

77
DaggerGPU.jl is still experimental, but we welcome GPU-owning users to try it out and report back on any issues or sharp edges that they encounter. When filing an issue about DaggerGPU.jl, please provide your:
88
- The complete error message and backtrace
99
- Julia version
1010
- GPU vendor and model
11-
- CuArrays/ROCArrays version(s)
11+
- CUDA/AMDGPU version(s)

src/DaggerGPU.jl

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
11
module DaggerGPU
22

3-
using Dagger, Requires
3+
using Dagger, Requires, Adapt
4+
using Distributed
45

56
macro gpuproc(PROC, T)
67
quote
7-
Dagger.iscompatible(proc::$PROC, opts, x::AbstractArray{AT}) where AT =
8-
isbitstype(AT)
9-
Dagger.move(ctx, from_proc::OSProc, to_proc::$PROC, x::AbstractArray) =
10-
$T(x)
11-
Dagger.move(ctx, from_proc::$PROC, to_proc::OSProc, x) = x
12-
Dagger.move(ctx, from_proc::$PROC, to_proc::OSProc, x::$T) =
13-
collect(x)
14-
Dagger.execute!(proc::$PROC, func, args...) = func(args...)
8+
# Assume that we can run anything
9+
Dagger.iscompatible_func(proc::$PROC, opts, f) = true
10+
Dagger.iscompatible_arg(proc::$PROC, opts, x) = true
11+
12+
# CPUs shouldn't process our array type
13+
Dagger.iscompatible_arg(proc::Dagger.ThreadProc, opts, x::$T) = false
14+
15+
# Adapt to/from the appropriate type
16+
Dagger.move(ctx, from_proc::OSProc, to_proc::$PROC, x) = adapt($T, x)
17+
Dagger.move(ctx, from_proc::$PROC, to_proc::OSProc, x) = adapt(Array, x)
1518
end
1619
end
1720

@@ -21,11 +24,11 @@ cancompute(kind::Symbol) = cancompute(Val(kind))
2124
cancompute(::Val) = false
2225

2326
function __init__()
24-
@require CuArrays="3a865a2d-5b23-5a0f-bc46-62713ec82fae" begin
25-
include("cuarrays.jl")
27+
@require CUDA="052768ef-5323-5732-b1bb-66c8b64840ba" begin
28+
include("cu.jl")
2629
end
2730
@require ROCArrays="ddf941ca-5d6a-11e9-36cc-a3fed13dd2fc" begin
28-
include("rocarrays.jl")
31+
include("roc.jl")
2932
end
3033
end
3134

src/cu.jl

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
using .CUDA
2+
import .CUDA: CuDevice, CuContext, devices, attribute
3+
4+
export CuArrayProc, CuArrayDeviceProc, CuArraySMProc
5+
6+
"Represents a single CUDA GPU device."
7+
struct CuArrayDeviceProc <: Dagger.Processor
8+
owner::Int
9+
#ctx::CuContext
10+
device::CuDevice
11+
end
12+
@gpuproc(CuArrayDeviceProc, CuArray)
13+
const CuArrayProc = CuArrayDeviceProc
14+
#= FIXME: CUDA IPC
15+
function Dagger.move(ctx, from::CuArrayDeviceProc, to::CuArrayDeviceProc, x)
16+
if from === to
17+
return x
18+
else
19+
error("Not implemented")
20+
end
21+
end
22+
=#
23+
function Dagger.execute!(proc::CuArrayDeviceProc, func, args...)
24+
#CUDA.context!(proc.ctx)
25+
CUDA.@sync func(args...)
26+
end
27+
28+
"Represents a single CUDA GPU Streaming Multiprocessor."
29+
struct CuArraySMProc <: Dagger.Processor
30+
owner::Int
31+
#ctx::CuContext
32+
device::CuDevice
33+
sm::Int
34+
end
35+
@gpuproc(CuArraySMProc, CuArray)
36+
#= FIXME: CUDA IPC
37+
function Dagger.move(ctx, from::CuArraySMProc, to::CuArraySMProc, x)
38+
if from.device === to.device
39+
return x
40+
else
41+
error("Not implemented")
42+
end
43+
end
44+
=#
45+
function Dagger.execute!(proc::CuArraySMProc, func, args...)
46+
#CUDA.context!(proc.ctx)
47+
CUDA.@sync func(args...)
48+
end
49+
50+
processor(::Val{:CUDA}) = CuArrayDeviceProc
51+
cancompute(::Val{:CUDA}) = CUDA.has_cuda()
52+
# TODO: CuArraySMProc
53+
54+
if CUDA.has_cuda()
55+
for dev in devices()
56+
Dagger.add_callback!(proc -> begin
57+
return CuArrayDeviceProc(Distributed.myid(), #=CuContext(dev),=# dev)
58+
end)
59+
for i in 1:attribute(dev, CUDA.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT)
60+
Dagger.add_callback!(proc -> begin
61+
return CuArraySMProc(Distributed.myid(), #=CuContext(dev),=# dev, i)
62+
end)
63+
end
64+
end
65+
end

src/cuarrays.jl

Lines changed: 0 additions & 17 deletions
This file was deleted.

src/roc.jl

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
using .AMDGPU
2+
3+
struct ROCArrayProc <: Dagger.Processor
4+
device
5+
end
6+
7+
@gpuproc(ROCArrayProc, ROCArray)
8+
9+
Dagger.execute!(proc::ROCArrayProc, func, args...) = func(args...)
10+
11+
processor(::Val{:ROC}) = ROCArrayProc
12+
cancompute(::Val{:ROC}) = AMDGPU.configured
13+
14+
if AMDGPU.configured
15+
Dagger.add_callback!(proc -> begin
16+
return ROCArrayProc(AMDGPU.get_default_agent())
17+
end)
18+
end

src/rocarrays.jl

Lines changed: 0 additions & 17 deletions
This file was deleted.

0 commit comments

Comments
 (0)