11using . CUDA
22import . CUDA: CuDevice, CuContext, devices, attribute
33
4+ using UUIDs
5+
46export CuArrayDeviceProc
57
68" Represents a single CUDA GPU device."
79struct CuArrayDeviceProc <: Dagger.Processor
810 owner:: Int
9- # ctx::CuContext
1011 device:: Int
12+ device_uuid:: UUID
1113end
1214@gpuproc (CuArrayDeviceProc, CuArray)
1315Dagger. get_parent (proc:: CuArrayDeviceProc ) = Dagger. OSProc (proc. owner)
@@ -49,7 +51,15 @@ function Dagger.move(from::CuArrayDeviceProc, to::CuArrayDeviceProc, x::Dagger.C
4951 finalizer (arr) do arr
5052 CUDA. cuIpcCloseMemHandle (pointer (arr))
5153 end
52- # FIXME : Deal with to_proc being a different GPU
54+ if from. device_uuid != to. device_uuid
55+ CUDA. device! (to. device) do
56+ to_arr = similar (arr)
57+ copyto! (to_arr, arr)
58+ to_arr
59+ end
60+ else
61+ arr
62+ end
5363 else
5464 # Different node, use DtoH, serialization, HtoD
5565 # TODO UCX
@@ -68,7 +78,7 @@ function Dagger.execute!(proc::CuArrayDeviceProc, func, args...)
6878 end )
6979end
7080Base. show (io:: IO , proc:: CuArrayDeviceProc ) =
71- print (io, " CuArrayDeviceProc on worker $(proc. owner) , device $(proc. device) " )
81+ print (io, " CuArrayDeviceProc on worker $(proc. owner) , device $(proc. device) , uuid $(proc . device_uuid) " )
7282
7383processor (:: Val{:CUDA} ) = CuArrayDeviceProc
7484cancompute (:: Val{:CUDA} ) = CUDA. has_cuda ()
@@ -77,7 +87,7 @@ kernel_backend(::CuArrayDeviceProc) = CUDADevice()
7787if CUDA. has_cuda ()
7888 for dev in devices ()
7989 Dagger. add_callback! (() -> begin
80- return CuArrayDeviceProc (Distributed. myid (), #= CuContext( dev), =# dev . handle )
90+ return CuArrayDeviceProc (Distributed. myid (), dev. handle, CUDA . uuid (dev) )
8191 end )
8292 end
8393end
0 commit comments