@@ -201,12 +201,12 @@ def __init__(self, args):
201201 def _init_gpu_cache (self , args ):
202202
203203 if not args .create_cache_tensor :
204- logger .info (" Waiting for runners to create kv cache." )
204+ logger .info (f"[rank { self . rank } / { self . n_ranks } ] Waiting for runners to create kv cache." )
205205 while self .cache_ready_signal .value [self .rank ] != 1 :
206- time .sleep (1 )
207- logger .info (" OK! Stop waiting." )
206+ time .sleep (0. 1 )
207+ logger .info (f"[rank { self . rank } / { self . n_ranks } ] OK! Stop waiting." )
208208
209- logger .info (" Initializing kv cache for all layers." )
209+ logger .info (f"[rank { self . rank } / { self . n_ranks } ] Initializing kv cache for all layers." )
210210 paddle .set_device (f"gpu:{ self .device } " )
211211 for i in range (args .num_layers + self .num_extra_layers ):
212212 num_gpu_blocks = args .num_gpu_blocks if i < args .num_layers else self .num_extra_layer_gpu_blocks
@@ -215,13 +215,13 @@ def _init_gpu_cache(self, args):
215215 val_name = f"value_caches_{ i } _rank{ self .rank } .device{ self .device } "
216216
217217 if args .create_cache_tensor :
218- logger .info (f"..creating kv cache for layer { i } : { cache_shape } " )
218+ logger .info (f"[rank { self . rank } / { self . n_ranks } ] ..creating kv cache for layer { i } : { cache_shape } " )
219219 key_cache = paddle .full (shape = cache_shape , fill_value = 0 , dtype = args .cache_dtype )
220220 val_cache = paddle .full (shape = cache_shape , fill_value = 0 , dtype = args .cache_dtype )
221221 set_data_ipc (key_cache , key_name )
222222 set_data_ipc (val_cache , val_name )
223223 else :
224- logger .info (f"..attaching kv cache for layer { i } : { cache_shape } " )
224+ logger .info (f"[rank { self . rank } / { self . n_ranks } ] ..attaching kv cache for layer { i } : { cache_shape } " )
225225 key_cache = paddle .empty (shape = [], dtype = args .cache_dtype )
226226 val_cache = paddle .empty (shape = [], dtype = args .cache_dtype )
227227 key_cache = share_external_data (key_cache , key_name , cache_shape )
@@ -233,33 +233,37 @@ def _init_gpu_cache(self, args):
233233 self .gpu_cache_v_tensors .append (self .gpu_cache_kvs [val_name ])
234234
235235 if args .create_cache_tensor :
236- logger .info ("✅ kv cache is ready!" )
236+ logger .info ("[rank {self.rank}/{self.n_ranks}] ✅ kv cache is ready!" )
237237 self .cache_ready_signal .value [self .rank ] = 1
238238
239239 cache_kv_size_byte = sum ([tmp .numel () * 1 for key , tmp in self .gpu_cache_kvs .items ()])
240- logger .info (f"device :{ self .device } " )
241- logger .info (f"cache_kv_size_byte : { cache_kv_size_byte } " )
242- logger .info (f"done init cache (full) gmem alloc : { paddle .device .cuda .memory_allocated ()} " )
240+ logger .info (f"[rank { self .rank } /{ self .n_ranks } ] device :{ self .device } " )
241+ logger .info (f"[rank { self .rank } /{ self .n_ranks } ] cache_kv_size_byte : { cache_kv_size_byte } " )
242+ logger .info (
243+ f"[rank { self .rank } /{ self .n_ranks } ] done init cache (full) gmem alloc : { paddle .device .cuda .memory_allocated ()} "
244+ )
243245
244246 def _init_cpu_cache (self , args ):
245247 if args .num_cpu_blocks == 0 :
246- logger .info (" 💡 no swap space (cpu cache) is specified." )
248+ logger .info (f"[rank { self . rank } / { self . n_ranks } ] 💡 no swap space (cpu cache) is specified." )
247249 self .swap_space_ready_signal .value [self .rank ] = 1
248250 return
249- logger .info (" Initializing swap space (cpu cache) for all layers." )
251+ logger .info (f"[rank { self . rank } / { self . n_ranks } ] Initializing swap space (cpu cache) for all layers." )
250252 paddle .set_device ("cpu" )
251253 self .k_dst_ptrs = []
252254 self .v_dst_ptrs = []
253255 for i in range (args .num_layers + self .num_extra_layers ):
254256 key_name = f"key_caches_{ i } _rank{ self .rank } "
255257 val_name = f"value_caches_{ i } _rank{ self .rank } "
256258 need_to_allocate_bytes = args .num_cpu_blocks * args .bytes_per_layer_per_block
257- logger .info (f"..creating cpu cache for layer { i } : { 2 * need_to_allocate_bytes / 1024 ** 3 :.2f} GB" )
259+ logger .info (
260+ f"[rank { self .rank } /{ self .n_ranks } ] ..creating cpu cache for layer { i } : { 2 * need_to_allocate_bytes / 1024 ** 3 :.2f} GB"
261+ )
258262 self .cpu_cache_kvs [key_name ] = cuda_host_alloc (need_to_allocate_bytes )
259263 self .k_dst_ptrs .append (self .cpu_cache_kvs [key_name ])
260264 self .cpu_cache_kvs [val_name ] = cuda_host_alloc (need_to_allocate_bytes )
261265 self .v_dst_ptrs .append (self .cpu_cache_kvs [val_name ])
262- logger .info (" ✅ swap space (cpu cache) is ready!" )
266+ logger .info (f"[rank { self . rank } / { self . n_ranks } ] ✅ swap space (cpu cache) is ready!" )
263267 self .swap_space_ready_signal .value [self .rank ] = 1
264268
265269 def _do_swap_to_cpu_task (
@@ -473,6 +477,10 @@ def clear_or_update_caches(self, args):
473477 while True :
474478 if kv_cache_status_signal .value [0 ] == KVCacheStatus .CLEARING :
475479 try :
480+ logger .info (
481+ f"[rank { self .rank } /{ self .n_ranks } ] Start clearing caches { self .cache_ready_signal .value } "
482+ )
483+ # clear cpu caches
476484 if envs .FD_ENABLE_SWAP_SPACE_CLEARING :
477485 paddle .set_device ("cpu" )
478486 for ptrs in self .k_dst_ptrs + self .v_dst_ptrs :
@@ -486,37 +494,58 @@ def clear_or_update_caches(self, args):
486494 while np .sum (self .swap_space_ready_signal .value ) != 0 :
487495 time .sleep (0.1 )
488496
497+ # clear gpu caches
489498 paddle .set_device (f"gpu:{ self .device } " )
490499 for name , tensor in self .gpu_cache_kvs .items ():
491500 unset_data_ipc (tensor , name , True , False )
492501 self .gpu_cache_kvs .clear ()
493502 self .gpu_cache_k_tensors .clear ()
494503 self .gpu_cache_v_tensors .clear ()
504+
495505 # reset cache_ready_signal
496506 self .cache_ready_signal .value [self .rank ] = 0
497- if np .sum (self .cache_ready_signal .value ) == 0 :
507+ logger .info (
508+ f"[rank { self .rank } /{ self .n_ranks } ] Finish clearing caches { self .cache_ready_signal .value } "
509+ )
510+
511+ # wait for all ranks caches to be cleared
512+ if np .sum (self .cache_ready_signal .value ) != 0 :
498513 time .sleep (0.1 )
499514
515+ # reset kv_cache_status_signal
500516 kv_cache_status_signal .value [0 ] = KVCacheStatus .CLEARED
517+ logger .info ("All ranks finish clearing caches" )
501518
502519 except Exception as e :
503- logger .error (f"Failed to clear caches: { e } " )
520+ logger .error (f"[rank { self . rank } / { self . n_ranks } ] Failed to clear caches: { e } " )
504521
505522 elif kv_cache_status_signal .value [0 ] == KVCacheStatus .UPDATING :
506523 try :
524+ logger .info (
525+ f"[rank { self .rank } /{ self .n_ranks } ] Start restoring caches { self .cache_ready_signal .value } "
526+ )
527+ # restore cpu cache
507528 if envs .FD_ENABLE_SWAP_SPACE_CLEARING :
508529 self ._init_cpu_cache (args )
509530 while np .sum (self .swap_space_ready_signal .value ) != args .mp_num :
510531 time .sleep (0.1 )
511532
533+ # restore gpu cache and set cache_ready_signal
512534 self ._init_gpu_cache (args )
535+ logger .info (
536+ f"[rank { self .rank } /{ self .n_ranks } ] Finish restoring caches { self .cache_ready_signal .value } "
537+ )
538+
539+ # wait for all ranks caches to be ready
513540 while np .sum (self .cache_ready_signal .value ) != args .mp_num :
514541 time .sleep (0.1 )
515542
543+ # set kv_cache_status_signal
544+ logger .info ("All ranks finish restoring caches" )
516545 kv_cache_status_signal .value [0 ] = KVCacheStatus .NORMAL
517546
518547 except Exception as e :
519- logger .error (f"Failed to restore caches: { e } " )
548+ logger .error (f"[rank { self . rank } / { self . n_ranks } ] Failed to restore caches: { e } " )
520549
521550 time .sleep (0.1 )
522551
0 commit comments