Skip to content

Commit 741a015

Browse files
authored
[BugFix][Cherry-Pick] cp fix dyc8 cache bug(#5958) (#5959)
* cp fix dyc8 cache bug * udpate code
1 parent 37bed64 commit 741a015

4 files changed

Lines changed: 56 additions & 39 deletions

File tree

fastdeploy/cache_manager/prefix_cache_manager.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,8 @@ def request_match_blocks(self, task, block_size, *args):
687687
"cpu_cache_blocks": 0,
688688
"gpu_match_token_num": 0,
689689
"cpu_match_token_num": 0,
690+
"match_gpu_block_ids": [],
691+
"match_cpu_block_ids": [],
690692
}
691693
self.metrics.req_count += 1
692694
if isinstance(task.prompt_token_ids, np.ndarray):
@@ -745,6 +747,8 @@ def request_match_blocks(self, task, block_size, *args):
745747
hit_info["cpu_cache_blocks"] = len(match_cpu_block_ids)
746748
hit_info["gpu_match_token_num"] = gpu_match_token_num
747749
hit_info["cpu_match_token_num"] = cpu_match_token_num
750+
hit_info["match_gpu_block_ids"] = match_gpu_block_ids
751+
hit_info["match_cpu_block_ids"] = match_cpu_block_ids
748752
self.metrics._update_history_hit_metrics()
749753
if self.metrics.req_count % 10000 == 0:
750754
self.metrics.reset_metrics()

fastdeploy/engine/sched/resource_manager_v1.py

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -376,11 +376,17 @@ def revert_chunked_mm_input(self, mm_inputs, matched_token_num):
376376
if mm_inputs is None or "mm_positions" not in mm_inputs or len(mm_inputs["mm_positions"]) == 0:
377377
return matched_token_num
378378

379-
for idx in range(len(mm_inputs["mm_positions"])):
380-
position = mm_inputs["mm_positions"][idx]
379+
position_idx = len(mm_inputs["mm_positions"]) - 1
380+
while matched_token_num > 0 and position_idx >= 0:
381+
position = mm_inputs["mm_positions"][position_idx]
381382
if position.offset < matched_token_num < position.offset + position.length:
382-
return position.offset
383+
matched_token_num = (
384+
position.offset // self.config.cache_config.block_size
385+
) * self.config.cache_config.block_size
386+
position_idx -= 1
383387
elif matched_token_num < position.offset:
388+
position_idx -= 1
389+
elif matched_token_num >= position.offset + position.length:
384390
break
385391
return matched_token_num
386392

@@ -950,17 +956,9 @@ def get_prefix_cached_blocks(self, request: Request):
950956
)
951957

952958
request.num_cached_tokens = matched_token_num
953-
request.gpu_cache_token_num = hit_info["gpu_match_token_num"]
954-
request.cpu_cache_token_num = hit_info["cpu_match_token_num"]
955959
request.cache_info = (matched_block_num, no_cache_block_num)
956960
request.block_tables = common_block_ids
957961
request.skip_allocate = False
958-
959-
# Report the number of cached tokens to Prometheus metrics
960-
main_process_metrics.prefix_cache_token_num.inc(matched_token_num)
961-
main_process_metrics.prefix_gpu_cache_token_num.inc(request.gpu_cache_token_num)
962-
main_process_metrics.prefix_cpu_cache_token_num.inc(request.cpu_cache_token_num)
963-
964962
if self.config.cache_config.disable_chunked_mm_input:
965963
if matched_token_num == request.need_prefill_tokens:
966964
matched_token_num = matched_token_num - self.config.cache_config.block_size
@@ -974,7 +972,27 @@ def get_prefix_cached_blocks(self, request: Request):
974972
request.skip_allocate = True
975973
else:
976974
request.num_computed_tokens = matched_token_num
977-
llm_logger.info(f"request {request.request_id} num_computed_tokens: {request.num_computed_tokens}")
975+
976+
if request.num_cached_tokens != request.num_computed_tokens:
977+
revert_tokens_num = request.num_cached_tokens - request.num_computed_tokens
978+
llm_logger.info(
979+
f"request {request.request_id} num_cached_tokens: {request.num_cached_tokens}, revert_tokens_num: {revert_tokens_num}"
980+
)
981+
982+
revert_block_idx = revert_tokens_num // self.config.cache_config.block_size
983+
for block_idx in range(len(common_block_ids) - 1, revert_block_idx, -1):
984+
if common_block_ids[block_idx] in hit_info["match_gpu_block_ids"]:
985+
hit_info["gpu_match_token_num"] -= self.config.cache_config.block_size
986+
elif common_block_ids[block_idx] in hit_info["match_cpu_block_ids"]:
987+
hit_info["cpu_match_token_num"] -= self.config.cache_config.block_size
988+
989+
request.gpu_cache_token_num = hit_info["gpu_match_token_num"]
990+
request.cpu_cache_token_num = hit_info["cpu_match_token_num"]
991+
992+
# Report the number of cached tokens to Prometheus metrics
993+
main_process_metrics.prefix_cache_token_num.inc(request.num_computed_tokens)
994+
main_process_metrics.prefix_gpu_cache_token_num.inc(request.gpu_cache_token_num)
995+
main_process_metrics.prefix_cpu_cache_token_num.inc(request.cpu_cache_token_num)
978996
request.cache_prepare_time = time.time() - cache_prepare_time
979997
return True
980998
except Exception as e:

fastdeploy/multimodal/hasher.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,11 @@
1919

2020
import numpy as np
2121

22-
from fastdeploy.utils import data_processor_logger
23-
2422

2523
class MultimodalHasher:
2624

2725
@classmethod
2826
def hash_features(cls, obj: object) -> str:
2927
if isinstance(obj, np.ndarray):
3028
return hashlib.sha256((obj.tobytes())).hexdigest()
31-
32-
data_processor_logger.warning(
33-
f"Unsupported type for hashing features: {type(obj)}" + ", use pickle for serialization"
34-
)
3529
return hashlib.sha256((pickle.dumps(obj))).hexdigest()

tests/v1/test_resource_manager_v1.py

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ def setUp(self):
190190
model_cfg.max_model_len = 5120
191191
model_cfg.architectures = ["test_model"]
192192
cache_cfg.bytes_per_layer_per_block = 1
193+
cache_cfg.block_size = 64
193194
parallel_cfg = ParallelConfig(args)
194195
scheduler_cfg = SchedulerConfig(args)
195196
graph_opt_cfg = engine_args.create_graph_optimization_config()
@@ -214,58 +215,58 @@ def setUp(self):
214215
self.request.multimodal_inputs = {}
215216

216217
def test_revert_chunked_mm_input_none_input(self):
217-
result = self.manager.revert_chunked_mm_input(None, 10)
218-
self.assertEqual(result, 10)
218+
result = self.manager.revert_chunked_mm_input(None, 64)
219+
self.assertEqual(result, 64)
219220

220221
def test_revert_chunked_mm_input_no_mm_positions(self):
221222
mm_inputs = {"other_field": "value"}
222-
result = self.manager.revert_chunked_mm_input(mm_inputs, 10)
223-
self.assertEqual(result, 10)
223+
result = self.manager.revert_chunked_mm_input(mm_inputs, 128)
224+
self.assertEqual(result, 128)
224225

225226
def test_revert_chunked_mm_input_empty_positions(self):
226227
mm_inputs = {"mm_positions": []}
227-
result = self.manager.revert_chunked_mm_input(mm_inputs, 10)
228-
self.assertEqual(result, 10)
228+
result = self.manager.revert_chunked_mm_input(mm_inputs, 128)
229+
self.assertEqual(result, 128)
229230

230231
def test_revert_chunked_mm_input_matched_in_chunk(self):
231232
mm_inputs = {
232233
"mm_positions": [
233-
ImagePosition(offset=5, length=10),
234-
ImagePosition(offset=20, length=10),
234+
ImagePosition(offset=40, length=100),
235+
ImagePosition(offset=200, length=80),
235236
]
236237
}
237-
result = self.manager.revert_chunked_mm_input(mm_inputs, 8)
238-
self.assertEqual(result, 5)
238+
result = self.manager.revert_chunked_mm_input(mm_inputs, 256)
239+
self.assertEqual(result, 192)
239240

240241
def test_revert_chunked_mm_input_matched_in_second_chunk(self):
241242
mm_inputs = {
242243
"mm_positions": [
243-
ImagePosition(offset=5, length=10),
244-
ImagePosition(offset=20, length=10),
244+
ImagePosition(offset=100, length=100),
245+
ImagePosition(offset=200, length=80),
245246
]
246247
}
247-
result = self.manager.revert_chunked_mm_input(mm_inputs, 25)
248-
self.assertEqual(result, 20)
248+
result = self.manager.revert_chunked_mm_input(mm_inputs, 256)
249+
self.assertEqual(result, 64)
249250

250251
def test_revert_chunked_mm_input_before_first_chunk(self):
251252
mm_inputs = {
252253
"mm_positions": [
253-
ImagePosition(offset=5, length=10),
254-
ImagePosition(offset=20, length=10),
254+
ImagePosition(offset=60, length=100),
255+
ImagePosition(offset=180, length=100),
255256
]
256257
}
257-
result = self.manager.revert_chunked_mm_input(mm_inputs, 3)
258-
self.assertEqual(result, 3)
258+
result = self.manager.revert_chunked_mm_input(mm_inputs, 256)
259+
self.assertEqual(result, 0)
259260

260261
def test_revert_chunked_mm_input_after_last_chunk(self):
261262
mm_inputs = {
262263
"mm_positions": [
263264
ImagePosition(offset=5, length=10),
264-
ImagePosition(offset=20, length=10),
265+
ImagePosition(offset=200, length=56),
265266
]
266267
}
267-
result = self.manager.revert_chunked_mm_input(mm_inputs, 35)
268-
self.assertEqual(result, 35)
268+
result = self.manager.revert_chunked_mm_input(mm_inputs, 256)
269+
self.assertEqual(result, 256)
269270

270271

271272
if __name__ == "__main__":

0 commit comments

Comments
 (0)