@@ -2417,17 +2417,13 @@ def test_fp8_blockscale(self, tp_size, pp_size, ep_size, mtp_nextn, fp8kv,
24172417 if get_sm_version () == 100 or get_sm_version () == 103 :
24182418 moe_backend = "DEEPGEMM" if moe_backend == "_DEFAULT" else moe_backend
24192419 moe_config = MoeConfig (backend = moe_backend , max_num_tokens = 16384 )
2420- # TODO: Support block reuse for DeepSeek-V3.2
2421- kv_cache_config = KvCacheConfig (enable_block_reuse = False ,
2422- free_gpu_memory_fraction = 0.6 ,
2420+ kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.6 ,
24232421 tokens_per_block = 64 )
24242422 else :
24252423 if moe_backend != "_DEFAULT" :
24262424 pytest .skip ("Not supported MoE backend!" )
24272425 moe_config = MoeConfig ()
2428- # TODO: Support block reuse for DeepSeek-V3.2
2429- kv_cache_config = KvCacheConfig (enable_block_reuse = False ,
2430- free_gpu_memory_fraction = 0.7 ,
2426+ kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.7 ,
24312427 tokens_per_block = 64 )
24322428
24332429 pytorch_config = dict (
@@ -2490,8 +2486,7 @@ def test_nvfp4_multi_gpus(self, tp_size, pp_size, ep_size, mtp_nextn, fp8kv,
24902486 "MOE TRTLLM backend does not support SM version 120 or 121" )
24912487
24922488 moe_config = MoeConfig (backend = moe_backend , max_num_tokens = 16384 )
2493- kv_cache_config = KvCacheConfig (enable_block_reuse = True ,
2494- free_gpu_memory_fraction = 0.7 ,
2489+ kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.7 ,
24952490 tokens_per_block = 64 )
24962491 cuda_graph_config = CudaGraphConfig (
24972492 enable_padding = True ,
@@ -2550,8 +2545,7 @@ def test_nvfp4_multi_gpus_chunked_prefill(self, tp_size, pp_size, ep_size,
25502545 "MOE TRTLLM backend does not support SM version 120 or 121" )
25512546
25522547 moe_config = MoeConfig (backend = moe_backend , max_num_tokens = 16384 )
2553- kv_cache_config = KvCacheConfig (enable_block_reuse = False ,
2554- free_gpu_memory_fraction = 0.7 ,
2548+ kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.7 ,
25552549 tokens_per_block = 64 )
25562550 cuda_graph_config = CudaGraphConfig (
25572551 enable_padding = True ,
0 commit comments