Skip to content

Commit 91a5e15

Browse files
committed
fix
1 parent 885e6fc commit 91a5e15

File tree

6 files changed

+49
-168
lines changed

6 files changed

+49
-168
lines changed

lightllm/server/router/model_infer/mode_backend/base_backend.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def _get_classed_reqs(
296296
将请求分类返回:
297297
1. wait_pause_reqs 因为推理资源不够,等待被暂停的请求。
298298
2. paused_reqs 已经被暂停的请求,可能会被恢复。
299-
3. finished_reqs 需要释放的请求
299+
3. finished_reqs 需要释放的请求, 包含正常结束和aborted退出的请求。
300300
4. prefill_reqs 需要进行prefill操作的请求
301301
5. decode_reqs 需要进行decode操作的请求
302302
"""
@@ -369,14 +369,22 @@ def _get_classed_reqs(
369369

370370
g_infer_state_lock.release()
371371

372+
self._pre_handle_finished_reqs(finished_reqs=finished_reqs)
372373
g_infer_context.filter_reqs(finished_reqs=finished_reqs)
374+
373375
g_infer_context.pause_reqs(wait_pause_reqs)
374376

375377
if recover_paused:
376378
g_infer_context.recover_paused_reqs(paused_reqs=paused_reqs)
377379

378380
return prefill_reqs, decode_reqs
379381

382+
def _pre_handle_finished_reqs(self, finished_reqs: List[InferReq]):
383+
"""
384+
给 PD 分离模式下,prefill node 使用的继承钩子函数,用于发起 kv 传输任务。
385+
"""
386+
pass
387+
380388
# 一些可以复用的通用功能函数
381389
def _pre_post_handle(self, run_reqs: List[InferReq], is_chuncked_mode: bool) -> List[InferReqUpdatePack]:
382390
update_func_objs: List[InferReqUpdatePack] = []

lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/prefill_node_impl/prefill_impl.py

Lines changed: 13 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,21 @@
55
import torch.multiprocessing as mp
66
import torch.distributed as dist
77
from typing import List, Tuple
8-
from lightllm.server.router.model_infer.mode_backend.base_backend import ModeBackend
9-
from lightllm.server.router.model_infer.infer_batch import InferReq, g_infer_context
8+
from lightllm.server.router.model_infer.infer_batch import InferReq
109
from lightllm.server.pd_io_struct import KVMoveTask, DecodeNodeInfo
1110
from lightllm.utils.log_utils import init_logger
1211
from lightllm.common.basemodel.infer_lock import g_router_lock, g_infer_state_lock
13-
from lightllm.server.router.model_infer.mode_backend.continues_batch.impl import ContinuesBatchBackend
1412
from rpyc.utils.server import ThreadedServer
1513
from .prefill_task_cache import g_kv_move_task_cache
1614
from lightllm.utils.device_utils import kv_trans_use_p2p
1715
from lightllm.utils.envs_utils import get_unique_server_name
1816
from lightllm.utils.dist_utils import create_new_group_for_current_dp
17+
from lightllm.server.router.model_infer.mode_backend.chunked_prefill.impl import ChunkedPrefillBackend
1918

2019
logger = init_logger(__name__)
2120

2221

23-
class ChunckedPrefillForPrefillNode(ModeBackend):
22+
class ChunckedPrefillForPrefillNode(ChunkedPrefillBackend):
2423
def __init__(self, info_queue: mp.Queue, mem_queue: mp.Queue) -> None:
2524
super().__init__()
2625
self.info_queue: mp.Queue = info_queue
@@ -49,36 +48,23 @@ def init_custom(self):
4948

5049
return
5150

52-
def decode(self):
53-
uinit_reqs, aborted_reqs, ok_finished_reqs, prefill_reqs, decode_reqs = self._get_classed_reqs(
54-
g_infer_context.infer_req_ids,
55-
no_decode=True,
56-
)
57-
assert len(decode_reqs) == 0
58-
59-
if aborted_reqs:
60-
self._filter_reqs(aborted_reqs)
61-
62-
if ok_finished_reqs:
63-
self.prefill_req_frozen_tokens_and_put_to_kvmove_taskqueue(ok_finished_reqs)
64-
self._filter_reqs(ok_finished_reqs)
65-
ok_finished_reqs.clear()
66-
67-
if prefill_reqs:
68-
ContinuesBatchBackend.normal_prefill_reqs(
69-
self, prefill_reqs=prefill_reqs, uninit_reqs=uinit_reqs, ok_finished_reqs=ok_finished_reqs
70-
)
71-
72-
self._overlap_req_init_and_filter(uninit_reqs=uinit_reqs, ok_finished_reqs=ok_finished_reqs, clear_list=True)
51+
def _pre_handle_finished_reqs(self, finished_reqs):
52+
self._prefill_req_frozen_tokens_and_put_to_kvmove_taskqueue(run_reqs=finished_reqs)
7353
return
7454

75-
def prefill_req_frozen_tokens_and_put_to_kvmove_taskqueue(self, run_reqs: List[InferReq]):
76-
# 提前在radix cache中回收相关的信息,并添加引用信息
55+
def _prefill_req_frozen_tokens_and_put_to_kvmove_taskqueue(self, run_reqs: List[InferReq]):
56+
# 提前在radix cache中回收相关的信息,并添加引用进行锁定,方便传输进程传输kv。
7757
if self.is_master_in_dp:
7858
logger.info("prefill_req_handle_and_frozen_tokens")
59+
7960
g_infer_state_lock.acquire()
8061
try:
8162
for req in run_reqs:
63+
64+
# 区分abort 和 正常结束的请求,正常结束的请求才发起kv传输任务。
65+
if not req.finish_status.is_finished():
66+
continue
67+
8268
req: InferReq = req
8369
key = req.get_input_token_ids()[0 : req.cur_kv_len]
8470
key = torch.tensor(key, dtype=torch.int64, device="cpu")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import torch.multiprocessing as mp
2+
from typing import List, Tuple
3+
from lightllm.server.router.model_infer.infer_batch import InferReq
4+
from lightllm.utils.log_utils import init_logger
5+
from .prefill_impl import ChunckedPrefillForPrefillNode
6+
from lightllm.server.router.model_infer.mode_backend.dp_backend.impl import DPChunkedPrefillBackend
7+
8+
logger = init_logger(__name__)
9+
10+
11+
class DPChunkedForPrefillNode(DPChunkedPrefillBackend):
12+
def __init__(self, info_queue: mp.Queue, mem_queue: mp.Queue) -> None:
13+
super().__init__()
14+
self.info_queue: mp.Queue = info_queue
15+
self.mem_queue: mp.Queue = mem_queue
16+
17+
def init_custom(self):
18+
ChunckedPrefillForPrefillNode.init_custom(self)
19+
return
20+
21+
def _pre_handle_finished_reqs(self, finished_reqs):
22+
self._prefill_req_frozen_tokens_and_put_to_kvmove_taskqueue(run_reqs=finished_reqs)
23+
return
24+
25+
def _prefill_req_frozen_tokens_and_put_to_kvmove_taskqueue(self, run_reqs: List[InferReq]):
26+
DPChunkedForPrefillNode._prefill_req_frozen_tokens_and_put_to_kvmove_taskqueue(self, run_reqs=run_reqs)
27+
return

lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/prefill_node_impl/prefill_impl_for_dp_chuncked.py

Lines changed: 0 additions & 44 deletions
This file was deleted.

lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/prefill_node_impl/prefill_impl_mtp.py

Lines changed: 0 additions & 41 deletions
This file was deleted.

lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/prefill_node_impl/prefill_impl_mtp_for_dp_chuncked.py

Lines changed: 0 additions & 55 deletions
This file was deleted.

0 commit comments

Comments
 (0)