Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions mooncake-wheel/mooncake/mooncake_connector_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,15 @@
import torch
import zmq

from vllm.attention.selector import backend_name_to_enum, get_attn_backend
from vllm.attention.selector import AttentionBackendEnum, get_attn_backend
from vllm.config import VllmConfig
from vllm.distributed.kv_transfer.kv_connector.v1.base import (
KVConnectorBase_V1, KVConnectorMetadata, KVConnectorRole)
from vllm.distributed.parallel_state import (get_tensor_model_parallel_rank,
get_tp_group)
from vllm.forward_context import ForwardContext
from vllm.logger import init_logger
from vllm.platforms import _Backend
from vllm.utils import get_ip, make_zmq_path, make_zmq_socket
from vllm.utils.network_utils import get_ip, make_zmq_path, make_zmq_socket
from vllm.v1.attention.backends.utils import get_kv_cache_layout
from vllm.v1.core.sched.output import SchedulerOutput
from vllm.v1.request import RequestStatus
Expand Down Expand Up @@ -114,6 +113,7 @@ class MooncakeConnector(KVConnectorBase_V1):
def __init__(self, vllm_config: VllmConfig, role: KVConnectorRole):
assert vllm_config.kv_transfer_config is not None
assert vllm_config.kv_transfer_config.engine_id is not None
super().__init__(vllm_config, role)
self.engine_id: EngineId = vllm_config.kv_transfer_config.engine_id

if role == KVConnectorRole.SCHEDULER:
Expand Down Expand Up @@ -425,12 +425,11 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str):
self.model_config.dtype,
self.cache_config.cache_dtype,
self.block_size,
self.model_config.is_attention_free,
use_mla=self.use_mla)
self.backend_name = backend.get_name()
attn_backend = backend_name_to_enum(self.backend_name)
self._use_flashinfer = attn_backend == _Backend.FLASHINFER_VLLM_V1
self._use_pallas_v1 = attn_backend == _Backend.PALLAS_VLLM_V1
attn_backend = AttentionBackendEnum[self.backend_name]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

The current implementation AttentionBackendEnum[self.backend_name] attempts to access an enum member by its name. However, self.backend_name (e.g., 'flashinfer') is the value of the enum member, not its name (e.g., 'FLASHINFER'). This will cause a KeyError at runtime, preventing the worker from initializing.

To correctly get the enum member from its value, you should call the enum as a function: AttentionBackendEnum(self.backend_name).

Suggested change
attn_backend = AttentionBackendEnum[self.backend_name]
attn_backend = AttentionBackendEnum(self.backend_name)

self._use_flashinfer = attn_backend in [AttentionBackendEnum.FLASHINFER, AttentionBackendEnum.FLASHINFER_MLA]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if vllm.version = xxx
    Use _Backend
elif vllm version = yyy
    Use AttentionBackendEnum
else
    raise Exception("we don't support this vllm version")

self._use_pallas_v1 = attn_backend == AttentionBackendEnum.PALLAS
self.kv_cache_layout = get_kv_cache_layout()
logger.debug("Detected attention backend %s", self.backend_name)
logger.debug("Detected kv cache layout %s", self.kv_cache_layout)
Expand Down
Loading