-
Notifications
You must be signed in to change notification settings - Fork 9
Open
Description
Exception raised in creation task: The actor died because of an error raised in its creation task, ray::ModelWorker.__init__() (pid=44490, ip=100.97.173.240, actor_id=f5984a9d5b19f763323e603d01000000, repr=<origin_reward_server.ModelWorker object at 0x7fc04e354e90>)
(ModelWorker pid=44490) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(ModelWorker pid=44490) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(ModelWorker pid=44490) File "/data/Edit-R1/reward_server/origin_reward_server.py", line 51, in __init__
(ModelWorker pid=44490) self.load_model()
(ModelWorker pid=44490) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(ModelWorker pid=44490) File "/data/Edit-R1/reward_server/origin_reward_server.py", line 55, in load_model
(ModelWorker pid=44490) self.llm = LLM(
(ModelWorker pid=44490) ^^^^
(ModelWorker pid=44490) File "/home/i-lanjinghong/miniconda3/envs/reward_server/lib/python3.11/site-packages/vllm/entrypoints/llm.py", line 271, in __init__
(ModelWorker pid=44490) self.llm_engine = LLMEngine.from_engine_args(
(ModelWorker pid=44490) ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(ModelWorker pid=44490) File "/home/i-lanjinghong/miniconda3/envs/reward_server/lib/python3.11/site-packages/vllm/engine/llm_engine.py", line 501, in from_engine_args
(ModelWorker pid=44490) return engine_cls.from_vllm_config(
(ModelWorker pid=44490) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(ModelWorker pid=44490) File "/home/i-lanjinghong/miniconda3/envs/reward_server/lib/python3.11/site-packages/vllm/engine/llm_engine.py", line 477, in from_vllm_config
(ModelWorker pid=44490) return cls(
(ModelWorker pid=44490) ^^^^
(ModelWorker pid=44490) File "/home/i-lanjinghong/miniconda3/envs/reward_server/lib/python3.11/site-packages/vllm/engine/llm_engine.py", line 268, in __init__
(ModelWorker pid=44490) self._initialize_kv_caches()
(ModelWorker pid=44490) File "/home/i-lanjinghong/miniconda3/envs/reward_server/lib/python3.11/site-packages/vllm/engine/llm_engine.py", line 413, in _initialize_kv_caches
(ModelWorker pid=44490) self.model_executor.determine_num_available_blocks())
(ModelWorker pid=44490) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(ModelWorker pid=44490) File "/home/i-lanjinghong/miniconda3/envs/reward_server/lib/python3.11/site-packages/vllm/executor/executor_base.py", line 104, in determine_num_available_blocks
(ModelWorker pid=44490) results = self.collective_rpc("determine_num_available_blocks")
(ModelWorker pid=44490) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(ModelWorker pid=44490) File "/home/i-lanjinghong/miniconda3/envs/reward_server/lib/python3.11/site-packages/vllm/executor/uniproc_executor.py", line 57, in collective_rpc
(ModelWorker pid=44490) answer = run_method(self.driver_worker, method, args, kwargs)
(ModelWorker pid=44490) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(ModelWorker pid=44490) File "/home/i-lanjinghong/miniconda3/envs/reward_server/lib/python3.11/site-packages/vllm/utils/__init__.py", line 2736, in run_method
(ModelWorker pid=44490) return func(*args, **kwargs)
(ModelWorker pid=44490) ^^^^^^^^^^^^^^^^^^^^^
(ModelWorker pid=44490) File "/home/i-lanjinghong/miniconda3/envs/reward_server/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
(ModelWorker pid=44490) return func(*args, **kwargs)
(ModelWorker pid=44490) ^^^^^^^^^^^^^^^^^^^^^
File "/home/i-lanjinghong/miniconda3/envs/reward_server/lib/python3.11/site-packages/triton/runtime/driver.py", line 9, in _create_driver
(ModelWorker pid=44490) return actives[0]()
(ModelWorker pid=44490) ^^^^^^^^^^^^
(ModelWorker pid=44490) File "/home/i-lanjinghong/miniconda3/envs/reward_server/lib/python3.11/site-packages/triton/backends/nvidia/driver.py", line 535, in __init__
(ModelWorker pid=44490) self.utils = CudaUtils() # TODO: make static
(ModelWorker pid=44490) ^^^^^^^^^^^
(ModelWorker pid=44490) File "/home/i-lanjinghong/miniconda3/envs/reward_server/lib/python3.11/site-packages/triton/backends/nvidia/driver.py", line 89, in __init__
(ModelWorker pid=44490) mod = compile_module_from_src(Path(os.path.join(dirname, "driver.c")).read_text(), "cuda_utils")
(ModelWorker pid=44490) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(ModelWorker pid=44490) File "/home/i-lanjinghong/miniconda3/envs/reward_server/lib/python3.11/site-packages/triton/backends/nvidia/driver.py", line 71, in compile_module_from_src
(ModelWorker pid=44490) mod = importlib.util.module_from_spec(spec)
(ModelWorker pid=44490) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(ModelWorker pid=44490) File "<frozen importlib._bootstrap>", line 573, in module_from_spec
(ModelWorker pid=44490) File "<frozen importlib._bootstrap_external>", line 1233, in create_module
(ModelWorker pid=44490) File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
(ModelWorker pid=44490) ImportError: /home/i-lanjinghong/.triton/cache/QLAEYTJR4KV5WSBGJKRUAKVP475DE47NW7P4XMI2RFXBOIE5TZ4Q/cuda_utils.so: undefined symbol: cuModuleGetFunction The environment setup has completed and got cuda in server, it bother me for a long time, really apprecitate it if get some response from anyone~
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels