We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 4082707 commit e6b4b6eCopy full SHA for e6b4b6e
vllm_spyre/v1/worker/spyre_model_runner.py
@@ -848,6 +848,8 @@ def _get_num_blocks_available(self) -> int:
848
# hard coded value for tensor parallel size 4 with the below model
849
# https://huggingface.co/ibm-granite/granite-3.3-8b-instruct
850
NUM_BLOCKS_SPYRE = 2080
851
+ logger.info("Model granite-3.3-8b-instruct and tensor parallel " \
852
+ "size 4 detected. Using NUM_BLOCKS_SPYRE = %d", 2080)
853
else:
854
# default value for any other model/ tensor parallel size
855
NUM_BLOCKS_SPYRE = max_batch_size * min_req_num_blocks
0 commit comments