diff --git a/docs/source/features/runtime.rst b/docs/source/features/runtime.rst index 4b9ff237..2d30a5b7 100644 --- a/docs/source/features/runtime.rst +++ b/docs/source/features/runtime.rst @@ -14,7 +14,6 @@ The AI Runtime hides various implementation details on the inference engine side ``python3 -m pip install aibrix`` - Metric Standardization ---------------------- Different inference engines will expose different metrics, and AI Runtime will standardize them. @@ -40,7 +39,7 @@ First Define the necessary environment variables for the HuggingFace model. .. code-block:: bash # General settings - export DOWNLOADER_ALLOW_FILE_SUFFIX=json, safetensors + export DOWNLOADER_ALLOW_FILE_SUFFIX="json, safetensors" export DOWNLOADER_NUM_THREADS=16 # HuggingFace settings export HF_ENDPOINT=https://hf-mirror.com # set it when env is in CN @@ -62,13 +61,13 @@ First Define the necessary environment variables for the S3 model. .. code-block:: bash # General settings - export DOWNLOADER_ALLOW_FILE_SUFFIX=json, safetensors + export DOWNLOADER_ALLOW_FILE_SUFFIX="json, safetensors" export DOWNLOADER_NUM_THREADS=16 # AWS settings export AWS_ACCESS_KEY_ID= export AWS_SECRET_ACCESS_KEY= - export AWS_ENDPOINT_URL= - export AWS_REGION= + export AWS_ENDPOINT_URL= # e.g. https://s3.us-west-2.amazonaws.com + export AWS_REGION= # e.g. us-west-2 Then use AI Runtime to download the model from AWS S3: @@ -87,13 +86,13 @@ First Define the necessary environment variables for the TOS model. .. code-block:: bash # General settings - export DOWNLOADER_ALLOW_FILE_SUFFIX=json, safetensors + export DOWNLOADER_ALLOW_FILE_SUFFIX="json, safetensors" export DOWNLOADER_NUM_THREADS=16 # AWS settings export TOS_ACCESS_KEY= export TOS_SECRET_KEY= - export TOS_ENDPOINT= - export TOS_REGION= + export TOS_ENDPOINT= # e.g. https://tos-s3-cn-beijing.volces.com + export TOS_REGION= # e..g cn-beijing Then use AI Runtime to download the model from TOS: @@ -103,6 +102,20 @@ Then use AI Runtime to download the model from TOS: python -m aibrix.downloader \ --model-uri tos://aibrix-model-artifacts/deepseek-coder-6.7b-instruct/ \ --local-dir /tmp/aibrix/models_tos/ - +Model Management API +^^^^^^^^^^^^^^^^^^^^ + +.. attention:: + this needs the engine to starts with `--enable-lora` and env `export VLLM_ALLOW_RUNTIME_LORA_UPDATING=true` enabled. + +.. code-block:: bash + curl -X POST http://localhost:8080/v1/lora_adapter/load \ + -H "Content-Type: application/json" \ + -d '{"lora_name": "lora-1", "lora_path": "bharati2324/Qwen2.5-1.5B-Instruct-Code-LoRA-r16v2"}' + +.. code-block:: bash + curl -X POST http://localhost:8080/v1/lora_adapter/unload \ + -H "Content-Type: application/json" \ + -d '{"lora_name": "lora-1"}' diff --git a/python/aibrix/aibrix/downloader/s3.py b/python/aibrix/aibrix/downloader/s3.py index 1621cc7c..f447bd65 100644 --- a/python/aibrix/aibrix/downloader/s3.py +++ b/python/aibrix/aibrix/downloader/s3.py @@ -131,7 +131,7 @@ def _get_auth_config(self) -> Dict[str, Optional[str]]: region_name: "region-name", endpoint_url: "URL_ADDRESS3.region-name.com", aws_access_key_id: "AK****", - aws_secret_access_key: "SK****",, + aws_secret_access_key: "SK****", } """ pass