diff --git a/docs/source/features/runtime.rst b/docs/source/features/runtime.rst
index 4b9ff237..2d30a5b7 100644
--- a/docs/source/features/runtime.rst
+++ b/docs/source/features/runtime.rst
@@ -14,7 +14,6 @@ The AI Runtime hides various implementation details on the inference engine side
``python3 -m pip install aibrix``
-
Metric Standardization
----------------------
Different inference engines will expose different metrics, and AI Runtime will standardize them.
@@ -40,7 +39,7 @@ First Define the necessary environment variables for the HuggingFace model.
.. code-block:: bash
# General settings
- export DOWNLOADER_ALLOW_FILE_SUFFIX=json, safetensors
+ export DOWNLOADER_ALLOW_FILE_SUFFIX="json, safetensors"
export DOWNLOADER_NUM_THREADS=16
# HuggingFace settings
export HF_ENDPOINT=https://hf-mirror.com # set it when env is in CN
@@ -62,13 +61,13 @@ First Define the necessary environment variables for the S3 model.
.. code-block:: bash
# General settings
- export DOWNLOADER_ALLOW_FILE_SUFFIX=json, safetensors
+ export DOWNLOADER_ALLOW_FILE_SUFFIX="json, safetensors"
export DOWNLOADER_NUM_THREADS=16
# AWS settings
export AWS_ACCESS_KEY_ID=
export AWS_SECRET_ACCESS_KEY=
- export AWS_ENDPOINT_URL=
- export AWS_REGION=
+ export AWS_ENDPOINT_URL= # e.g. https://s3.us-west-2.amazonaws.com
+ export AWS_REGION= # e.g. us-west-2
Then use AI Runtime to download the model from AWS S3:
@@ -87,13 +86,13 @@ First Define the necessary environment variables for the TOS model.
.. code-block:: bash
# General settings
- export DOWNLOADER_ALLOW_FILE_SUFFIX=json, safetensors
+ export DOWNLOADER_ALLOW_FILE_SUFFIX="json, safetensors"
export DOWNLOADER_NUM_THREADS=16
# AWS settings
export TOS_ACCESS_KEY=
export TOS_SECRET_KEY=
- export TOS_ENDPOINT=
- export TOS_REGION=
+ export TOS_ENDPOINT= # e.g. https://tos-s3-cn-beijing.volces.com
+ export TOS_REGION= # e..g cn-beijing
Then use AI Runtime to download the model from TOS:
@@ -103,6 +102,20 @@ Then use AI Runtime to download the model from TOS:
python -m aibrix.downloader \
--model-uri tos://aibrix-model-artifacts/deepseek-coder-6.7b-instruct/ \
--local-dir /tmp/aibrix/models_tos/
-
+Model Management API
+^^^^^^^^^^^^^^^^^^^^
+
+.. attention::
+ this needs the engine to starts with `--enable-lora` and env `export VLLM_ALLOW_RUNTIME_LORA_UPDATING=true` enabled.
+
+.. code-block:: bash
+ curl -X POST http://localhost:8080/v1/lora_adapter/load \
+ -H "Content-Type: application/json" \
+ -d '{"lora_name": "lora-1", "lora_path": "bharati2324/Qwen2.5-1.5B-Instruct-Code-LoRA-r16v2"}'
+
+.. code-block:: bash
+ curl -X POST http://localhost:8080/v1/lora_adapter/unload \
+ -H "Content-Type: application/json" \
+ -d '{"lora_name": "lora-1"}'
diff --git a/python/aibrix/aibrix/downloader/s3.py b/python/aibrix/aibrix/downloader/s3.py
index 1621cc7c..f447bd65 100644
--- a/python/aibrix/aibrix/downloader/s3.py
+++ b/python/aibrix/aibrix/downloader/s3.py
@@ -131,7 +131,7 @@ def _get_auth_config(self) -> Dict[str, Optional[str]]:
region_name: "region-name",
endpoint_url: "URL_ADDRESS3.region-name.com",
aws_access_key_id: "AK****",
- aws_secret_access_key: "SK****",,
+ aws_secret_access_key: "SK****",
}
"""
pass