Enable AQUA SDK & CLI to Deploy Fine-Tuned LLMs in Multi-Model Deployment (#1175)

elizjo · mrDzurb · web-flow · commit ab204733097e · 2025-05-14T14:12:53.000-07:00
Co-authored-by: Dmitrii Cherkasov &lt;dmitrii.cherkasov@oracle.com&gt;
diff --git a/ads/aqua/common/entities.py b/ads/aqua/common/entities.py
@@ -157,6 +157,8 @@ class AquaMultiModelRef(Serializable):
         Optional environment variables to override during deployment.
     artifact_location : Optional[str]
         Artifact path of model in the multimodel group.
+    fine_tune_weights_location : Optional[str]
+        For fine tuned models, the artifact path of the modified model weights
     """
 
     model_id: str = Field(..., description="The model OCID to deploy.")
@@ -171,6 +173,9 @@ class AquaMultiModelRef(Serializable):
     artifact_location: Optional[str] = Field(
         None, description="Artifact path of model in the multimodel group."
     )
+    fine_tune_weights_location: Optional[str] = Field(
+        None, description="For fine tuned models, the artifact path of the modified model weights"
+    )
 
     class Config:
         extra = "ignore"
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
@@ -83,6 +83,10 @@
     ModelValidationResult,
 )
 from ads.aqua.model.enums import MultiModelSupportedTaskType
+from ads.aqua.model.utils import (
+    extract_base_model_from_ft,
+    extract_fine_tune_artifacts_path,
+)
 from ads.common.auth import default_signer
 from ads.common.oci_resource import SEARCH_TYPE, OCIResource
 from ads.common.utils import (
@@ -311,12 +315,21 @@ def create_multi(
             #         "Currently only service models are supported for multi model deployment."
             #     )
 
+            # check if model is a fine-tuned model and if so, add the fine tuned weights path to the fine_tune_weights_location pydantic field
+            is_fine_tuned_model = Tags.AQUA_FINE_TUNED_MODEL_TAG in source_model.freeform_tags
+
+            if is_fine_tuned_model:
+                model.model_id, model.model_name = extract_base_model_from_ft(source_model)
+                model_artifact_path, model.fine_tune_weights_location = extract_fine_tune_artifacts_path(source_model)
+
+            else:
+                # Retrieve model artifact for base models
+                model_artifact_path = source_model.artifact
+
             display_name_list.append(display_name)
 
             self._extract_model_task(model, source_model)
 
-            # Retrieve model artifact
-            model_artifact_path = source_model.artifact
             if not model_artifact_path:
                 raise AquaValueError(
                     f"Model '{display_name}' (ID: {model.model_id}) has no artifacts. "
diff --git a/ads/aqua/model/utils.py b/ads/aqua/model/utils.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+# Copyright (c) 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+"""AQUA model utils"""
+
+from typing import Dict, Optional, Tuple
+
+from ads.aqua.common.entities import AquaMultiModelRef
+from ads.aqua.common.errors import AquaValueError
+from ads.aqua.common.utils import get_model_by_reference_paths
+from ads.aqua.finetuning.constants import FineTuneCustomMetadata
+from ads.common.object_storage_details import ObjectStorageDetails
+from ads.model.datascience_model import DataScienceModel
+
+
+def extract_base_model_from_ft(aqua_model: DataScienceModel) -> Tuple[str, str]:
+    """Extracts the model_name and base model OCID (config_source_id) OCID for a fine-tuned model"""
+
+    config_source_id = aqua_model.custom_metadata_list.get(
+        FineTuneCustomMetadata.FINE_TUNE_SOURCE
+    ).value
+    model_name = aqua_model.custom_metadata_list.get(
+        FineTuneCustomMetadata.FINE_TUNE_SOURCE_NAME
+    ).value
+
+    if not config_source_id or not model_name:
+        raise AquaValueError(
+            f"Either {FineTuneCustomMetadata.FINE_TUNE_SOURCE} or {FineTuneCustomMetadata.FINE_TUNE_SOURCE_NAME} is missing "
+            f"from custom metadata for the model {config_source_id}"
+        )
+
+    return config_source_id, model_name
+
+
+def extract_fine_tune_artifacts_path(aqua_model: DataScienceModel) -> Tuple[str, str]:
+    """Extracts the fine tuning source (fine_tune_output_path) and base model path from the DataScienceModel Object"""
+
+    base_model_path, fine_tune_output_path = get_model_by_reference_paths(
+        aqua_model.model_file_description
+    )
+
+    if not fine_tune_output_path or not ObjectStorageDetails.is_oci_path(
+        fine_tune_output_path
+    ):
+        raise AquaValueError(
+            "Fine tuned output path is not available in the model artifact."
+        )
+
+    os_path = ObjectStorageDetails.from_path(fine_tune_output_path)
+    fine_tune_output_path = os_path.filepath.rstrip("/")
+
+    return base_model_path, fine_tune_output_path
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
@@ -25,7 +25,6 @@
     build_pydantic_error_message,
     get_combined_params,
     get_container_params_type,
-    get_model_by_reference_paths,
     get_ocid_substring,
     get_params_dict,
     get_params_list,
@@ -46,9 +45,12 @@
     UNKNOWN_DICT,
 )
 from ads.aqua.data import AquaResourceIdentifier
-from ads.aqua.finetuning.finetuning import FineTuneCustomMetadata
 from ads.aqua.model import AquaModelApp
 from ads.aqua.model.constants import AquaModelMetadataKeys, ModelCustomMetadataFields
+from ads.aqua.model.utils import (
+    extract_base_model_from_ft,
+    extract_fine_tune_artifacts_path,
+)
 from ads.aqua.modeldeployment.entities import (
     AquaDeployment,
     AquaDeploymentConfig,
@@ -211,6 +213,7 @@ def create(
             )
         else:
             model_ids = [model.model_id for model in create_deployment_details.models]
+
             try:
                 model_config_summary = self.get_multimodel_deployment_config(
                     model_ids=model_ids, compartment_id=compartment_id
@@ -343,22 +346,6 @@ def _create(
         config_source_id = create_deployment_details.model_id
         model_name = aqua_model.display_name
 
-        is_fine_tuned_model = Tags.AQUA_FINE_TUNED_MODEL_TAG in aqua_model.freeform_tags
-
-        if is_fine_tuned_model:
-            try:
-                config_source_id = aqua_model.custom_metadata_list.get(
-                    FineTuneCustomMetadata.FINE_TUNE_SOURCE
-                ).value
-                model_name = aqua_model.custom_metadata_list.get(
-                    FineTuneCustomMetadata.FINE_TUNE_SOURCE_NAME
-                ).value
-            except ValueError as err:
-                raise AquaValueError(
-                    f"Either {FineTuneCustomMetadata.FINE_TUNE_SOURCE} or {FineTuneCustomMetadata.FINE_TUNE_SOURCE_NAME} is missing "
-                    f"from custom metadata for the model {config_source_id}"
-                ) from err
-
         # set up env and cmd var
         env_var = create_deployment_details.env_var or {}
         cmd_var = create_deployment_details.cmd_var or []
@@ -378,19 +365,11 @@ def _create(
 
         env_var.update({"BASE_MODEL": f"{model_path_prefix}"})
 
-        if is_fine_tuned_model:
-            _, fine_tune_output_path = get_model_by_reference_paths(
-                aqua_model.model_file_description
-            )
-
-            if not fine_tune_output_path:
-                raise AquaValueError(
-                    "Fine tuned output path is not available in the model artifact."
-                )
-
-            os_path = ObjectStorageDetails.from_path(fine_tune_output_path)
-            fine_tune_output_path = os_path.filepath.rstrip("/")
+        is_fine_tuned_model = Tags.AQUA_FINE_TUNED_MODEL_TAG in aqua_model.freeform_tags
 
+        if is_fine_tuned_model:
+            config_source_id, model_name = extract_base_model_from_ft(aqua_model)
+            _, fine_tune_output_path = extract_fine_tune_artifacts_path(aqua_model)
             env_var.update({"FT_MODEL": f"{fine_tune_output_path}"})
 
         container_type_key = self._get_container_type_key(
@@ -647,6 +626,10 @@ def _create_multi(
             config_data = {"params": params, "model_path": artifact_path_prefix}
             if model.model_task:
                 config_data["model_task"] = model.model_task
+
+            if model.fine_tune_weights_location:
+                config_data["fine_tune_weights_location"] = model.fine_tune_weights_location
+
             model_config.append(config_data)
             model_name_list.append(model.model_name)
 
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -19,22 +19,21 @@
 )
 from parameterized import parameterized
 
+import ads.aqua.modeldeployment.deployment
+import ads.config
+from ads.aqua.app import AquaApp
 from ads.aqua.common.entities import (
     AquaMultiModelRef,
     ComputeShapeSummary,
     ModelConfigResult,
 )
-from ads.aqua.app import AquaApp
-from ads.aqua.common.entities import ModelConfigResult
-import ads.aqua.modeldeployment.deployment
-import ads.config
-from ads.aqua.common.entities import AquaMultiModelRef
 from ads.aqua.common.enums import Tags
 from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
 from ads.aqua.config.container_config import (
-    AquaContainerConfigItem,
     AquaContainerConfig,
+    AquaContainerConfigItem,
 )
+from ads.aqua.model.enums import MultiModelSupportedTaskType
 from ads.aqua.modeldeployment import AquaDeploymentApp, MDInferenceResponse
 from ads.aqua.modeldeployment.entities import (
     AquaDeployment,
@@ -45,7 +44,6 @@
     ModelDeploymentConfigSummary,
     ModelParams,
 )
-from ads.aqua.model.enums import MultiModelSupportedTaskType
 from ads.aqua.modeldeployment.utils import MultiModelDeploymentConfigLoader
 from ads.model.datascience_model import DataScienceModel
 from ads.model.deployment.model_deployment import ModelDeployment
@@ -277,7 +275,7 @@ class TestDataset:
                         "environment_configuration_type": "OCIR_CONTAINER",
                         "environment_variables": {
                             "MODEL_DEPLOY_PREDICT_ENDPOINT": "/v1/completions",
-                            "MULTI_MODEL_CONFIG": '{ "models": [{ "params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_one/5be6479/artifact/", "model_task": "text_embedding"}, {"params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_two/83e9aa1/artifact/", "model_task": "image_text_to_text"}, {"params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_three/83e9aa1/artifact/", "model_task": "code_synthesis"}]}',
+                            "MULTI_MODEL_CONFIG": '{ "models": [{ "params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_one/5be6479/artifact/", "model_task": "text_embedding"}, {"params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_two/83e9aa1/artifact/", "model_task": "image_text_to_text"}, {"params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_three/83e9aa1/artifact/", "model_task": "code_synthesis", "fine_tune_weights_location": "oci://test_bucket@test_namespace/models/ft-models/meta-llama-3b/ocid1.datasciencejob.oc1.iad.<ocid>"}]}',
                         },
                         "health_check_port": 8080,
                         "image": "dsmc://image-name:1.0.0.0",
@@ -489,6 +487,7 @@ class TestDataset:
                 "model_name": "test_model_1",
                 "model_task": "text_embedding",
                 "artifact_location": "test_location_1",
+                "fine_tune_weights_location" : None
             },
             {
                 "env_var": {},
@@ -497,6 +496,7 @@ class TestDataset:
                 "model_name": "test_model_2",
                 "model_task": "image_text_to_text",
                 "artifact_location": "test_location_2",
+                "fine_tune_weights_location" : None
             },
             {
                 "env_var": {},
@@ -505,12 +505,13 @@ class TestDataset:
                 "model_name": "test_model_3",
                 "model_task": "code_synthesis",
                 "artifact_location": "test_location_3",
+                "fine_tune_weights_location" : "oci://test_bucket@test_namespace/models/ft-models/meta-llama-3b/ocid1.datasciencejob.oc1.iad.<ocid>"
             },
         ],
         "model_id": "ocid1.datasciencemodel.oc1.<region>.<OCID>",
         "environment_variables": {
             "MODEL_DEPLOY_PREDICT_ENDPOINT": "/v1/completions",
-            "MULTI_MODEL_CONFIG": '{ "models": [{ "params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_one/5be6479/artifact/", "model_task": "text_embedding"}, {"params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_two/83e9aa1/artifact/", "model_task": "image_text_to_text"}, {"params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_three/83e9aa1/artifact/", "model_task": "code_synthesis"}]}',
+            "MULTI_MODEL_CONFIG": '{ "models": [{ "params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_one/5be6479/artifact/", "model_task": "text_embedding"}, {"params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_two/83e9aa1/artifact/", "model_task": "image_text_to_text"}, {"params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_three/83e9aa1/artifact/", "model_task": "code_synthesis", "fine_tune_weights_location": "oci://test_bucket@test_namespace/models/ft-models/meta-llama-3b/ocid1.datasciencejob.oc1.iad.<ocid>"}]}',
         },
         "cmd": [],
         "console_link": "https://cloud.oracle.com/data-science/model-deployments/ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>?region=region-name",
@@ -971,6 +972,7 @@ class TestDataset:
             "model_name": "model_one",
             "model_task": "text_embedding",
             "artifact_location": "artifact_location_one",
+            "fine_tune_weights_location": None
         },
         {
             "env_var": {"--test_key_two": "test_value_two"},
@@ -979,6 +981,7 @@ class TestDataset:
             "model_name": "model_two",
             "model_task": "image_text_to_text",
             "artifact_location": "artifact_location_two",
+            "fine_tune_weights_location": None
         },
         {
             "env_var": {"--test_key_three": "test_value_three"},
@@ -987,6 +990,7 @@ class TestDataset:
             "model_name": "model_three",
             "model_task": "code_synthesis",
             "artifact_location": "artifact_location_three",
+            "fine_tune_weights_location" : "oci://test_bucket@test_namespace/models/ft-models/meta-llama-3b/ocid1.datasciencejob.oc1.iad.<ocid>"
         },
     ]
 
@@ -1813,6 +1817,7 @@ def test_create_deployment_for_multi_model(
             model_task="code_synthesis",
             gpu_count=2,
             artifact_location="test_location_3",
+            fine_tune_weights_location= "oci://test_bucket@test_namespace/models/ft-models/meta-llama-3b/ocid1.datasciencejob.oc1.iad.<ocid>"
         )
 
         result = self.app.create(
diff --git a/tests/unitary/with_extras/aqua/test_model.py b/tests/unitary/with_extras/aqua/test_model.py