From 910e6dd60437f66f970da528fef9f8aaaee6a39b Mon Sep 17 00:00:00 2001
From: Alan Davidson <alan@viam.com>
Date: Thu, 14 Nov 2024 16:33:13 -0500
Subject: [PATCH] [rsdk-8618] symlink model data without copying it over
 manually (#21)

Tried on an Orin Nano with the pepperoni detector and the COCO person detector: seems to work fine.

* create a function to initialize directories

* add vim swap files to .gitignore

* include the directory we forgot

* make a helper to symlink the model data

* -a

* bugfixes

* it compiles now

* Revert "add vim swap files to .gitignore"

This reverts commit 578921e89601156c7d2b338d04520f2015747e2e.

* move #include to the right paragraph

* reorder function calls

* pr feedback: symlink the whole directory, not just the TF-specific stuff

* tabs -> spaces

* debugging

* delete the old symlink if it exists

* remove mention of model_repository_path in favor of symlink

* debugging

* update readme

* debugging: need environment variable

* remove debugging info

* more debugging, will come back later

* -a

* -a

* pr feedback: rephrase comment

* pr feedback: URL in comment

* -a

* pr feedback: add comment

* pr feedback: use a version of the function that throws exceptions, don't check return value

* pr feedback: use filesystem::path and don't have boolean args

* use filesystem::path again

* put back the model repo directory, need to sort out model version still

* set the model version only if you're manually setting up the repo, otherwise use version 1

* debugging: get the model name before creating symlinks

* update readme

* pr feedback: allow either the model_path or the model_repository_path but not both

---------

Co-authored-by: robin <robin@orin-robin.taila2962.ts.net>
Co-authored-by: viam <viam@alan-orin-nano.taila2962.ts.net>
---
 CMakeLists.txt                             |   4 +-
 README.md                                  |  23 ++-
 bin/viam-mlmodelservice-triton.sh.envsubst |   6 +-
 src/viam_mlmodelservice_triton_impl.cpp    | 157 +++++++++++++++------
 4 files changed, 140 insertions(+), 50 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a1d72d7..0afd2b3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,8 +6,8 @@ project(viam-mlmodelservice-triton
   LANGUAGES CXX
 )
 
-# Use of the C++ SDK requires C++14.
-set(CMAKE_CXX_STANDARD 14)
+# Use of the C++ SDK requires C++14 or later. We make use of std::filesystem, which requires C++17 or later.
+set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED True)
 set(CMAKE_CXX_EXTENSIONS OFF)
 
diff --git a/README.md b/README.md
index 1bbb843..bc884fc 100644
--- a/README.md
+++ b/README.md
@@ -118,9 +118,10 @@ The following attributes are available for the MLModel service `viam:mlmodelserv
 | Name | Type | Inclusion | Description |
 | ---- | ---- | --------- | ----------- |
 | `model_name` | string | **Required** | The model to be loaded from the model repository. |
-| `model_repository_path` | string | **Required** | The container-side path to a model repository. Note that this must be a subdirectory of the `$HOME/.viam` directory of the user running `viam-server`. |
+| `model_repository_path` | string | **Semi-Required** | The container-side path to a model repository. Note that this must be a subdirectory of the `$HOME/.viam` directory of the user running `viam-server`. Exactly one of the `model_repository_path` or the `model_path` is required. |
+| `model_path` | string | **Semi-Required** | The directory in which the model to use is stored. You can use strings like `${packages.ml_model.MyModel}`, too. Exactly one of the `model_repository_path` or the `model_path` is required. |
 | `backend_directory` | string | Optional | A container side path to the TritonServer "backend" directory. You normally do not need to override this; the build will set it to the backend directory of the Triton Server installation in the container. You may set it if you wish to use a different set of backends. |
-| `model_version` | int | Optional | The version of the model to be loaded. If not specified, the module will use the newest version of the model named by model_name.<br><br>Default: `-1` (newest) |
+| `model_version` | int | Optional | The version of the model to be loaded from `model_repository_path`. If not specified, the module will use the newest version of the model named by model_name.<br><br>Default: `-1` (newest) |
 | `preferred_input_memory_type` | string | Optional | One of `cpu`, `cpu-pinned`, or `gpu`. This controlls the type of memory that will be allocated by the module for input tensors. If not specified, this will default to `cpu` if no CUDA-capable devices are detected at runtime, or to `gpu` if CUDA-capable devices are found.|
 | `preferred_input_memory_type_id` | int | Optional | CUDA identifier on which to allocate gpu or cpu-pinned input tensors. You probably don't need to change this unless you have multiple GPUs<br><br>Default: `0` (first device) |
 | `tensor_name_remappings` | obj | Optional | Provides two dictionaries under the `inputs` and `outputs` keys that rename the models' tensors. Other Viam services, like the [vision service]([/ml/vision/](https://docs.viam.com/registry/advanced/mlmodel-design/)) may expect to work with tensors with particular names. Use this map to rename the tensors from the loaded model to what the vision service expects as needed to meet those requirements.<br><br>Default: `{}` |
@@ -134,7 +135,7 @@ An example minimal configuration would look like this, within your robot’s "se
   "type": "mlmodel",
   "attributes": {
     "model_name": "efficientdet-lite4-detection",
-    "model_repository_path": "/path/to/.viam/triton/repository"
+    "model_path": "${packages.ml_model.FaceDetector}"
   },
   "model": "viam:mlmodelservice:triton",
   "name": "mlmodel-effdet-triton"
@@ -150,7 +151,7 @@ An example detailed configuration with optional parameters specified would look
     "backend_directory": "/opt/tritonserver/backends",
     "model_name": "efficientdet-lite4-detection",
     "model_version": 1,
-    "model_repository_path": "/path/to/.viam/triton/repository",
+    "model_path": "${packages.ml_model.FaceDetector}",
     "preferred_input_memory_type_id": 0,
     "preferred_input_memory_type": "gpu",
     "tensor_name_remappings": {
@@ -170,6 +171,20 @@ An example detailed configuration with optional parameters specified would look
 }
 ```
 
+If you have your own Triton model repository, you could use it like this:
+
+```json {class="line-numbers linkable-line-numbers"}
+{
+  "type": "mlmodel",
+  "attributes": {
+    "model_name": "efficientdet-lite4-detection",
+    "model_repository_path": "/path/to/.viam/triton/repository"
+  },
+  "model": "viam:mlmodelservice:triton",
+  "name": "mlmodel-effdet-triton"
+}
+```
+
 ## Next Steps
 
 - Use the [`jtop`](https://github.com/rbonghi/jetson_stats) utility on the Jetson line to monitor GPU usage to validate that Triton is accelerating inference through the GPU.
diff --git a/bin/viam-mlmodelservice-triton.sh.envsubst b/bin/viam-mlmodelservice-triton.sh.envsubst
index bd2f595..67fb566 100755
--- a/bin/viam-mlmodelservice-triton.sh.envsubst
+++ b/bin/viam-mlmodelservice-triton.sh.envsubst
@@ -6,6 +6,10 @@ VIAM_DIR=`realpath ~/.viam`
 
 # TODO: Validate presence of NVidia container runtime
 
+# Sneaky trick alert: we need to get $VIAM_MODULE_DATA propagated into the actual server. If we set
+# the environment variable using a `-e` flag on the docker command, it would be set for the command
+# run, but that command is `sudo`, which does *not* propagate environment variables to its
+# sub-command! So, set it within the `sudo` command at the end.
 exec docker run \
      --rm \
      --runtime=nvidia --gpus=all \
@@ -14,4 +18,4 @@ exec docker run \
      -v ${DOLLAR}SOCKET_DIR:${DOLLAR}SOCKET_DIR \
      -v ${DOLLAR}VIAM_DIR:${DOLLAR}VIAM_DIR \
      $TAG \
-     sudo -u \#$(id -u) LD_PRELOAD=libjemalloc.so.2 /opt/viam/bin/viam_mlmodelservice_triton "$@" 2>&1
+     sudo -u \#$(id -u) LD_PRELOAD=libjemalloc.so.2 VIAM_MODULE_DATA="$VIAM_MODULE_DATA" /opt/viam/bin/viam_mlmodelservice_triton "$@" 2>&1
diff --git a/src/viam_mlmodelservice_triton_impl.cpp b/src/viam_mlmodelservice_triton_impl.cpp
index 0f9a3c0..88ab7b6 100644
--- a/src/viam_mlmodelservice_triton_impl.cpp
+++ b/src/viam_mlmodelservice_triton_impl.cpp
@@ -20,6 +20,8 @@
 #include <cuda_runtime_api.h>
 
 #include <condition_variable>
+#include <cstdlib>
+#include <filesystem>
 #include <fstream>
 #include <future>
 #include <iostream>
@@ -338,6 +340,64 @@ class Service : public vsdk::MLModelService, public vsdk::Stoppable, public vsdk
         return state_;
     }
 
+    static void symlink_mlmodel_(const struct state_& state) {
+        const auto& attributes = state.configuration.attributes();
+
+        auto model_path = attributes->find("model_path");
+        if (model_path == attributes->end()) {
+            std::ostringstream buffer;
+            buffer << service_name
+                   << ": Required parameter `model_path` not found in configuration";
+            throw std::invalid_argument(buffer.str());
+        }
+
+        const std::string* model_path_string = model_path->second->get<std::string>();
+        if (!model_path_string || model_path_string->empty()) {
+            std::ostringstream buffer;
+            buffer << service_name
+                   << ": Required non-empty string parameter `model_path` is either not "
+                      "a string or is an empty string";
+            throw std::invalid_argument(buffer.str());
+        }
+
+        // The user doesn't have a way to set the version number: they've downloaded the only
+        // version available to them. So, set the version to 1
+        const std::string model_version = "1";
+
+        // If there exists a `saved_model.pb` file in the model path, this is a TensorFlow model.
+        // In that case, Triton uses a different directory structure compared to all other models.
+        // For details, see
+        // https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/user_guide/model_repository.html#model-files
+        const std::filesystem::path saved_model_pb_path =
+            std::filesystem::path(*model_path_string) / "saved_model.pb";
+        const bool is_tf = std::filesystem::exists(saved_model_pb_path);
+        std::filesystem::path directory_name =
+            std::filesystem::path(std::getenv("VIAM_MODULE_DATA")) / state.model_name;
+        if (is_tf) {
+            directory_name /= model_version;
+        }
+        std::filesystem::create_directories(directory_name);
+
+        if (is_tf) {
+            directory_name /= "model.savedmodel";
+        } else {
+            directory_name /= model_version;
+        }
+        const std::string triton_name = directory_name.string();
+
+        if (std::filesystem::exists(triton_name)) {
+            // TODO: make a backup copy instead of deleting
+            const bool success = std::filesystem::remove(triton_name);
+            if (!success) {
+                std::ostringstream buffer;
+                buffer << service_name
+                       << ": Unable to delete old model symlink";
+                throw std::invalid_argument(buffer.str());
+            }
+        }
+        std::filesystem::create_directory_symlink(*model_path_string, triton_name);
+    }
+
     static std::shared_ptr<struct state_> reconfigure_(vsdk::Dependencies dependencies,
                                                        vsdk::ResourceConfig configuration) {
         auto state =
@@ -364,41 +424,6 @@ class Service : public vsdk::MLModelService, public vsdk::Stoppable, public vsdk
 
         const auto& attributes = state->configuration.attributes();
 
-        // Pull the model repository path out of the configuration.
-        auto model_repo_path = attributes->find("model_repository_path");
-        if (model_repo_path == attributes->end()) {
-            std::ostringstream buffer;
-            buffer << service_name
-                   << ": Required parameter `model_repository_path` not found in configuration";
-            throw std::invalid_argument(buffer.str());
-        }
-
-        auto* const model_repo_path_string = model_repo_path->second->get<std::string>();
-        if (!model_repo_path_string || model_repo_path_string->empty()) {
-            std::ostringstream buffer;
-            buffer << service_name
-                   << ": Required non-empty string parameter `model_repository_path` is either not "
-                      "a string "
-                      "or is an empty string";
-            throw std::invalid_argument(buffer.str());
-        }
-        state->model_repo_path = std::move(*model_repo_path_string);
-
-        // Pull the backend directory out of the configuration, if provided.
-        auto backend_directory = attributes->find("backend_directory");
-        if (backend_directory != attributes->end()) {
-            auto* const backend_directory_string = backend_directory->second->get<std::string>();
-            if (!backend_directory_string || backend_directory_string->empty()) {
-                std::ostringstream buffer;
-                buffer << service_name
-                       << ": Configuration parameter `backend_directory` is either not a"
-                          "string "
-                          "or is an empty string";
-                throw std::invalid_argument(buffer.str());
-            }
-            state->backend_directory = std::move(*backend_directory_string);
-        }
-
         // Pull the model name out of the configuration.
         auto model_name = attributes->find("model_name");
         if (model_name == attributes->end()) {
@@ -419,18 +444,64 @@ class Service : public vsdk::MLModelService, public vsdk::Stoppable, public vsdk
         }
         state->model_name = std::move(*model_name_string);
 
-        auto model_version = attributes->find("model_version");
-        if (model_version != attributes->end()) {
-            auto* const model_version_value = model_version->second->get<double>();
-            if (!model_version_value || (*model_version_value < 1) ||
-                (std::nearbyint(*model_version_value) != *model_version_value)) {
+        // Pull the model repository path out of the configuration.
+        auto model_repo_path = attributes->find("model_repository_path");
+        if (model_repo_path == attributes->end()) {
+            // With no model repository path, we try to construct our own by symlinking a single
+            // model path.
+            symlink_mlmodel_(*state.get());
+            state->model_repo_path = std::move(std::getenv("VIAM_MODULE_DATA"));
+            state->model_version = 1;
+        } else {
+            // If the model_repository_path is specified, forbid specifying the model_path.
+            if (attributes->find("model_repository_path") != attributes->end()) {
+                std::ostringstream buffer;
+                buffer << service_name
+                       << ": Both the `model_repository_path` and `model_path` are set, "
+                          "but we expect only one or the other.";
+                throw std::invalid_argument(buffer.str());
+            }
+
+            auto* const model_repo_path_string = model_repo_path->second->get<std::string>();
+            if (!model_repo_path_string || model_repo_path_string->empty()) {
                 std::ostringstream buffer;
                 buffer << service_name
-                       << ": Optional parameter `model_version` was provided, but is not a natural "
-                          "number";
+                       << ": Non-empty string parameter `model_repository_path` is either not "
+                          "a string or is an empty string";
                 throw std::invalid_argument(buffer.str());
             }
-            state->model_version = static_cast<std::int64_t>(*model_version_value);
+            state->model_repo_path = std::move(*model_repo_path_string);
+
+            // If you specify your own model repo path, you may specify your own model version
+            // number, too.
+            auto model_version = attributes->find("model_version");
+            if (model_version != attributes->end()) {
+                auto* const model_version_value = model_version->second->get<double>();
+                if (!model_version_value || (*model_version_value < 1) ||
+                    (std::nearbyint(*model_version_value) != *model_version_value)) {
+                    std::ostringstream buffer;
+                    buffer << service_name
+                           << ": Optional parameter `model_version` was provided, but is not a "
+                              "natural number";
+                    throw std::invalid_argument(buffer.str());
+                }
+                state->model_version = static_cast<std::int64_t>(*model_version_value);
+            }
+        }
+
+        // Pull the backend directory out of the configuration, if provided.
+        auto backend_directory = attributes->find("backend_directory");
+        if (backend_directory != attributes->end()) {
+            auto* const backend_directory_string = backend_directory->second->get<std::string>();
+            if (!backend_directory_string || backend_directory_string->empty()) {
+                std::ostringstream buffer;
+                buffer << service_name
+                       << ": Configuration parameter `backend_directory` is either not a"
+                          "string "
+                          "or is an empty string";
+                throw std::invalid_argument(buffer.str());
+            }
+            state->backend_directory = std::move(*backend_directory_string);
         }
 
         auto preferred_input_memory_type = attributes->find("preferred_input_memory_type");