From 9e756c8b49ad9b70848dd0a86d42fae491243dec Mon Sep 17 00:00:00 2001
From: Suryaprakash Shanmugam <suryaprakash.shanmugam@intel.com>
Date: Sat, 29 Apr 2023 10:21:35 +0530
Subject: [PATCH 1/5] Patch for whisper on CPU

---
 .../cpu/transformers/beam_search_impl_t5.h    |  14 +--
 .../contrib_ops/cpu/utils/console_dumper.h    |   2 +-
 .../openvino/backends/basic_backend.cc        | 112 +++++++++---------
 .../openvino/ov_versions/capability.cc        |   6 +-
 .../openvino/ov_versions/data_ops.cc          |   2 +
 onnxruntime/core/session/inference_session.cc |  37 ++++++
 6 files changed, 103 insertions(+), 70 deletions(-)
diff --git a/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_t5.h b/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_t5.h
index 51e8ae7b13765..39087999ba957 100644
--- a/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_t5.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_t5.h
@@ -302,13 +302,13 @@ Status BeamSearchT5<T>::Execute(const FeedsFetchesManager& encoder_feeds_fetches
       dumper->Print("decoder_feeds", i, true);
       dumper->Print("", decoder_feeds[i]);
     }
-    auto offset = decoder_subgraph_.GetFirstPastInputIndex() + 4 * decoder_subgraph_.num_layers;
-    dumper->Print("past_sequence_length", offset, true);
-    dumper->Print("", decoder_feeds[offset]);
-    dumper->Print("beam_width", offset + 1, true);
-    dumper->Print("", decoder_feeds[offset + 1]);
-    dumper->Print("cache_redir", offset + 2, true);
-    dumper->Print("", decoder_feeds[offset + 2]);
+    // auto offset = decoder_subgraph_.GetFirstPastInputIndex() + 4 * decoder_subgraph_.num_layers;
+    // dumper->Print("past_sequence_length", offset, true);
+    // dumper->Print("", decoder_feeds[offset]);
+    // dumper->Print("beam_width", offset + 1, true);
+    // dumper->Print("", decoder_feeds[offset + 1]);
+    // dumper->Print("past_sequence_length", offset + 2, true);
+    // dumper->Print("", decoder_feeds[offset + 2]);
 #endif
 
 #ifdef DEBUG_NODE_INPUTS_OUTPUTS
diff --git a/onnxruntime/contrib_ops/cpu/utils/console_dumper.h b/onnxruntime/contrib_ops/cpu/utils/console_dumper.h
index 97e0a6597db5d..0062d757f39ec 100644
--- a/onnxruntime/contrib_ops/cpu/utils/console_dumper.h
+++ b/onnxruntime/contrib_ops/cpu/utils/console_dumper.h
@@ -9,7 +9,7 @@ namespace onnxruntime {
 namespace contrib {
 namespace transformers {
 
-// #define DEBUG_GENERATION 1  // uncomment it for debugging generation (like beam search etc)
+#define DEBUG_GENERATION 1  // uncomment it for debugging generation (like beam search etc)
 #ifdef DEBUG_GENERATION
 #define DUMP_TENSOR_LEVEL 2
 #else
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
index 9bdbdbad592ec..ca00f60f97547 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -157,75 +157,69 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
     }
   }
 
-  void BasicBackend::EnableGPUThrottling(ov::AnyMap & device_config) {
-    if (global_context_.enable_opencl_throttling == true && global_context_.device_type.find("GPU") != std::string::npos) {
-      LOGS_DEFAULT(INFO) << log_tag << "Enabled OpenCL queue throttling for GPU device";
-      device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
-    }
+void BasicBackend::EnableGPUThrottling(ov::AnyMap& device_config) {
+  if (global_context_.enable_opencl_throttling == true && global_context_.device_type.find("GPU") != std::string::npos) {
+    LOGS_DEFAULT(INFO) << log_tag << "Enabled OpenCL queue throttling for GPU device";
+    device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
   }
+}
 
-  // Starts an asynchronous inference request for data in slice indexed by batch_slice_idx on
-  // an Infer Request indexed by infer_req_idx
-  void BasicBackend::StartAsyncInference(Ort::KernelContext & context, OVInferRequestPtr infer_request) {
-    try {
-      auto graph_input_info = exe_network_.Get().inputs();
-      int input_idx = 0;
-      for (auto input_info_iter = graph_input_info.begin();
-           input_info_iter != graph_input_info.end(); ++input_info_iter) {
-        auto input_names = input_info_iter->get_names();
-        std::string onnx_input_name;
-        std::string input_name;
-        // use names retrieved from original ONNX model to assign the right onnx input name for the graph
-        for (auto it = subgraph_context_.input_names.begin(); it != subgraph_context_.input_names.end(); ++it) {
-          if (it->second == input_idx) {
-            onnx_input_name = it->first;
-            break;
-          }
-        }
+// Starts an asynchronous inference request for data in slice indexed by batch_slice_idx on
+// an Infer Request indexed by infer_req_idx
+void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferRequestPtr infer_request) {
+  try {
+    auto graph_input_info = exe_network_.Get().inputs();
+    int input_idx = 0;
+    for (auto input_info_iter = graph_input_info.begin();
+         input_info_iter != graph_input_info.end(); ++input_info_iter) {
+      auto input_names = input_info_iter->get_names();
+      std::string onnx_input_name;
+      std::string input_name;
+      // use names retrieved from original ONNX model to assign the right onnx input name for the graph
+      for (auto it = subgraph_context_.input_names.begin(); it != subgraph_context_.input_names.end(); ++it) {
+        onnx_input_name = it->first;
         // using the input name retrieved from ONNX original to match with the input names returned by OV tensors
         if (input_names.find(onnx_input_name) != input_names.end()) {
           input_name = onnx_input_name;
+          break;
         } else {
+          continue;
           throw(log_tag + "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name + " doesn't exist in the list of OpenVINO input tensor names");
         }
-        size_t batch_slice_idx = 0;
-        if (subgraph_context_.has_dynamic_input_shape &&
-            global_context_.enable_dynamic_shapes == true &&
-            (global_context_.device_type.find("CPU") != std::string::npos ||
-             global_context_.device_type.find("GPU") != std::string::npos)) {
-          auto tensor = context.GetInput(subgraph_context_.input_names.at(input_name));
-          auto tensor_info = tensor.GetTensorTypeAndShapeInfo();
-          auto tensor_shape = tensor_info.GetShape();
-          auto tensor_size = tensor_shape.size();
-          auto tensor_iter = 0;
-          ov::Shape input_tensor_shape = ov::Shape(tensor_size, 0);
-          for (auto i = tensor_shape.begin(); i != tensor_shape.end(); ++i) {
-            input_tensor_shape[tensor_iter] = *i;
-            tensor_iter += 1;
-          }
-          auto input = ie_cnn_network_->get_parameters().at(input_idx);
-          OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape);
-          FillInputBlob(tensor_ptr, batch_slice_idx, input_name, context, subgraph_context_);
-          try {
-            infer_request->SetTensor(input_name, tensor_ptr);
-          } catch (const char* msg) {
-            throw(msg);
-          }
-        } else {
-          OVTensorPtr graph_input_blob;
-          try {
-            graph_input_blob = infer_request->GetTensor(input_name);
-          } catch (const char* msg) {
-            throw(msg);
-          }
-          FillInputBlob(graph_input_blob, batch_slice_idx, input_name, context, subgraph_context_);
+      }
+      size_t batch_slice_idx = 0;
+      if (subgraph_context_.has_dynamic_input_shape &&
+          global_context_.enable_dynamic_shapes == true &&
+          (global_context_.device_type.find("CPU") != std::string::npos ||
+           global_context_.device_type.find("GPU") != std::string::npos)) {
+        auto tensor = context.GetInput(subgraph_context_.input_names.at(input_name));
+        auto tensor_info = tensor.GetTensorTypeAndShapeInfo();
+        auto tensor_shape = tensor_info.GetShape();
+        auto tensor_size = tensor_shape.size();
+        auto tensor_iter = 0;
+        ov::Shape input_tensor_shape = ov::Shape(tensor_size, 0);
+        for (auto i = tensor_shape.begin(); i != tensor_shape.end(); ++i) {
+          input_tensor_shape[tensor_iter] = *i;
+          tensor_iter += 1;
         }
-        input_idx++;
+        auto input = ie_cnn_network_->get_parameters().at(input_idx);
+        OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape);
+        FillInputBlob(tensor_ptr, batch_slice_idx, input_name, context, subgraph_context_);
+        try {
+          infer_request->SetTensor(input_name, tensor_ptr);
+        } catch (const char* msg) {
+          throw(msg);
+        }
+      } else {
+        OVTensorPtr graph_input_blob;
+        try {
+          graph_input_blob = infer_request->GetTensor(input_name);
+        } catch (const char* msg) {
+          throw(msg);
+        }
+        FillInputBlob(graph_input_blob, batch_slice_idx, input_name, context, subgraph_context_);
       }
-      // Start Async inference
-      infer_request->StartAsync();
-    } catch (const char* msg) {
-      throw(msg);
+      input_idx++;
     }
   }
 
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
index 9ff0ea0bb1bf4..238524031cd34 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
@@ -42,9 +42,9 @@ std::vector<std::unique_ptr<ComputeCapability>> GetCapability::Execute() {
   std::vector<std::unique_ptr<ComputeCapability>> result;
 
   // Check if it is a subgraph
-  if (graph_viewer_.IsSubgraph()) {
-    return result;
-  }
+  // if (graph_viewer_.IsSubgraph()) {
+  //   return result;
+  // }
 
   // This is a list of initializers that nGraph considers as constants. Example weights, reshape shape etc.
   std::unordered_set<std::string> ng_required_initializers;
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
index 88db95c1fbe92..352099a608872 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -63,6 +63,7 @@ std::set<std::string> ops_supported_only_in_model = {
 std::set<std::string> ops_supported_as_function = {
     "LessOrEqual",
     "GreaterOrEqual",
+    "LayerNormalization",
 };
 
 std::vector<SupportedOp> supported_op_mode = {
@@ -160,6 +161,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"InstanceNormalization", V_2023_0, {"VPUX"}},
     {"HardSigmoid", V_2020_4, {"CPU", "GPU"}},
     {"HardMax", V_2022_1, {"CPU", "GPU"}},
+    {"LayerNormalization", V_2023_0, {"CPU", "GPU"}},
     {"LeakyRelu", V_2020_4, {"CPU", "GPU"}},
     {"LeakyRelu", V_2023_0, {"VPUX"}},
     {"Less", V_2020_4, {"CPU", "GPU"}},
diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
index 6d87d2bbb40fe..d1ce84074b8ab 100644
--- a/onnxruntime/core/session/inference_session.cc
+++ b/onnxruntime/core/session/inference_session.cc
@@ -1353,6 +1353,43 @@ common::Status InferenceSession::Initialize() {
 
     // Verify that there are no external initializers in the graph if external data is disabled.
     onnxruntime::Graph& graph = model_->MainGraph();
+   
+    ///////////////////////////////////
+    /// Move initializers from the MainGraph to subgraphs
+    const std::unordered_set<std::string> initializer_names_to_preserve = {
+      "s_d_onnx::MatMul_1152",
+      "s_d_onnx::MatMul_1151",
+      "s_d_onnx::MatMul_1116",
+      "s_d_onnx::MatMul_1079",
+      "s_d_decoder.model.decoder.layers.0.fc1.bias",
+      "s_d_decoder.model.decoder.layers.1.fc1.bias",
+      "s_d_onnx::MatMul_1044",
+      "s_d_decoder.model.decoder.layers.2.fc1.bias",
+      "s_d_decoder.model.decoder.layers.3.fc1.bias",
+      "s_d_onnx::MatMul_1153",
+      "s_d_onnx::MatMul_1115",
+      "s_d_onnx::MatMul_1080",
+      "s_d_onnx::MatMul_1043",
+      "s_d_decoder.model.decoder.embed_tokens.weight"
+    };
+
+    for (auto& node: graph.Nodes()) {
+      for(auto& entry: node.GetAttributeNameToMutableSubgraphMap()) {
+        Graph *subgraph = entry.second;
+        
+        for(const auto& parent_graph_initializer: graph.GetAllInitializedTensors()) {
+          if (initializer_names_to_preserve.find(parent_graph_initializer.first) != initializer_names_to_preserve.cend()) {
+            std::cout << "Adding initializer " << parent_graph_initializer.first << " to subgraph " << entry.first << std::endl;
+            subgraph->AddInitializedTensor(*parent_graph_initializer.second);
+        }
+      }
+    }
+  }
+
+  for(auto& name: initializer_names_to_preserve)
+    graph.RemoveInitializedTensor(name);
+  ///////////////////////////////////////////
+
 #ifdef DISABLE_EXTERNAL_INITIALIZERS
     const InitializedTensorSet& initializers = graph.GetAllInitializedTensors();
     for (const auto& it : initializers) {

From a394b08bdea5c4e8a725e96a43acad69e2fddc19 Mon Sep 17 00:00:00 2001
From: Suryaprakash Shanmugam <suryaprakash.shanmugam@intel.com>
Date: Tue, 2 May 2023 18:40:38 -1200
Subject: [PATCH 2/5] Patch for Whisper VPUX

---
 onnxruntime/contrib_ops/cpu/utils/console_dumper.h          | 2 +-
 .../core/providers/openvino/backends/basic_backend.cc       | 2 +-
 .../core/providers/openvino/ov_versions/capability.cc       | 2 ++
 onnxruntime/core/providers/openvino/ov_versions/data_ops.cc | 2 +-
 onnxruntime/core/session/inference_session.cc               | 6 +++---
 5 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/onnxruntime/contrib_ops/cpu/utils/console_dumper.h b/onnxruntime/contrib_ops/cpu/utils/console_dumper.h
index 0062d757f39ec..97e0a6597db5d 100644
--- a/onnxruntime/contrib_ops/cpu/utils/console_dumper.h
+++ b/onnxruntime/contrib_ops/cpu/utils/console_dumper.h
@@ -9,7 +9,7 @@ namespace onnxruntime {
 namespace contrib {
 namespace transformers {
 
-#define DEBUG_GENERATION 1  // uncomment it for debugging generation (like beam search etc)
+// #define DEBUG_GENERATION 1  // uncomment it for debugging generation (like beam search etc)
 #ifdef DEBUG_GENERATION
 #define DUMP_TENSOR_LEVEL 2
 #else
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
index ca00f60f97547..cd8f2ae4ba9c3 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -184,7 +184,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
           break;
         } else {
           continue;
-          throw(log_tag + "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name + " doesn't exist in the list of OpenVINO input tensor names");
+          // throw(log_tag + "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name + " doesn't exist in the list of OpenVINO input tensor names");
         }
       }
       size_t batch_slice_idx = 0;
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
index 238524031cd34..aa81111facfa2 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
@@ -45,6 +45,8 @@ std::vector<std::unique_ptr<ComputeCapability>> GetCapability::Execute() {
   // if (graph_viewer_.IsSubgraph()) {
   //   return result;
   // }
+  if (graph_viewer_.Name() == "decoder subgraph" || graph_viewer_.Name() == "beam-search-test")
+    return result;
 
   // This is a list of initializers that nGraph considers as constants. Example weights, reshape shape etc.
   std::unordered_set<std::string> ng_required_initializers;
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
index 352099a608872..9511226918761 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -161,7 +161,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"InstanceNormalization", V_2023_0, {"VPUX"}},
     {"HardSigmoid", V_2020_4, {"CPU", "GPU"}},
     {"HardMax", V_2022_1, {"CPU", "GPU"}},
-    {"LayerNormalization", V_2023_0, {"CPU", "GPU"}},
+    {"LayerNormalization", V_2023_0, {"CPU", "GPU", "VPUX"}},
     {"LeakyRelu", V_2020_4, {"CPU", "GPU"}},
     {"LeakyRelu", V_2023_0, {"VPUX"}},
     {"Less", V_2020_4, {"CPU", "GPU"}},
diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
index d1ce84074b8ab..defb2891bdb5e 100644
--- a/onnxruntime/core/session/inference_session.cc
+++ b/onnxruntime/core/session/inference_session.cc
@@ -1353,7 +1353,7 @@ common::Status InferenceSession::Initialize() {
 
     // Verify that there are no external initializers in the graph if external data is disabled.
     onnxruntime::Graph& graph = model_->MainGraph();
-   
+
     ///////////////////////////////////
     /// Move initializers from the MainGraph to subgraphs
     const std::unordered_set<std::string> initializer_names_to_preserve = {
@@ -1376,10 +1376,10 @@ common::Status InferenceSession::Initialize() {
     for (auto& node: graph.Nodes()) {
       for(auto& entry: node.GetAttributeNameToMutableSubgraphMap()) {
         Graph *subgraph = entry.second;
-        
+
         for(const auto& parent_graph_initializer: graph.GetAllInitializedTensors()) {
           if (initializer_names_to_preserve.find(parent_graph_initializer.first) != initializer_names_to_preserve.cend()) {
-            std::cout << "Adding initializer " << parent_graph_initializer.first << " to subgraph " << entry.first << std::endl;
+            // std::cout << "Adding initializer " << parent_graph_initializer.first << " to subgraph " << entry.first << std::endl;
             subgraph->AddInitializedTensor(*parent_graph_initializer.second);
         }
       }

From d680468f2c2a54ef2facad9aff5e4f62e5f8ceed Mon Sep 17 00:00:00 2001
From: Suryaprakash Shanmugam <suryaprakash.shanmugam@intel.com>
Date: Wed, 3 May 2023 18:40:58 -1200
Subject: [PATCH 3/5] Fix rebasing

---
 .../core/providers/openvino/backends/basic_backend.cc        | 5 +++++
 onnxruntime/core/session/inference_session.cc                | 1 -
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
index cd8f2ae4ba9c3..4eb9c7473c922 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -221,7 +221,12 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
       }
       input_idx++;
     }
+    // Start Async inference
+    infer_request->StartAsync();
+  } catch (const char* msg) {
+    throw(msg);
   }
+}
 
 #ifdef IO_BUFFER_ENABLED
   // Wait for Remote Aynchronous inference completion
diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
index defb2891bdb5e..2ca698e0cb5e5 100644
--- a/onnxruntime/core/session/inference_session.cc
+++ b/onnxruntime/core/session/inference_session.cc
@@ -1379,7 +1379,6 @@ common::Status InferenceSession::Initialize() {
 
         for(const auto& parent_graph_initializer: graph.GetAllInitializedTensors()) {
           if (initializer_names_to_preserve.find(parent_graph_initializer.first) != initializer_names_to_preserve.cend()) {
-            // std::cout << "Adding initializer " << parent_graph_initializer.first << " to subgraph " << entry.first << std::endl;
             subgraph->AddInitializedTensor(*parent_graph_initializer.second);
         }
       }

From 534679a81f21cfe7e1a122bdcac09f2e1f6654f9 Mon Sep 17 00:00:00 2001
From: Suryaprakash Shanmugam <suryaprakash.shanmugam@intel.com>
Date: Wed, 3 May 2023 22:11:36 -1200
Subject: [PATCH 4/5] Handle initializer movement through ImplicitInputDef
 names

---
 .../openvino/ov_versions/capability.cc        |  4 ---
 onnxruntime/core/session/inference_session.cc | 31 ++++++-------------
 2 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
index aa81111facfa2..6d2926af30604 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
@@ -41,10 +41,6 @@ GetCapability::GetCapability(const GraphViewer& graph_viewer_param, std::string
 std::vector<std::unique_ptr<ComputeCapability>> GetCapability::Execute() {
   std::vector<std::unique_ptr<ComputeCapability>> result;
 
-  // Check if it is a subgraph
-  // if (graph_viewer_.IsSubgraph()) {
-  //   return result;
-  // }
   if (graph_viewer_.Name() == "decoder subgraph" || graph_viewer_.Name() == "beam-search-test")
     return result;
 
diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
index 2ca698e0cb5e5..7cd957276c13d 100644
--- a/onnxruntime/core/session/inference_session.cc
+++ b/onnxruntime/core/session/inference_session.cc
@@ -1354,40 +1354,29 @@ common::Status InferenceSession::Initialize() {
     // Verify that there are no external initializers in the graph if external data is disabled.
     onnxruntime::Graph& graph = model_->MainGraph();
 
-    ///////////////////////////////////
+#ifdef USE_OPENVINO
     /// Move initializers from the MainGraph to subgraphs
-    const std::unordered_set<std::string> initializer_names_to_preserve = {
-      "s_d_onnx::MatMul_1152",
-      "s_d_onnx::MatMul_1151",
-      "s_d_onnx::MatMul_1116",
-      "s_d_onnx::MatMul_1079",
-      "s_d_decoder.model.decoder.layers.0.fc1.bias",
-      "s_d_decoder.model.decoder.layers.1.fc1.bias",
-      "s_d_onnx::MatMul_1044",
-      "s_d_decoder.model.decoder.layers.2.fc1.bias",
-      "s_d_decoder.model.decoder.layers.3.fc1.bias",
-      "s_d_onnx::MatMul_1153",
-      "s_d_onnx::MatMul_1115",
-      "s_d_onnx::MatMul_1080",
-      "s_d_onnx::MatMul_1043",
-      "s_d_decoder.model.decoder.embed_tokens.weight"
-    };
-
+    std::unordered_set<std::string> initializer_names_to_preserve;
     for (auto& node: graph.Nodes()) {
+
+      // Preserve implicitInputDefs in the subgraphs
+      for (auto& def : node.ImplicitInputDefs())
+        initializer_names_to_preserve.insert(def->Name());
+
       for(auto& entry: node.GetAttributeNameToMutableSubgraphMap()) {
         Graph *subgraph = entry.second;
 
         for(const auto& parent_graph_initializer: graph.GetAllInitializedTensors()) {
-          if (initializer_names_to_preserve.find(parent_graph_initializer.first) != initializer_names_to_preserve.cend()) {
+          if (initializer_names_to_preserve.find(parent_graph_initializer.first) != initializer_names_to_preserve.cend())
             subgraph->AddInitializedTensor(*parent_graph_initializer.second);
-        }
       }
     }
   }
 
+  // Now remove those initializers from the MainGraph
   for(auto& name: initializer_names_to_preserve)
     graph.RemoveInitializedTensor(name);
-  ///////////////////////////////////////////
+#endif
 
 #ifdef DISABLE_EXTERNAL_INITIALIZERS
     const InitializedTensorSet& initializers = graph.GetAllInitializedTensors();

From 6e9a6b7320000e1cd838126c0d7ef65d0ee973e8 Mon Sep 17 00:00:00 2001
From: Suryaprakash Shanmugam <suryaprakash.shanmugam@intel.com>
Date: Tue, 27 Jun 2023 21:01:25 -0700
Subject: [PATCH 5/5] Fix typo

---
 onnxruntime/core/providers/openvino/ov_versions/capability.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
index e5a238dd1768f..bd334753e3517 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
@@ -44,7 +44,7 @@ std::vector<std::unique_ptr<ComputeCapability>> GetCapability::Execute() {
   if (graph_viewer_.Name() == "decoder subgraph" || graph_viewer_.Name() == "beam-search-test")
     return result;
   // Check if it is a subgraph
-  if (graph_viewer_.IsSubgraph() && graph_viewer_.Name() == "tf2onnx") {
+  if (graph_viewer_.IsSubgraph() && graph_viewer_.Name() == "tf2onnx")
     return result;
 
   // This is a list of initializers that nGraph considers as constants. Example weights, reshape shape etc.