Skip to content

Commit b217424

Browse files
committed
Optimize CPU time spent in inference path
Move input/output name to ort/ov input output bindings to compilation. Reduce tensor lookups by name in favor of index look ups.
1 parent a077c79 commit b217424

File tree

3 files changed

+78
-101
lines changed

3 files changed

+78
-101
lines changed

onnxruntime/core/providers/openvino/backend_utils.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,15 +121,15 @@ std::istream& operator>>(std::istream& stream, SharedContext::SharedWeights::Met
121121
namespace backend_utils {
122122

123123
bool IsDebugEnabled() {
124-
const std::string env_name = onnxruntime::GetEnvironmentVar("ORT_OPENVINO_ENABLE_DEBUG");
124+
static std::string env_name = onnxruntime::GetEnvironmentVar("ORT_OPENVINO_ENABLE_DEBUG");
125125
if (!env_name.empty()) {
126126
return true;
127127
}
128128
return false;
129129
}
130130

131131
bool IsCILogEnabled() {
132-
const std::string env_name = onnxruntime::GetEnvironmentVar("ORT_OPENVINO_ENABLE_CI_LOG");
132+
static std::string env_name = onnxruntime::GetEnvironmentVar("ORT_OPENVINO_ENABLE_CI_LOG");
133133
if (!env_name.empty()) {
134134
return true;
135135
}

onnxruntime/core/providers/openvino/backends/basic_backend.cc

Lines changed: 38 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
140140
};
141141
}
142142
inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(exe_network_, num_infer_req, std::move(initializer)));
143+
bindings_ = std::make_unique<OnnxToOvNetworkBindings>(exe_network_, subgraph_context_);
143144
}
144145

145146
bool BasicBackend::ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) {
@@ -362,29 +363,16 @@ void BasicBackend::SetNumThreads(ov::AnyMap& device_config) {
362363
// an Infer Request indexed by infer_req_idx
363364
void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferRequestPtr infer_request) {
364365
try {
365-
auto ov_input_info = exe_network_.Get().inputs();
366-
367-
// Loop over subgraph original input names to find the correspondent OV input name
368-
for (const auto& [onnx_input_name, onnx_input_index] : subgraph_context_.input_names) {
369-
std::string input_name{};
370-
uint32_t input_idx = 0;
371-
for (uint32_t index = 0; const auto& ov_input : ov_input_info) {
372-
if (ov_input.get_names().contains(onnx_input_name)) {
373-
input_name = onnx_input_name;
374-
input_idx = index;
375-
break;
376-
}
377-
index++;
378-
}
379-
ORT_ENFORCE(!input_name.empty(), log_tag,
380-
"Input names mismatch between OpenVINO and ONNX. ", onnx_input_name,
381-
" doesn't exist in the list of OpenVINO input tensor names");
366+
bool cpu_or_gpu = (session_context_.device_type.find("CPU") != std::string::npos ||
367+
session_context_.device_type.find("GPU") != std::string::npos);
368+
bool npu = (session_context_.device_type.find("NPU") != std::string::npos);
369+
370+
for (const auto& input_info : bindings_->network_inputs_) {
382371
size_t batch_slice_idx = 0;
383372
if (subgraph_context_.has_dynamic_input_shape &&
384373
!session_context_.disable_dynamic_shapes &&
385-
(session_context_.device_type.find("CPU") != std::string::npos ||
386-
session_context_.device_type.find("GPU") != std::string::npos)) {
387-
auto tensor = context.GetInput(subgraph_context_.input_names.at(input_name));
374+
cpu_or_gpu) {
375+
auto tensor = context.GetInput(input_info.onnx_index);
388376
auto tensor_info = tensor.GetTensorTypeAndShapeInfo();
389377
auto tensor_shape = tensor_info.GetShape();
390378
auto tensor_size = tensor_shape.size();
@@ -395,98 +383,72 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
395383
input_tensor_shape[tensor_iter] = *i;
396384
tensor_iter += 1;
397385
}
398-
const auto& input = ov_input_info.at(input_idx);
399386
OVTensorPtr tensor_ptr;
400387
// avoid input copies on the CPU device
401388
if (session_context_.device_type.find("CPU") != std::string::npos) {
402-
tensor_ptr = std::make_shared<ov::Tensor>(input.get_element_type(), input_tensor_shape,
389+
tensor_ptr = std::make_shared<ov::Tensor>(input_info.type, input_tensor_shape,
403390
(void*)tensor_data);
404391
} else {
405-
tensor_ptr = std::make_shared<ov::Tensor>(input.get_element_type(), input_tensor_shape);
406-
FillInputBlob(tensor_ptr, batch_slice_idx, input_name, context, subgraph_context_);
392+
tensor_ptr = std::make_shared<ov::Tensor>(input_info.type, input_tensor_shape);
393+
FillInputBlob(tensor_ptr, batch_slice_idx, input_info.name, context, subgraph_context_);
407394
}
408395

409396
try {
410-
infer_request->SetTensor(std::move(input_name), tensor_ptr);
397+
infer_request->SetTensor(input_info.name, tensor_ptr);
411398
} catch (const char* msg) {
412399
ORT_THROW(msg);
413400
}
414401
} else {
415-
if ((session_context_.device_type.find("CPU") != std::string::npos ||
416-
session_context_.device_type.find("GPU") != std::string::npos)) {
402+
if (cpu_or_gpu) {
417403
OVTensorPtr graph_input_blob;
418404
try {
419-
graph_input_blob = infer_request->GetTensor(input_name);
405+
graph_input_blob = infer_request->GetTensor(input_info.name);
420406
} catch (const char* msg) {
421407
ORT_THROW(msg);
422408
}
423-
FillInputBlob(std::move(graph_input_blob), batch_slice_idx, std::move(input_name), context, subgraph_context_);
409+
FillInputBlob(std::move(graph_input_blob), batch_slice_idx, input_info.name, context, subgraph_context_);
424410
} else {
425-
auto tensor = context.GetInput(subgraph_context_.input_names.at(input_name));
426-
ort_tensor_key_t ort_tensor_key{input_name};
411+
auto tensor = context.GetInput(input_info.onnx_index);
412+
ort_tensor_key_t ort_tensor_key{input_info.name};
427413
auto it = ort_ov_tensor_map.find(ort_tensor_key);
428-
if ((it == ort_ov_tensor_map.end()) ||
429-
(it != ort_ov_tensor_map.end() && (it->second.ort_ptr != tensor.GetTensorRawData()))) {
414+
if ((it == ort_ov_tensor_map.end()) || it->second.ort_ptr != tensor.GetTensorRawData()) {
430415
ov_tensor_data_t ov_tensor_data;
431-
const auto& input = ov_input_info.at(input_idx);
432-
ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input.get_element_type(), input.get_shape(),
416+
ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input_info.type, input_info.ov_shape,
433417
const_cast<void*>(tensor.GetTensorRawData()));
434418

435419
ov_tensor_data.ort_ptr = tensor.GetTensorRawData();
436420
ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;
437421

438422
try {
439-
infer_request->SetTensor(std::move(input_name), ov_tensor_data.tensor_ptr);
423+
infer_request->SetTensor(input_info.name, ov_tensor_data.tensor_ptr);
440424
} catch (const char* msg) {
441425
ORT_THROW(msg);
442426
}
443427
}
444428
}
445429
}
446-
} // Loop subgraph original input names
430+
} // Loop subgraph original input
447431

448-
if (session_context_.device_type.find("NPU") != std::string::npos) {
432+
if (npu) {
449433
// Set the output blob as remote blob
450-
auto graph_output_info = exe_network_.Get().outputs();
451-
auto output_idx = 0;
452-
for (auto output_info_iter = graph_output_info.begin();
453-
output_info_iter != graph_output_info.end(); ++output_info_iter) {
454-
auto output_names = output_info_iter->get_names();
455-
std::string onnx_output_name;
456-
std::string output_name;
457-
// using the output name retrieved from ONNX original to match with the output names returned by OV tensors
458-
for (auto it = subgraph_context_.output_names.begin(); it != subgraph_context_.output_names.end(); ++it) {
459-
onnx_output_name = it->first;
460-
if (output_names.find(onnx_output_name) != output_names.end()) {
461-
// Assigning the output_name
462-
output_name = it->first;
463-
break;
464-
}
465-
}
466-
size_t batch_size = 1;
467-
Ort::UnownedValue tensor = GetOutputTensor(context,
468-
batch_size,
469-
infer_request,
470-
output_name,
471-
subgraph_context_.output_names);
472-
ort_tensor_key_t ort_tensor_key{output_name};
434+
for (const auto& output_info : bindings_->network_outputs_) {
435+
Ort::UnownedValue tensor = context.GetOutput(output_info.onnx_index, output_info.onnx_shape);
436+
437+
ort_tensor_key_t ort_tensor_key{output_info.name};
473438
const auto& it = ort_ov_tensor_map.find(ort_tensor_key);
474-
if ((it == ort_ov_tensor_map.end()) ||
475-
(it != ort_ov_tensor_map.end() && (it->second.ort_ptr != tensor.GetTensorRawData()))) {
439+
if ((it == ort_ov_tensor_map.end()) || (it->second.ort_ptr != tensor.GetTensorRawData())) {
476440
ov_tensor_data_t ov_tensor_data;
477-
const auto& output = graph_output_info.at(output_idx);
478441
ov_tensor_data.ort_ptr = tensor.GetTensorRawData();
479-
ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(output.get_element_type(), output.get_shape(),
442+
ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(output_info.type, output_info.ov_shape,
480443
const_cast<void*>(tensor.GetTensorRawData()));
481444
ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;
482445

483446
try {
484-
infer_request->SetTensor(std::move(output_name), ov_tensor_data.tensor_ptr);
447+
infer_request->SetTensor(output_info.name, ov_tensor_data.tensor_ptr);
485448
} catch (const char* msg) {
486449
ORT_THROW(msg);
487450
}
488451
}
489-
output_idx++;
490452
}
491453
}
492454

@@ -611,44 +573,22 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
611573
void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRequestPtr infer_request) {
612574
// Wait for Async inference completion
613575
try {
576+
bool cpu_or_gpu = session_context_.device_type.find("CPU") != std::string::npos ||
577+
session_context_.device_type.find("GPU") != std::string::npos;
578+
614579
infer_request->WaitRequest();
615-
auto graph_output_info = exe_network_.Get().outputs();
616-
for (auto output_info_iter = graph_output_info.begin();
617-
output_info_iter != graph_output_info.end(); ++output_info_iter) {
618-
OVTensorPtr graph_output_blob;
619-
auto output_names = output_info_iter->get_names();
620-
std::string onnx_output_name;
621-
std::string output_name;
622-
bool output_name_found = false;
623-
// using the output name retrieved from ONNX original to match with the output names returned by OV tensors
624-
for (auto it = subgraph_context_.output_names.begin(); it != subgraph_context_.output_names.end(); ++it) {
625-
onnx_output_name = it->first;
626-
if (output_names.find(onnx_output_name) != output_names.end()) {
627-
// Assigning the output_name
628-
output_name = it->first;
629-
output_name_found = true;
630-
break;
631-
}
632-
}
633-
if (!output_name_found) {
634-
ORT_THROW(
635-
log_tag +
636-
"Output names mismatch between OpenVINO and ONNX. "
637-
"[ONNX Output: ] " +
638-
onnx_output_name +
639-
" doesn't exist in the "
640-
"list of OpenVINO output tensor names");
641-
}
642-
if ((session_context_.device_type.find("CPU") != std::string::npos ||
643-
session_context_.device_type.find("GPU") != std::string::npos)) {
580+
581+
if (cpu_or_gpu) {
582+
for (const auto& output_info : bindings_->network_outputs_) {
583+
OVTensorPtr graph_output_blob;
644584
try {
645-
graph_output_blob = infer_request->GetTensor(output_name);
585+
graph_output_blob = infer_request->GetTensor(output_info.name);
646586
} catch (const char* msg) {
647587
ORT_THROW(msg);
648588
}
649589
size_t batch_size = 1;
650590
Ort::UnownedValue output_tensor =
651-
GetOutputTensor(context, batch_size, infer_request, std::move(output_name), subgraph_context_.output_names);
591+
GetOutputTensor(context, batch_size, infer_request, output_info.name, subgraph_context_.output_names);
652592
auto mem_info = output_tensor.GetTensorMemoryInfo();
653593
if (mem_info.GetAllocatorName() == OpenVINO_GPU) {
654594
return;

onnxruntime/core/providers/openvino/backends/basic_backend.h

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "core/providers/openvino/contexts.h"
1919
#include "core/providers/openvino/ibackend.h"
2020
#include "core/providers/openvino/ov_interface.h"
21+
#include "core/providers/openvino/backend_utils.h"
2122

2223
namespace onnxruntime {
2324
namespace openvino_ep {
@@ -27,6 +28,42 @@ struct ov_tensor_data_t {
2728
const void* ort_ptr;
2829
};
2930

31+
struct OnnxToOvNetworkBindings {
32+
struct ParameterInfo {
33+
std::string name;
34+
uint32_t ov_index;
35+
uint32_t onnx_index;
36+
ov::element::Type type;
37+
ov::Shape ov_shape;
38+
std::vector<int64_t> onnx_shape;
39+
};
40+
std::vector<ParameterInfo> network_outputs_;
41+
std::vector<ParameterInfo> network_inputs_;
42+
43+
OnnxToOvNetworkBindings(OVExeNetwork& exec_network, SubGraphContext& subgraph_context) {
44+
auto populate = [&](auto& input_output_map, const SubGraphContext::string_index_map_t& onnx_input_map, const auto& ov_parameters) {
45+
for (const auto& [onnx_name, onnx_param_index] : onnx_input_map) {
46+
auto it = std::find_if(ov_parameters.begin(), ov_parameters.end(),
47+
[&onnx_name](const auto& ov_parameter_info) { return ov_parameter_info.get_names().contains(onnx_name); });
48+
auto ov_param_index = std::distance(ov_parameters.begin(), it);
49+
50+
ORT_ENFORCE(it != ov_parameters.end(), backend_utils::log_tag,
51+
"Input names mismatch between OpenVINO and ONNX. ", onnx_name,
52+
" doesn't exist in the list of OpenVINO input tensor names");
53+
auto shape = ov_parameters[ov_param_index].get_shape();
54+
auto type = ov_parameters[ov_param_index].get_element_type();
55+
56+
ParameterInfo info{onnx_name, ov_param_index, onnx_param_index, type, shape};
57+
std::transform(shape.begin(), shape.end(), std::back_inserter(info.onnx_shape), [](const auto& dim) { return static_cast<int64_t>(dim); });
58+
input_output_map.push_back(std::move(info));
59+
}
60+
};
61+
62+
populate(network_inputs_, subgraph_context.input_names, exec_network.Get().inputs());
63+
populate(network_outputs_, subgraph_context.output_names, exec_network.Get().outputs());
64+
}
65+
};
66+
3067
class InferRequestsQueue;
3168
class BasicBackend : public IBackend {
3269
public:
@@ -43,7 +80,6 @@ class BasicBackend : public IBackend {
4380
}
4481

4582
private:
46-
void PopulateCompiledDirectory(std::string, std::string&, std::string&, bool&);
4783
bool ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
4884
void PopulateConfigValue(ov::AnyMap& device_config);
4985
void EnableCaching();
@@ -71,6 +107,7 @@ class BasicBackend : public IBackend {
71107

72108
using ort_tensor_key_t = const std::string;
73109
std::map<ort_tensor_key_t, ov_tensor_data_t> ort_ov_tensor_map;
110+
std::unique_ptr<OnnxToOvNetworkBindings> bindings_;
74111
};
75112

76113
class InferRequestsQueue {

0 commit comments

Comments
 (0)