@@ -140,6 +140,7 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
140
140
};
141
141
}
142
142
inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue (exe_network_, num_infer_req, std::move (initializer)));
143
+ bindings_ = std::make_unique<OnnxToOvNetworkBindings>(exe_network_, subgraph_context_);
143
144
}
144
145
145
146
bool BasicBackend::ValidateSubgraph (std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) {
@@ -362,29 +363,16 @@ void BasicBackend::SetNumThreads(ov::AnyMap& device_config) {
362
363
// an Infer Request indexed by infer_req_idx
363
364
void BasicBackend::StartAsyncInference (Ort::KernelContext& context, OVInferRequestPtr infer_request) {
364
365
try {
365
- auto ov_input_info = exe_network_.Get ().inputs ();
366
-
367
- // Loop over subgraph original input names to find the correspondent OV input name
368
- for (const auto & [onnx_input_name, onnx_input_index] : subgraph_context_.input_names ) {
369
- std::string input_name{};
370
- uint32_t input_idx = 0 ;
371
- for (uint32_t index = 0 ; const auto & ov_input : ov_input_info) {
372
- if (ov_input.get_names ().contains (onnx_input_name)) {
373
- input_name = onnx_input_name;
374
- input_idx = index ;
375
- break ;
376
- }
377
- index ++;
378
- }
379
- ORT_ENFORCE (!input_name.empty (), log_tag,
380
- " Input names mismatch between OpenVINO and ONNX. " , onnx_input_name,
381
- " doesn't exist in the list of OpenVINO input tensor names" );
366
+ bool cpu_or_gpu = (session_context_.device_type .find (" CPU" ) != std::string::npos ||
367
+ session_context_.device_type .find (" GPU" ) != std::string::npos);
368
+ bool npu = (session_context_.device_type .find (" NPU" ) != std::string::npos);
369
+
370
+ for (const auto & input_info : bindings_->network_inputs_ ) {
382
371
size_t batch_slice_idx = 0 ;
383
372
if (subgraph_context_.has_dynamic_input_shape &&
384
373
!session_context_.disable_dynamic_shapes &&
385
- (session_context_.device_type .find (" CPU" ) != std::string::npos ||
386
- session_context_.device_type .find (" GPU" ) != std::string::npos)) {
387
- auto tensor = context.GetInput (subgraph_context_.input_names .at (input_name));
374
+ cpu_or_gpu) {
375
+ auto tensor = context.GetInput (input_info.onnx_index );
388
376
auto tensor_info = tensor.GetTensorTypeAndShapeInfo ();
389
377
auto tensor_shape = tensor_info.GetShape ();
390
378
auto tensor_size = tensor_shape.size ();
@@ -395,98 +383,72 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
395
383
input_tensor_shape[tensor_iter] = *i;
396
384
tensor_iter += 1 ;
397
385
}
398
- const auto & input = ov_input_info.at (input_idx);
399
386
OVTensorPtr tensor_ptr;
400
387
// avoid input copies on the CPU device
401
388
if (session_context_.device_type .find (" CPU" ) != std::string::npos) {
402
- tensor_ptr = std::make_shared<ov::Tensor>(input. get_element_type () , input_tensor_shape,
389
+ tensor_ptr = std::make_shared<ov::Tensor>(input_info. type , input_tensor_shape,
403
390
(void *)tensor_data);
404
391
} else {
405
- tensor_ptr = std::make_shared<ov::Tensor>(input. get_element_type () , input_tensor_shape);
406
- FillInputBlob (tensor_ptr, batch_slice_idx, input_name , context, subgraph_context_);
392
+ tensor_ptr = std::make_shared<ov::Tensor>(input_info. type , input_tensor_shape);
393
+ FillInputBlob (tensor_ptr, batch_slice_idx, input_info. name , context, subgraph_context_);
407
394
}
408
395
409
396
try {
410
- infer_request->SetTensor (std::move (input_name) , tensor_ptr);
397
+ infer_request->SetTensor (input_info. name , tensor_ptr);
411
398
} catch (const char * msg) {
412
399
ORT_THROW (msg);
413
400
}
414
401
} else {
415
- if ((session_context_.device_type .find (" CPU" ) != std::string::npos ||
416
- session_context_.device_type .find (" GPU" ) != std::string::npos)) {
402
+ if (cpu_or_gpu) {
417
403
OVTensorPtr graph_input_blob;
418
404
try {
419
- graph_input_blob = infer_request->GetTensor (input_name );
405
+ graph_input_blob = infer_request->GetTensor (input_info. name );
420
406
} catch (const char * msg) {
421
407
ORT_THROW (msg);
422
408
}
423
- FillInputBlob (std::move (graph_input_blob), batch_slice_idx, std::move (input_name) , context, subgraph_context_);
409
+ FillInputBlob (std::move (graph_input_blob), batch_slice_idx, input_info. name , context, subgraph_context_);
424
410
} else {
425
- auto tensor = context.GetInput (subgraph_context_. input_names . at (input_name) );
426
- ort_tensor_key_t ort_tensor_key{input_name };
411
+ auto tensor = context.GetInput (input_info. onnx_index );
412
+ ort_tensor_key_t ort_tensor_key{input_info. name };
427
413
auto it = ort_ov_tensor_map.find (ort_tensor_key);
428
- if ((it == ort_ov_tensor_map.end ()) ||
429
- (it != ort_ov_tensor_map.end () && (it->second .ort_ptr != tensor.GetTensorRawData ()))) {
414
+ if ((it == ort_ov_tensor_map.end ()) || it->second .ort_ptr != tensor.GetTensorRawData ()) {
430
415
ov_tensor_data_t ov_tensor_data;
431
- const auto & input = ov_input_info.at (input_idx);
432
- ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input.get_element_type (), input.get_shape (),
416
+ ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input_info.type , input_info.ov_shape ,
433
417
const_cast <void *>(tensor.GetTensorRawData ()));
434
418
435
419
ov_tensor_data.ort_ptr = tensor.GetTensorRawData ();
436
420
ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;
437
421
438
422
try {
439
- infer_request->SetTensor (std::move (input_name) , ov_tensor_data.tensor_ptr );
423
+ infer_request->SetTensor (input_info. name , ov_tensor_data.tensor_ptr );
440
424
} catch (const char * msg) {
441
425
ORT_THROW (msg);
442
426
}
443
427
}
444
428
}
445
429
}
446
- } // Loop subgraph original input names
430
+ } // Loop subgraph original input
447
431
448
- if (session_context_. device_type . find ( " NPU " ) != std::string::npos ) {
432
+ if (npu ) {
449
433
// Set the output blob as remote blob
450
- auto graph_output_info = exe_network_.Get ().outputs ();
451
- auto output_idx = 0 ;
452
- for (auto output_info_iter = graph_output_info.begin ();
453
- output_info_iter != graph_output_info.end (); ++output_info_iter) {
454
- auto output_names = output_info_iter->get_names ();
455
- std::string onnx_output_name;
456
- std::string output_name;
457
- // using the output name retrieved from ONNX original to match with the output names returned by OV tensors
458
- for (auto it = subgraph_context_.output_names .begin (); it != subgraph_context_.output_names .end (); ++it) {
459
- onnx_output_name = it->first ;
460
- if (output_names.find (onnx_output_name) != output_names.end ()) {
461
- // Assigning the output_name
462
- output_name = it->first ;
463
- break ;
464
- }
465
- }
466
- size_t batch_size = 1 ;
467
- Ort::UnownedValue tensor = GetOutputTensor (context,
468
- batch_size,
469
- infer_request,
470
- output_name,
471
- subgraph_context_.output_names );
472
- ort_tensor_key_t ort_tensor_key{output_name};
434
+ for (const auto & output_info : bindings_->network_outputs_ ) {
435
+ Ort::UnownedValue tensor = context.GetOutput (output_info.onnx_index , output_info.onnx_shape );
436
+
437
+ ort_tensor_key_t ort_tensor_key{output_info.name };
473
438
const auto & it = ort_ov_tensor_map.find (ort_tensor_key);
474
- if ((it == ort_ov_tensor_map.end ()) ||
475
- (it != ort_ov_tensor_map.end () && (it->second .ort_ptr != tensor.GetTensorRawData ()))) {
439
+ if ((it == ort_ov_tensor_map.end ()) || (it->second .ort_ptr != tensor.GetTensorRawData ())) {
476
440
ov_tensor_data_t ov_tensor_data;
477
- const auto & output = graph_output_info.at (output_idx);
478
441
ov_tensor_data.ort_ptr = tensor.GetTensorRawData ();
479
- ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(output. get_element_type (), output. get_shape () ,
442
+ ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(output_info. type , output_info. ov_shape ,
480
443
const_cast <void *>(tensor.GetTensorRawData ()));
481
444
ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;
482
445
483
446
try {
484
- infer_request->SetTensor (std::move (output_name) , ov_tensor_data.tensor_ptr );
447
+ infer_request->SetTensor (output_info. name , ov_tensor_data.tensor_ptr );
485
448
} catch (const char * msg) {
486
449
ORT_THROW (msg);
487
450
}
488
451
}
489
- output_idx++;
490
452
}
491
453
}
492
454
@@ -611,44 +573,22 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
611
573
void BasicBackend::CompleteAsyncInference (Ort::KernelContext& context, OVInferRequestPtr infer_request) {
612
574
// Wait for Async inference completion
613
575
try {
576
+ bool cpu_or_gpu = session_context_.device_type .find (" CPU" ) != std::string::npos ||
577
+ session_context_.device_type .find (" GPU" ) != std::string::npos;
578
+
614
579
infer_request->WaitRequest ();
615
- auto graph_output_info = exe_network_.Get ().outputs ();
616
- for (auto output_info_iter = graph_output_info.begin ();
617
- output_info_iter != graph_output_info.end (); ++output_info_iter) {
618
- OVTensorPtr graph_output_blob;
619
- auto output_names = output_info_iter->get_names ();
620
- std::string onnx_output_name;
621
- std::string output_name;
622
- bool output_name_found = false ;
623
- // using the output name retrieved from ONNX original to match with the output names returned by OV tensors
624
- for (auto it = subgraph_context_.output_names .begin (); it != subgraph_context_.output_names .end (); ++it) {
625
- onnx_output_name = it->first ;
626
- if (output_names.find (onnx_output_name) != output_names.end ()) {
627
- // Assigning the output_name
628
- output_name = it->first ;
629
- output_name_found = true ;
630
- break ;
631
- }
632
- }
633
- if (!output_name_found) {
634
- ORT_THROW (
635
- log_tag +
636
- " Output names mismatch between OpenVINO and ONNX. "
637
- " [ONNX Output: ] " +
638
- onnx_output_name +
639
- " doesn't exist in the "
640
- " list of OpenVINO output tensor names" );
641
- }
642
- if ((session_context_.device_type .find (" CPU" ) != std::string::npos ||
643
- session_context_.device_type .find (" GPU" ) != std::string::npos)) {
580
+
581
+ if (cpu_or_gpu) {
582
+ for (const auto & output_info : bindings_->network_outputs_ ) {
583
+ OVTensorPtr graph_output_blob;
644
584
try {
645
- graph_output_blob = infer_request->GetTensor (output_name );
585
+ graph_output_blob = infer_request->GetTensor (output_info. name );
646
586
} catch (const char * msg) {
647
587
ORT_THROW (msg);
648
588
}
649
589
size_t batch_size = 1 ;
650
590
Ort::UnownedValue output_tensor =
651
- GetOutputTensor (context, batch_size, infer_request, std::move (output_name) , subgraph_context_.output_names );
591
+ GetOutputTensor (context, batch_size, infer_request, output_info. name , subgraph_context_.output_names );
652
592
auto mem_info = output_tensor.GetTensorMemoryInfo ();
653
593
if (mem_info.GetAllocatorName () == OpenVINO_GPU) {
654
594
return ;
0 commit comments