From f526630bb9a36c69db38f6d5e76c239db821e6d5 Mon Sep 17 00:00:00 2001 From: Eric Crawford Date: Mon, 23 Sep 2024 14:32:23 -0700 Subject: [PATCH] fix: caching lookup to behave correctly when inputs/output mapping are changed. --- .../openvino/backends/basic_backend.cc | 76 +++++-------------- .../openvino/backends/basic_backend.h | 4 +- 2 files changed, 19 insertions(+), 61 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index 1f9c61780f27..0270de51251a 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -303,33 +303,18 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque FillInputBlob(std::move(graph_input_blob), batch_slice_idx, std::move(input_name), context, subgraph_context_); } else { auto tensor = context.GetInput(subgraph_context_.input_names.at(input_name)); - auto allocator_name = tensor.GetTensorMemoryInfo().GetAllocatorName(); - ov_tensor_data_t ov_tensor_key; - ort_tensor_key_t ort_tensor_key{tensor.GetTensorRawData(), allocator_name}; - if (const auto& it = ort_ov_tensor_map.find(ort_tensor_key); it != ort_ov_tensor_map.end()) { - ov_tensor_key = it->second; - } else { - // Does this make sense for both types of allocators? + ort_tensor_key_t ort_tensor_key{input_name}; + auto it = ort_ov_tensor_map.find(ort_tensor_key); + if (it != ort_ov_tensor_map.end() || it->second.ort_ptr != tensor.GetTensorRawData()) { + ov_tensor_data_t ov_tensor_data; auto input = graph_input_info.at(input_idx); - if (allocator_name == OpenVINO_RT_NPU) { - ov_tensor_key.copy_needed = false; - ov_tensor_key.tensor_ptr = std::make_shared(input.get_element_type(), input.get_shape(), - (void*)tensor.GetTensorRawData()); - } else { - ov_tensor_key.copy_needed = true; - ov_tensor_key.tensor_ptr = std::make_shared(input.get_element_type(), input.get_shape()); - } - ort_ov_tensor_map.emplace(ort_tensor_key, ov_tensor_key); - - if (ov_tensor_key.copy_needed) { - const char* ort_tensor_data = tensor.GetTensorData(); - size_t tensor_data_size = ov_tensor_key.tensor_ptr->get_byte_size(); - auto ort_batch_memory_offset = ort_tensor_data + tensor_data_size * batch_slice_idx; - std::memcpy(ov_tensor_key.tensor_ptr->data(), ort_batch_memory_offset, tensor_data_size); - } + ov_tensor_data.tensor_ptr = std::make_shared(input.get_element_type(), input.get_shape(), + (void*)tensor.GetTensorRawData()); + ov_tensor_data.ort_ptr = tensor.GetTensorRawData(); + ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data; try { - infer_request->SetTensor(input_name, ov_tensor_key.tensor_ptr); + infer_request->SetTensor(input_name, ov_tensor_data.tensor_ptr); } catch (const char* msg) { ORT_THROW(msg); } @@ -362,23 +347,15 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque infer_request, output_name, subgraph_context_.output_names); - auto allocator_name = tensor.GetTensorMemoryInfo().GetAllocatorName(); - - ov_tensor_data_t ov_tensor_data; - ort_tensor_key_t ort_tensor_key{tensor.GetTensorRawData(), allocator_name}; - if (const auto& it = ort_ov_tensor_map.find(ort_tensor_key); it != ort_ov_tensor_map.end()) { - ov_tensor_data = it->second; - } else { + ort_tensor_key_t ort_tensor_key{output_name}; + const auto& it = ort_ov_tensor_map.find(ort_tensor_key); + if (it != ort_ov_tensor_map.end() || it->second.ort_ptr != tensor.GetTensorRawData()) { + ov_tensor_data_t ov_tensor_data; auto output = graph_output_info.at(output_idx); - if (allocator_name == OpenVINO_RT_NPU) { - ov_tensor_data.copy_needed = false; - ov_tensor_data.tensor_ptr = std::make_shared(output.get_element_type(), output.get_shape(), - (void*)tensor.GetTensorRawData()); - } else { - ov_tensor_data.copy_needed = true; - ov_tensor_data.tensor_ptr = std::make_shared(output.get_element_type(), output.get_shape()); - } - ort_ov_tensor_map.emplace(ort_tensor_key, ov_tensor_data); + ov_tensor_data.ort_ptr = tensor.GetTensorRawData(); + ov_tensor_data.tensor_ptr = std::make_shared(output.get_element_type(), output.get_shape(), + (void*)tensor.GetTensorRawData()); + ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data; try { infer_request->SetTensor(output_name, ov_tensor_data.tensor_ptr); @@ -556,25 +533,6 @@ void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRe size_t batch_slice = 0; FillOutputBlob(std::move(graph_output_blob), output_tensor, batch_slice); } - } else { - size_t batch_size = 1; - Ort::UnownedValue output_tensor = - GetOutputTensor(context, batch_size, infer_request, std::move(output_name), subgraph_context_.output_names); - auto allocator_name = output_tensor.GetTensorMemoryInfo().GetAllocatorName(); - ov_tensor_data_t ov_tensor_data; - ort_tensor_key_t ort_tensor_key{output_tensor.GetTensorRawData(), allocator_name}; - if (const auto& it = ort_ov_tensor_map.find(ort_tensor_key); it != ort_ov_tensor_map.end()) { - ov_tensor_data = it->second; - } else { - ORT_THROW(log_tag + "Expected all outputs to have associated OV::Tensor's"); - } - - if (ov_tensor_data.copy_needed) { - auto ort_tensor_data = output_tensor.GetTensorMutableData(); - size_t tensor_data_size = ov_tensor_data.tensor_ptr->get_byte_size(); - auto ort_batch_memory_offset = ort_tensor_data /*+ tensor_data_size * batch_size*/; - std::memcpy(ort_batch_memory_offset, ov_tensor_data.tensor_ptr->data(), tensor_data_size); - } } } diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h index cd69e88f994b..875e5941a4f1 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.h +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h @@ -23,7 +23,7 @@ namespace openvino_ep { struct ov_tensor_data_t { OVTensorPtr tensor_ptr; - bool copy_needed; + const void *ort_ptr; }; class InferRequestsQueue; @@ -67,7 +67,7 @@ class BasicBackend : public IBackend { OVRemoteContextPtr remote_context_; #endif - using ort_tensor_key_t = std::pair; + using ort_tensor_key_t = const std::string; std::map ort_ov_tensor_map; };