intel · preetha-intel · Apr 22, 2024 · Apr 22, 2024 · Apr 22, 2024 · Apr 22, 2024
diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -125,6 +125,14 @@ BackendManager::BackendManager(const GlobalContext& global_context,
 // the EPContext node.
 Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& graph_body_viewer,
                                                      const logging::Logger& logger) {
+  if (GetGlobalContext().disable_dynamic_shapes && subgraph_context_.has_dynamic_input_shape) {
+    std::string exception_str =
+        "Exporting dynamically compiled models at runtime is not supported. "
+        "Cannot export blobs of dynamic models that request static shape inference. "
+        "To export this model, set disable_dynamic_shapes to False";
+    ORT_THROW(exception_str);
+  }
+
   std::string model_blob_str;
   auto compiled_model = concrete_backend_->GetOVCompiledModel();
   auto graph_name = global_context_.onnx_model_path_name;

diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -93,8 +93,10 @@
       } else if (!subgraph_context_.has_dynamic_input_shape &&
                  global_context_.onnx_model_path_name.find(".onnx") != std::string ::npos) {
         // Inputs with static dimenstions
+        std::string prec_str = (global_context_.precision_str != "ACCURACY") ? global_context_.precision_str : global_context_.model_precision;
         exe_network_ = global_context_.ie_core.CompileModel(global_context_.onnx_model_path_name,
                                                             hw_target,
+                                                            prec_str,
                                                             global_context_.cache_dir,
                                                             device_config,
                                                             subgraph_context_.subgraph_name);

diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -105,7 +105,8 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer,
       return "";
     } else {
       auto input_type = graph_viewer.GetInputs()[0]->TypeAsProto()->tensor_type().elem_type();
-      if (global_context_->precision_str == "ACCURACY" && global_context_->device_type == "GPU") {
+      if (global_context_->precision_str == "ACCURACY" &&
+          global_context_->device_type.find("GPU") != std::string::npos) {
         if (input_type == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT) {
           return "FP32";
         } else if (input_type == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16) {

diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
@@ -120,16 +120,8 @@ struct OpenVINOExecutionProviderInfo {
 #endif
     } else if (ov_supported_device_types.find(dev_type) != ov_supported_device_types.end()) {
       device_type_ = dev_type;
-    } else if (dev_type.find("HETERO") == 0 || dev_type.find("MULTI") == 0) {
+    } else if (dev_type.find("HETERO") == 0 || dev_type.find("MULTI") == 0 || dev_type.find("AUTO") == 0) {
       std::vector<std::string> devices = parseDevices(dev_type);
-      precision_ = "FP16";
-      if (devices[0] == "CPU") {
-        precision_ = "FP32";
-      }
-      device_type_ = dev_type;
-    } else if (dev_type.find("AUTO") == 0) {
-      std::vector<std::string> devices = parseDevices(dev_type);
-      precision_ = "FP32";
       device_type_ = dev_type;
     } else {
       ORT_THROW("Invalid device string: " + dev_type);

diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@@ -136,23 +136,23 @@ struct OpenVINO_Provider : Provider {
     if (provider_options_map.find("precision") != provider_options_map.end()) {
       precision = provider_options_map.at("precision").c_str();
     }
-    if (device_type == "CPU") {
-      if (precision == "" || precision == "ACCURACY" || precision == "FP32") {
-        precision = "FP32";
-      } else {
-        ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. CPU only supports FP32 . \n");
+    if (device_type.find("GPU") != std::string::npos) {
+      if (precision == "") {
+        precision = "FP16";
+      } else if (precision != "ACCURACY" && precision != "FP16" && precision != "FP32") {
+        ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. GPU only supports FP32 / FP16. \n");
       }
-    } else if (device_type == "NPU") {
+    } else if (device_type.find("NPU") != std::string::npos) {
       if (precision == "" || precision == "ACCURACY" || precision == "FP16") {
         precision = "FP16";
       } else {
         ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. NPU only supported FP16. \n");
       }
-    } else if (device_type == "GPU") {
-      if (precision == "") {
-        precision = "FP16";
-      } else if (precision != "ACCURACY" && precision != "FP16" && precision != "FP32") {
-        ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. GPU only supports FP32 / FP16. \n");
+    } else if (device_type.find("CPU") != std::string::npos) {
+      if (precision == "" || precision == "ACCURACY" || precision == "FP32") {
+        precision = "FP32";
+      } else {
+        ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. CPU only supports FP32 . \n");
       }
     }
 

diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc
@@ -90,6 +90,7 @@ OVExeNetwork OVCore::CompileModel(std::shared_ptr<const OVNetwork>& ie_cnn_netwo
 
 OVExeNetwork OVCore::CompileModel(const std::string onnx_model_path,
                                   std::string& hw_target,
+                                  std::string precision,
                                   std::string cache_dir,
                                   ov::AnyMap& device_config,
                                   std::string name) {
@@ -99,7 +100,8 @@ OVExeNetwork OVCore::CompileModel(const std::string onnx_model_path,
       obj = oe.compile_model(onnx_model_path,
                              "AUTO",
                              ov::device::priorities("GPU", "CPU"),
-                             ov::device::properties("GPU", ov::cache_dir(cache_dir)));
+                             ov::device::properties("GPU", {ov::cache_dir(cache_dir),
+                                                            ov::hint::inference_precision(precision)}));
     } else {
       obj = oe.compile_model(onnx_model_path, hw_target, device_config);
     }
@@ -134,7 +136,7 @@ OVExeNetwork OVCore::ImportModel(std::shared_ptr<std::istringstream> model_strea
 }
 
 void OVCore::SetCache(std::string cache_dir_path, std::string device_type) {
-  if (device_type == "AUTO:GPU,CPU") {
+  if (device_type != "AUTO:GPU,CPU") {
     oe.set_property(ov::cache_dir(cache_dir_path));
   }
 }

diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h
@@ -46,6 +46,7 @@ class OVCore {
                             std::string name);
   OVExeNetwork CompileModel(const std::string onnx_model_path,
                             std::string& hw_target,
+                            std::string precision,
                             std::string cache_dir,
                             ov::AnyMap& device_config,
                             std::string name);

diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
@@ -274,29 +274,31 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
         }
       } else if (key == "precision") {
         auto device_type = ov_options["device_type"];
-        if (device_type == "CPU") {
-          if (value == "" || value == "ACCURACY" || value == "FP32") {
-            ov_options[key] = "FP32";
+        if (device_type.find("GPU") != std::string::npos) {
+          if (value == "") {
+            ov_options[key] = "FP16";
+            continue;
+          } else if (value == "ACCURACY" || value == "FP16" || value == "FP32") {
+            ov_options[key] = value;
             continue;
           } else {
-            ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. CPU only supports FP32 . \n");
+            ORT_THROW(
+                "[ERROR] [OpenVINO] Unsupported inference precision is selected. "
+                "GPU only supported FP32 / FP16. \n");
           }
-        } else if (device_type == "NPU") {
+        } else if (device_type.find("NPU") != std::string::npos) {
           if (value == "" || value == "ACCURACY" || value == "FP16") {
             ov_options[key] = "FP16";
             continue;
           } else {
             ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. NPU only supported FP16. \n");
           }
-        } else if (device_type == "GPU") {
-          if (value == "") {
-            ov_options[key] = "FP16";
-            continue;
-          } else if (value == "ACCURACY" || value == "FP16" || value == "FP32") {
-            ov_options[key] = value;
+        } else if (device_type.find("CPU") != std::string::npos) {
+          if (value == "" || value == "ACCURACY" || value == "FP32") {
+            ov_options[key] = "FP32";
             continue;
           } else {
-            ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. GPU only supported FP32 / FP16. \n");
+            ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. CPU only supports FP32 . \n");
           }
         }
       } else if (key == "enable_npu_fast_compile") {