Skip to content

Commit

Permalink
Run QDQ stripping for OV before level1 transformations
Browse files Browse the repository at this point in the history
  • Loading branch information
ubuntu committed Apr 24, 2024
1 parent 4fd1785 commit 472110a
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions onnxruntime/core/session/inference_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include "core/optimizer/layout_transformation/layout_transformation.h"
#include "core/optimizer/insert_cast_transformer.h"
#include "core/optimizer/qdq_transformer/ensure_unique_dq_for_node_unit.h"
#include "core/optimizer/qdq_transformer/qdq_final_cleanup.h"
#include "core/optimizer/rule_based_graph_transformer.h"
#include "core/optimizer/selectors_actions/selector_action_transformer_apply_contexts.h"
#include "core/optimizer/transformer_memcpy.h"
Expand Down Expand Up @@ -1152,6 +1153,14 @@ common::Status InferenceSession::TransformGraph(onnxruntime::Graph& graph, bool
return transformer.Apply(graph, modified, logger);
};

#if defined(USE_OPENVINO)
const bool enable_quant_qdq_cleanup =
session_options_.config_options.GetConfigOrDefault(kOrtSessionOptionsEnableQuantQDQCleanup, "0") == "1";
// Need early removal of QDQ node pairs for OVEP
QDQFinalCleanupTransformer qdq_cleanup_transform(enable_quant_qdq_cleanup);
ORT_RETURN_IF_ERROR_SESSIONID_(apply_transformer_once(qdq_cleanup_transform, *session_logger_, graph));
#endif

// ensure potential QDQ node units have unique DQ nodes
if (const bool disable_quant_qdq =
session_options_.config_options.GetConfigOrDefault(kOrtSessionOptionsDisableQuantQDQ, "0") == "1";
Expand All @@ -1163,6 +1172,7 @@ common::Status InferenceSession::TransformGraph(onnxruntime::Graph& graph, bool
// apply execution provider independent level 1 graph optimizations.
ORT_RETURN_IF_ERROR_SESSIONID_(graph_transformer_mgr_.ApplyTransformers(graph, TransformerLevel::Level1, *session_logger_));


// if saving model to ORT format we only assign nodes a custom EP can handle and don't compile them.
// we do this to preserve the original nodes in the model but prevent optimizers from changing them.
// at runtime, the ORT format model will re-do the partitioning/compilation of these nodes, which may change
Expand Down

0 comments on commit 472110a

Please sign in to comment.