diff --git a/src/subgraph/static-mean.c b/src/subgraph/static-mean.c index d8aadbbd98a..39416f6f18c 100644 --- a/src/subgraph/static-mean.c +++ b/src/subgraph/static-mean.c @@ -60,7 +60,20 @@ static enum xnn_status create_mean_operator( const int8_t output_zero_point = (int8_t) values[output_id].quantization.zero_point; status = xnn_create_mean_nd_qs8( - input_scale * output_scale, input_zero_point, output_zero_point, + input_scale / output_scale, input_zero_point, output_zero_point, + node->flags, + &opdata->operator_objects[0]); + break; + } + case xnn_datatype_quint8: + { + const float input_scale = values[input_id].quantization.scale; + const float output_scale = values[output_id].quantization.scale; + const uint8_t input_zero_point = (uint8_t) values[input_id].quantization.zero_point; + const uint8_t output_zero_point = (uint8_t) values[output_id].quantization.zero_point; + + status = xnn_create_mean_nd_qu8( + input_scale / output_scale, input_zero_point, output_zero_point, node->flags, &opdata->operator_objects[0]); break; @@ -126,6 +139,17 @@ static enum xnn_status reshape_mean_operator( &opdata->workspace_alignment, threadpool); break; + case xnn_operator_type_mean_nd_qu8: + status = xnn_reshape_mean_nd_qu8( + opdata->operator_objects[0], + opdata->num_reduction_axes, + opdata->reduction_axes, + input_value->shape.num_dims, + input_value->shape.dim, + &opdata->workspace_size, + &opdata->workspace_alignment, + threadpool); + break; default: XNN_UNREACHABLE; } @@ -212,6 +236,11 @@ static enum xnn_status setup_mean_operator( opdata->operator_objects[0], opdata->workspace, input_data, output_data); + case xnn_operator_type_mean_nd_qu8: + return xnn_setup_mean_nd_qu8( + opdata->operator_objects[0], + opdata->workspace, + input_data, output_data); default: XNN_UNREACHABLE; } @@ -245,6 +274,7 @@ enum xnn_status xnn_define_static_mean( case xnn_datatype_fp16: case xnn_datatype_fp32: case xnn_datatype_qint8: + case xnn_datatype_quint8: break; default: xnn_log_error( @@ -276,6 +306,9 @@ enum xnn_status xnn_define_static_mean( case xnn_datatype_qint8: compute_type = xnn_compute_type_qs8; break; + case xnn_datatype_quint8: + compute_type = xnn_compute_type_qu8; + break; default: xnn_log_error( "failed to define %s operator with output ID #%" PRIu32 ": unsupported Value datatype %s (%d)", diff --git a/test/static-mean.cc b/test/static-mean.cc index 971eab581b4..92b7a9d309d 100644 --- a/test/static-mean.cc +++ b/test/static-mean.cc @@ -64,6 +64,8 @@ class MeanTestBase : public ::testing::TestWithParam { std::uniform_real_distribution f32dist; std::uniform_int_distribution i8dist = std::uniform_int_distribution( std::numeric_limits::min(), std::numeric_limits::max()); + std::uniform_int_distribution u8dist = std::uniform_int_distribution( + std::numeric_limits::min(), std::numeric_limits::max()); std::vector reduction_axes; std::vector input_shape; @@ -79,10 +81,12 @@ class MeanTestBase : public ::testing::TestWithParam { using MeanTestF16 = MeanTestBase; using MeanTestF32 = MeanTestBase; using MeanTestQS8 = MeanTestBase; +using MeanTestQU8 = MeanTestBase; INSTANTIATE_TEST_SUITE_P(KeepDims, MeanTestF16, testing::Bool()); INSTANTIATE_TEST_SUITE_P(KeepDims, MeanTestF32, testing::Bool()); INSTANTIATE_TEST_SUITE_P(KeepDims, MeanTestQS8, testing::Bool()); +INSTANTIATE_TEST_SUITE_P(KeepDims, MeanTestQU8, testing::Bool()); TEST_F(MeanTestF16, define) { @@ -220,6 +224,58 @@ TEST_F(MeanTestQS8, define) ASSERT_EQ(node->flags, 0); } +TEST_F(MeanTestQU8, define) +{ + ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); + + xnn_subgraph_t subgraph = nullptr; + ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); + std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); + + std::uniform_real_distribution scale_dist = std::uniform_real_distribution(0.0f, 1.0f); + const int32_t input_zero_point = u8dist(rng); + const int32_t output_zero_point = u8dist(rng); + const float input_scale = scale_dist(rng); + const float output_scale = scale_dist(rng); + + uint32_t input_id = XNN_INVALID_NODE_ID; + ASSERT_EQ(xnn_status_success, + xnn_define_quantized_tensor_value( + subgraph, xnn_datatype_quint8, input_zero_point, input_scale, + input_shape.size(), input_shape.data(), nullptr, /*external_id=*/0, + /*flags=*/0, &input_id)); + ASSERT_NE(input_id, XNN_INVALID_NODE_ID); + + uint32_t output_id = XNN_INVALID_NODE_ID; + ASSERT_EQ(xnn_status_success, + xnn_define_quantized_tensor_value( + subgraph, xnn_datatype_quint8, output_zero_point, output_scale, + output_shape.size(), output_shape.data(), nullptr, /*external_id=*/1, + /*flags=*/0, &output_id)); + ASSERT_NE(output_id, XNN_INVALID_NODE_ID); + + ASSERT_EQ(xnn_status_success, + xnn_define_static_mean( + subgraph, + reduction_axes.size(), reduction_axes.data(), + input_id, output_id, + /*flags=*/0)); + + ASSERT_EQ(subgraph->num_nodes, 1); + const struct xnn_node* node = &subgraph->nodes[0]; + ASSERT_EQ(node->type, xnn_node_type_static_mean); + ASSERT_EQ(node->compute_type, xnn_compute_type_qu8); + ASSERT_EQ(node->params.reduce.num_reduction_axes, reduction_axes.size()); + for (size_t i = 0; i < reduction_axes.size(); i++) { + ASSERT_EQ(node->params.reduce.reduction_axes[i], reduction_axes[i]); + } + ASSERT_EQ(node->num_inputs, 1); + ASSERT_EQ(node->inputs[0], input_id); + ASSERT_EQ(node->num_outputs, 1); + ASSERT_EQ(node->outputs[0], output_id); + ASSERT_EQ(node->flags, 0); +} + TEST_P(MeanTestF16, matches_operator_api) { bool keep_dims = GetParam(); ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); @@ -401,7 +457,7 @@ TEST_P(MeanTestQS8, matches_operator_api) { uint32_t flags = keep_dims ? XNN_FLAG_KEEP_DIMS : 0; // Call operator API. const xnn_status status = xnn_create_mean_nd_qs8( - input_scale * output_scale, input_zero_point, output_zero_point, flags, &op); + input_scale / output_scale, input_zero_point, output_zero_point, flags, &op); if (status == xnn_status_unsupported_hardware) { GTEST_SKIP(); } @@ -474,6 +530,98 @@ TEST_P(MeanTestQS8, matches_operator_api) { } } +TEST_P(MeanTestQU8, matches_operator_api) { + bool keep_dims = GetParam(); + ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); + + xnn_operator_t op = nullptr; + + std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); }); + std::fill(operator_output.begin(), operator_output.end(), UINT8_C(0)); + std::fill(subgraph_output.begin(), subgraph_output.end(), UINT8_C(0)); + + std::uniform_real_distribution scale_dist = std::uniform_real_distribution(0.0f, 1.0f); + const float input_scale = scale_dist(rng); + const float output_scale = scale_dist(rng); + const int32_t input_zero_point = u8dist(rng); + const int32_t output_zero_point = u8dist(rng); + + uint32_t flags = keep_dims ? XNN_FLAG_KEEP_DIMS : 0; + // Call operator API. + const xnn_status status = xnn_create_mean_nd_qu8( + input_scale / output_scale, input_zero_point, output_zero_point, flags, &op); + if (status == xnn_status_unsupported_hardware) { + GTEST_SKIP(); + } + ASSERT_EQ(xnn_status_success, status); + ASSERT_NE(nullptr, op); + + std::unique_ptr auto_op(op, xnn_delete_operator); + + size_t workspace_size = SIZE_MAX; + size_t workspace_alignment = SIZE_MAX; + ASSERT_EQ(xnn_status_success, + xnn_reshape_mean_nd_qu8(op, + reduction_axes.size(), reduction_axes.data(), + input_shape.size(), input_shape.data(), + &workspace_size, &workspace_alignment, + /*threadpool=*/nullptr)); + + ASSERT_NE(workspace_size, SIZE_MAX); + ASSERT_LE(workspace_alignment, XNN_ALLOCATION_ALIGNMENT); + std::vector> workspace(workspace_size); + ASSERT_EQ(xnn_status_success, xnn_setup_mean_nd_qu8(op, workspace.data(), input.data(), operator_output.data())); + + ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr)); + + // Call subgraph API. + xnn_subgraph_t subgraph = nullptr; + ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); + std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); + + uint32_t input_id = XNN_INVALID_NODE_ID; + ASSERT_EQ(xnn_status_success, + xnn_define_quantized_tensor_value( + subgraph, xnn_datatype_quint8, input_zero_point, input_scale, + input_shape.size(), input_shape.data(), nullptr, /*external_id=*/0, + XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); + ASSERT_NE(input_id, XNN_INVALID_NODE_ID); + + uint32_t output_id = XNN_INVALID_NODE_ID; + int output_num_dims = input_shape.size(); + if (!keep_dims) { + output_num_dims -= reduction_axes.size(); + } + ASSERT_EQ(xnn_status_success, + xnn_define_quantized_tensor_value( + subgraph, xnn_datatype_quint8, output_zero_point, output_scale, + output_shape.size(), output_shape.data(), nullptr, /*external_id=*/1, + XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); + ASSERT_NE(output_id, XNN_INVALID_NODE_ID); + + ASSERT_EQ(xnn_status_success, + xnn_define_static_mean(subgraph, reduction_axes.size(), + reduction_axes.data(), input_id, output_id, + flags)); + + xnn_runtime_t runtime = nullptr; + ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); + ASSERT_NE(nullptr, runtime); + std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); + + const std::array external = { + xnn_external_value{input_id, input.data()}, + xnn_external_value{output_id, subgraph_output.data()} + }; + ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); + ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); + + // Check outputs match. + for (int i = 0; i < subgraph_output.size(); ++i) { + ASSERT_EQ(subgraph_output[i], operator_output[i]) << " i " << i; + } +} + TEST_F(MeanTestF32, reshape_output_keep_dims) { ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); @@ -830,4 +978,191 @@ TEST_F(MeanTestQS8, reshape_output_no_keep_dims) } } +TEST_F(MeanTestQU8, reshape_output_keep_dims) +{ + ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); + + // Call subgraph API. + xnn_subgraph_t subgraph = nullptr; + ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); + std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); + + std::uniform_real_distribution scale_dist = std::uniform_real_distribution(0.0f, 1.0f); + const float input_scale = scale_dist(rng); + const float output_scale = scale_dist(rng); + const int32_t input_zero_point = u8dist(rng); + const int32_t output_zero_point = u8dist(rng); + + uint32_t input_id = XNN_INVALID_NODE_ID; + ASSERT_EQ(xnn_status_success, + xnn_define_quantized_tensor_value( + subgraph, xnn_datatype_quint8, input_zero_point, input_scale, + input_shape.size(), input_shape.data(), nullptr, /*external_id=*/0, + XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); + ASSERT_NE(input_id, XNN_INVALID_NODE_ID); + + uint32_t output_id = XNN_INVALID_NODE_ID; + ASSERT_EQ(xnn_status_success, + xnn_define_quantized_tensor_value( + subgraph, xnn_datatype_quint8, output_zero_point, output_scale, + output_shape.size(), output_shape.data(), nullptr, /*external_id=*/1, + XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); + ASSERT_NE(output_id, XNN_INVALID_NODE_ID); + + ASSERT_EQ(xnn_status_success, + xnn_define_static_mean( + subgraph, + reduction_axes.size(), reduction_axes.data(), + input_id, output_id, + /*flags=*/XNN_FLAG_KEEP_DIMS)); + + xnn_runtime_t runtime = nullptr; + ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); + ASSERT_NE(nullptr, runtime); + std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); + + const std::array external = { + xnn_external_value{input_id, input.data()}, + xnn_external_value{output_id, subgraph_output.data()} + }; + ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); + ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); + + input_shape[0] += 2; + input_shape[1] += 4; + ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, input_id, input_shape.size(), input_shape.data())); + const struct xnn_node* node = &subgraph->nodes[0]; + std::vector unique_reduction_axes = reduction_axes; + std::sort(unique_reduction_axes.begin(), unique_reduction_axes.end()); + auto end = std::unique(unique_reduction_axes.begin(), unique_reduction_axes.end()); + unique_reduction_axes.erase(end, unique_reduction_axes.end()); + // There are too many parameters which influence the workspace size so + // knowing if reallocation is required or not is messy. + node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr); + const xnn_shape* output_shape = &runtime->values[node->outputs[0]].shape; + size_t current_axes = 0; + for (size_t i = 0; i < output_shape->num_dims; ++i) { + if (unique_reduction_axes[current_axes] == i) { + ASSERT_EQ(output_shape->dim[i], 1); + ++current_axes; + if (current_axes == unique_reduction_axes.size()) { + break; + } + } else { + ASSERT_EQ(output_shape->dim[i], input_shape[i]); + } + } + + input_shape[0] -= 1; + ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, input_id, input_shape.size(), input_shape.data())); + ASSERT_EQ(node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr), xnn_status_success); + current_axes = 0; + for (size_t i = 0; i < output_shape->num_dims; ++i) { + if (unique_reduction_axes[current_axes] == i) { + ASSERT_EQ(output_shape->dim[i], 1); + ++current_axes; + if (current_axes == unique_reduction_axes.size()) { + break; + } + } else { + ASSERT_EQ(output_shape->dim[i], input_shape[i]); + } + } +} + +TEST_F(MeanTestQU8, reshape_output_no_keep_dims) +{ + ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr)); + + // Call subgraph API. + xnn_subgraph_t subgraph = nullptr; + ASSERT_EQ(xnn_status_success, xnn_create_subgraph(2, /*flags=*/0, &subgraph)); + std::unique_ptr auto_subgraph(subgraph, xnn_delete_subgraph); + + std::uniform_real_distribution scale_dist = std::uniform_real_distribution(0.0f, 1.0f); + const float input_scale = scale_dist(rng); + const float output_scale = scale_dist(rng); + const int32_t input_zero_point = u8dist(rng); + const int32_t output_zero_point = u8dist(rng); + + uint32_t input_id = XNN_INVALID_NODE_ID; + ASSERT_EQ(xnn_status_success, + xnn_define_quantized_tensor_value( + subgraph, xnn_datatype_quint8, input_zero_point, input_scale, + input_shape.size(), input_shape.data(), nullptr, /*external_id=*/0, + XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id)); + ASSERT_NE(input_id, XNN_INVALID_NODE_ID); + + uint32_t output_id = XNN_INVALID_NODE_ID; + int output_num_dims = input_shape.size() - reduction_axes.size(); + ASSERT_EQ(xnn_status_success, + xnn_define_quantized_tensor_value( + subgraph, xnn_datatype_quint8, output_zero_point, output_scale, + output_num_dims, output_shape.data(), nullptr, /*external_id=*/1, + XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id)); + ASSERT_NE(output_id, XNN_INVALID_NODE_ID); + + ASSERT_EQ(xnn_status_success, + xnn_define_static_mean( + subgraph, + reduction_axes.size(), reduction_axes.data(), + input_id, output_id, + /*flags=*/0)); + + xnn_runtime_t runtime = nullptr; + ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime)); + ASSERT_NE(nullptr, runtime); + std::unique_ptr auto_runtime(runtime, xnn_delete_runtime); + + const std::array external = { + xnn_external_value{input_id, input.data()}, + xnn_external_value{output_id, subgraph_output.data()} + }; + ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data())); + ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime)); + + input_shape[0] += 2; + input_shape[1] += 4; + ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, input_id, input_shape.size(), input_shape.data())); + const struct xnn_node* node = &subgraph->nodes[0]; + std::vector unique_reduction_axes = reduction_axes; + std::sort(unique_reduction_axes.begin(), unique_reduction_axes.end()); + auto end = std::unique(unique_reduction_axes.begin(), unique_reduction_axes.end()); + unique_reduction_axes.erase(end, unique_reduction_axes.end()); + // There are too many parameters which influence the workspace size so + // knowing if reallocation is required or not is messy. + node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr); + const xnn_shape* output_shape = &runtime->values[node->outputs[0]].shape; + size_t current_axes = 0; + size_t current_dim = 0; + for (size_t i = 0; i < input_shape.size(); ++i) { + if (unique_reduction_axes[current_axes] == i) { + ++current_axes; + if (current_axes == unique_reduction_axes.size()) { + break; + } + } else { + ASSERT_EQ(output_shape->dim[current_dim], input_shape[i]); + ++current_dim; + } + } + + input_shape[0] -= 1; + ASSERT_EQ(xnn_status_success, xnn_reshape_external_value(runtime, input_id, input_shape.size(), input_shape.data())); + ASSERT_EQ(node->reshape(&runtime->opdata[0], runtime->values, runtime->num_values, /*threadpool=*/nullptr), xnn_status_success); + current_axes = 0; + current_dim = 0; + for (size_t i = 0; i < input_shape.size(); ++i) { + if (unique_reduction_axes[current_axes] == i) { + ++current_axes; + if (current_axes == unique_reduction_axes.size()) { + break; + } + } else { + ASSERT_EQ(output_shape->dim[current_dim], input_shape[i]); + ++current_dim; + } + } +} + } // namespace xnnpack