Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AVXVNNIINT8 remove 128 bias from input zero point #7110

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions src/qs8-gemm/MRx8c8-avxvnni.c.in
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,10 @@ void xnn_${DATATYPE_SPEC}_gemm_minmax${REQUANTIZATION_SPEC}_ukernel_${MR}x8c8__$
XNN_FORCE_REALIZATION(vsign_mask);
$if DATATYPE in ["QD8_F16", "QD8_F32", "QC4_F16", "QC4_F32"]:
$for M in range(MR):
const __m256i vinput_zero_point${M} = _mm256_set1_epi32((int) quantization_params[${M}].zero_point + 128);
$if VARIANT == "AVXVNNIINT8":
const __m256i vinput_zero_point${M} = _mm256_set1_epi32((int) quantization_params[${M}].zero_point);
$else:
const __m256i vinput_zero_point${M} = _mm256_set1_epi32((int) quantization_params[${M}].zero_point + 128);
$if "F16" in DATATYPE:
const __m256 voutput_min = _mm256_cvtph_ps(_mm_set1_epi16(*(const uint16_t*) &params->scalar.min));
const __m256 voutput_max = _mm256_cvtph_ps(_mm_set1_epi16(*(const uint16_t*) &params->scalar.max));
Expand All @@ -108,7 +111,7 @@ void xnn_${DATATYPE_SPEC}_gemm_minmax${REQUANTIZATION_SPEC}_ukernel_${MR}x8c8__$
$else:
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->${PARAMS_STRUCT}.output_max - (int32_t) params->${PARAMS_STRUCT}.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->${PARAMS_STRUCT}.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->${PARAMS_STRUCT}.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->${PARAMS_STRUCT}.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_10x8c8__avx256vnni_prfm(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_10x8c8__avx256vnni(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_12x8c8__avx256vnni_prfm(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_12x8c8__avx256vnni(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_14x8c8__avx256vnni_prfm(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_14x8c8__avx256vnni(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_1x8c8__avx256vnni_prfm(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_1x8c8__avx256vnni(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_1x8c8__avxvnni_prfm(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_1x8c8__avxvnni(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_1x8c8__avxvnniint8_prfm(

const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_2x8c8__avxvnni_prfm(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_2x8c8__avxvnni(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_3x8c8__avxvnni_prfm(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_3x8c8__avxvnni(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_4x8c8__avxvnni_prfm(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_4x8c8__avxvnni(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_5x8c8__avx256vnni_prfm(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_5x8c8__avx256vnni(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_5x8c8__avxvnni_prfm(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_5x8c8__avxvnni(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_5x8c8__avxvnniint8_prfm(

const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_6x8c8__avxvnni_prfm(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_6x8c8__avxvnni(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_7x8c8__avx256vnni_prfm(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_7x8c8__avx256vnni(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_7x8c8__avxvnni_prfm(
XNN_FORCE_REALIZATION(vsign_mask);
const __m256 voutput_max_less_zero_point = _mm256_set1_ps((int32_t) params->fp32_scalar.output_max - (int32_t) params->fp32_scalar.output_zero_point);
const __m256i voutput_zero_point = _mm256_set1_epi32(params->fp32_scalar.output_zero_point);
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min); // *** check params
const __m128i voutput_min = _mm_set1_epi8(params->fp32_scalar.output_min);
// XNN_FORCE_REALIZATION(voutput_max_less_zero_point);
// XNN_FORCE_REALIZATION(voutput_zero_point);
// XNN_FORCE_REALIZATION(voutput_min);
Expand Down
Loading