Skip to content

Commit

Permalink
qd8-f32-qc4w-gemm SSE4 use _mm_mullo_epi32 but unpack to zero extend
Browse files Browse the repository at this point in the history
- Add prefetch versions of SSE
- Replace MR=28 gemm kernels with MR=4 (for comparing code quality of sse vs avx512)
- Apply formatting to BUILD and tests

PiperOrigin-RevId: 674495954
  • Loading branch information
fbarchard authored and xnnpack-bot committed Sep 14, 2024
1 parent 6ee48c2 commit 467338f
Show file tree
Hide file tree
Showing 78 changed files with 6,219 additions and 12,868 deletions.
14 changes: 7 additions & 7 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -171,15 +171,15 @@ MICROKERNEL_DEFS = [
"src/f16-vbinary/f16-vminc.h",
"src/f16-vbinary/f16-vmul-minmax.h",
"src/f16-vbinary/f16-vmulc-minmax.h",
"src/f16-vbinary/f16-vprelu.h",
"src/f16-vbinary/f16-vpreluc.h",
"src/f16-vbinary/f16-vrdivc-minmax.h",
"src/f16-vbinary/f16-vrpreluc.h",
"src/f16-vbinary/f16-vrsubc-minmax.h",
"src/f16-vbinary/f16-vsqrdiff.h",
"src/f16-vbinary/f16-vsqrdiffc.h",
"src/f16-vbinary/f16-vsub-minmax.h",
"src/f16-vbinary/f16-vsubc-minmax.h",
"src/f16-vbinary/f16-vprelu.h",
"src/f16-vbinary/f16-vpreluc.h",
"src/f16-vbinary/f16-vrpreluc.h",
"src/f16-vclamp/f16-vclamp.h",
"src/f16-velu/f16-velu.h",
"src/f16-vhswish/f16-vhswish.h",
Expand Down Expand Up @@ -217,9 +217,12 @@ MICROKERNEL_DEFS = [
"src/f32-vbinary/f32-vmul.h",
"src/f32-vbinary/f32-vmulc-minmax.h",
"src/f32-vbinary/f32-vmulc.h",
"src/f32-vbinary/f32-vprelu.h",
"src/f32-vbinary/f32-vpreluc.h",
"src/f32-vbinary/f32-vrcopysignc.h",
"src/f32-vbinary/f32-vrdivc-minmax.h",
"src/f32-vbinary/f32-vrdivc.h",
"src/f32-vbinary/f32-vrpreluc.h",
"src/f32-vbinary/f32-vrsubc-minmax.h",
"src/f32-vbinary/f32-vrsubc.h",
"src/f32-vbinary/f32-vsqrdiff.h",
Expand All @@ -228,9 +231,6 @@ MICROKERNEL_DEFS = [
"src/f32-vbinary/f32-vsub.h",
"src/f32-vbinary/f32-vsubc-minmax.h",
"src/f32-vbinary/f32-vsubc.h",
"src/f32-vbinary/f32-vprelu.h",
"src/f32-vbinary/f32-vpreluc.h",
"src/f32-vbinary/f32-vrpreluc.h",
"src/f32-vclamp/f32-vclamp.h",
"src/f32-velu/f32-velu.h",
"src/f32-vexp/f32-vexp.h",
Expand All @@ -249,6 +249,7 @@ MICROKERNEL_DEFS = [
"src/f32-vsqr/f32-vsqr.h",
"src/f32-vsqrt/f32-vsqrt.h",
"src/f32-vtanh/f32-vtanh.h",
"src/qs8-packw/qs8-packw.h",
"src/qs8-vadd/qs8-vadd-minmax.h",
"src/qs8-vaddc/qs8-vaddc-minmax.h",
"src/qs8-vhswish/qs8-vhswish.h",
Expand All @@ -275,7 +276,6 @@ MICROKERNEL_DEFS = [
"src/u64-u32-vsqrtshift/u64-u32-vsqrtshift.h",
"src/x8-packq/x8-packq.h",
"src/x8-packw/x8-packw.h",
"src/qs8-packw/qs8-packw.h",
"src/x16-packw/x16-packw.h",
"src/x32-packb/x32-packb.h",
"src/x32-packw/x32-packw.h",
Expand Down
Loading

0 comments on commit 467338f

Please sign in to comment.