Skip to content

Commit

Permalink
Copybara import of the project:
Browse files Browse the repository at this point in the history
--
07db838 by Mihai.Olinovici <[email protected]>:

Add RVV f32-f16-vcvt kernels and configs.

FUTURE_COPYBARA_INTEGRATE_REVIEW=#7128 from imaginationtech:img_patch30_f32_f16_vcvt bd821ed
PiperOrigin-RevId: 678634358
  • Loading branch information
oliIMG authored and xnnpack-bot committed Sep 26, 2024
1 parent 2286715 commit cc5d8eb
Show file tree
Hide file tree
Showing 9 changed files with 221 additions and 2 deletions.
8 changes: 6 additions & 2 deletions cmake/gen/rvvfp16arith_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,16 @@
# Generator: tools/update-microkernels.py


SET(PROD_RVVFP16ARITH_MICROKERNEL_SRCS)
SET(PROD_RVVFP16ARITH_MICROKERNEL_SRCS
src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u4v.c)

SET(NON_PROD_RVVFP16ARITH_MICROKERNEL_SRCS
src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u1v.c
src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u2v.c
src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u4v.c
src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u8v.c)
src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u8v.c
src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u1v.c
src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u2v.c
src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u8v.c)

SET(ALL_RVVFP16ARITH_MICROKERNEL_SRCS ${PROD_RVVFP16ARITH_MICROKERNEL_SRCS} + ${NON_PROD_RVVFP16ARITH_MICROKERNEL_SRCS})
4 changes: 4 additions & 0 deletions gen/rvvfp16arith_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,17 @@ Auto-generated file. Do not edit!
"""

PROD_RVVFP16ARITH_MICROKERNEL_SRCS = [
"src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u4v.c",
]

NON_PROD_RVVFP16ARITH_MICROKERNEL_SRCS = [
"src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u1v.c",
"src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u2v.c",
"src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u4v.c",
"src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u8v.c",
"src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u1v.c",
"src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u2v.c",
"src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u8v.c",
]

ALL_RVVFP16ARITH_MICROKERNEL_SRCS = PROD_RVVFP16ARITH_MICROKERNEL_SRCS + NON_PROD_RVVFP16ARITH_MICROKERNEL_SRCS
6 changes: 6 additions & 0 deletions scripts/generate-f32-f16-vcvt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ tools/xngen src/f32-f16-vcvt/neon.c.in -D BATCH_TILE=32 -o src/f32-f16-vcvt/gen/
tools/xngen src/f32-f16-vcvt/neonfp16.c.in -D BATCH_TILE=8 -o src/f32-f16-vcvt/gen/f32-f16-vcvt-neonfp16-u8.c &
tools/xngen src/f32-f16-vcvt/neonfp16.c.in -D BATCH_TILE=16 -o src/f32-f16-vcvt/gen/f32-f16-vcvt-neonfp16-u16.c &

################################ RISC-V Vector ################################
tools/xngen src/f32-f16-vcvt/rvvfp16arith.c.in -D LMUL=1 -o src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u1v.c &
tools/xngen src/f32-f16-vcvt/rvvfp16arith.c.in -D LMUL=2 -o src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u2v.c &
tools/xngen src/f32-f16-vcvt/rvvfp16arith.c.in -D LMUL=4 -o src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u4v.c &
tools/xngen src/f32-f16-vcvt/rvvfp16arith.c.in -D LMUL=8 -o src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u8v.c &

################################# x86 128-bit #################################
tools/xngen src/f32-f16-vcvt/sse.c.in -D SSE=2 -D AVX=0 -D BATCH_TILE=8 -o src/f32-f16-vcvt/gen/f32-f16-vcvt-sse2-u8.c &
tools/xngen src/f32-f16-vcvt/sse.c.in -D SSE=2 -D AVX=0 -D BATCH_TILE=16 -o src/f32-f16-vcvt/gen/f32-f16-vcvt-sse2-u16.c &
Expand Down
7 changes: 7 additions & 0 deletions src/f32-f16-vcvt/f32-f16-vcvt.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,13 @@ XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_f32_f16_vcvt_ukernel__wasmrelaxedsimd_u24, 24
XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_f32_f16_vcvt_ukernel__wasmrelaxedsimd_u32, 32, false, float, xnn_float16, void, NULL)
#endif // XNN_ARCH_WASMRELAXEDSIMD

#if XNN_ARCH_RISCV && XNN_ENABLE_RISCV_FP16_VECTOR
XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_f32_f16_vcvt_ukernel__rvv_u1v, 1, true, float, xnn_float16, void, NULL)
XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_f32_f16_vcvt_ukernel__rvv_u2v, 2, true, float, xnn_float16, void, NULL)
XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_f32_f16_vcvt_ukernel__rvv_u4v, 4, true, float, xnn_float16, void, NULL)
XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_f32_f16_vcvt_ukernel__rvv_u8v, 8, true, float, xnn_float16, void, NULL)
#endif

XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_f32_f16_vcvt_ukernel__scalar_bitcast_u1, 1, false, float, xnn_float16, void, NULL)
XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_f32_f16_vcvt_ukernel__scalar_bitcast_u2, 2, false, float, xnn_float16, void, NULL)
XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_f32_f16_vcvt_ukernel__scalar_bitcast_u3, 3, false, float, xnn_float16, void, NULL)
Expand Down
40 changes: 40 additions & 0 deletions src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u1v.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Auto-generated file. Do not edit!
// Template: src/f32-f16-vcvt/rvvfp16arith.c.in
// Generator: tools/xngen
//
// Copyright 2024 Imagination Technologies, Inc.
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <assert.h>

#include <riscv_vector.h>

#include "xnnpack/vcvt.h"


void xnn_f32_f16_vcvt_ukernel__rvvfp16arith_u1v(
size_t batch,
const float* input,
void* output,
const void* params)
{
assert(batch != 0);
assert(batch % sizeof(float) == 0);
assert(input != NULL);
assert(output != NULL);

batch >>= XNN_LOG2_SIZEOF_FLOAT;

_Float16* o = (_Float16*) output;
for (; batch > 0;) {
const int32_t n = __riscv_vsetvl_e32m1(batch); batch -= n;

vfloat32m1_t x_f32v = __riscv_vle32_v_f32m1(input, n); input += n;

vfloat16mf2_t y_f16v = __riscv_vfncvt_f_f_w_f16mf2(x_f32v, n);

__riscv_vse16_v_f16mf2(o, y_f16v, n); o += n;
}
}
40 changes: 40 additions & 0 deletions src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u2v.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Auto-generated file. Do not edit!
// Template: src/f32-f16-vcvt/rvvfp16arith.c.in
// Generator: tools/xngen
//
// Copyright 2024 Imagination Technologies, Inc.
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <assert.h>

#include <riscv_vector.h>

#include "xnnpack/vcvt.h"


void xnn_f32_f16_vcvt_ukernel__rvvfp16arith_u2v(
size_t batch,
const float* input,
void* output,
const void* params)
{
assert(batch != 0);
assert(batch % sizeof(float) == 0);
assert(input != NULL);
assert(output != NULL);

batch >>= XNN_LOG2_SIZEOF_FLOAT;

_Float16* o = (_Float16*) output;
for (; batch > 0;) {
const int32_t n = __riscv_vsetvl_e32m2(batch); batch -= n;

vfloat32m2_t x_f32v = __riscv_vle32_v_f32m2(input, n); input += n;

vfloat16m1_t y_f16v = __riscv_vfncvt_f_f_w_f16m1(x_f32v, n);

__riscv_vse16_v_f16m1(o, y_f16v, n); o += n;
}
}
40 changes: 40 additions & 0 deletions src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u4v.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Auto-generated file. Do not edit!
// Template: src/f32-f16-vcvt/rvvfp16arith.c.in
// Generator: tools/xngen
//
// Copyright 2024 Imagination Technologies, Inc.
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <assert.h>

#include <riscv_vector.h>

#include "xnnpack/vcvt.h"


void xnn_f32_f16_vcvt_ukernel__rvvfp16arith_u4v(
size_t batch,
const float* input,
void* output,
const void* params)
{
assert(batch != 0);
assert(batch % sizeof(float) == 0);
assert(input != NULL);
assert(output != NULL);

batch >>= XNN_LOG2_SIZEOF_FLOAT;

_Float16* o = (_Float16*) output;
for (; batch > 0;) {
const int32_t n = __riscv_vsetvl_e32m4(batch); batch -= n;

vfloat32m4_t x_f32v = __riscv_vle32_v_f32m4(input, n); input += n;

vfloat16m2_t y_f16v = __riscv_vfncvt_f_f_w_f16m2(x_f32v, n);

__riscv_vse16_v_f16m2(o, y_f16v, n); o += n;
}
}
40 changes: 40 additions & 0 deletions src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u8v.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Auto-generated file. Do not edit!
// Template: src/f32-f16-vcvt/rvvfp16arith.c.in
// Generator: tools/xngen
//
// Copyright 2024 Imagination Technologies, Inc.
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <assert.h>

#include <riscv_vector.h>

#include "xnnpack/vcvt.h"


void xnn_f32_f16_vcvt_ukernel__rvvfp16arith_u8v(
size_t batch,
const float* input,
void* output,
const void* params)
{
assert(batch != 0);
assert(batch % sizeof(float) == 0);
assert(input != NULL);
assert(output != NULL);

batch >>= XNN_LOG2_SIZEOF_FLOAT;

_Float16* o = (_Float16*) output;
for (; batch > 0;) {
const int32_t n = __riscv_vsetvl_e32m8(batch); batch -= n;

vfloat32m8_t x_f32v = __riscv_vle32_v_f32m8(input, n); input += n;

vfloat16m4_t y_f16v = __riscv_vfncvt_f_f_w_f16m4(x_f32v, n);

__riscv_vse16_v_f16m4(o, y_f16v, n); o += n;
}
}
38 changes: 38 additions & 0 deletions src/f32-f16-vcvt/rvvfp16arith.c.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright 2024 Imagination Technologies, Inc.
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

$assert LMUL in [1, 2, 4, 8]
$LMUL_16 = {1: "f2", 2: "1", 4: "2", 8: "4"}[LMUL]
#include <assert.h>

#include <riscv_vector.h>

#include "xnnpack/vcvt.h"


void xnn_f32_f16_vcvt_ukernel__rvvfp16arith_u${LMUL}v(
size_t batch,
const float* input,
void* output,
const void* params)
{
assert(batch != 0);
assert(batch % sizeof(float) == 0);
assert(input != NULL);
assert(output != NULL);

batch >>= XNN_LOG2_SIZEOF_FLOAT;

_Float16* o = (_Float16*) output;
for (; batch > 0;) {
const int32_t n = __riscv_vsetvl_e32m${LMUL}(batch); batch -= n;

vfloat32m${LMUL}_t x_f32v = __riscv_vle32_v_f32m${LMUL}(input, n); input += n;

vfloat16m${LMUL_16}_t y_f16v = __riscv_vfncvt_f_f_w_f16m${LMUL_16}(x_f32v, n);

__riscv_vse16_v_f16m${LMUL_16}(o, y_f16v, n); o += n;
}
}

0 comments on commit cc5d8eb

Please sign in to comment.