Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: [onert] Support RoPE operation #14090

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions compute/cker/include/cker/Types.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ enum class ComparisonOpType
LessEqual
};

enum class RoPEMode
{
kGptNeox = 0,
kGptJ = 1,
};

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was added to support two modes (GPT-NEOX, GPT-J) of RoPE.
Currently, only GPT-NEOX is supported.

struct PaddingValues
{
int16_t width;
Expand Down
77 changes: 77 additions & 0 deletions compute/cker/include/cker/operation/RoPE.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef __NNFW_CKER_ROPE_H__
#define __NNFW_CKER_ROPE_H__

#include "cker/Shape.h"
#include "cker/Types.h"
#include "cker/Utils.h"

namespace nnfw
{
namespace cker
{

template <typename T>
inline void RoPE(const RoPEMode mode, const Shape &input_shape, const T *input_data,
const Shape &sin_table_shape, const T *sin_table_data,
const Shape &cos_table_shape, const T *cos_table_data, const Shape &output_shape,
T *output_data)
{
if (input_shape.Dims(3) != sin_table_shape.Dims(3))
throw std::runtime_error("the dimension(3) of input and sin_table do not match");

if (input_shape.Dims(3) != cos_table_shape.Dims(3))
throw std::runtime_error("the dimension(3) of input and cos_table do not match");
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if the sin/cos table and input dim(3) are different, an error occurs in the following operation
and it is processed as throw.


const int32_t i0_n = MatchingDim(input_shape, 0, output_shape, 0);
const int32_t i1_n = MatchingDim(input_shape, 1, output_shape, 1);
const int32_t i2_n = MatchingDim(input_shape, 2, output_shape, 2);
const int32_t i3_n = MatchingDim(input_shape, 3, output_shape, 3);

if (mode == RoPEMode::kGptNeox)
{
for (int32_t i0 = 0; i0 < i0_n; ++i0)
{
for (int32_t i1 = 0; i1 < i1_n; ++i1)
{
for (int32_t i2 = 0; i2 < i2_n; ++i2)
{
for (int32_t i3 = 0; i3 < i3_n / 2; ++i3)
{
const int32_t offset = Offset(input_shape, i0, i1, i2, i3);
const T x0 = input_data[offset];
const T x1 = input_data[offset + i3_n / 2];

output_data[offset] = x0 * cos_table_data[i3] - x1 * sin_table_data[i3];
output_data[offset + i3_n / 2] =
x0 * sin_table_data[i3 + i3_n / 2] + x1 * cos_table_data[i3 + i3_n / 2];
}
}
}
}
}
else
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BUILD_TYPE = Release

before fuse

$ ./Product/x86_64-linux.release/out/bin/onert_run ~/model/rope/Net_RoPE_000.circle

===================================
MODEL_LOAD   takes 0.251 ms
PREPARE      takes 3.557 ms
EXECUTE      takes 0.036 ms
- MEAN     :  0.036 ms
- MAX      :  0.036 ms
- MIN      :  0.036 ms
- GEOMEAN  :  0.036 ms
===================================

after fuse

$ ./Product/x86_64-linux.release/out/bin/onert_run ~/model/rope/RoPE_000.circle

===================================
MODEL_LOAD   takes 0.169 ms
PREPARE      takes 2.384 ms
EXECUTE      takes 0.020 ms
- MEAN     :  0.020 ms
- MAX      :  0.020 ms
- MIN      :  0.020 ms
- GEOMEAN  :  0.020 ms
===================================

{
throw std::runtime_error("Unsupported RoPE mode");
}
}

} // namespace cker
} // namespace nnfw

#endif // __NNFW_CKER_ROPE_H__
152 changes: 152 additions & 0 deletions compute/cker/src/RoPE.test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "cker/Shape.h"
#include <cker/operation/RoPE.h>

#include <gtest/gtest.h>
#include <vector>

using nnfw::cker::Shape;
using nnfw::cker::RoPEMode;

TEST(CKer_Operation, RoPE)
{
// float
{
RoPEMode mode = RoPEMode::kGptNeox;

Shape input_shape{1, 1, 1, 4};
std::vector<float> input{0, 1.0, 2.0, 3.0};

Shape sin_table_shape{1, 1, 1, 4};
std::vector<float> sin_table{0.5, 1.0, 1.0, 0.5};
Shape cos_table_shape{1, 1, 1, 4};
std::vector<float> cos_table{1.0, 0.5, 0.5, 1.0};

Shape ref_output_shape{1, 1, 1, 4};
std::vector<float> ref_output_data{-1.0, -2.5, 1.0, 3.5};

Shape output_shape{1, 1, 1, 4};
std::vector<float> output(ref_output_data.size());

nnfw::cker::RoPE<float>(mode, input_shape, input.data(), sin_table_shape, sin_table.data(),
cos_table_shape, cos_table.data(), ref_output_shape, output.data());

for (size_t i = 0; i < ref_output_data.size(); ++i)
{
EXPECT_FLOAT_EQ(ref_output_data[i], output[i]);
}
}

// int64_t
{
RoPEMode mode = RoPEMode::kGptNeox;

Shape input_shape{1, 1, 1, 4};
std::vector<int64_t> input{0, 1, 2, 3};

Shape sin_table_shape{1, 1, 1, 4};
std::vector<int64_t> sin_table{0, 1, 1, 0};
Shape cos_table_shape{1, 1, 1, 4};
std::vector<int64_t> cos_table{1, 0, 0, 1};

Shape ref_output_shape{1, 1, 1, 4};
std::vector<int64_t> ref_output_data{0, -3, 0, 3};

Shape output_shape{1, 1, 1, 4};
std::vector<int64_t> output(ref_output_data.size());

nnfw::cker::RoPE<int64_t>(mode, input_shape, input.data(), sin_table_shape, sin_table.data(),
cos_table_shape, cos_table.data(), ref_output_shape, output.data());

for (size_t i = 0; i < ref_output_data.size(); ++i)
{
EXPECT_EQ(ref_output_data[i], output[i]);
}
}
}

TEST(CKer_Operation, neg_RoPE)
{
// the dimension(3) of sin_table and input do not match
{
RoPEMode mode = RoPEMode::kGptNeox;

Shape input_shape{1, 1, 1, 4};
std::vector<float> input{0, 1.0, 2.0, 3.0};

Shape sin_table_shape{1, 1, 1, 3};
std::vector<float> sin_table{0.5, 1.0, 1.0};
Shape cos_table_shape{1, 1, 1, 4};
std::vector<float> cos_table{1.0, 0.5, 0.5, 1.0};

Shape ref_output_shape{1, 1, 1, 4};
std::vector<float> ref_output_data{-1.0, -2.5, 1.0, 3.5};

std::vector<float> output(ref_output_data.size());
Shape output_shape{1, 1, 1, 4};

EXPECT_ANY_THROW(nnfw::cker::RoPE<float>(mode, input_shape, input.data(), sin_table_shape,
sin_table.data(), cos_table_shape, cos_table.data(),
ref_output_shape, output.data()));
}

// the dimension(3) of cos_table and input do not match
{
RoPEMode mode = RoPEMode::kGptNeox;

Shape input_shape{1, 1, 1, 4};
std::vector<float> input{0, 1.0, 2.0, 3.0};

Shape sin_table_shape{1, 1, 1, 4};
std::vector<float> sin_table{0.5, 1.0, 1.0, 0.5};
Shape cos_table_shape{1, 1, 1, 3};
std::vector<float> cos_table{1.0, 0.5, 0.5};

Shape ref_output_shape{1, 1, 1, 4};
std::vector<float> ref_output_data{-1.0, -2.5, 1.0, 3.5};

std::vector<float> output(ref_output_data.size());
Shape output_shape{1, 1, 1, 4};

EXPECT_ANY_THROW(nnfw::cker::RoPE<float>(mode, input_shape, input.data(), sin_table_shape,
sin_table.data(), cos_table_shape, cos_table.data(),
ref_output_shape, output.data()));
}

// unsupported RoPE Mode
{
RoPEMode mode = RoPEMode::kGptJ;

Shape input_shape{1, 1, 1, 4};
std::vector<float> input{0, 1.0, 2.0, 3.0};

Shape sin_table_shape{1, 1, 1, 4};
std::vector<float> sin_table{0.5, 1.0, 1.0, 0.5};
Shape cos_table_shape{1, 1, 1, 4};
std::vector<float> cos_table{1.0, 0.5, 0.5, 1.0};

Shape ref_output_shape{1, 1, 1, 4};
std::vector<float> ref_output_data{-1.0, -2.5, 1.0, 3.5};

Shape output_shape{1, 1, 1, 4};
std::vector<float> output(ref_output_data.size());

EXPECT_ANY_THROW(nnfw::cker::RoPE<float>(mode, input_shape, input.data(), sin_table_shape,
sin_table.data(), cos_table_shape, cos_table.data(),
ref_output_shape, output.data()));
}
}
21 changes: 21 additions & 0 deletions runtime/onert/backend/cpu/KernelGenerator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#include "ops/ReshapeLayer.h"
#include "ops/ResizeBilinearLayer.h"
#include "ops/ReverseLayer.h"
#include "ops/RoPELayer.h"
#include "ops/SelectLayer.h"
#include "ops/ShapeLayer.h"
#include "ops/SliceLayer.h"
Expand Down Expand Up @@ -1543,6 +1544,26 @@ void KernelGenerator::visit(const ir::operation::LSTM &node)
_return_fn = std::move(fn);
}

void KernelGenerator::visit(const ir::operation::RoPE &node)
{
const auto input_index{node.getInputs().at(ir::operation::RoPE::Input::INPUT)};
const auto sin_table{node.getInputs().at(ir::operation::RoPE::Input::SIN_TABLE)};
const auto cos_table{node.getInputs().at(ir::operation::RoPE::Input::COS_TABLE)};
const auto output_index{node.getOutputs().at(ir::operation::RoPE::Output::OUTPUT)};

auto mode = ops::getRoPEMode(node.param().mode);

auto input_tensor = _tensor_reg->getPortableTensor(input_index);
auto sin_tensor = _tensor_reg->getPortableTensor(sin_table);
auto cos_tensor = _tensor_reg->getPortableTensor(cos_table);
auto output_tensor = _tensor_reg->getPortableTensor(output_index);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current implementation has three inputs(input, sin_table, and cos_table)
so, input the sin/cos table at the upper layer (ex.app).


auto fn = std::make_unique<ops::RoPELayer>();

fn->configure(input_tensor, sin_tensor, cos_tensor, mode, output_tensor);
_return_fn = std::move(fn);
}

} // namespace cpu
} // namespace backend
} // namespace onert
1 change: 1 addition & 0 deletions runtime/onert/backend/cpu/KernelGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class KernelGenerator : public basic::KernelGeneratorBase
void visit(const ir::operation::Reshape &) override;
void visit(const ir::operation::ResizeBilinear &node) override;
void visit(const ir::operation::Reverse &) override;
void visit(const ir::operation::RoPE &) override;
void visit(const ir::operation::Select &) override;
void visit(const ir::operation::Shape &) override;
void visit(const ir::operation::Slice &) override;
Expand Down
14 changes: 14 additions & 0 deletions runtime/onert/backend/cpu/ops/OperationUtils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,20 @@ std::vector<int32_t> getReducerAxes(const IPortableTensor *axes)
return ret;
}

nnfw::cker::RoPEMode getRoPEMode(ir::operation::RoPE::RoPEMode rope_mode)
{
switch (rope_mode)
{
case ir::operation::RoPE::RoPEMode::GPT_NEOX:
return nnfw::cker::RoPEMode::kGptNeox;
case ir::operation::RoPE::RoPEMode::GPT_J:
return nnfw::cker::RoPEMode::kGptJ;
default:
throw std::runtime_error("Wrong rope mode.");
break;
}
}

} // namespace ops
} // namespace cpu
} // namespace backend
Expand Down
3 changes: 3 additions & 0 deletions runtime/onert/backend/cpu/ops/OperationUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <ir/DataType.h>
#include <ir/Operand.h>
#include <ir/Padding.h>
#include <ir/operation/RoPE.h>
#include <util/CalculateActivationRange.h>

#include <cker/Shape.h>
Expand Down Expand Up @@ -169,6 +170,8 @@ nnfw::cker::PaddingType getPaddingType(ir::PaddingType ir_padding_type);

std::vector<int32_t> getReducerAxes(const IPortableTensor *axes);

nnfw::cker::RoPEMode getRoPEMode(ir::operation::RoPE::RoPEMode rope_mode);

template <typename T> const T *getBuffer(const IPortableTensor *tensor)
{
return reinterpret_cast<const T *>(tensor->buffer());
Expand Down
Loading