Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SYCL. Add some basic functional for sycl implementation of partition builder. #10011

Merged
merged 3 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 101 additions & 0 deletions plugin/sycl/common/partition_builder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*!
* Copyright 2017-2024 XGBoost contributors
*/
#ifndef PLUGIN_SYCL_COMMON_PARTITION_BUILDER_H_
#define PLUGIN_SYCL_COMMON_PARTITION_BUILDER_H_

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#pragma GCC diagnostic ignored "-W#pragma-messages"
#include <xgboost/data.h>
#pragma GCC diagnostic pop
#include <xgboost/tree_model.h>

#include <algorithm>
#include <vector>
#include <utility>

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#include "../../../src/common/column_matrix.h"
#pragma GCC diagnostic pop

#include "../data.h"

#include <CL/sycl.hpp>

namespace xgboost {
namespace sycl {
namespace common {

// The builder is required for samples partition to left and rights children for set of nodes
class PartitionBuilder {
public:
template<typename Func>
void Init(::sycl::queue* qu, size_t n_nodes, Func funcNTaks) {
qu_ = qu;
nodes_offsets_.resize(n_nodes+1);
result_rows_.resize(2 * n_nodes);
n_nodes_ = n_nodes;


nodes_offsets_[0] = 0;
for (size_t i = 1; i < n_nodes+1; ++i) {
nodes_offsets_[i] = nodes_offsets_[i-1] + funcNTaks(i-1);
}

if (data_.Size() < nodes_offsets_[n_nodes]) {
data_.Resize(qu, nodes_offsets_[n_nodes]);
}
}

size_t GetNLeftElems(int nid) const {
return result_rows_[2 * nid];
}


size_t GetNRightElems(int nid) const {
return result_rows_[2 * nid + 1];
}

// For test purposes only
void SetNLeftElems(int nid, size_t val) {
result_rows_[2 * nid] = val;
}

// For test purposes only
void SetNRightElems(int nid, size_t val) {
result_rows_[2 * nid + 1] = val;
}

xgboost::common::Span<size_t> GetData(int nid) {
return { data_.Data() + nodes_offsets_[nid], nodes_offsets_[nid + 1] - nodes_offsets_[nid] };
}

void MergeToArray(size_t nid,
size_t* data_result,
::sycl::event event) {
size_t n_nodes_total = GetNLeftElems(nid) + GetNRightElems(nid);
if (n_nodes_total > 0) {
const size_t* data = data_.Data() + nodes_offsets_[nid];
qu_->memcpy(data_result, data, sizeof(size_t) * n_nodes_total, event);
}
}

protected:
std::vector<size_t> nodes_offsets_;
std::vector<size_t> result_rows_;
size_t n_nodes_;

USMVector<size_t, MemoryType::on_device> parts_size_;
USMVector<size_t, MemoryType::on_device> data_;

::sycl::queue* qu_;
};

} // namespace common
} // namespace sycl
} // namespace xgboost


#endif // PLUGIN_SYCL_COMMON_PARTITION_BUILDER_H_
34 changes: 32 additions & 2 deletions tests/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,38 @@ if(USE_CUDA)
endif()

file(GLOB_RECURSE SYCL_TEST_SOURCES "plugin/test_sycl_*.cc")
if(NOT PLUGIN_SYCL)
list(REMOVE_ITEM TEST_SOURCES ${SYCL_TEST_SOURCES})
list(REMOVE_ITEM TEST_SOURCES ${SYCL_TEST_SOURCES})

if(PLUGIN_SYCL)
set(CMAKE_CXX_COMPILER "icpx")
file(GLOB_RECURSE SYCL_TEST_SOURCES "plugin/test_sycl_*.cc")
add_library(plugin_sycl_test OBJECT ${SYCL_TEST_SOURCES})

target_include_directories(plugin_sycl_test
PRIVATE
${gtest_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/dmlc-core/include
${xgboost_SOURCE_DIR}/rabit/include)

target_compile_definitions(plugin_sycl_test PUBLIC -DXGBOOST_USE_SYCL=1)

target_link_libraries(plugin_sycl_test PUBLIC -fsycl)

set_target_properties(plugin_sycl_test PROPERTIES
COMPILE_FLAGS -fsycl
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we also have "-fsycl-device-code-split=per_kernel" as options?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we also have "-fsycl-device-code-split=per_kernel" as options?

I didn't observe any perf differences, at least for the last compiler.

CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
POSITION_INDEPENDENT_CODE ON)
if(USE_OPENMP)
find_package(OpenMP REQUIRED)
set_target_properties(plugin_sycl_test PROPERTIES
COMPILE_FLAGS "-fsycl -qopenmp")
endif()
# Get compilation and link flags of plugin_sycl and propagate to testxgboost
target_link_libraries(testxgboost PUBLIC plugin_sycl_test)
# Add all objects of plugin_sycl to testxgboost
target_sources(testxgboost INTERFACE $<TARGET_OBJECTS:plugin_sycl_test>)
endif()

if(PLUGIN_FEDERATED)
Expand Down
4 changes: 4 additions & 0 deletions tests/cpp/plugin/test_sycl_multiclass_obj.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
* Copyright 2018-2023 XGBoost contributors
*/
#include <gtest/gtest.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#pragma GCC diagnostic ignored "-W#pragma-messages"
#include <xgboost/context.h>
#pragma GCC diagnostic pop

#include "../objective/test_multiclass_obj.h"

Expand Down
91 changes: 91 additions & 0 deletions tests/cpp/plugin/test_sycl_partition_builder.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/**
* Copyright 2020-2024 by XGBoost contributors
*/
#include <gtest/gtest.h>

#include <string>
#include <utility>
#include <vector>

#include "../../../plugin/sycl/common/partition_builder.h"
#include "../../../plugin/sycl/device_manager.h"
#include "../helpers.h"

namespace xgboost::sycl::common {

TEST(SyclPartitionBuilder, BasicTest) {
constexpr size_t kNodes = 5;
// Number of rows for each node
std::vector<size_t> rows = { 5, 5, 10, 1, 2 };

DeviceManager device_manager;
auto qu = device_manager.GetQueue(DeviceOrd::SyclDefault());
PartitionBuilder builder;
builder.Init(&qu, kNodes, [&](size_t i) {
return rows[i];
});

// We test here only the basics, thus syntetic partition builder is adopted
// Number of rows to go left for each node.
std::vector<size_t> rows_for_left_node = { 2, 0, 7, 1, 2 };

size_t first_row_id = 0;
for(size_t nid = 0; nid < kNodes; ++nid) {
size_t n_rows_nodes = rows[nid];

auto rid_buff = builder.GetData(nid);
size_t rid_buff_size = rid_buff.size();
auto* rid_buff_ptr = rid_buff.data();

size_t n_left = rows_for_left_node[nid];
size_t n_right = rows[nid] - n_left;

qu.submit([&](::sycl::handler& cgh) {
cgh.parallel_for<>(::sycl::range<1>(n_left), [=](::sycl::id<1> pid) {
int row_id = first_row_id + pid[0];
rid_buff_ptr[pid[0]] = row_id;
});
});
qu.wait();
first_row_id += n_left;

// We are storing indexes for the right side in the tail of the array to save some memory
qu.submit([&](::sycl::handler& cgh) {
cgh.parallel_for<>(::sycl::range<1>(n_right), [=](::sycl::id<1> pid) {
int row_id = first_row_id + pid[0];
rid_buff_ptr[rid_buff_size - pid[0] - 1] = row_id;
});
});
qu.wait();
first_row_id += n_right;

builder.SetNLeftElems(nid, n_left);
builder.SetNRightElems(nid, n_right);
}

::sycl::event event;
std::vector<size_t> v(*std::max_element(rows.begin(), rows.end()));
size_t row_id = 0;
for(size_t nid = 0; nid < kNodes; ++nid) {
builder.MergeToArray(nid, v.data(), event);
qu.wait();

// Check that row_id for left side are correct
for(size_t j = 0; j < rows_for_left_node[nid]; ++j) {
ASSERT_EQ(v[j], row_id++);
}

// Check that row_id for right side are correct
for(size_t j = 0; j < rows[nid] - rows_for_left_node[nid]; ++j) {
ASSERT_EQ(v[rows[nid] - j - 1], row_id++);
}

// Check that number of left/right rows are correct
size_t n_left = builder.GetNLeftElems(nid);
size_t n_right = builder.GetNRightElems(nid);
ASSERT_EQ(n_left, rows_for_left_node[nid]);
ASSERT_EQ(n_right, (rows[nid] - rows_for_left_node[nid]));
}
}

} // namespace xgboost::common
10 changes: 9 additions & 1 deletion tests/cpp/plugin/test_sycl_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,19 @@
* Copyright 2017-2023 XGBoost contributors
*/
#include <gtest/gtest.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#pragma GCC diagnostic ignored "-W#pragma-messages"
#include <xgboost/predictor.h>
#pragma GCC diagnostic pop

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#include "../../../src/data/adapter.h"
#include "../../../src/data/proxy_dmatrix.h"
#include "../../../src/gbm/gbtree.h"
#pragma GCC diagnostic pop

#include "../../../src/data/proxy_dmatrix.h"
#include "../../../src/gbm/gbtree_model.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
Expand Down
4 changes: 4 additions & 0 deletions tests/cpp/plugin/test_sycl_regression_obj.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
* Copyright 2017-2019 XGBoost contributors
*/
#include <gtest/gtest.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#pragma GCC diagnostic ignored "-W#pragma-messages"
#include <xgboost/objective.h>
#pragma GCC diagnostic pop
#include <xgboost/context.h>

#include "../helpers.h"
Expand Down
Loading