Skip to content

Commit

Permalink
[CI] Update machine images (#9932)
Browse files Browse the repository at this point in the history
  • Loading branch information
hcho3 authored Dec 29, 2023
1 parent a7226c0 commit ef8bdaa
Show file tree
Hide file tree
Showing 26 changed files with 82 additions and 80 deletions.
5 changes: 4 additions & 1 deletion cmake/modules/FindLibR.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ function(create_rlib_for_msvc)

execute_process(COMMAND ${DLLTOOL_EXE}
"--input-def" "${CMAKE_CURRENT_BINARY_DIR}/R.def"
"--output-lib" "${CMAKE_CURRENT_BINARY_DIR}/R.lib")
"--output-lib" "${CMAKE_CURRENT_BINARY_DIR}/R.lib"
"--temp-prefix" "Rlibtemp"
COMMAND_ECHO STDOUT
COMMAND_ERROR_IS_FATAL ANY)
endfunction()


Expand Down
7 changes: 3 additions & 4 deletions doc/contrib/ci.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ For your convenience, we provide the wrapper script ``tests/ci_build/ci_build.sh

.. code-block:: bash
tests/ci_build/ci_build.sh <CONTAINER_TYPE> <DOCKER_BINARY> --build-arg <BUILD_ARG> \
tests/ci_build/ci_build.sh <CONTAINER_TYPE> --use-gpus --build-arg <BUILD_ARG> \
<COMMAND> ...
where:
Expand All @@ -68,8 +68,7 @@ where:
container definition (Dockerfile) located at ``tests/ci_build/Dockerfile.<CONTAINER_TYPE>``.
For example, setting the container type to ``gpu`` will cause the script to load the Dockerfile
``tests/ci_build/Dockerfile.gpu``.
* ``<DOCKER_BINARY>`` must be either ``docker`` or ``nvidia-docker``. Choose ``nvidia-docker``
as long as you need to run any GPU code.
* Specify ``--use-gpus`` to run any GPU code. This flag will grant the container access to all NVIDIA GPUs in the base machine. Omit the flag if the access to GPUs is not necessary.
* ``<BUILD_ARG>`` is a build argument to be passed to Docker. Must be of form ``VAR=VALUE``.
Example: ``--build-arg CUDA_VERSION_ARG=11.0``. You can pass multiple ``--build-arg``.
* ``<COMMAND>`` is the command to run inside the Docker container. This can be more than one argument.
Expand All @@ -83,7 +82,7 @@ arguments to Docker. For example:
# Allocate extra space in /dev/shm to enable NCCL
export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'
# Run multi-GPU test suite
tests/ci_build/ci_build.sh gpu nvidia-docker --build-arg CUDA_VERSION_ARG=11.0 \
tests/ci_build/ci_build.sh gpu --use-gpus --build-arg CUDA_VERSION_ARG=11.0 \
tests/ci_build/test_python.sh mgpu
To pass multiple extra arguments:
Expand Down
3 changes: 2 additions & 1 deletion tests/buildkite/build-containers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ case "${container}" in

gpu)
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
;;

Expand All @@ -43,4 +44,4 @@ case "${container}" in
esac

# Run a no-op command. This will simply build the container and push it to the private registry
tests/ci_build/ci_build.sh ${container} docker ${BUILD_ARGS} bash
tests/ci_build/ci_build.sh ${container} ${BUILD_ARGS} bash
2 changes: 1 addition & 1 deletion tests/buildkite/build-cpu-arm64.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ echo "--- Build CPU code targeting ARM64"

source tests/buildkite/conftest.sh

command_wrapper="tests/ci_build/ci_build.sh aarch64 docker"
command_wrapper="tests/ci_build/ci_build.sh aarch64"

echo "--- Build libxgboost from the source"
$command_wrapper tests/ci_build/build_via_cmake.sh --conda-env=aarch64_test \
Expand Down
2 changes: 1 addition & 1 deletion tests/buildkite/build-cpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ echo "--- Build CPU code"

source tests/buildkite/conftest.sh

command_wrapper="tests/ci_build/ci_build.sh cpu docker"
command_wrapper="tests/ci_build/ci_build.sh cpu"

$command_wrapper rm -fv dmlc-core/include/dmlc/build_config_default.h
# This step is not necessary, but here we include it, to ensure that
Expand Down
6 changes: 3 additions & 3 deletions tests/buildkite/build-cuda-with-rmm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ else
arch_flag=""
fi

command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "`
command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 --build-arg "`
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
`"NCCL_VERSION_ARG=$NCCL_VERSION --build-arg "`
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
Expand All @@ -40,13 +40,13 @@ $command_wrapper python tests/ci_build/rename_whl.py python-package/dist/*.whl \
${BUILDKITE_COMMIT} ${WHEEL_TAG}

echo "--- Audit binary wheel to ensure it's compliant with manylinux2014 standard"
tests/ci_build/ci_build.sh auditwheel_x86_64 docker auditwheel repair \
tests/ci_build/ci_build.sh auditwheel_x86_64 auditwheel repair \
--plat ${WHEEL_TAG} python-package/dist/*.whl
$command_wrapper python tests/ci_build/rename_whl.py wheelhouse/*.whl \
${BUILDKITE_COMMIT} ${WHEEL_TAG}
mv -v wheelhouse/*.whl python-package/dist/
# Make sure that libgomp.so is vendored in the wheel
tests/ci_build/ci_build.sh auditwheel_x86_64 docker bash -c \
tests/ci_build/ci_build.sh auditwheel_x86_64 bash -c \
"unzip -l python-package/dist/*.whl | grep libgomp || exit -1"

echo "--- Upload Python wheel"
Expand Down
6 changes: 3 additions & 3 deletions tests/buildkite/build-cuda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ else
arch_flag=""
fi

command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "`
command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 --build-arg "`
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
`"NCCL_VERSION_ARG=$NCCL_VERSION --build-arg "`
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
Expand All @@ -39,13 +39,13 @@ $command_wrapper python tests/ci_build/rename_whl.py python-package/dist/*.whl \
${BUILDKITE_COMMIT} ${WHEEL_TAG}

echo "--- Audit binary wheel to ensure it's compliant with manylinux2014 standard"
tests/ci_build/ci_build.sh auditwheel_x86_64 docker auditwheel repair \
tests/ci_build/ci_build.sh auditwheel_x86_64 auditwheel repair \
--plat ${WHEEL_TAG} python-package/dist/*.whl
$command_wrapper python tests/ci_build/rename_whl.py wheelhouse/*.whl \
${BUILDKITE_COMMIT} ${WHEEL_TAG}
mv -v wheelhouse/*.whl python-package/dist/
# Make sure that libgomp.so is vendored in the wheel
tests/ci_build/ci_build.sh auditwheel_x86_64 docker bash -c \
tests/ci_build/ci_build.sh auditwheel_x86_64 bash -c \
"unzip -l python-package/dist/*.whl | grep libgomp || exit -1"

echo "--- Upload Python wheel"
Expand Down
2 changes: 1 addition & 1 deletion tests/buildkite/build-gpu-rpkg.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ source tests/buildkite/conftest.sh

echo "--- Build XGBoost R package with CUDA"

tests/ci_build/ci_build.sh gpu_build_r_centos7 docker \
tests/ci_build/ci_build.sh gpu_build_r_centos7 \
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
--build-arg R_VERSION_ARG=${R_VERSION} \
tests/ci_build/build_r_pkg_with_cuda.sh \
Expand Down
2 changes: 1 addition & 1 deletion tests/buildkite/build-jvm-doc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -euo pipefail
source tests/buildkite/conftest.sh

echo "--- Build JVM packages doc"
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_doc.sh ${BRANCH_NAME}
tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_doc.sh ${BRANCH_NAME}
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
then
echo "--- Upload JVM packages doc"
Expand Down
2 changes: 1 addition & 1 deletion tests/buildkite/build-jvm-packages-gpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ else
arch_flag=""
fi

tests/ci_build/ci_build.sh jvm_gpu_build nvidia-docker \
tests/ci_build/ci_build.sh jvm_gpu_build --use-gpus \
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
--build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \
tests/ci_build/build_jvm_packages.sh \
Expand Down
4 changes: 2 additions & 2 deletions tests/buildkite/build-jvm-packages.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ set -euo pipefail
source tests/buildkite/conftest.sh

echo "--- Build XGBoost JVM packages scala 2.12"
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_packages.sh \
${SPARK_VERSION}


echo "--- Build XGBoost JVM packages scala 2.13"

tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_packages.sh \
${SPARK_VERSION} "" "" "true"

echo "--- Stash XGBoost4J JARs"
Expand Down
2 changes: 1 addition & 1 deletion tests/buildkite/deploy-jvm-packages.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ source tests/buildkite/conftest.sh
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
then
echo "--- Deploy JVM packages to xgboost-maven-repo S3 repo"
tests/ci_build/ci_build.sh jvm_gpu_build docker \
tests/ci_build/ci_build.sh jvm_gpu_build \
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
--build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \
tests/ci_build/deploy_jvm_packages.sh ${SPARK_VERSION}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def format_params(args, *, stack_id, agent_iam_policy):
params["BuildkiteAgentToken"] = args.agent_token
params["VpcId"] = default_vpc.id
params["Subnets"] = ",".join(subnets)
params["ManagedPolicyARN"] = agent_iam_policy
params["ManagedPolicyARNs"] = agent_iam_policy
params.update(COMMON_STACK_PARAMS)
return [{"ParameterKey": k, "ParameterValue": v} for k, v in params.items()]

Expand Down
28 changes: 14 additions & 14 deletions tests/buildkite/infrastructure/aws-stack-creator/metadata.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,34 @@
AMI_ID = {
# Managed by XGBoost team
"linux-amd64-gpu": {
"us-west-2": "ami-094271bed4788ddb5",
"us-west-2": "ami-08c3bc1dd5ec8bc5c",
},
"linux-amd64-mgpu": {
"us-west-2": "ami-094271bed4788ddb5",
"us-west-2": "ami-08c3bc1dd5ec8bc5c",
},
"windows-gpu": {
"us-west-2": "ami-0839681594a1d7627",
"us-west-2": "ami-03c7f2156f93b22a7",
},
"windows-cpu": {
"us-west-2": "ami-0839681594a1d7627",
"us-west-2": "ami-03c7f2156f93b22a7",
},
# Managed by BuildKite
# from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml
"linux-amd64-cpu": {
"us-west-2": "ami-00f2127550cf03658",
"us-west-2": "ami-015e64acb52b3e595",
},
"pipeline-loader": {
"us-west-2": "ami-00f2127550cf03658",
"us-west-2": "ami-015e64acb52b3e595",
},
"linux-arm64-cpu": {
"us-west-2": "ami-0c5789068f4a2d1b5",
"us-west-2": "ami-0884e9c23a2fa98d0",
},
}

STACK_PARAMS = {
"linux-amd64-gpu": {
"InstanceOperatingSystem": "linux",
"InstanceType": "g4dn.xlarge",
"InstanceTypes": "g4dn.xlarge",
"AgentsPerInstance": "1",
"MinSize": "0",
"MaxSize": "8",
Expand All @@ -38,7 +38,7 @@
},
"linux-amd64-mgpu": {
"InstanceOperatingSystem": "linux",
"InstanceType": "g4dn.12xlarge",
"InstanceTypes": "g4dn.12xlarge",
"AgentsPerInstance": "1",
"MinSize": "0",
"MaxSize": "1",
Expand All @@ -48,7 +48,7 @@
},
"windows-gpu": {
"InstanceOperatingSystem": "windows",
"InstanceType": "g4dn.2xlarge",
"InstanceTypes": "g4dn.2xlarge",
"AgentsPerInstance": "1",
"MinSize": "0",
"MaxSize": "2",
Expand All @@ -58,7 +58,7 @@
},
"windows-cpu": {
"InstanceOperatingSystem": "windows",
"InstanceType": "c5a.2xlarge",
"InstanceTypes": "c5a.2xlarge",
"AgentsPerInstance": "1",
"MinSize": "0",
"MaxSize": "2",
Expand All @@ -68,7 +68,7 @@
},
"linux-amd64-cpu": {
"InstanceOperatingSystem": "linux",
"InstanceType": "c5a.4xlarge",
"InstanceTypes": "c5a.4xlarge",
"AgentsPerInstance": "1",
"MinSize": "0",
"MaxSize": "16",
Expand All @@ -78,7 +78,7 @@
},
"pipeline-loader": {
"InstanceOperatingSystem": "linux",
"InstanceType": "t3a.micro",
"InstanceTypes": "t3a.micro",
"AgentsPerInstance": "1",
"MinSize": "2",
"MaxSize": "2",
Expand All @@ -88,7 +88,7 @@
},
"linux-arm64-cpu": {
"InstanceOperatingSystem": "linux",
"InstanceType": "c6g.4xlarge",
"InstanceTypes": "c6g.4xlarge",
"AgentsPerInstance": "1",
"MinSize": "0",
"MaxSize": "8",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,13 @@ phases:
- |
yum groupinstall -y "Development tools"
yum install -y kernel-devel-$(uname -r)
dnf install -y kernel-modules-extra
aws s3 cp --recursive s3://ec2-linux-nvidia-drivers/latest/ .
chmod +x NVIDIA-Linux-x86_64*.run
CC=/usr/bin/gcc10-cc ./NVIDIA-Linux-x86_64*.run --silent
./NVIDIA-Linux-x86_64*.run --silent
amazon-linux-extras install docker
systemctl --now enable docker
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
&& curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.repo \
| sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | tee /etc/yum.repos.d/nvidia-container-toolkit.repo
yum install -y nvidia-container-toolkit
yum clean expire-cache
yum install -y nvidia-docker2
nvidia-ctk runtime configure --runtime=docker
systemctl restart docker
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,24 @@ phases:
choco --version
choco feature enable -n=allowGlobalConfirmation
# CMake 3.25
Write-Host '>>> Installing CMake 3.25...'
choco install cmake --version 3.25.2 --installargs "ADD_CMAKE_TO_PATH=System"
# CMake 3.27
Write-Host '>>> Installing CMake 3.27...'
choco install cmake --version 3.27.9 --installargs "ADD_CMAKE_TO_PATH=System"
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Notepad++
Write-Host '>>> Installing Notepad++...'
choco install notepadplusplus
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Miniconda
Write-Host '>>> Installing Miniconda...'
choco install miniconda3 /RegisterPython:1 /D:C:\tools\miniconda3
C:\tools\miniconda3\Scripts\conda.exe init --user --system
# Mambaforge
Write-Host '>>> Installing Mambaforge...'
choco install mambaforge /RegisterPython:1 /D:C:\tools\mambaforge
C:\tools\mambaforge\Scripts\conda.exe init --user --system
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
. "C:\Windows\System32\WindowsPowerShell\v1.0\profile.ps1"
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
conda config --set auto_activate_base false
conda config --prepend channels conda-forge
# Install Java 11
Write-Host '>>> Installing Java 11...'
Expand All @@ -59,15 +58,9 @@ phases:
choco install cuda --version=11.8.0.52206
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Install Python packages
Write-Host '>>> Installing Python packages...'
conda activate
conda install -y mamba
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Install R
Write-Host '>>> Installing R...'
choco install r.project --version=3.6.3
choco install r.project --version=4.3.2
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
choco install rtools --version=3.5.0.4
choco install rtools --version=4.3.5550
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
2 changes: 1 addition & 1 deletion tests/buildkite/run-clang-tidy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ echo "--- Run clang-tidy"

source tests/buildkite/conftest.sh

tests/ci_build/ci_build.sh clang_tidy docker \
tests/ci_build/ci_build.sh clang_tidy \
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
python3 tests/ci_build/tidy.py --cuda-archs 75
4 changes: 2 additions & 2 deletions tests/buildkite/test-cpp-gpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ source tests/buildkite/conftest.sh
echo "--- Run Google Tests with CUDA, using a GPU"
buildkite-agent artifact download "build/testxgboost" . --step build-cuda
chmod +x build/testxgboost
tests/ci_build/ci_build.sh gpu nvidia-docker \
tests/ci_build/ci_build.sh gpu --use-gpus \
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
Expand All @@ -17,7 +17,7 @@ echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
rm -rfv build/
buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
chmod +x build/testxgboost
tests/ci_build/ci_build.sh gpu nvidia-docker \
tests/ci_build/ci_build.sh gpu --use-gpus \
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
Expand Down
2 changes: 1 addition & 1 deletion tests/buildkite/test-cpp-mgpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'
echo "--- Run Google Tests with CUDA, using multiple GPUs"
buildkite-agent artifact download "build/testxgboost" . --step build-cuda
chmod +x build/testxgboost
tests/ci_build/ci_build.sh gpu nvidia-docker \
tests/ci_build/ci_build.sh gpu --use-gpus \
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
Expand Down
2 changes: 1 addition & 1 deletion tests/buildkite/test-integration-jvm-packages.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ buildkite-agent artifact download "jvm-packages/xgboost4j/target/*.jar" . --step
buildkite-agent artifact download "jvm-packages/xgboost4j-spark/target/*.jar" . --step build-jvm-packages
buildkite-agent artifact download "jvm-packages/xgboost4j-example/target/*.jar" . --step build-jvm-packages
export CI_DOCKER_EXTRA_PARAMS_INIT='-e RUN_INTEGRATION_TEST=1'
tests/ci_build/ci_build.sh jvm_cross docker --build-arg JDK_VERSION=${JDK_VERSION} \
tests/ci_build/ci_build.sh jvm_cross --build-arg JDK_VERSION=${JDK_VERSION} \
--build-arg SPARK_VERSION=${SPARK_VERSION} tests/ci_build/test_jvm_cross.sh
2 changes: 1 addition & 1 deletion tests/buildkite/test-python-cpu-arm64.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ echo "--- Test Python CPU ARM64"
buildkite-agent artifact download "python-package/dist/*.whl" . --step build-cpu-arm64
buildkite-agent artifact download "xgboost" . --step build-cpu-arm64
chmod +x ./xgboost
tests/ci_build/ci_build.sh aarch64 docker tests/ci_build/test_python.sh cpu-arm64
tests/ci_build/ci_build.sh aarch64 tests/ci_build/test_python.sh cpu-arm64
Loading

0 comments on commit ef8bdaa

Please sign in to comment.