Skip to content

Add separate writer threads config for bucketed table write to avoid max open file exceeded error #10423

Add separate writer threads config for bucketed table write to avoid max open file exceeded error

Add separate writer threads config for bucketed table write to avoid max open file exceeded error #10423

Workflow file for this run

# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Linux Build
on:
push:
branches:
- "main"
paths:
- "velox/**"
- "!velox/docs/**"
- "CMakeLists.txt"
- "CMake/**"
- "third_party/**"
- "scripts/setup-ubuntu.sh"
- "scripts/setup-helper-functions.sh"
- ".github/workflows/linux-build.yml"
pull_request:
paths:
- "velox/**"
- "!velox/docs/**"
- "CMakeLists.txt"
- "CMake/**"
- "third_party/**"
- "scripts/setup-ubuntu.sh"
- "scripts/setup-helper-functions.sh"
- ".github/workflows/linux-build.yml"
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.repository }}-${{ github.head_ref || github.sha }}
cancel-in-progress: true
jobs:
adapters:
name: Linux release with adapters
# prevent errors when forks ff their main branch
if: ${{ github.repository == 'facebookincubator/velox' }}
runs-on: 8-core-ubuntu
container: ghcr.io/facebookincubator/velox-dev:adapters
defaults:
run:
shell: bash
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache"
VELOX_DEPENDENCY_SOURCE: SYSTEM
simdjson_SOURCE: BUNDLED
xsimd_SOURCE: BUNDLED
CUDA_VERSION: "12.4"
steps:
- uses: actions/checkout@v4
- name: Fix git permissions
# Usually actions/checkout does this but as we run in a container
# it doesn't work
run: git config --global --add safe.directory ${GITHUB_WORKSPACE}
- name: Install Dependencies
run: |
# Allows to install arbitrary cuda-version whithout needing to update
# docker container before. It simplifies testing new/different versions
if ! yum list installed cuda-nvcc-$(echo ${CUDA_VERSION} | tr '.' '-') 1>/dev/null; then
source scripts/setup-centos9.sh
install_cuda ${CUDA_VERSION}
fi
- name: Install Minio
run: |
MINIO_BINARY="minio-2022-05-26"
if [ ! -f /usr/local/bin/${MINIO_BINARY} ]; then
wget https://dl.min.io/server/minio/release/linux-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z -O ${MINIO_BINARY}
chmod +x ./${MINIO_BINARY}
mv ./${MINIO_BINARY} /usr/local/bin/
fi
- uses: assignUser/stash/restore@v1
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-linux-adapters
- name: "Zero Ccache Statistics"
run: |
ccache -sz
- name: Make Release Build
env:
MAKEFLAGS: 'NUM_THREADS=8 MAX_HIGH_MEM_JOBS=4 MAX_LINK_JOBS=4'
CUDA_ARCHITECTURES: 70
CUDA_COMPILER: /usr/local/cuda-${CUDA_VERSION}/bin/nvcc
# Set compiler to GCC 12
CUDA_FLAGS: "-ccbin /opt/rh/gcc-toolset-12/root/usr/bin"
run: |
EXTRA_CMAKE_FLAGS=(
"-DVELOX_ENABLE_BENCHMARKS=ON"
"-DVELOX_ENABLE_ARROW=ON"
"-DVELOX_ENABLE_PARQUET=ON"
"-DVELOX_ENABLE_HDFS=ON"
"-DVELOX_ENABLE_S3=ON"
"-DVELOX_ENABLE_GCS=ON"
"-DVELOX_ENABLE_ABFS=ON"
"-DVELOX_ENABLE_REMOTE_FUNCTIONS=ON"
"-DVELOX_ENABLE_GPU=ON"
"-DVELOX_MONO_LIBRARY=ON"
)
make release EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS[*]}"
- name: Ccache after
run: ccache -s
- uses: assignUser/stash/save@v1
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-linux-adapters
- name: Run Tests
# Some of the adapters dependencies are in the 'adapters' conda env
shell: mamba run --no-capture-output -n adapters /usr/bin/bash -e {0}
env:
LIBHDFS3_CONF: "${{ github.workspace }}/scripts/hdfs-client.xml"
working-directory: _build/release
run: |
ctest -j 8 --output-on-failure --no-tests=error
ubuntu-debug:
runs-on: 8-core-ubuntu
# prevent errors when forks ff their main branch
if: ${{ github.repository == 'facebookincubator/velox' }}
name: "Ubuntu debug with resolve_dependency"
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache"
defaults:
run:
shell: bash
working-directory: velox
steps:
- name: Get Ccache Stash
uses: assignUser/stash/restore@v1
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-ubuntu-debug-default
- name: Ensure Stash Dirs Exists
working-directory: ${{ github.workspace }}
run: |
mkdir -p '${{ env.CCACHE_DIR }}'
- uses: actions/checkout@v4
with:
path: velox
- name: Install Dependencies
run: |
source scripts/setup-ubuntu.sh && install_apt_deps
- name: Clear CCache Statistics
run: |
ccache -sz
- name: Make Debug Build
env:
VELOX_DEPENDENCY_SOURCE: BUNDLED
MAKEFLAGS: "NUM_THREADS=8 MAX_HIGH_MEM_JOBS=4 MAX_LINK_JOBS=3"
EXTRA_CMAKE_FLAGS: "-DVELOX_ENABLE_ARROW=ON -DVELOX_ENABLE_PARQUET=ON"
run: |
make debug
- name: CCache after
run: |
ccache -vs
- uses: assignUser/stash/save@v1
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-ubuntu-debug-default
- name: Run Tests
run: |
cd _build/debug && ctest -j 8 --output-on-failure --no-tests=error