From db3ce35779e05cdae0833226bc2c225afc667a62 Mon Sep 17 00:00:00 2001 From: Christian Zentgraf Date: Tue, 2 Jul 2024 11:23:21 -0700 Subject: [PATCH] Changes to compile with Clang17 --- CMakeLists.txt | 4 + Makefile | 11 ++ scripts/setup-centos9.sh | 2 + velox/dwio/dwrf/test/ColumnWriterTest.cpp | 131 ++++++++++++---------- velox/type/DecimalUtil.h | 14 ++- 5 files changed, 99 insertions(+), 63 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bb7c49907980..fd54dc1201f0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -206,6 +206,10 @@ if(${VELOX_FORCE_COLORED_OUTPUT}) elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang") add_compile_options(-fcolor-diagnostics) + if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND "${CMAKE_CXX_COMPILER_VERSION}" + VERSION_GREATER_EQUAL 17) + set(CMAKE_EXE_LINKER_FLAGS "-latomic") + endif() endif() endif() diff --git a/Makefile b/Makefile index b604d560adf6..3529e3012674 100644 --- a/Makefile +++ b/Makefile @@ -208,3 +208,14 @@ python-build: python-test: $(MAKE) python-build extras="[tests]" DEBUG=1 ${PYTHON_EXECUTABLE} -m unittest -v + +clang-debug: #: Build with debugging symbols using Clang + $(MAKE) debug EXTRA_CMAKE_FLAGS=" ${EXTRA_CMAKE_FLAGS} \ + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++" + + +clang-release: #: Build the release version using Clang + $(MAKE) release EXTRA_CMAKE_FLAGS=" ${EXTRA_CMAKE_FLAGS} \ + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++" diff --git a/scripts/setup-centos9.sh b/scripts/setup-centos9.sh index 487dadba8af9..54c1a1e6e39b 100755 --- a/scripts/setup-centos9.sh +++ b/scripts/setup-centos9.sh @@ -52,6 +52,8 @@ function install_build_prerequisites { dnf update -y dnf_install ninja-build cmake ccache gcc-toolset-12 git wget which dnf_install autoconf automake python3-devel pip libtool + # For clang + dnf_install clang gcc-toolset-13-libatomic-devel pip install cmake==3.28.3 } diff --git a/velox/dwio/dwrf/test/ColumnWriterTest.cpp b/velox/dwio/dwrf/test/ColumnWriterTest.cpp index 4af07b83238e..4268765f516b 100644 --- a/velox/dwio/dwrf/test/ColumnWriterTest.cpp +++ b/velox/dwio/dwrf/test/ColumnWriterTest.cpp @@ -841,15 +841,14 @@ void mapToStruct( } } -template +template void testMapWriter( MemoryPool& pool, const std::vector& batches, bool useFlatMap, bool disableDictionaryEncoding, bool testEncoded, - bool printMaps = true, - bool useStruct = false) { + bool printMaps = true) { const auto rowType = CppToType>>::create(); const auto dataType = rowType->childAt(0); const auto rowTypeWithId = TypeWithId::create(rowType); @@ -866,7 +865,7 @@ void testMapWriter( std::vector structs; std::unordered_map> structReaderContext; if (useFlatMap) { - if (useStruct) { + if constexpr (useStruct) { structs = batches; pBatches = &structs; std::vector uniqueKeys; @@ -1158,26 +1157,25 @@ TEST_F(ColumnWriterTest, TestMapWriterNestedRow) { testMapWriterRowImpl>(); } -template +template void testMapWriter( MemoryPool& pool, const VectorPtr& batch, bool useFlatMap, - bool printMaps = true, - bool useStruct = false) { + bool printMaps = true) { std::vector batches{batch, batch}; - testMapWriter( - pool, batches, useFlatMap, true, false, printMaps, useStruct); + testMapWriter( + pool, batches, useFlatMap, true, false, printMaps); if (useFlatMap) { - testMapWriter( - pool, batches, useFlatMap, false, false, printMaps, useStruct); - testMapWriter( - pool, batches, useFlatMap, true, true, printMaps, useStruct); + testMapWriter( + pool, batches, useFlatMap, false, false, printMaps); + testMapWriter( + pool, batches, useFlatMap, true, true, printMaps); } } -template -void testMapWriterNumericKey(bool useFlatMap, bool useStruct = false) { +template +void testMapWriterNumericKey(bool useFlatMap) { using b = MapBuilder; auto pool = memory::memoryManager()->addLeafPool(); @@ -1191,7 +1189,14 @@ void testMapWriterNumericKey(bool useFlatMap, bool useStruct = false) { typename b::pair{ std::numeric_limits::min(), std::numeric_limits::min()}}}); - testMapWriter(*pool, batch, useFlatMap, true, useStruct); + testMapWriter(*pool, batch, useFlatMap, true); +} + +// Workaround to avoid issues with two template arguments when wrapped in gtest +// EXPECT macros. +template +void testMapWriterNumericKeyUseStruct(bool useFlatMap) { + testMapWriterNumericKey(useFlatMap); } TEST_F(ColumnWriterTest, TestMapWriterFloatKey) { @@ -1203,8 +1208,8 @@ TEST_F(ColumnWriterTest, TestMapWriterFloatKey) { EXPECT_THROW( { - testMapWriterNumericKey( - /* useFlatMap */ true, /* useStruct */ true); + testMapWriterNumericKeyUseStruct( + /* useFlatMap */ true); }, exception::LoggedException); } @@ -1212,7 +1217,7 @@ TEST_F(ColumnWriterTest, TestMapWriterFloatKey) { TEST_F(ColumnWriterTest, TestMapWriterInt64Key) { testMapWriterNumericKey(/* useFlatMap */ false); testMapWriterNumericKey(/* useFlatMap */ true); - testMapWriterNumericKey(/* useFlatMap */ true, /* useStruct */ true); + testMapWriterNumericKey(/* useFlatMap */ true); } TEST_F(ColumnWriterTest, TestMapWriterDuplicatedInt64Key) { @@ -1234,22 +1239,22 @@ TEST_F(ColumnWriterTest, TestMapWriterDuplicatedInt64Key) { TEST_F(ColumnWriterTest, TestMapWriterInt32Key) { testMapWriterNumericKey(/* useFlatMap */ false); testMapWriterNumericKey(/* useFlatMap */ true); - testMapWriterNumericKey( - /* useFlatMap */ true, /* useStruct */ true); + testMapWriterNumericKey( + /* useFlatMap */ true); } TEST_F(ColumnWriterTest, TestMapWriterInt16Key) { testMapWriterNumericKey(/* useFlatMap */ false); testMapWriterNumericKey(/* useFlatMap */ true); - testMapWriterNumericKey( - /* useFlatMap */ true, /* useStruct */ true); + testMapWriterNumericKey( + /* useFlatMap */ true); } TEST_F(ColumnWriterTest, TestMapWriterInt8Key) { testMapWriterNumericKey(/* useFlatMap */ false); testMapWriterNumericKey(/* useFlatMap */ true); - testMapWriterNumericKey( - /* useFlatMap */ true, /* useStruct */ true); + testMapWriterNumericKey( + /* useFlatMap */ true); } TEST_F(ColumnWriterTest, TestMapWriterStringKey) { @@ -1265,8 +1270,8 @@ TEST_F(ColumnWriterTest, TestMapWriterStringKey) { testMapWriter(*pool_, batch, /* useFlatMap */ false); testMapWriter(*pool_, batch, /* useFlatMap */ true); - testMapWriter( - *pool_, batch, /* useFlatMap */ true, true, /* useStruct */ true); + testMapWriter( + *pool_, batch, /* useFlatMap */ true, true); } TEST_F(ColumnWriterTest, TestMapWriterDuplicatedStringKey) { @@ -1362,8 +1367,8 @@ TEST_F(ColumnWriterTest, TestMapWriterBinaryKey) { testMapWriter(*pool_, batch, /* useFlatMap */ false); testMapWriter(*pool_, batch, /* useFlatMap */ true); - testMapWriter( - *pool_, batch, /* useFlatMap */ true, true, /* useStruct */ true); + testMapWriter( + *pool_, batch, /* useFlatMap */ true, true); } template @@ -4300,7 +4305,7 @@ TEST_F(ColumnWriterTest, ColumnIdInStream) { ASSERT_NE(streams.getStream(si, {}, false), nullptr); } -template +template struct DictColumnWriterTestCase { DictColumnWriterTestCase(size_t size, bool writeDirect, const TypePtr& type) : size_(size), writeDirect_(writeDirect), type_(type) {} @@ -4369,6 +4374,7 @@ struct DictColumnWriterTestCase { * Map) * @return */ + template VectorPtr createDictionaryBatch( size_t size, std::function valueAt, @@ -4378,10 +4384,10 @@ struct DictColumnWriterTestCase { VectorPtr dictionaryVector; VectorPtr flatVector; - if (complexRowType == nullptr) { - flatVector = makeFlatVector(size, valueAt, isNullAt); - } else { + if constexpr (isComplexRowType) { flatVector = makeComplexVectors(complexRowType, size, isNullAt); + } else { + flatVector = makeFlatVector(size, valueAt, isNullAt); } auto wrappedVector = BaseVector::wrapInDictionary( @@ -4400,14 +4406,12 @@ struct DictColumnWriterTestCase { WriterContext context{config, memory::memoryManager()->addRootPool()}; context.initBuffer(); - // complexVectorType will be nullptr if the vector is not complex. - bool isComplexType = std::dynamic_pointer_cast(type_) || - std::dynamic_pointer_cast(type_) || - std::dynamic_pointer_cast(type_); - - auto complexVectorType = isComplexType ? rowType : nullptr; - auto batch = - createDictionaryBatch(size_, valueAt, isNullAt, complexVectorType); + VectorPtr batch; + if constexpr (isComplexTypeT) { + batch = createDictionaryBatch(size_, valueAt, isNullAt, rowType); + } else { + batch = createDictionaryBatch(size_, valueAt, isNullAt); + } const auto writer = BaseColumnWriter::create(context, *typeWithId); @@ -4457,7 +4461,7 @@ std::function randomNulls(int32_t n) { [n](vector_size_t /*index*/) { return folly::Random::rand32() % n == 0; }; } -template +template void testDictionary( const TypePtr& type, std::function isNullAt = nullptr, @@ -4465,18 +4469,17 @@ void testDictionary( constexpr int32_t vectorSize = 200; // Tests for null/non null data with direct or dict write - DictColumnWriterTestCase(vectorSize, true, type) + DictColumnWriterTestCase(vectorSize, true, type) .runTest(valueAt, isNullAt); - DictColumnWriterTestCase(vectorSize, false, type) + DictColumnWriterTestCase(vectorSize, false, type) .runTest(valueAt, isNullAt); // Tests for non null data with direct or dict write - DictColumnWriterTestCase(vectorSize, true, type).runTest(valueAt, [](int) { - return false; - }); + DictColumnWriterTestCase(vectorSize, true, type) + .runTest(valueAt, [](int) { return false; }); - DictColumnWriterTestCase(vectorSize, false, type) + DictColumnWriterTestCase(vectorSize, false, type) .runTest(valueAt, [](int) { return false; }); } @@ -4520,27 +4523,28 @@ TEST_F(ColumnWriterTest, rowDictionary) { // randomly // Row tests - testDictionary>(ROW({INTEGER()}), randomNulls(5)); + testDictionary, true>(ROW({INTEGER()}), randomNulls(5)); - testDictionary>( + testDictionary, true>( ROW({VARCHAR(), INTEGER()}), randomNulls(11)); - testDictionary>>( + testDictionary>, true>( ROW({ROW({VARCHAR(), INTEGER()})}), randomNulls(11)); - testDictionary>( + testDictionary, true>( ROW({INTEGER(), DOUBLE(), VARCHAR()}), randomNulls(5)); - testDictionary>( + testDictionary, true>( ROW({INTEGER(), VARCHAR(), DOUBLE(), VARCHAR()}), randomNulls(5)); - testDictionary, StringView>>( + testDictionary, StringView>, true>( ROW({ARRAY(VARCHAR()), VARCHAR()}), randomNulls(11)); testDictionary< Row, Array>>, - Row>>( + Row>, + true>( ROW( {MAP(INTEGER(), DOUBLE()), ARRAY(MAP(INTEGER(), ROW({INTEGER(), DOUBLE()}))), @@ -4550,17 +4554,19 @@ TEST_F(ColumnWriterTest, rowDictionary) { TEST_F(ColumnWriterTest, arrayDictionary) { // Array tests - testDictionary>(ARRAY(REAL()), randomNulls(7)); + testDictionary, true>(ARRAY(REAL()), randomNulls(7)); testDictionary< - Row, Row>>>>( + Row, Row>>>, + true>( ROW( {ARRAY(INTEGER()), ROW({VARCHAR(), ARRAY(MAP(VARCHAR(), VARCHAR()))})}), randomNulls(11)); testDictionary< - Array>>>>>>( + Array>>>>>, + true>( ARRAY(MAP( INTEGER(), ARRAY(MAP(TINYINT(), ROW({VARCHAR(), ARRAY(DOUBLE())}))))), randomNulls(7)); @@ -4568,20 +4574,21 @@ TEST_F(ColumnWriterTest, arrayDictionary) { TEST_F(ColumnWriterTest, mapDictionary) { // Map tests - testDictionary>( + testDictionary, true>( MAP(INTEGER(), DOUBLE()), randomNulls(7)); - testDictionary>( + testDictionary, true>( MAP(VARCHAR(), VARCHAR()), randomNulls(13)); testDictionary< Map>>>>>( + Map>>>>, + true>( MAP(VARCHAR(), MAP(INTEGER(), ARRAY(ROW({INTEGER(), INTEGER(), ARRAY(DOUBLE())})))), randomNulls(9)); - testDictionary>>>( + testDictionary>>, true>( MAP(INTEGER(), MAP(VARCHAR(), MAP(VARCHAR(), TINYINT()))), randomNulls(3)); } diff --git a/velox/type/DecimalUtil.h b/velox/type/DecimalUtil.h index 357030b2ea45..0168adc0b4d2 100644 --- a/velox/type/DecimalUtil.h +++ b/velox/type/DecimalUtil.h @@ -217,8 +217,20 @@ class DecimalUtil { if (!std::isfinite(value)) { return Status::UserError("The input value should be finite."); } + // Avoid casting during the comparison to the maxiumum integer value. + // The issue is that compiler reports + // - implicit conversion from 'long' to 'double' changes value from + // 9223372036854775807 to 9223372036854775808 + // - implicit conversion from '__int128' to 'float' changes value from + // 170141183460469231731687303715884105727 + // to 1.70141183460469231731687E+38 + // The idea is to cast the maximum value to the float or double and then + // compare the input value to the next lower float/double value to not + // exceed the integer maximum. + constexpr TInput maxAllowedValue = + static_cast(std::numeric_limits::max()); if (value <= std::numeric_limits::min() || - value >= std::numeric_limits::max()) { + value >= std::nextafter(maxAllowedValue, 0)) { return Status::UserError("Result overflows."); }