diff --git a/velox/functions/sparksql/DecimalArithmetic.cpp b/velox/functions/sparksql/DecimalArithmetic.cpp
index 519ff0d5cf58..61599bce10ea 100644
--- a/velox/functions/sparksql/DecimalArithmetic.cpp
+++ b/velox/functions/sparksql/DecimalArithmetic.cpp
@@ -14,405 +14,250 @@
  * limitations under the License.
  */
 
-#include "velox/common/base/CheckedArithmetic.h"
-#include "velox/expression/DecodedArgs.h"
-#include "velox/expression/VectorFunction.h"
+#include "velox/functions/Macros.h"
+#include "velox/functions/Registerer.h"
 #include "velox/functions/sparksql/DecimalUtil.h"
-#include "velox/type/DecimalUtil.h"
 
 namespace facebook::velox::functions::sparksql {
 namespace {
 
-std::string getResultScale(std::string precision, std::string scale) {
-  return fmt::format(
-      "({}) <= 38 ? ({}) : max(({}) - ({}) + 38, min(({}), 6))",
-      precision,
-      scale,
-      scale,
-      precision,
-      scale);
-}
-
-// Returns the whole and fraction parts of a decimal value.
-template <typename T>
-inline std::pair<T, T> getWholeAndFraction(T value, uint8_t scale) {
-  const auto scaleFactor = velox::DecimalUtil::kPowersOfTen[scale];
-  const T whole = value / scaleFactor;
-  return {whole, value - whole * scaleFactor};
-}
-
-// Increases the scale of input value by 'delta'. Returns the input value if
-// delta is not positive.
-inline int128_t increaseScale(int128_t in, int16_t delta) {
-  // No need to consider overflow as 'delta == higher scale - input scale', so
-  // the scaled value will not exceed the maximum of long decimal.
-  return delta <= 0 ? in : in * velox::DecimalUtil::kPowersOfTen[delta];
-}
-
-// Scales up the whole part to result scale, and combine it with fraction part
-// to produce a full result for decimal add. Checks whether the result
-// overflows.
-template <typename T>
-inline T
-decimalAddResult(T whole, T fraction, uint8_t resultScale, bool& overflow) {
-  T scaledWhole = DecimalUtil::multiply<T>(
-      whole, velox::DecimalUtil::kPowersOfTen[resultScale], overflow);
-  if (FOLLY_UNLIKELY(overflow)) {
-    return 0;
-  }
-  const auto result = scaledWhole + fraction;
-  if constexpr (std::is_same_v<T, int64_t>) {
-    overflow = (result > velox::DecimalUtil::kShortDecimalMax) ||
-        (result < velox::DecimalUtil::kShortDecimalMin);
-  } else {
-    overflow = (result > velox::DecimalUtil::kLongDecimalMax) ||
-        (result < velox::DecimalUtil::kLongDecimalMin);
+struct DecimalAddSubtractBase {
+ protected:
+  void initializeBase(const std::vector<TypePtr>& inputTypes) {
+    auto [aPrecision, aScale] = getDecimalPrecisionScale(*inputTypes[0]);
+    auto [bPrecision, bScale] = getDecimalPrecisionScale(*inputTypes[1]);
+    aScale_ = aScale;
+    bScale_ = bScale;
+    auto [rPrecision, rScale] =
+        computeResultPrecisionScale(aPrecision, aScale_, bPrecision, bScale_);
+    rPrecision_ = rPrecision;
+    rScale_ = rScale;
+    aRescale_ = computeRescaleFactor(aScale_, bScale_);
+    bRescale_ = computeRescaleFactor(bScale_, aScale_);
   }
-  return result;
-}
 
-// Reduces the scale of input value by 'delta'. Returns the input value if delta
-// is not positive.
-template <typename T>
-inline static T reduceScale(T in, int32_t delta) {
-  if (delta <= 0) {
-    return in;
-  }
-  T result;
-  bool overflow;
-  const auto scaleFactor = velox::DecimalUtil::kPowersOfTen[delta];
-  if constexpr (std::is_same_v<T, int64_t>) {
-    VELOX_DCHECK_LE(
-        scaleFactor,
-        std::numeric_limits<int64_t>::max(),
-        "Scale factor should not exceed the maximum of int64_t.");
+  // Adds the values 'a' and 'b' and stores the result in 'r'. To align the
+  // scales of inputs, the value with the smaller scale is rescaled to the
+  // larger scale. 'aRescale' and 'bRescale' are the rescale factors needed to
+  // rescale 'a' and 'b'. 'rPrecision' and 'rScale' are the precision and scale
+  // of the result.
+  template <typename TResult, typename A, typename B>
+  bool applyAdd(TResult& r, A a, B b) {
+    // The overflow flag is set to true if an overflow occurs
+    // during the addition.
+    bool overflow = false;
+    if (rPrecision_ < LongDecimalType::kMaxPrecision) {
+      const int128_t aRescaled =
+          a * velox::DecimalUtil::kPowersOfTen[aRescale_];
+      const int128_t bRescaled =
+          b * velox::DecimalUtil::kPowersOfTen[bRescale_];
+      r = TResult(aRescaled + bRescaled);
+    } else {
+      const uint32_t minLeadingZeros =
+          sparksql::DecimalUtil::minLeadingZeros<A, B>(
+              a, b, aRescale_, bRescale_);
+      if (minLeadingZeros >= 3) {
+        // Fast path for no overflow. If both numbers contain at least 3 leading
+        // zeros, they can be added directly without the risk of overflow.
+        // The reason is if a number contains at least 2 leading zeros, it is
+        // ensured that the number fits in the maximum of decimal, because
+        // '2^126 - 1 < 10^38 - 1'. If both numbers contain at least 3 leading
+        // zeros, we are guaranteed that the result will have at least 2 leading
+        // zeros.
+        int128_t aRescaled = a * velox::DecimalUtil::kPowersOfTen[aRescale_];
+        int128_t bRescaled = b * velox::DecimalUtil::kPowersOfTen[bRescale_];
+        r = reduceScale(
+            TResult(aRescaled + bRescaled),
+            std::max(aScale_, bScale_) - rScale_);
+      } else {
+        // The risk of overflow should be considered. Add whole and fraction
+        // parts separately, and then combine.
+        r = addLarge<TResult, A, B>(a, b, aScale_, bScale_, rScale_, overflow);
+      }
+    }
+    return !overflow &&
+        velox::DecimalUtil::valueInPrecisionRange(r, rPrecision_);
   }
-  DecimalUtil::divideWithRoundUp<T, T, T>(
-      result, in, T(scaleFactor), 0, overflow);
-  VELOX_DCHECK(!overflow);
-  return result;
-}
 
-// Adds two non-negative values by adding the whole and fraction parts
-// separately.
-template <typename TResult, typename A, typename B>
-inline static TResult addLargeNonNegative(
-    A a,
-    B b,
-    uint8_t aScale,
-    uint8_t bScale,
-    uint8_t rScale,
-    bool& overflow) {
-  VELOX_DCHECK_GE(
-      a, 0, "Non-negative value is expected in addLargeNonNegative.");
-  VELOX_DCHECK_GE(
-      b, 0, "Non-negative value is expected in addLargeNonNegative.");
-
-  // Separate whole and fraction parts.
-  const auto [aWhole, aFraction] = getWholeAndFraction<A>(a, aScale);
-  const auto [bWhole, bFraction] = getWholeAndFraction<B>(b, bScale);
-
-  // Adjust fractional parts to higher scale.
-  const auto higherScale = std::max(aScale, bScale);
-  const auto aFractionScaled =
-      increaseScale((int128_t)aFraction, higherScale - aScale);
-  const auto bFractionScaled =
-      increaseScale((int128_t)bFraction, higherScale - bScale);
-
-  int128_t fraction;
-  bool carryToLeft = false;
-  const auto carrier = velox::DecimalUtil::kPowersOfTen[higherScale];
-  if (aFractionScaled >= carrier - bFractionScaled) {
-    fraction = aFractionScaled + bFractionScaled - carrier;
-    carryToLeft = true;
-  } else {
-    fraction = aFractionScaled + bFractionScaled;
+ private:
+  // Returns the whole and fraction parts of a decimal value.
+  template <typename T>
+  static std::pair<T, T> getWholeAndFraction(T value, uint8_t scale) {
+    const auto scaleFactor = velox::DecimalUtil::kPowersOfTen[scale];
+    const T whole = value / scaleFactor;
+    return {whole, value - whole * scaleFactor};
   }
 
-  // Scale up the whole part and scale down the fraction part to combine them.
-  fraction = reduceScale(TResult(fraction), higherScale - rScale);
-  const auto whole = TResult(aWhole) + TResult(bWhole) + TResult(carryToLeft);
-  return decimalAddResult(whole, TResult(fraction), rScale, overflow);
-}
-
-// Adds two opposite values by adding the whole and fraction parts separately.
-template <typename TResult, typename A, typename B>
-inline static TResult addLargeOpposite(
-    A a,
-    B b,
-    uint8_t aScale,
-    uint8_t bScale,
-    int32_t rScale,
-    bool& overflow) {
-  VELOX_DCHECK(
-      (a < 0 && b > 0) || (a > 0 && b < 0),
-      "One positve and one negative value are expected in addLargeOpposite.");
-
-  // Separate whole and fraction parts.
-  const auto [aWhole, aFraction] = getWholeAndFraction<A>(a, aScale);
-  const auto [bWhole, bFraction] = getWholeAndFraction<B>(b, bScale);
-
-  // Adjust fractional parts to higher scale.
-  const auto higherScale = std::max(aScale, bScale);
-  const auto aFractionScaled =
-      increaseScale((int128_t)aFraction, higherScale - aScale);
-  const auto bFractionScaled =
-      increaseScale((int128_t)bFraction, higherScale - bScale);
-
-  // No need to consider overflow because two inputs are opposite.
-  int128_t whole = (int128_t)aWhole + (int128_t)bWhole;
-  int128_t fraction = aFractionScaled + bFractionScaled;
-
-  // If the whole and fractional parts have different signs, adjust them to the
-  // same sign.
-  const auto scaleFactor = velox::DecimalUtil::kPowersOfTen[higherScale];
-  if (whole < 0 && fraction > 0) {
-    whole += 1;
-    fraction -= scaleFactor;
-  } else if (whole > 0 && fraction < 0) {
-    whole -= 1;
-    fraction += scaleFactor;
+  // Increases the scale of input value by 'delta'. Returns the input value if
+  // delta is not positive.
+  static int128_t increaseScale(int128_t in, int16_t delta) {
+    // No need to consider overflow as 'delta == higher scale - input scale', so
+    // the scaled value will not exceed the maximum of long decimal.
+    return delta <= 0 ? in : in * velox::DecimalUtil::kPowersOfTen[delta];
   }
 
-  // Scale up the whole part and scale down the fraction part to combine them.
-  fraction = reduceScale(TResult(fraction), higherScale - rScale);
-  return decimalAddResult(TResult(whole), TResult(fraction), rScale, overflow);
-}
+  // Scales up the whole part to result scale, and combine it with fraction part
+  // to produce a full result for decimal add. Checks whether the result
+  // overflows.
+  template <typename T>
+  static T
+  decimalAddResult(T whole, T fraction, uint8_t resultScale, bool& overflow) {
+    T scaledWhole = sparksql::DecimalUtil::multiply<T>(
+        whole, velox::DecimalUtil::kPowersOfTen[resultScale], overflow);
+    if (FOLLY_UNLIKELY(overflow)) {
+      return 0;
+    }
+    const auto result = scaledWhole + fraction;
+    if constexpr (std::is_same_v<T, int64_t>) {
+      overflow = (result > velox::DecimalUtil::kShortDecimalMax) ||
+          (result < velox::DecimalUtil::kShortDecimalMin);
+    } else {
+      overflow = (result > velox::DecimalUtil::kLongDecimalMax) ||
+          (result < velox::DecimalUtil::kLongDecimalMin);
+    }
+    return result;
+  }
 
-template <typename TResult, typename A, typename B>
-inline static TResult addLarge(
-    A a,
-    B b,
-    uint8_t aScale,
-    uint8_t bScale,
-    int32_t rScale,
-    bool& overflow) {
-  if (a >= 0 && b >= 0) {
-    // Both non-negative.
-    return addLargeNonNegative<TResult, A, B>(
-        a, b, aScale, bScale, rScale, overflow);
-  } else if (a <= 0 && b <= 0) {
-    // Both non-positive.
-    return TResult(-addLargeNonNegative<TResult, A, B>(
-        A(-a), B(-b), aScale, bScale, rScale, overflow));
-  } else {
-    // One positive and the other negative.
-    return addLargeOpposite<TResult, A, B>(
-        a, b, aScale, bScale, rScale, overflow);
+  // Reduces the scale of input value by 'delta'. Returns the input value if
+  // delta is not positive.
+  template <typename T>
+  static T reduceScale(T in, int32_t delta) {
+    if (delta <= 0) {
+      return in;
+    }
+    T result;
+    bool overflow;
+    const auto scaleFactor = velox::DecimalUtil::kPowersOfTen[delta];
+    if constexpr (std::is_same_v<T, int64_t>) {
+      VELOX_DCHECK_LE(
+          scaleFactor,
+          std::numeric_limits<int64_t>::max(),
+          "Scale factor should not exceed the maximum of int64_t.");
+    }
+    DecimalUtil::divideWithRoundUp<T, T, T>(
+        result, in, T(scaleFactor), 0, overflow);
+    VELOX_DCHECK(!overflow);
+    return result;
   }
-}
 
-template <
-    typename R /* Result Type */,
-    typename A /* Argument1 */,
-    typename B /* Argument2 */,
-    typename Operation /* Arithmetic operation */>
-class DecimalBaseFunction : public exec::VectorFunction {
- public:
-  DecimalBaseFunction(
-      uint8_t aRescale,
-      uint8_t bRescale,
-      uint8_t aPrecision,
+  // Adds two non-negative values by adding the whole and fraction parts
+  // separately.
+  template <typename TResult, typename A, typename B>
+  static TResult addLargeNonNegative(
+      A a,
+      B b,
       uint8_t aScale,
-      uint8_t bPrecision,
       uint8_t bScale,
-      uint8_t rPrecision,
-      uint8_t rScale)
-      : aRescale_(aRescale),
-        bRescale_(bRescale),
-        aPrecision_(aPrecision),
-        aScale_(aScale),
-        bPrecision_(bPrecision),
-        bScale_(bScale),
-        rPrecision_(rPrecision),
-        rScale_(rScale) {}
-
-  void apply(
-      const SelectivityVector& rows,
-      std::vector<VectorPtr>& args,
-      const TypePtr& resultType,
-      exec::EvalCtx& context,
-      VectorPtr& result) const override {
-    auto rawResults = prepareResults(rows, resultType, context, result);
-    if (args[0]->isConstantEncoding() && args[1]->isFlatEncoding()) {
-      // Fast path for (const, flat).
-      auto constant = args[0]->asUnchecked<SimpleVector<A>>()->valueAt(0);
-      auto flatValues = args[1]->asUnchecked<FlatVector<B>>();
-      auto rawValues = flatValues->mutableRawValues();
-      context.applyToSelectedNoThrow(rows, [&](auto row) {
-        bool overflow = false;
-        Operation::template apply<R, A, B>(
-            rawResults[row],
-            constant,
-            rawValues[row],
-            aRescale_,
-            bRescale_,
-            aPrecision_,
-            aScale_,
-            bPrecision_,
-            bScale_,
-            rPrecision_,
-            rScale_,
-            overflow);
-        if (overflow ||
-            !velox::DecimalUtil::valueInPrecisionRange(
-                rawResults[row], rPrecision_)) {
-          result->setNull(row, true);
-        }
-      });
-    } else if (args[0]->isFlatEncoding() && args[1]->isConstantEncoding()) {
-      // Fast path for (flat, const).
-      auto flatValues = args[0]->asUnchecked<FlatVector<A>>();
-      auto constant = args[1]->asUnchecked<SimpleVector<B>>()->valueAt(0);
-      auto rawValues = flatValues->mutableRawValues();
-      context.applyToSelectedNoThrow(rows, [&](auto row) {
-        bool overflow = false;
-        Operation::template apply<R, A, B>(
-            rawResults[row],
-            rawValues[row],
-            constant,
-            aRescale_,
-            bRescale_,
-            aPrecision_,
-            aScale_,
-            bPrecision_,
-            bScale_,
-            rPrecision_,
-            rScale_,
-            overflow);
-        if (overflow ||
-            !velox::DecimalUtil::valueInPrecisionRange(
-                rawResults[row], rPrecision_)) {
-          result->setNull(row, true);
-        }
-      });
-    } else if (args[0]->isFlatEncoding() && args[1]->isFlatEncoding()) {
-      // Fast path for (flat, flat).
-      auto flatA = args[0]->asUnchecked<FlatVector<A>>();
-      auto rawA = flatA->mutableRawValues();
-      auto flatB = args[1]->asUnchecked<FlatVector<B>>();
-      auto rawB = flatB->mutableRawValues();
-
-      context.applyToSelectedNoThrow(rows, [&](auto row) {
-        bool overflow = false;
-        Operation::template apply<R, A, B>(
-            rawResults[row],
-            rawA[row],
-            rawB[row],
-            aRescale_,
-            bRescale_,
-            aPrecision_,
-            aScale_,
-            bPrecision_,
-            bScale_,
-            rPrecision_,
-            rScale_,
-            overflow);
-        if (overflow ||
-            !velox::DecimalUtil::valueInPrecisionRange(
-                rawResults[row], rPrecision_)) {
-          result->setNull(row, true);
-        }
-      });
+      uint8_t rScale,
+      bool& overflow) {
+    VELOX_DCHECK_GE(
+        a, 0, "Non-negative value is expected in addLargeNonNegative.");
+    VELOX_DCHECK_GE(
+        b, 0, "Non-negative value is expected in addLargeNonNegative.");
+
+    // Separate whole and fraction parts.
+    const auto [aWhole, aFraction] = getWholeAndFraction<A>(a, aScale);
+    const auto [bWhole, bFraction] = getWholeAndFraction<B>(b, bScale);
+
+    // Adjust fractional parts to higher scale.
+    const auto higherScale = std::max(aScale, bScale);
+    const auto aFractionScaled =
+        increaseScale((int128_t)aFraction, higherScale - aScale);
+    const auto bFractionScaled =
+        increaseScale((int128_t)bFraction, higherScale - bScale);
+
+    int128_t fraction;
+    bool carryToLeft = false;
+    const auto carrier = velox::DecimalUtil::kPowersOfTen[higherScale];
+    if (aFractionScaled >= carrier - bFractionScaled) {
+      fraction = aFractionScaled + bFractionScaled - carrier;
+      carryToLeft = true;
     } else {
-      // Fast path if one or more arguments are encoded.
-      exec::DecodedArgs decodedArgs(rows, args, context);
-      auto a = decodedArgs.at(0);
-      auto b = decodedArgs.at(1);
-      context.applyToSelectedNoThrow(rows, [&](auto row) {
-        bool overflow = false;
-        Operation::template apply<R, A, B>(
-            rawResults[row],
-            a->valueAt<A>(row),
-            b->valueAt<B>(row),
-            aRescale_,
-            bRescale_,
-            aPrecision_,
-            aScale_,
-            bPrecision_,
-            bScale_,
-            rPrecision_,
-            rScale_,
-            overflow);
-        if (overflow ||
-            !velox::DecimalUtil::valueInPrecisionRange(
-                rawResults[row], rPrecision_)) {
-          result->setNull(row, true);
-        }
-      });
+      fraction = aFractionScaled + bFractionScaled;
     }
-  }
 
- private:
-  R* prepareResults(
-      const SelectivityVector& rows,
-      const TypePtr& resultType,
-      exec::EvalCtx& context,
-      VectorPtr& result) const {
-    context.ensureWritable(rows, resultType, result);
-    result->clearNulls(rows);
-    return result->asUnchecked<FlatVector<R>>()->mutableRawValues();
+    // Scale up the whole part and scale down the fraction part to combine them.
+    fraction = reduceScale(TResult(fraction), higherScale - rScale);
+    const auto whole = TResult(aWhole) + TResult(bWhole) + TResult(carryToLeft);
+    return decimalAddResult(whole, TResult(fraction), rScale, overflow);
   }
 
-  const uint8_t aRescale_;
-  const uint8_t bRescale_;
-  const uint8_t aPrecision_;
-  const uint8_t aScale_;
-  const uint8_t bPrecision_;
-  const uint8_t bScale_;
-  const uint8_t rPrecision_;
-  const uint8_t rScale_;
-};
-
-class Addition {
- public:
+  // Adds two opposite values by adding the whole and fraction parts separately.
   template <typename TResult, typename A, typename B>
-  inline static void apply(
-      TResult& r,
+  static TResult addLargeOpposite(
       A a,
       B b,
-      uint8_t aRescale,
-      uint8_t bRescale,
-      uint8_t /* aPrecision */,
       uint8_t aScale,
-      uint8_t /* bPrecision */,
       uint8_t bScale,
-      uint8_t rPrecision,
-      uint8_t rScale,
+      int32_t rScale,
       bool& overflow) {
-    if (rPrecision < LongDecimalType::kMaxPrecision) {
-      const int128_t aRescaled = a * velox::DecimalUtil::kPowersOfTen[aRescale];
-      const int128_t bRescaled = b * velox::DecimalUtil::kPowersOfTen[bRescale];
-      r = TResult(aRescaled + bRescaled);
-    } else {
-      const uint32_t minLeadingZeros =
-          DecimalUtil::minLeadingZeros<A, B>(a, b, aRescale, bRescale);
-      if (minLeadingZeros >= 3) {
-        // Fast path for no overflow. If both numbers contain at least 3 leading
-        // zeros, they can be added directly without the risk of overflow.
-        // The reason is if a number contains at least 2 leading zeros, it is
-        // ensured that the number fits in the maximum of decimal, because
-        // '2^126 - 1 < 10^38 - 1'. If both numbers contain at least 3 leading
-        // zeros, we are guaranteed that the result will have at least 2 leading
-        // zeros.
-        int128_t aRescaled = a * velox::DecimalUtil::kPowersOfTen[aRescale];
-        int128_t bRescaled = b * velox::DecimalUtil::kPowersOfTen[bRescale];
-        r = reduceScale(
-            TResult(aRescaled + bRescaled), std::max(aScale, bScale) - rScale);
-      } else {
-        // The risk of overflow should be considered. Add whole and fraction
-        // parts separately, and then combine.
-        r = addLarge<TResult, A, B>(a, b, aScale, bScale, rScale, overflow);
-      }
+    VELOX_DCHECK(
+        (a < 0 && b > 0) || (a > 0 && b < 0),
+        "One positve and one negative value are expected in addLargeOpposite.");
+
+    // Separate whole and fraction parts.
+    const auto [aWhole, aFraction] = getWholeAndFraction<A>(a, aScale);
+    const auto [bWhole, bFraction] = getWholeAndFraction<B>(b, bScale);
+
+    // Adjust fractional parts to higher scale.
+    const auto higherScale = std::max(aScale, bScale);
+    const auto aFractionScaled =
+        increaseScale((int128_t)aFraction, higherScale - aScale);
+    const auto bFractionScaled =
+        increaseScale((int128_t)bFraction, higherScale - bScale);
+
+    // No need to consider overflow because two inputs are opposite.
+    int128_t whole = (int128_t)aWhole + (int128_t)bWhole;
+    int128_t fraction = aFractionScaled + bFractionScaled;
+
+    // If the whole and fractional parts have different signs, adjust them to
+    // the same sign.
+    const auto scaleFactor = velox::DecimalUtil::kPowersOfTen[higherScale];
+    if (whole < 0 && fraction > 0) {
+      whole += 1;
+      fraction -= scaleFactor;
+    } else if (whole > 0 && fraction < 0) {
+      whole -= 1;
+      fraction += scaleFactor;
     }
+
+    // Scale up the whole part and scale down the fraction part to combine them.
+    fraction = reduceScale(TResult(fraction), higherScale - rScale);
+    return decimalAddResult(
+        TResult(whole), TResult(fraction), rScale, overflow);
   }
 
-  inline static uint8_t
-  computeRescaleFactor(uint8_t fromScale, uint8_t toScale, uint8_t rScale = 0) {
-    return std::max(0, toScale - fromScale);
+  // Add whole and fraction parts separately, and then combine. The overflow
+  // flag will be set to true if an overflow occurs during the addition.
+  template <typename TResult, typename A, typename B>
+  static TResult addLarge(
+      A a,
+      B b,
+      uint8_t aScale,
+      uint8_t bScale,
+      int32_t rScale,
+      bool& overflow) {
+    if (a >= 0 && b >= 0) {
+      // Both non-negative.
+      return addLargeNonNegative<TResult, A, B>(
+          a, b, aScale, bScale, rScale, overflow);
+    } else if (a <= 0 && b <= 0) {
+      // Both non-positive.
+      return TResult(-addLargeNonNegative<TResult, A, B>(
+          A(-a), B(-b), aScale, bScale, rScale, overflow));
+    } else {
+      // One positive and the other negative.
+      return addLargeOpposite<TResult, A, B>(
+          a, b, aScale, bScale, rScale, overflow);
+    }
   }
 
-  inline static std::pair<uint8_t, uint8_t> computeResultPrecisionScale(
+  // Computes the result precision and scale for decimal add and subtract
+  // operations following Hive's formulas.
+  // If result is representable with long decimal, the result
+  // scale is the maximum of 'aScale' and 'bScale'. If not, reduces result scale
+  // and caps the result precision at 38.
+  static std::pair<uint8_t, uint8_t> computeResultPrecisionScale(
       uint8_t aPrecision,
       uint8_t aScale,
       uint8_t bPrecision,
@@ -420,92 +265,92 @@ class Addition {
     auto precision = std::max(aPrecision - aScale, bPrecision - bScale) +
         std::max(aScale, bScale) + 1;
     auto scale = std::max(aScale, bScale);
-    return DecimalUtil::adjustPrecisionScale(precision, scale);
+    return sparksql::DecimalUtil::adjustPrecisionScale(precision, scale);
+  }
+
+  static uint8_t computeRescaleFactor(uint8_t fromScale, uint8_t toScale) {
+    return std::max(0, toScale - fromScale);
   }
+
+  uint8_t aScale_;
+  uint8_t bScale_;
+  uint8_t aRescale_;
+  uint8_t bRescale_;
+  uint8_t rPrecision_;
+  uint8_t rScale_;
 };
 
-class Subtraction {
- public:
-  template <typename TResult, typename A, typename B>
-  inline static void apply(
-      TResult& r,
-      A a,
-      B b,
-      uint8_t aRescale,
-      uint8_t bRescale,
-      uint8_t aPrecision,
-      uint8_t aScale,
-      uint8_t bPrecision,
-      uint8_t bScale,
-      uint8_t rPrecision,
-      uint8_t rScale,
-      bool& overflow) {
-    Addition::apply<TResult, A, B>(
-        r,
-        a,
-        B(-b),
-        aRescale,
-        bRescale,
-        aPrecision,
-        aScale,
-        bPrecision,
-        bScale,
-        rPrecision,
-        rScale,
-        overflow);
+template <typename TExec>
+struct DecimalAddFunction : DecimalAddSubtractBase {
+  VELOX_DEFINE_FUNCTION_TYPES(TExec);
+
+  template <typename A, typename B>
+  void initialize(
+      const std::vector<TypePtr>& inputTypes,
+      const core::QueryConfig& /*config*/,
+      A* /*a*/,
+      B* /*b*/) {
+    initializeBase(inputTypes);
   }
 
-  inline static uint8_t
-  computeRescaleFactor(uint8_t fromScale, uint8_t toScale, uint8_t rScale = 0) {
-    return std::max(0, toScale - fromScale);
+  template <typename R, typename A, typename B>
+  bool call(R& out, const A& a, const B& b) {
+    return applyAdd<R, A, B>(out, a, b);
   }
+};
 
-  inline static std::pair<uint8_t, uint8_t> computeResultPrecisionScale(
-      uint8_t aPrecision,
-      uint8_t aScale,
-      uint8_t bPrecision,
-      uint8_t bScale) {
-    return Addition::computeResultPrecisionScale(
-        aPrecision, aScale, bPrecision, bScale);
+template <typename TExec>
+struct DecimalSubtractFunction : DecimalAddSubtractBase {
+  VELOX_DEFINE_FUNCTION_TYPES(TExec);
+
+  template <typename A, typename B>
+  void initialize(
+      const std::vector<TypePtr>& inputTypes,
+      const core::QueryConfig& /*config*/,
+      A* /*a*/,
+      B* /*b*/) {
+    initializeBase(inputTypes);
+  }
+
+  template <typename R, typename A, typename B>
+  bool call(R& out, const A& a, const B& b) {
+    return applyAdd<R, A, B>(out, a, B(-b));
   }
 };
 
-class Multiply {
- public:
-  // Derive from Arrow.
-  // https://github.com/apache/arrow/blob/release-12.0.1-rc1/cpp/src/gandiva/precompiled/decimal_ops.cc#L331
+template <typename TExec>
+struct DecimalMultiplyFunction {
+  VELOX_DEFINE_FUNCTION_TYPES(TExec);
+
+  template <typename A, typename B>
+  void initialize(
+      const std::vector<TypePtr>& inputTypes,
+      const core::QueryConfig& /*config*/,
+      A* /*a*/,
+      B* /*b*/) {
+    auto [aPrecision, aScale] = getDecimalPrecisionScale(*inputTypes[0]);
+    auto [bPrecision, bScale] = getDecimalPrecisionScale(*inputTypes[1]);
+    auto [rPrecision, rScale] = DecimalUtil::adjustPrecisionScale(
+        aPrecision + bPrecision + 1, aScale + bScale);
+    rPrecision_ = rPrecision;
+    deltaScale_ = aScale + bScale - rScale;
+  }
+
   template <typename R, typename A, typename B>
-  inline static void apply(
-      R& r,
-      A a,
-      B b,
-      uint8_t aRescale,
-      uint8_t bRescale,
-      uint8_t aPrecision,
-      uint8_t aScale,
-      uint8_t bPrecision,
-      uint8_t bScale,
-      uint8_t rPrecision,
-      uint8_t rScale,
-      bool& overflow) {
-    if (rPrecision < 38) {
-      R result = DecimalUtil::multiply<R>(R(a), R(b), overflow);
-      VELOX_DCHECK(!overflow);
-      r = DecimalUtil::multiply<R>(
-          result,
-          R(velox::DecimalUtil::kPowersOfTen[aRescale + bRescale]),
-          overflow);
+  bool call(R& out, const A& a, const B& b) {
+    bool overflow = false;
+    if (rPrecision_ < 38) {
+      out = DecimalUtil::multiply<R>(R(a), R(b), overflow);
       VELOX_DCHECK(!overflow);
     } else if (a == 0 && b == 0) {
       // Handle this separately to avoid divide-by-zero errors.
-      r = R(0);
+      out = R(0);
     } else {
-      auto deltaScale = aScale + bScale - rScale;
-      if (deltaScale == 0) {
+      if (deltaScale_ == 0) {
         // No scale down.
         // Multiply when the out_precision is 38, and there is no trimming of
         // the scale i.e the intermediate value is the same as the final value.
-        r = DecimalUtil::multiply<R>(R(a), R(b), overflow);
+        out = DecimalUtil::multiply<R>(R(a), R(b), overflow);
       } else {
         // Scale down.
         // It's possible that the intermediate value does not fit in 128-bits,
@@ -520,10 +365,10 @@ class Multiply {
           // Needs int256.
           int256_t reslarge =
               static_cast<int256_t>(a) * static_cast<int256_t>(b);
-          reslarge = reduceScaleBy(reslarge, deltaScale);
-          r = DecimalUtil::convert<R>(reslarge, overflow);
+          reslarge = reduceScaleBy(reslarge, deltaScale_);
+          out = DecimalUtil::convert<R>(reslarge, overflow);
         } else {
-          if (LIKELY(deltaScale <= 38)) {
+          if (LIKELY(deltaScale_ <= 38)) {
             // The largest value that result can have here is (2^64 - 1) * (2^63
             // - 1) = 1.70141E+38,which is greater than
             // DecimalUtil::kLongDecimalMax.
@@ -533,9 +378,9 @@ class Multiply {
             // ((2^64 - 1) * (2^63 - 1)) / 10, which is less than
             // DecimalUtil::kLongDecimalMax, so there cannot be any overflow.
             DecimalUtil::divideWithRoundUp<R, R, R>(
-                r,
+                out,
                 result,
-                R(velox::DecimalUtil::kPowersOfTen[deltaScale]),
+                R(velox::DecimalUtil::kPowersOfTen[deltaScale_]),
                 0,
                 overflow);
             VELOX_DCHECK(!overflow);
@@ -550,29 +395,18 @@ class Multiply {
             // the right of the rightmost "visible" one. The reason why we have
             // to handle this case separately is because a scale multiplier with
             // a deltaScale 39 does not fit into 128 bit.
-            r = R(0);
+            out = R(0);
           }
         }
       }
     }
-  }
-
-  inline static uint8_t
-  computeRescaleFactor(uint8_t fromScale, uint8_t toScale, uint8_t rScale = 0) {
-    return 0;
-  }
 
-  inline static std::pair<uint8_t, uint8_t> computeResultPrecisionScale(
-      uint8_t aPrecision,
-      uint8_t aScale,
-      uint8_t bPrecision,
-      uint8_t bScale) {
-    return DecimalUtil::adjustPrecisionScale(
-        aPrecision + bPrecision + 1, aScale + bScale);
+    return !overflow &&
+        velox::DecimalUtil::valueInPrecisionRange(out, rPrecision_);
   }
 
  private:
-  inline static int256_t reduceScaleBy(int256_t in, int32_t reduceBy) {
+  static int256_t reduceScaleBy(int256_t in, int32_t reduceBy) {
     if (reduceBy == 0) {
       return in;
     }
@@ -586,33 +420,40 @@ class Multiply {
     }
     return result;
   }
+
+  uint8_t rPrecision_;
+  // The difference between result scale and the sum of aScale and bScale.
+  int32_t deltaScale_;
 };
 
-class Divide {
- public:
-  template <typename R, typename A, typename B>
-  inline static void apply(
-      R& r,
-      A a,
-      B b,
-      uint8_t aRescale,
-      uint8_t /* bRescale */,
-      uint8_t /* aPrecision */,
-      uint8_t /* aScale */,
-      uint8_t /* bPrecision */,
-      uint8_t /* bScale */,
-      uint8_t /* rPrecision */,
-      uint8_t /* rScale */,
-      bool& overflow) {
-    DecimalUtil::divideWithRoundUp<R, A, B>(r, a, b, aRescale, overflow);
+template <typename TExec>
+struct DecimalDivideFunction {
+  VELOX_DEFINE_FUNCTION_TYPES(TExec);
+
+  template <typename A, typename B>
+  void initialize(
+      const std::vector<TypePtr>& inputTypes,
+      const core::QueryConfig& /*config*/,
+      A* /*a*/,
+      B* /*b*/) {
+    auto [aPrecision, aScale] = getDecimalPrecisionScale(*inputTypes[0]);
+    auto [bPrecision, bScale] = getDecimalPrecisionScale(*inputTypes[1]);
+    auto [rPrecision, rScale] =
+        computeResultPrecisionScale(aPrecision, aScale, bPrecision, bScale);
+    rPrecision_ = rPrecision;
+    aRescale_ = rScale - aScale + bScale;
   }
 
-  inline static uint8_t
-  computeRescaleFactor(uint8_t fromScale, uint8_t toScale, uint8_t rScale) {
-    return rScale - fromScale + toScale;
+  template <typename R, typename A, typename B>
+  bool call(R& out, const A& a, const B& b) {
+    bool overflow = false;
+    DecimalUtil::divideWithRoundUp<R, A, B>(out, a, b, aRescale_, overflow);
+    return !overflow &&
+        velox::DecimalUtil::valueInPrecisionRange(out, rPrecision_);
   }
 
-  inline static std::pair<uint8_t, uint8_t> computeResultPrecisionScale(
+ private:
+  static std::pair<uint8_t, uint8_t> computeResultPrecisionScale(
       uint8_t aPrecision,
       uint8_t aScale,
       uint8_t bPrecision,
@@ -621,182 +462,139 @@ class Divide {
     auto precision = aPrecision - aScale + bScale + scale;
     return DecimalUtil::adjustPrecisionScale(precision, scale);
   }
+
+  uint8_t aRescale_;
+  uint8_t rPrecision_;
 };
 
-std::vector<std::shared_ptr<exec::FunctionSignature>>
-decimalAddSubtractSignature() {
+template <template <class> typename Func>
+void registerDecimalBinary(
+    const std::string& name,
+    std::vector<exec::SignatureVariable> constraints) {
+  // (long, long) -> long
+  registerFunction<
+      Func,
+      LongDecimal<P3, S3>,
+      LongDecimal<P1, S1>,
+      LongDecimal<P2, S2>>({name}, constraints);
+
+  // (short, short) -> short
+  registerFunction<
+      Func,
+      ShortDecimal<P3, S3>,
+      ShortDecimal<P1, S1>,
+      ShortDecimal<P2, S2>>({name}, constraints);
+
+  // (short, short) -> long
+  registerFunction<
+      Func,
+      LongDecimal<P3, S3>,
+      ShortDecimal<P1, S1>,
+      ShortDecimal<P2, S2>>({name}, constraints);
+
+  // (short, long) -> long
+  registerFunction<
+      Func,
+      LongDecimal<P3, S3>,
+      ShortDecimal<P1, S1>,
+      LongDecimal<P2, S2>>({name}, constraints);
+
+  // (long, short) -> long
+  registerFunction<
+      Func,
+      LongDecimal<P3, S3>,
+      LongDecimal<P1, S1>,
+      ShortDecimal<P2, S2>>({name}, constraints);
+}
+
+std::vector<exec::SignatureVariable> makeConstraints(
+    const std::string& rPrecision,
+    const std::string& rScale) {
+  std::string finalScale = fmt::format(
+      "({}) <= 38 ? ({}) : max(({}) - ({}) + 38, min(({}), 6))",
+      rPrecision,
+      rScale,
+      rScale,
+      rPrecision,
+      rScale);
   return {
-      exec::FunctionSignatureBuilder()
-          .integerVariable("a_precision")
-          .integerVariable("a_scale")
-          .integerVariable("b_precision")
-          .integerVariable("b_scale")
-          .integerVariable(
-              "r_precision",
-              "min(38, max(a_precision - a_scale, b_precision - b_scale) + max(a_scale, b_scale) + 1)")
-          .integerVariable(
-              "r_scale",
-              getResultScale(
-                  "max(a_precision - a_scale, b_precision - b_scale) + max(a_scale, b_scale) + 1",
-                  "max(a_scale, b_scale)"))
-          .returnType("DECIMAL(r_precision, r_scale)")
-          .argumentType("DECIMAL(a_precision, a_scale)")
-          .argumentType("DECIMAL(b_precision, b_scale)")
-          .build()};
+      exec::SignatureVariable(
+          P3::name(),
+          fmt::format(
+              "min(38, {r_precision})", fmt::arg("r_precision", rPrecision)),
+          exec::ParameterType::kIntegerParameter),
+      exec::SignatureVariable(
+          S3::name(), finalScale, exec::ParameterType::kIntegerParameter)};
 }
 
-std::vector<std::shared_ptr<exec::FunctionSignature>>
-decimalMultiplySignature() {
-  return {exec::FunctionSignatureBuilder()
-              .integerVariable("a_precision")
-              .integerVariable("a_scale")
-              .integerVariable("b_precision")
-              .integerVariable("b_scale")
-              .integerVariable(
-                  "r_precision", "min(38, a_precision + b_precision + 1)")
-              .integerVariable(
-                  "r_scale",
-                  getResultScale(
-                      "a_precision + b_precision + 1", "a_scale + b_scale"))
-              .returnType("DECIMAL(r_precision, r_scale)")
-              .argumentType("DECIMAL(a_precision, a_scale)")
-              .argumentType("DECIMAL(b_precision, b_scale)")
-              .build()};
+template <template <class> typename Func>
+void registerDecimalAddSubtract(const std::string& name) {
+  std::string rPrecision = fmt::format(
+      "max({a_precision} - {a_scale}, {b_precision} - {b_scale}) + max({a_scale}, {b_scale}) + 1",
+      fmt::arg("a_precision", P1::name()),
+      fmt::arg("b_precision", P2::name()),
+      fmt::arg("a_scale", S1::name()),
+      fmt::arg("b_scale", S2::name()));
+  std::string rScale = fmt::format(
+      "max({a_scale}, {b_scale})",
+      fmt::arg("a_scale", S1::name()),
+      fmt::arg("b_scale", S2::name()));
+  registerDecimalBinary<Func>(name, makeConstraints(rPrecision, rScale));
 }
 
-std::vector<std::shared_ptr<exec::FunctionSignature>> decimalDivideSignature() {
-  return {
-      exec::FunctionSignatureBuilder()
-          .integerVariable("a_precision")
-          .integerVariable("a_scale")
-          .integerVariable("b_precision")
-          .integerVariable("b_scale")
-          .integerVariable(
-              "r_precision",
-              "min(38, a_precision - a_scale + b_scale + max(6, a_scale + b_precision + 1))")
-          .integerVariable(
-              "r_scale",
-              getResultScale(
-                  "a_precision - a_scale + b_scale + max(6, a_scale + b_precision + 1)",
-                  "max(6, a_scale + b_precision + 1)"))
-          .returnType("DECIMAL(r_precision, r_scale)")
-          .argumentType("DECIMAL(a_precision, a_scale)")
-          .argumentType("DECIMAL(b_precision, b_scale)")
-          .build()};
+} // namespace
+
+void registerDecimalAdd(const std::string& prefix) {
+  registerDecimalAddSubtract<DecimalAddFunction>(prefix + "add");
 }
 
-template <typename Operation>
-std::shared_ptr<exec::VectorFunction> createDecimalFunction(
-    const std::string& name,
-    const std::vector<exec::VectorFunctionArg>& inputArgs,
-    const core::QueryConfig& /*config*/) {
-  const auto& aType = inputArgs[0].type;
-  const auto& bType = inputArgs[1].type;
-  const auto [aPrecision, aScale] = getDecimalPrecisionScale(*aType);
-  const auto [bPrecision, bScale] = getDecimalPrecisionScale(*bType);
-  const auto [rPrecision, rScale] = Operation::computeResultPrecisionScale(
-      aPrecision, aScale, bPrecision, bScale);
-  const uint8_t aRescale =
-      Operation::computeRescaleFactor(aScale, bScale, rScale);
-  const uint8_t bRescale =
-      Operation::computeRescaleFactor(bScale, aScale, rScale);
-  if (aType->isShortDecimal()) {
-    if (bType->isShortDecimal()) {
-      if (rPrecision > ShortDecimalType::kMaxPrecision) {
-        return std::make_shared<DecimalBaseFunction<
-            int128_t /*result*/,
-            int64_t,
-            int64_t,
-            Operation>>(
-            aRescale,
-            bRescale,
-            aPrecision,
-            aScale,
-            bPrecision,
-            bScale,
-            rPrecision,
-            rScale);
-      } else {
-        return std::make_shared<DecimalBaseFunction<
-            int64_t /*result*/,
-            int64_t,
-            int64_t,
-            Operation>>(
-            aRescale,
-            bRescale,
-            aPrecision,
-            aScale,
-            bPrecision,
-            bScale,
-            rPrecision,
-            rScale);
-      }
-    } else {
-      return std::make_shared<DecimalBaseFunction<
-          int128_t /*result*/,
-          int64_t,
-          int128_t,
-          Operation>>(
-          aRescale,
-          bRescale,
-          aPrecision,
-          aScale,
-          bPrecision,
-          bScale,
-          rPrecision,
-          rScale);
-    }
-  } else {
-    if (bType->isShortDecimal()) {
-      return std::make_shared<DecimalBaseFunction<
-          int128_t /*result*/,
-          int128_t,
-          int64_t,
-          Operation>>(
-          aRescale,
-          bRescale,
-          aPrecision,
-          aScale,
-          bPrecision,
-          bScale,
-          rPrecision,
-          rScale);
-    } else {
-      return std::make_shared<DecimalBaseFunction<
-          int128_t /*result*/,
-          int128_t,
-          int128_t,
-          Operation>>(
-          aRescale,
-          bRescale,
-          aPrecision,
-          aScale,
-          bPrecision,
-          bScale,
-          rPrecision,
-          rScale);
-    }
-  }
+void registerDecimalSubtract(const std::string& prefix) {
+  registerDecimalAddSubtract<DecimalSubtractFunction>(prefix + "subtract");
 }
-} // namespace
 
-VELOX_DECLARE_STATEFUL_VECTOR_FUNCTION(
-    udf_decimal_add,
-    decimalAddSubtractSignature(),
-    createDecimalFunction<Addition>);
-
-VELOX_DECLARE_STATEFUL_VECTOR_FUNCTION(
-    udf_decimal_sub,
-    decimalAddSubtractSignature(),
-    createDecimalFunction<Subtraction>);
-
-VELOX_DECLARE_STATEFUL_VECTOR_FUNCTION(
-    udf_decimal_mul,
-    decimalMultiplySignature(),
-    createDecimalFunction<Multiply>);
-
-VELOX_DECLARE_STATEFUL_VECTOR_FUNCTION(
-    udf_decimal_div,
-    decimalDivideSignature(),
-    createDecimalFunction<Divide>);
+void registerDecimalMultiply(const std::string& prefix) {
+  std::string rPrecision = fmt::format(
+      "{a_precision} + {b_precision} + 1",
+      fmt::arg("a_precision", P1::name()),
+      fmt::arg("b_precision", P2::name()));
+  std::string rScale = fmt::format(
+      "{a_scale} + {b_scale}",
+      fmt::arg("a_scale", S1::name()),
+      fmt::arg("b_scale", S2::name()));
+  registerDecimalBinary<DecimalMultiplyFunction>(
+      prefix + "multiply", makeConstraints(rPrecision, rScale));
+}
+
+std::vector<exec::SignatureVariable> getDivideConstraints() {
+  std::string rPrecision = fmt::format(
+      "{a_precision} - {a_scale} + {b_scale} + max(6, {a_scale} + {b_precision} + 1)",
+      fmt::arg("a_precision", P1::name()),
+      fmt::arg("b_precision", P2::name()),
+      fmt::arg("a_scale", S1::name()),
+      fmt::arg("b_scale", S2::name()));
+  std::string rScale = fmt::format(
+      "max(6, {a_scale} + {b_precision} + 1)",
+      fmt::arg("a_scale", S1::name()),
+      fmt::arg("b_precision", P2::name()));
+  return makeConstraints(rPrecision, rScale);
+}
+
+void registerDecimalDivide(const std::string& prefix) {
+  std::vector<exec::SignatureVariable> constraints = getDivideConstraints();
+  registerDecimalBinary<DecimalDivideFunction>(prefix + "divide", constraints);
+
+  // (short, long) -> short
+  registerFunction<
+      DecimalDivideFunction,
+      ShortDecimal<P3, S3>,
+      ShortDecimal<P1, S1>,
+      LongDecimal<P2, S2>>({prefix + "divide"}, constraints);
+
+  // (long, short) -> short
+  registerFunction<
+      DecimalDivideFunction,
+      ShortDecimal<P3, S3>,
+      LongDecimal<P1, S1>,
+      ShortDecimal<P2, S2>>({prefix + "divide"}, constraints);
+}
 } // namespace facebook::velox::functions::sparksql
diff --git a/velox/functions/sparksql/DecimalArithmetic.h b/velox/functions/sparksql/DecimalArithmetic.h
new file mode 100644
index 000000000000..f1a94ce01a92
--- /dev/null
+++ b/velox/functions/sparksql/DecimalArithmetic.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <string>
+
+namespace facebook::velox::functions::sparksql {
+
+void registerDecimalAdd(const std::string& prefix);
+
+void registerDecimalSubtract(const std::string& prefix);
+
+void registerDecimalMultiply(const std::string& prefix);
+
+void registerDecimalDivide(const std::string& prefix);
+
+} // namespace facebook::velox::functions::sparksql
diff --git a/velox/functions/sparksql/RegisterArithmetic.cpp b/velox/functions/sparksql/RegisterArithmetic.cpp
index a9f7ff740414..c2a820d36ee6 100644
--- a/velox/functions/sparksql/RegisterArithmetic.cpp
+++ b/velox/functions/sparksql/RegisterArithmetic.cpp
@@ -17,6 +17,7 @@
 #include "velox/functions/lib/RegistrationHelpers.h"
 #include "velox/functions/prestosql/Arithmetic.h"
 #include "velox/functions/sparksql/Arithmetic.h"
+#include "velox/functions/sparksql/DecimalArithmetic.h"
 #include "velox/functions/sparksql/Rand.h"
 
 namespace facebook::velox::functions::sparksql {
@@ -105,10 +106,10 @@ void registerArithmeticFunctions(const std::string& prefix) {
       int64_t>({prefix + "width_bucket"});
   registerRandFunctions(prefix);
 
-  VELOX_REGISTER_VECTOR_FUNCTION(udf_decimal_add, prefix + "add");
-  VELOX_REGISTER_VECTOR_FUNCTION(udf_decimal_sub, prefix + "subtract");
-  VELOX_REGISTER_VECTOR_FUNCTION(udf_decimal_mul, prefix + "multiply");
-  VELOX_REGISTER_VECTOR_FUNCTION(udf_decimal_div, prefix + "divide");
+  registerDecimalAdd(prefix);
+  registerDecimalSubtract(prefix);
+  registerDecimalMultiply(prefix);
+  registerDecimalDivide(prefix);
   registerFunction<sparksql::IsNanFunction, bool, float>({prefix + "isnan"});
   registerFunction<sparksql::IsNanFunction, bool, double>({prefix + "isnan"});
 
diff --git a/velox/functions/sparksql/tests/DecimalArithmeticTest.cpp b/velox/functions/sparksql/tests/DecimalArithmeticTest.cpp
index 2ecbd4f3da23..a80baacf78dc 100644
--- a/velox/functions/sparksql/tests/DecimalArithmeticTest.cpp
+++ b/velox/functions/sparksql/tests/DecimalArithmeticTest.cpp
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "velox/functions/sparksql/DecimalArithmetic.h"
 #include "velox/common/base/tests/GTestUtils.h"
 #include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h"
 #include "velox/functions/sparksql/tests/SparkFunctionBaseTest.h"
@@ -32,21 +33,10 @@ class DecimalArithmeticTest : public SparkFunctionBaseTest {
   }
 
  protected:
-  template <TypeKind KIND>
-  void testDecimalExpr(
-      const VectorPtr& expected,
-      const std::string& expression,
-      const std::vector<VectorPtr>& input) {
-    using EvalType = typename velox::TypeTraits<KIND>::NativeType;
-    auto result =
-        evaluate<SimpleVector<EvalType>>(expression, makeRowVector(input));
-    assertEqualVectors(expected, result);
-  }
-
   void testArithmeticFunction(
       const std::string& functionName,
-      const std::vector<VectorPtr>& inputs,
-      const VectorPtr& expected) {
+      const VectorPtr& expected,
+      const std::vector<VectorPtr>& inputs) {
     VELOX_USER_CHECK_EQ(
         inputs.size(),
         2,
@@ -56,7 +46,7 @@ class DecimalArithmeticTest : public SparkFunctionBaseTest {
         std::make_shared<core::FieldAccessTypedExpr>(inputs[1]->type(), "c1")};
     auto expr = std::make_shared<const core::CallTypedExpr>(
         expected->type(), std::move(inputExprs), functionName);
-    testEncodings(expr, inputs, expected);
+    assertEqualVectors(expected, evaluate(expr, makeRowVector(inputs)));
   }
 
   VectorPtr makeNullableLongDecimalVector(
@@ -82,30 +72,36 @@ TEST_F(DecimalArithmeticTest, add) {
   // Precision < 38.
   testArithmeticFunction(
       "add",
+      makeNullableLongDecimalVector(
+          {"502", "1502", "11232", "1999999999999999999999999999998"},
+          DECIMAL(31, 3)),
       {makeNullableLongDecimalVector(
            {"201", "601", "1366", "999999999999999999999999999999"},
            DECIMAL(30, 3)),
        makeNullableLongDecimalVector(
            {"301", "901", "9866", "999999999999999999999999999999"},
-           DECIMAL(30, 3))},
-      makeNullableLongDecimalVector(
-          {"502", "1502", "11232", "1999999999999999999999999999998"},
-          DECIMAL(31, 3)));
+           DECIMAL(30, 3))});
 
   // Min leading zero >= 3.
   testArithmeticFunction(
       "add",
+      makeFlatVector(
+          std::vector<int128_t>{2123210, 2999889, 4234568, 4213563},
+          DECIMAL(38, 6)),
       {makeFlatVector(
            std::vector<int128_t>{11232100, 9998888, 12345678, 2135632},
            DECIMAL(38, 7)),
-       makeFlatVector(std::vector<int64_t>{1, 2, 3, 4}, DECIMAL(10, 0))},
-      makeFlatVector(
-          std::vector<int128_t>{2123210, 2999889, 4234568, 4213563},
-          DECIMAL(38, 6)));
+       makeFlatVector(std::vector<int64_t>{1, 2, 3, 4}, DECIMAL(10, 0))});
 
   // No carry to left.
   testArithmeticFunction(
       "add",
+      makeNullableLongDecimalVector(
+          {"99999999999999999999999999999990000010",
+           "99999999999999999999999999999999010000",
+           "99999999999999999999999999999999900123",
+           "99999999999999999999999999999999990100"},
+          DECIMAL(38, 6)),
       {makeNullableLongDecimalVector(
            {"9999999999999999999999999999999000000",
             "9999999999999999999999999999999900000",
@@ -113,17 +109,17 @@ TEST_F(DecimalArithmeticTest, add) {
             "9999999999999999999999999999999999000"},
            DECIMAL(38, 5)),
        makeFlatVector(
-           std::vector<int128_t>{100, 99999, 1234, 999}, DECIMAL(38, 7))},
-      makeNullableLongDecimalVector(
-          {"99999999999999999999999999999990000010",
-           "99999999999999999999999999999999010000",
-           "99999999999999999999999999999999900123",
-           "99999999999999999999999999999999990100"},
-          DECIMAL(38, 6)));
+           std::vector<int128_t>{100, 99999, 1234, 999}, DECIMAL(38, 7))});
 
   // Carry to left.
   testArithmeticFunction(
       "add",
+      makeNullableLongDecimalVector(
+          {"99999999999999999999999999999991500000",
+           "99999999999999999999999999999991000000",
+           "99999999999999999999999999999999500000",
+           "99999999999999999999999999999999100000"},
+          DECIMAL(38, 6)),
       {makeNullableLongDecimalVector(
            {"9999999999999999999999999999999070000",
             "9999999999999999999999999999999050000",
@@ -132,30 +128,26 @@ TEST_F(DecimalArithmeticTest, add) {
            DECIMAL(38, 5)),
        makeFlatVector(
            std::vector<int128_t>{8000000, 5000000, 8000000, 1999999},
-           DECIMAL(38, 7))},
-      makeNullableLongDecimalVector(
-          {"99999999999999999999999999999991500000",
-           "99999999999999999999999999999991000000",
-           "99999999999999999999999999999999500000",
-           "99999999999999999999999999999999100000"},
-          DECIMAL(38, 6)));
+           DECIMAL(38, 7))});
 
   // Both -ve.
   testArithmeticFunction(
       "add",
+      makeNullableLongDecimalVector(
+          {"-502", "-1502", "-11232", "-1999999999999999999999999999998"},
+          DECIMAL(31, 3)),
       {makeNullableLongDecimalVector(
            {"-201", "-601", "-1366", "-999999999999999999999999999999"},
            DECIMAL(30, 3)),
        makeNullableLongDecimalVector(
            {"-301", "-901", "-9866", "-999999999999999999999999999999"},
-           DECIMAL(30, 3))},
-      makeNullableLongDecimalVector(
-          {"-502", "-1502", "-11232", "-1999999999999999999999999999998"},
-          DECIMAL(31, 3)));
+           DECIMAL(30, 3))});
 
   // Overflow when scaling up the whole part.
   testArithmeticFunction(
       "add",
+      makeNullableLongDecimalVector(
+          {"null", "null", "null", "null"}, DECIMAL(38, 6)),
       {makeNullableLongDecimalVector(
            {"-99999999999999999999999999999999990000",
             "99999999999999999999999999999999999000",
@@ -164,13 +156,13 @@ TEST_F(DecimalArithmeticTest, add) {
            DECIMAL(38, 3)),
        makeFlatVector(
            std::vector<int128_t>{-100, 9999999, -999900, 99999},
-           DECIMAL(38, 7))},
-      makeNullableLongDecimalVector(
-          {"null", "null", "null", "null"}, DECIMAL(38, 6)));
+           DECIMAL(38, 7))});
 
   // Ve and -ve.
   testArithmeticFunction(
       "add",
+      makeNullableLongDecimalVector(
+          {"999990", "-999990", "-10", "10"}, DECIMAL(38, 6)),
       {makeNullableLongDecimalVector(
            {"99999999999999999999999999999989999990",
             "-99999999999999999999999999999989999990",
@@ -182,38 +174,42 @@ TEST_F(DecimalArithmeticTest, add) {
             "9999999999999999999999999999998900000",
             "-9999999999999999999999999999999999999",
             "9999999999999999999999999999999999999"},
-           DECIMAL(38, 5))},
-      makeNullableLongDecimalVector(
-          {"999990", "-999990", "-10", "10"}, DECIMAL(38, 6)));
+           DECIMAL(38, 5))});
 }
 
 TEST_F(DecimalArithmeticTest, subtract) {
   testArithmeticFunction(
       "subtract",
+      makeNullableLongDecimalVector(
+          {"-100", "-300", "-8500", "1999999999999999999999999999998"},
+          DECIMAL(31, 3)),
       {makeNullableLongDecimalVector(
            {"201", "601", "1366", "999999999999999999999999999999"},
            DECIMAL(30, 3)),
        makeNullableLongDecimalVector(
            {"301", "901", "9866", "-999999999999999999999999999999"},
-           DECIMAL(30, 3))},
-      makeNullableLongDecimalVector(
-          {"-100", "-300", "-8500", "1999999999999999999999999999998"},
-          DECIMAL(31, 3)));
+           DECIMAL(30, 3))});
 
   // Min leading zero >= 3.
   testArithmeticFunction(
       "subtract",
+      makeFlatVector(
+          std::vector<int128_t>{123210, -1000111, -1765432, -3786437},
+          DECIMAL(38, 6)),
       {makeFlatVector(
            std::vector<int128_t>{11232100, 9998888, 12345678, 2135632},
            DECIMAL(38, 7)),
-       makeFlatVector(std::vector<int64_t>{1, 2, 3, 4}, DECIMAL(10, 0))},
-      makeFlatVector(
-          std::vector<int128_t>{123210, -1000111, -1765432, -3786437},
-          DECIMAL(38, 6)));
+       makeFlatVector(std::vector<int64_t>{1, 2, 3, 4}, DECIMAL(10, 0))});
 
   // No carry to left.
   testArithmeticFunction(
       "subtract",
+      makeNullableLongDecimalVector(
+          {"99999999999999999999999999999990000010",
+           "99999999999999999999999999999999010000",
+           "99999999999999999999999999999999900123",
+           "99999999999999999999999999999999990100"},
+          DECIMAL(38, 6)),
       {makeNullableLongDecimalVector(
            {"9999999999999999999999999999999000000",
             "9999999999999999999999999999999900000",
@@ -221,17 +217,17 @@ TEST_F(DecimalArithmeticTest, subtract) {
             "9999999999999999999999999999999999000"},
            DECIMAL(38, 5)),
        makeFlatVector(
-           std::vector<int128_t>{-100, -99999, -1234, -999}, DECIMAL(38, 7))},
-      makeNullableLongDecimalVector(
-          {"99999999999999999999999999999990000010",
-           "99999999999999999999999999999999010000",
-           "99999999999999999999999999999999900123",
-           "99999999999999999999999999999999990100"},
-          DECIMAL(38, 6)));
+           std::vector<int128_t>{-100, -99999, -1234, -999}, DECIMAL(38, 7))});
 
   // Carry to left.
   testArithmeticFunction(
       "subtract",
+      makeNullableLongDecimalVector(
+          {"99999999999999999999999999999991500000",
+           "99999999999999999999999999999991000000",
+           "99999999999999999999999999999999500000",
+           "99999999999999999999999999999999100000"},
+          DECIMAL(38, 6)),
       {makeNullableLongDecimalVector(
            {"9999999999999999999999999999999070000",
             "9999999999999999999999999999999050000",
@@ -240,29 +236,25 @@ TEST_F(DecimalArithmeticTest, subtract) {
            DECIMAL(38, 5)),
        makeFlatVector(
            std::vector<int128_t>{-8000000, -5000000, -8000000, -1999999},
-           DECIMAL(38, 7))},
-      makeNullableLongDecimalVector(
-          {"99999999999999999999999999999991500000",
-           "99999999999999999999999999999991000000",
-           "99999999999999999999999999999999500000",
-           "99999999999999999999999999999999100000"},
-          DECIMAL(38, 6)));
+           DECIMAL(38, 7))});
 
   // Both -ve.
   testArithmeticFunction(
       "subtract",
+      makeNullableLongDecimalVector(
+          {"100", "300", "8500", "0"}, DECIMAL(31, 3)),
       {makeNullableLongDecimalVector(
            {"-201", "-601", "-1366", "-999999999999999999999999999999"},
            DECIMAL(30, 3)),
        makeNullableLongDecimalVector(
            {"-301", "-901", "-9866", "-999999999999999999999999999999"},
-           DECIMAL(30, 3))},
-      makeNullableLongDecimalVector(
-          {"100", "300", "8500", "0"}, DECIMAL(31, 3)));
+           DECIMAL(30, 3))});
 
   // Overflow when scaling up the whole part.
   testArithmeticFunction(
       "subtract",
+      makeNullableLongDecimalVector(
+          {"null", "null", "null", "null"}, DECIMAL(38, 6)),
       {makeNullableLongDecimalVector(
            {"-99999999999999999999999999999999990000",
             "99999999999999999999999999999999999000",
@@ -271,13 +263,17 @@ TEST_F(DecimalArithmeticTest, subtract) {
            DECIMAL(38, 3)),
        makeFlatVector(
            std::vector<int128_t>{100, -9999999, 999900, -99999},
-           DECIMAL(38, 7))},
-      makeNullableLongDecimalVector(
-          {"null", "null", "null", "null"}, DECIMAL(38, 6)));
+           DECIMAL(38, 7))});
 
   // Ve and -ve.
   testArithmeticFunction(
       "subtract",
+      makeNullableLongDecimalVector(
+          {"99999999999999999999999999999999999990",
+           "-99999999999999999999999999999999999990",
+           "99999999999999999999999999999999999990",
+           "-99999999999999999999999999999999999990"},
+          DECIMAL(38, 6)),
       {makeNullableLongDecimalVector(
            {"99999999999999999999999999999989999990",
             "-99999999999999999999999999999989999990",
@@ -285,13 +281,7 @@ TEST_F(DecimalArithmeticTest, subtract) {
             "-99999999999999999999999999999999999980"},
            DECIMAL(38, 6)),
        makeFlatVector(
-           std::vector<int128_t>{-1000000, 1000000, -1, 1}, DECIMAL(38, 5))},
-      makeNullableLongDecimalVector(
-          {"99999999999999999999999999999999999990",
-           "-99999999999999999999999999999999999990",
-           "99999999999999999999999999999999999990",
-           "-99999999999999999999999999999999999990"},
-          DECIMAL(38, 6)));
+           std::vector<int128_t>{-1000000, 1000000, -1, 1}, DECIMAL(38, 5))});
 }
 
 TEST_F(DecimalArithmeticTest, multiply) {
@@ -307,74 +297,76 @@ TEST_F(DecimalArithmeticTest, multiply) {
   //   }
   auto shortFlat = makeFlatVector<int64_t>({1000, 2000}, DECIMAL(17, 3));
   // Multiply short and short, returning long.
-  testDecimalExpr<TypeKind::HUGEINT>(
+  testArithmeticFunction(
+      "multiply",
       makeFlatVector<int128_t>({1000000, 4000000}, DECIMAL(35, 6)),
-      "multiply(c0, c1)",
       {shortFlat, shortFlat});
   // Multiply short and long, returning long.
   auto longFlat = makeFlatVector<int128_t>({1000, 2000}, DECIMAL(20, 3));
   auto expectedLongFlat =
       makeFlatVector<int128_t>({1000000, 4000000}, DECIMAL(38, 6));
-  testDecimalExpr<TypeKind::HUGEINT>(
-      expectedLongFlat, "multiply(c0, c1)", {shortFlat, longFlat});
+  testArithmeticFunction("multiply", expectedLongFlat, {shortFlat, longFlat});
   // Multiply long and short, returning long.
-  testDecimalExpr<TypeKind::HUGEINT>(
-      expectedLongFlat, "multiply(c0, c1)", {longFlat, shortFlat});
+  testArithmeticFunction("multiply", expectedLongFlat, {longFlat, shortFlat});
 
   // Multiply long and long, returning long.
-  testDecimalExpr<TypeKind::HUGEINT>(
+  testArithmeticFunction(
+      "multiply",
       makeFlatVector<int128_t>({1000000, 4000000}, DECIMAL(38, 6)),
-      "multiply(c0, c1)",
       {longFlat, longFlat});
 
   auto leftFlat0 = makeFlatVector<int128_t>({0, 1, 0}, DECIMAL(20, 3));
   auto rightFlat0 = makeFlatVector<int128_t>({1, 0, 0}, DECIMAL(20, 2));
-  testDecimalExpr<TypeKind::HUGEINT>(
+  testArithmeticFunction(
+      "multiply",
       makeFlatVector<int128_t>({0, 0, 0}, DECIMAL(38, 5)),
-      "multiply(c0, c1)",
       {leftFlat0, rightFlat0});
 
   // Multiply short and short, returning short.
   shortFlat = makeFlatVector<int64_t>({1000, 2000}, DECIMAL(6, 3));
-  testDecimalExpr<TypeKind::BIGINT>(
+  testArithmeticFunction(
+      "multiply",
       makeFlatVector<int64_t>({1000000, 4000000}, DECIMAL(13, 6)),
-      "c0 * c1",
       {shortFlat, shortFlat});
 
   auto expectedConstantFlat =
       makeFlatVector<int64_t>({100000, 200000}, DECIMAL(10, 5));
   // Constant and Flat arguments.
-  testDecimalExpr<TypeKind::BIGINT>(
-      expectedConstantFlat, "1.00 * c0", {shortFlat});
+  testArithmeticFunction(
+      "multiply",
+      expectedConstantFlat,
+      {makeConstant<int64_t>(100, 2, DECIMAL(3, 2)), shortFlat});
 
   // Flat and Constant arguments.
-  testDecimalExpr<TypeKind::BIGINT>(
-      expectedConstantFlat, "c0 * 1.00", {shortFlat});
+  testArithmeticFunction(
+      "multiply",
+      expectedConstantFlat,
+      {shortFlat, makeConstant<int64_t>(100, 2, DECIMAL(3, 2))});
 
   // out_precision == 38, small input values, trimming of scale.
-  testDecimalExpr<TypeKind::HUGEINT>(
+  testArithmeticFunction(
+      "multiply",
       makeConstant<int128_t>(61, 1, DECIMAL(38, 7)),
-      "c0 * c1",
       {makeConstant<int128_t>(201, 1, DECIMAL(20, 5)),
        makeConstant<int128_t>(301, 1, DECIMAL(20, 5))});
 
   // out_precision == 38, large values, trimming of scale.
-  testDecimalExpr<TypeKind::HUGEINT>(
+  testArithmeticFunction(
+      "multiply",
       makeConstant<int128_t>(
           HugeInt::parse("201" + std::string(31, '0')), 1, DECIMAL(38, 6)),
-      "c0 * c1",
       {makeConstant<int128_t>(201, 1, DECIMAL(20, 5)),
        makeConstant<int128_t>(
            HugeInt::parse(std::string(35, '9')), 1, DECIMAL(35, 5))});
 
   // out_precision == 38, very large values, trimming of scale (requires convert
   // to 256).
-  testDecimalExpr<TypeKind::HUGEINT>(
+  testArithmeticFunction(
+      "multiply",
       makeConstant<int128_t>(
           HugeInt::parse("9999999999999999999999999999999999890"),
           1,
           DECIMAL(38, 6)),
-      "c0 * c1",
       {makeConstant<int128_t>(
            HugeInt::parse(std::string(35, '9')), 1, DECIMAL(38, 20)),
        makeConstant<int128_t>(
@@ -382,77 +374,79 @@ TEST_F(DecimalArithmeticTest, multiply) {
 
   // out_precision == 38, very large values, trimming of scale (requires convert
   // to 256). should cause overflow.
-  testDecimalExpr<TypeKind::HUGEINT>(
+  testArithmeticFunction(
+      "multiply",
       makeConstant<int128_t>(std::nullopt, 1, DECIMAL(38, 6)),
-      "c0 * c1",
       {makeConstant<int128_t>(
            HugeInt::parse(std::string(35, '9')), 1, DECIMAL(38, 4)),
        makeConstant<int128_t>(
            HugeInt::parse(std::string(36, '9')), 1, DECIMAL(38, 4))});
 
   // Big scale * big scale.
-  testDecimalExpr<TypeKind::HUGEINT>(
+  testArithmeticFunction(
+      "multiply",
       makeConstant<int128_t>(0, 1, DECIMAL(38, 37)),
-      "c0 * c1",
       {makeConstant<int128_t>(201, 1, DECIMAL(38, 38)),
        makeConstant<int128_t>(301, 1, DECIMAL(38, 38))});
 
   // Long decimal limits.
-  testDecimalExpr<TypeKind::HUGEINT>(
+  testArithmeticFunction(
+      "multiply",
       makeConstant<int128_t>(std::nullopt, 1, DECIMAL(38, 0)),
-      "c0 * cast(10.00 as decimal(2,0))",
       {makeConstant<int128_t>(
-          HugeInt::build(0x08FFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
-          1,
-          DECIMAL(38, 0))});
+           HugeInt::build(0x08FFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
+           1,
+           DECIMAL(38, 0)),
+       makeConstant<int64_t>(10, 1, DECIMAL(2, 0))});
 
   // Rescaling the final result overflows.
-  testDecimalExpr<TypeKind::HUGEINT>(
+  testArithmeticFunction(
+      "multiply",
       makeConstant<int128_t>(std::nullopt, 1, DECIMAL(38, 1)),
-      "c0 * cast(1.00 as decimal(2,1))",
       {makeConstant<int128_t>(
-          HugeInt::build(0x08FFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
-          1,
-          DECIMAL(38, 0))});
+           HugeInt::build(0x08FFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
+           1,
+           DECIMAL(38, 0)),
+       makeConstant<int64_t>(100, 1, DECIMAL(2, 1))});
 }
 
-TEST_F(DecimalArithmeticTest, decimalDivTest) {
+TEST_F(DecimalArithmeticTest, divide) {
   auto shortFlat = makeFlatVector<int64_t>({1000, 2000}, DECIMAL(17, 3));
   // Divide short and short, returning long.
-  testDecimalExpr<TypeKind::HUGEINT>(
+  testArithmeticFunction(
+      "divide",
       makeNullableLongDecimalVector(
           {"500000000000000000000", "2000000000000000000000"}, DECIMAL(38, 21)),
-      "divide(c0, c1)",
       {makeFlatVector<int64_t>({500, 4000}, DECIMAL(17, 3)), shortFlat});
 
   // Divide short and long, returning long.
   auto longFlat = makeFlatVector<int128_t>({500, 4000}, DECIMAL(20, 2));
-  testDecimalExpr<TypeKind::HUGEINT>(
+  testArithmeticFunction(
+      "divide",
       makeFlatVector<int128_t>(
           {500000000000000000, 2000000000000000000}, DECIMAL(38, 17)),
-      "divide(c0, c1)",
       {longFlat, shortFlat});
 
   // Divide long and short, returning long.
-  testDecimalExpr<TypeKind::HUGEINT>(
+  testArithmeticFunction(
+      "divide",
       makeNullableLongDecimalVector(
           {"20" + std::string(20, '0'), "5" + std::string(20, '0')},
           DECIMAL(38, 22)),
-      "divide(c0, c1)",
       {shortFlat, longFlat});
 
   // Divide long and long, returning long.
-  testDecimalExpr<TypeKind::HUGEINT>(
+  testArithmeticFunction(
+      "divide",
       makeNullableLongDecimalVector(
           {"5" + std::string(18, '0'), "3" + std::string(18, '0')},
           DECIMAL(38, 18)),
-      "divide(c0, c1)",
       {makeFlatVector<int128_t>({2500, 12000}, DECIMAL(20, 2)), longFlat});
 
   // Divide short and short, returning short.
-  testDecimalExpr<TypeKind::BIGINT>(
+  testArithmeticFunction(
+      "divide",
       makeFlatVector<int64_t>({500000000, 300000000}, DECIMAL(13, 11)),
-      "divide(c0, c1)",
       {makeFlatVector<int64_t>({2500, 12000}, DECIMAL(5, 5)),
        makeFlatVector<int64_t>({500, 4000}, DECIMAL(5, 2))});
   // This result can be obtained by Spark unit test
@@ -472,27 +466,27 @@ TEST_F(DecimalArithmeticTest, decimalDivTest) {
   //       DecimalType(s(2), s(3)))
   //     checkEvaluation(Divide(l1, l2), null)
   //   }
-  testDecimalExpr<TypeKind::HUGEINT>(
+  testArithmeticFunction(
+      "divide",
       makeNullableLongDecimalVector(
           {"497512437810945273631840796019900493"}, DECIMAL(38, 6)),
-      "c0 / c1",
       {makeNullableLongDecimalVector({std::string(35, '9')}, DECIMAL(35, 6)),
        makeConstant<int128_t>(201, 1, DECIMAL(20, 3))});
 
-  testDecimalExpr<TypeKind::HUGEINT>(
+  testArithmeticFunction(
+      "divide",
       makeNullableLongDecimalVector(
           {"1000" + std::string(17, '0'), "500" + std::string(17, '0')},
           DECIMAL(24, 20)),
-      "1.00 / c0",
-      {shortFlat});
+      {makeConstant<int64_t>(100, 2, DECIMAL(3, 2)), shortFlat});
 
   // Flat and Constant arguments.
-  testDecimalExpr<TypeKind::HUGEINT>(
+  testArithmeticFunction(
+      "divide",
       makeNullableLongDecimalVector(
           {"500" + std::string(4, '0'), "1000" + std::string(4, '0')},
           DECIMAL(23, 7)),
-      "c0 / 2.00",
-      {shortFlat});
+      {shortFlat, makeConstant<int64_t>(200, 2, DECIMAL(3, 2))});
 
   // Divide and round-up.
   // The result can be obtained by Spark unit test
@@ -504,23 +498,24 @@ TEST_F(DecimalArithmeticTest, decimalDivTest) {
   //     df.show(truncate = false)
   //     spark.sql("drop table decimals_test;")
   //   }
-  testDecimalExpr<TypeKind::BIGINT>(
-      {makeFlatVector<int64_t>(
-          {566667, -83333, -1083333, -1500000, -33333, 816667}, DECIMAL(8, 6))},
-      "c0 / -6.0",
-      {makeFlatVector<int64_t>({-34, 5, 65, 90, 2, -49}, DECIMAL(2, 1))});
+  testArithmeticFunction(
+      "divide",
+      makeFlatVector<int64_t>(
+          {566667, -83333, -1083333, -1500000, -33333, 816667}, DECIMAL(8, 6)),
+      {makeFlatVector<int64_t>({-34, 5, 65, 90, 2, -49}, DECIMAL(2, 1)),
+       makeConstant<int64_t>(-60, 6, DECIMAL(2, 1))});
   // Divide by zero.
-  testDecimalExpr<TypeKind::HUGEINT>(
+  testArithmeticFunction(
+      "divide",
       makeConstant<int128_t>(std::nullopt, 2, DECIMAL(21, 6)),
-      "c0 / 0.0",
-      {shortFlat});
+      {shortFlat, makeConstant<int64_t>(0, 2, DECIMAL(2, 1))});
 
   // Long decimal limits.
-  testDecimalExpr<TypeKind::HUGEINT>(
+  testArithmeticFunction(
+      "divide",
       makeConstant<int128_t>(std::nullopt, 1, DECIMAL(38, 6)),
-      "c0 / 0.01",
-      {makeConstant<int128_t>(
-          DecimalUtil::kLongDecimalMax, 1, DECIMAL(38, 0))});
+      {makeConstant<int128_t>(DecimalUtil::kLongDecimalMax, 1, DECIMAL(38, 0)),
+       makeConstant<int64_t>(1, 1, DECIMAL(3, 2))});
 }
 } // namespace
 } // namespace facebook::velox::functions::sparksql::test
diff --git a/velox/type/Type.cpp b/velox/type/Type.cpp
index 84db02dd4abe..17b88074f172 100644
--- a/velox/type/Type.cpp
+++ b/velox/type/Type.cpp
@@ -119,7 +119,7 @@ std::string mapTypeKindToName(const TypeKind& typeKind) {
   return found->second;
 }
 
-std::pair<int, int> getDecimalPrecisionScale(const Type& type) {
+std::pair<uint8_t, uint8_t> getDecimalPrecisionScale(const Type& type) {
   if (type.isShortDecimal()) {
     const auto& decimalType = static_cast<const ShortDecimalType&>(type);
     return {decimalType.precision(), decimalType.scale()};
diff --git a/velox/type/Type.h b/velox/type/Type.h
index b7bc6bbdbd3d..f33bfaf51d80 100644
--- a/velox/type/Type.h
+++ b/velox/type/Type.h
@@ -765,7 +765,7 @@ FOLLY_ALWAYS_INLINE bool isDecimalName(const std::string& name) {
   return (name == "DECIMAL");
 }
 
-std::pair<int, int> getDecimalPrecisionScale(const Type& type);
+std::pair<uint8_t, uint8_t> getDecimalPrecisionScale(const Type& type);
 
 class UnknownType : public TypeBase<TypeKind::UNKNOWN> {
  public: