Add CAST(real as decimal) (8575)

rui-mo · JkSelf · commit 4f0b86345457 · 2024-02-06T17:40:57.000+08:00
diff --git a/velox/docs/functions/presto/conversion.rst b/velox/docs/functions/presto/conversion.rst
@@ -123,7 +123,7 @@ supported conversions to/from JSON are listed in :doc:`json`.
      -
      -
      -
-     -
+     - Y
    * - double
      - Y
      - Y
@@ -724,14 +724,15 @@ Invalid examples
   SELECT cast(123 as decimal(6, 4)); -- Out of range
   SELECT cast(123 as decimal(4, 2)); -- Out of range
 
-From double type
-^^^^^^^^^^^^^^^^
+From floating-point types
+^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Casting a double number to a decimal of given precision and scale is allowed
-if the input value can be represented by the precision and scale. When the
-given scale is less than the number of decimal places, the double value is
-rounded. The conversion precision is up to 15 as double provides 16(±1)
-significant decimal digits precision. Casting from invalid input values throws.
+Casting a floating-point number to a decimal of given precision and scale is allowed
+if the input value can be represented by the precision and scale. When the given
+scale is less than the number of decimal places, the floating-point value is rounded.
+The conversion precision is up to 15 for double and 6 for real according to the
+significant decimal digits precision they provide. Casting from invalid input values
+throws.
 
 Valid example
 
@@ -741,6 +742,7 @@ Valid example
   SELECT cast(0.12 as decimal(4, 1)); -- decimal '0.1'
   SELECT cast(0.19 as decimal(4, 1)); -- decimal '0.2'
   SELECT cast(0.123456789123123 as decimal(38, 18)); -- decimal '0.123456789123123000'
+  SELECT cast(cast(0.123456 as real) as decimal(38, 18)); -- decimal '0.123456000000000000'
 
 Invalid example
 
diff --git a/velox/expression/CastExpr-inl.h b/velox/expression/CastExpr-inl.h
@@ -453,22 +453,22 @@ void CastExpr::applyIntToDecimalCastKernel(
       });
 }
 
-template <typename TOutput>
-void CastExpr::applyDoubleToDecimalCastKernel(
+template <typename TInput, typename TOutput>
+void CastExpr::applyFloatingPointToDecimalCastKernel(
     const SelectivityVector& rows,
     const BaseVector& input,
     exec::EvalCtx& context,
     const TypePtr& toType,
     VectorPtr& result) {
-  const auto doubleInput = input.as<SimpleVector<double>>();
+  const auto floatingInput = input.as<SimpleVector<TInput>>();
   auto rawResults =
       result->asUnchecked<FlatVector<TOutput>>()->mutableRawValues();
   const auto toPrecisionScale = getDecimalPrecisionScale(*toType);
 
   applyToSelectedNoThrowLocal(context, rows, result, [&](vector_size_t row) {
     TOutput output;
-    const auto status = DecimalUtil::rescaleDouble<TOutput>(
-        doubleInput->valueAt(row),
+    const auto status = DecimalUtil::rescaleFloatingPoint<TInput, TOutput>(
+        floatingInput->valueAt(row),
         toPrecisionScale.first,
         toPrecisionScale.second,
         output);
diff --git a/velox/expression/CastExpr.cpp b/velox/expression/CastExpr.cpp
@@ -576,8 +576,12 @@ VectorPtr CastExpr::applyDecimal(
       applyIntToDecimalCastKernel<int32_t, toDecimalType>(
           rows, input, context, toType, castResult);
       break;
+    case TypeKind::REAL:
+      applyFloatingPointToDecimalCastKernel<float, toDecimalType>(
+          rows, input, context, toType, castResult);
+      break;
     case TypeKind::DOUBLE:
-      applyDoubleToDecimalCastKernel<toDecimalType>(
+      applyFloatingPointToDecimalCastKernel<double, toDecimalType>(
           rows, input, context, toType, castResult);
       break;
     case TypeKind::BIGINT: {
diff --git a/velox/expression/CastExpr.h b/velox/expression/CastExpr.h
@@ -206,8 +206,8 @@ class CastExpr : public SpecialForm {
       const TypePtr& toType,
       VectorPtr& castResult);
 
-  template <typename TOutput>
-  void applyDoubleToDecimalCastKernel(
+  template <typename TInput, typename TOutput>
+  void applyFloatingPointToDecimalCastKernel(
       const SelectivityVector& rows,
       const BaseVector& input,
       exec::EvalCtx& context,
diff --git a/velox/expression/tests/CastExprTest.cpp b/velox/expression/tests/CastExprTest.cpp
@@ -1969,29 +1969,52 @@ TEST_F(CastExprTest, castInTry) {
 
 TEST_F(CastExprTest, doubleToDecimal) {
   // Double to short decimal.
-  const auto input =
-      makeFlatVector<double>({-3333.03, -2222.02, -1.0, 0.00, 100, 99999.99});
+  const auto input = makeFlatVector<double>(
+      {-3333.03,
+       -2222.02,
+       -1.0,
+       0.00,
+       100,
+       99999.99,
+       10.03,
+       10.05,
+       9.95,
+       -2.123456789});
   testCast(
       input,
       makeFlatVector<int64_t>(
-          {-33'330'300, -22'220'200, -10'000, 0, 1'000'000, 999'999'900},
+          {-33'330'300,
+           -22'220'200,
+           -10'000,
+           0,
+           1'000'000,
+           999'999'900,
+           100'300,
+           100'500,
+           99'500,
+           -21'235},
           DECIMAL(10, 4)));
 
   // Double to long decimal.
   testCast(
       input,
       makeFlatVector<int128_t>(
-          {-33'330'300'000'000,
-           -22'220'200'000'000,
-           -10'000'000'000,
+          {HugeInt::build(0xFFFFFFFFFFFFFF4B, 0x50EABA2657C90000),
+           HugeInt::build(0xFFFFFFFFFFFFFF87, 0x8B4726C43A860000),
+           -1'000'000'000'000'000'000,
            0,
-           1'000'000'000'000,
-           999'999'900'000'000},
-          DECIMAL(20, 10)));
+           HugeInt::build(0x5, 0x6BC75E2D63100000),
+           HugeInt::build(0x152D, 0x02A45A5886BF0000),
+           HugeInt::build(0, 0x8B31B7DBD92B0000),
+           HugeInt::build(0, 0x8B78C5C0B8AD0000),
+           HugeInt::build(0, 0x8A1580485B230000),
+           -2'123'456'789'000'000'000},
+          DECIMAL(38, 18)));
   testCast(
       input,
       makeFlatVector<int128_t>(
-          {-33'330, -22'220, -10, 0, 1'000, 1'000'000}, DECIMAL(20, 1)));
+          {-33'330, -22'220, -10, 0, 1'000, 1'000'000, 100, 101, 100, -21},
+          DECIMAL(20, 1)));
   testCast(
       makeNullableFlatVector<double>(
           {0.13456789,
@@ -2062,6 +2085,119 @@ TEST_F(CastExprTest, doubleToDecimal) {
       "Cannot cast DOUBLE 'NaN' to DECIMAL(38, 2). The input value should be finite.");
 }
 
+TEST_F(CastExprTest, realToDecimal) {
+  // Real to short decimal.
+  const auto input = makeFlatVector<float>(
+      {-3333.03,
+       -2222.02,
+       -1.0,
+       0.00,
+       100,
+       99999.9,
+       10.03,
+       10.05,
+       9.95,
+       -2.12345});
+  testCast(
+      input,
+      makeFlatVector<int64_t>(
+          {-33'330'300,
+           -22'220'200,
+           -10'000,
+           0,
+           1'000'000,
+           999'999'000,
+           100'300,
+           100'500,
+           99'500,
+           -212'35},
+          DECIMAL(10, 4)));
+
+  // Real to long decimal.
+  testCast(
+      input,
+      makeFlatVector<int128_t>(
+          {HugeInt::build(0xFFFFFFFFFFFFFF4B, 0x50EABA2657C90000),
+           HugeInt::build(0xFFFFFFFFFFFFFF87, 0x8B4726C43A860000),
+           -1'000'000'000'000'000'000,
+           0,
+           HugeInt::build(0x5, 0x6BC75E2D63100000),
+           HugeInt::build(0x152D, 0x01649BD298F60000),
+           HugeInt::build(0, 0x8B31B7DBD92B0000),
+           HugeInt::build(0, 0x8B78C5C0B8AD0000),
+           HugeInt::build(0, 0x8A1580485B230000),
+           -2'123'450'000'000'000'000},
+          DECIMAL(38, 18)));
+  testCast(
+      input,
+      makeFlatVector<int128_t>(
+          {-33'330, -22'220, -10, 0, 1'000, 999'999, 100, 101, 100, -21},
+          DECIMAL(20, 1)));
+  testCast(
+      makeNullableFlatVector<float>(
+          {0.134567, 0.000015, 0.000001, 0.999999, 0.123456, std::nullopt}),
+      makeNullableFlatVector<int128_t>(
+          {134'567'000'000'000'000,
+           15'000'000'000'000,
+           1'000'000'000'000,
+           999'999'000'000'000'000,
+           123'456'000'000'000'000,
+           std::nullopt},
+          DECIMAL(38, 18)));
+
+  testThrow<float>(
+      REAL(),
+      DECIMAL(10, 2),
+      {9999999999999999999999.99},
+      "Cannot cast REAL '9.999999778196308E21' to DECIMAL(10, 2). Result overflows.");
+  testThrow<float>(
+      REAL(),
+      DECIMAL(10, 2),
+      {static_cast<float>(
+          static_cast<int128_t>(std::numeric_limits<int64_t>::max()) + 1)},
+      "Cannot cast REAL '9223372036854776000' to DECIMAL(10, 2). Result overflows.");
+  testThrow<float>(
+      REAL(),
+      DECIMAL(10, 2),
+      {static_cast<float>(
+          static_cast<int128_t>(std::numeric_limits<int64_t>::min()) - 1)},
+      "Cannot cast REAL '-9223372036854776000' to DECIMAL(10, 2). Result overflows.");
+  testThrow<float>(
+      REAL(),
+      DECIMAL(20, 2),
+      {static_cast<float>(DecimalUtil::kLongDecimalMax)},
+      "Cannot cast REAL '9.999999680285692E37' to DECIMAL(20, 2). Result overflows.");
+  testThrow<float>(
+      REAL(),
+      DECIMAL(20, 2),
+      {static_cast<float>(DecimalUtil::kLongDecimalMin)},
+      "Cannot cast REAL '-9.999999680285692E37' to DECIMAL(20, 2). Result overflows.");
+  testThrow<float>(
+      REAL(),
+      DECIMAL(38, 2),
+      {std::numeric_limits<float>::max()},
+      "Cannot cast REAL '3.4028234663852886E38' to DECIMAL(38, 2). Result overflows.");
+  testThrow<float>(
+      REAL(),
+      DECIMAL(38, 2),
+      {std::numeric_limits<float>::lowest()},
+      "Cannot cast REAL '-3.4028234663852886E38' to DECIMAL(38, 2). Result overflows.");
+  testCast(
+      makeConstant<float>(std::numeric_limits<float>::min(), 1),
+      makeConstant<int128_t>(0, 1, DECIMAL(38, 2)));
+
+  testThrow<float>(
+      REAL(),
+      DECIMAL(38, 2),
+      {INFINITY},
+      "Cannot cast REAL 'Infinity' to DECIMAL(38, 2). The input value should be finite.");
+  testThrow<float>(
+      REAL(),
+      DECIMAL(38, 2),
+      {NAN},
+      "Cannot cast REAL 'NaN' to DECIMAL(38, 2). The input value should be finite.");
+}
+
 TEST_F(CastExprTest, primitiveNullConstant) {
   // Evaluate cast(NULL::double as bigint).
   auto cast =
diff --git a/velox/type/DecimalUtil.h b/velox/type/DecimalUtil.h
@@ -18,6 +18,7 @@
 
 #include <string>
 #include "velox/common/base/CheckedArithmetic.h"
+#include "velox/common/base/CountBits.h"
 #include "velox/common/base/Exceptions.h"
 #include "velox/common/base/Nulls.h"
 #include "velox/common/base/Status.h"
@@ -203,29 +204,55 @@ class DecimalUtil {
     return static_cast<TOutput>(rescaledValue);
   }
 
-  /// Rescales a double value to decimal value of given precision and scale. The
-  /// output is rescaled value of int128_t or int64_t type. Returns error status
-  /// if fails.
-  template <typename TOutput>
-  inline static Status
-  rescaleDouble(double value, int precision, int scale, TOutput& output) {
+  /// Rescales a floating point value to decimal value of given precision and
+  /// scale. The output is rescaled value of int128_t or int64_t type. Returns
+  /// error status if fails.
+  template <typename TIntput, typename TOutput>
+  inline static Status rescaleFloatingPoint(
+      TIntput value,
+      int precision,
+      int scale,
+      TOutput& output) {
     if (!std::isfinite(value)) {
       return Status::UserError("The input value should be finite.");
     }
 
+    uint8_t digits;
+    if constexpr (std::is_same_v<TIntput, float>) {
+      // A float provides between 6 and 7 decimal digits, so at least 6 digits
+      // are precise.
+      digits = 6;
+    } else {
+      // A double provides from 15 to 17 decimal digits, so at least 15 digits
+      // are precise.
+      digits = 15;
+      if (value <= std::numeric_limits<int128_t>::min() ||
+          value >= std::numeric_limits<int128_t>::max()) {
+        return Status::UserError("Result overflows.");
+      }
+    }
+
+    // Calculate the precise fractional digits.
+    const auto integralValue =
+        static_cast<uint128_t>(value > 0 ? value : -value);
+    const auto integralDigits =
+        integralValue == 0 ? 0 : countDigits(integralValue);
+    const auto fractionDigits = digits - integralDigits;
+    /// Scales up the input value to keep all the precise fractional digits
+    /// before rounding. Convert value to long double type, as double * int128_t
+    /// returns int128_t and fractional digits are lost. No need to consider
+    /// 'toValue' becoming infinite as DOUBLE_MAX * 10^38 < LONG_DOUBLE_MAX.
+    const auto scaledValue =
+        (long double)value * DecimalUtil::kPowersOfTen[fractionDigits];
+
     long double rounded;
-    // A double provides 16(±1) decimal digits, so at least 15 digits are
-    // precise.
-    if (scale > 15) {
-      // Convert value to long double type, as double * int128_t returns
-      // int128_t and fractional digits are lost. No need to consider 'toValue'
-      // becoming infinite as DOUBLE_MAX * 10^38 < LONG_DOUBLE_MAX.
-      const auto toValue = (long double)value * DecimalUtil::kPowersOfTen[15];
-      rounded = std::round(toValue) * DecimalUtil::kPowersOfTen[scale - 15];
+    if (scale > fractionDigits) {
+      rounded = std::round(scaledValue) *
+          DecimalUtil::kPowersOfTen[scale - fractionDigits];
     } else {
-      const auto toValue =
-          (long double)value * DecimalUtil::kPowersOfTen[scale];
-      rounded = std::round(toValue);
+      rounded = std::round(
+          std::round(scaledValue) /
+          DecimalUtil::kPowersOfTen[fractionDigits - scale]);
     }
 
     const auto result = folly::tryTo<TOutput>(rounded);
diff --git a/velox/type/tests/DecimalTest.cpp b/velox/type/tests/DecimalTest.cpp