Skip to content

Commit

Permalink
Cast timestamp to json (#11013)
Browse files Browse the repository at this point in the history
Summary:
Cast timestamp as json had a few differences as compared to Presto. (e.g. cast(timestamp '1970-01-01 00:00:00.123' as json) would give 1970-01-01T08:00:00.123000000 vs "1970-01-01 00:00:00.123").

Fixed:
- precision/formatting
- difference in hour (08 vs 00)
- missing quotation marks (in also cast date to json)

Pull Request resolved: #11013

Reviewed By: kevinwilfong, amitkdutta

Differential Revision: D62773255

Pulled By: HeidiHan0000

fbshipit-source-id: 68aeea98a634fa5dc3086ee78e917a319e326dc4
  • Loading branch information
HeidiHan0000 authored and facebook-github-bot committed Sep 19, 2024
1 parent fad7750 commit 5127cfe
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 33 deletions.
2 changes: 1 addition & 1 deletion velox/expression/CastExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -718,7 +718,7 @@ void CastExpr::applyPeeled(

auto applyCustomCast = [&]() {
if (castToOperator) {
castToOperator->castTo(input, context, rows, toType, result);
castToOperator->castTo(input, context, rows, toType, result, hooks_);
} else {
castFromOperator->castFrom(input, context, rows, toType, result);
}
Expand Down
10 changes: 10 additions & 0 deletions velox/expression/CastExpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,16 @@ class CastOperator {
const TypePtr& resultType,
VectorPtr& result) const = 0;

virtual void castTo(
const BaseVector& input,
exec::EvalCtx& context,
const SelectivityVector& rows,
const TypePtr& resultType,
VectorPtr& result,
const std::shared_ptr<CastHooks>& /* hooks */) const {
castTo(input, context, rows, resultType, result);
}

/// Casts a vector of the custom type to another type. This function should
/// not throw when processing input rows, but report errors via
/// context.setError().
Expand Down
33 changes: 29 additions & 4 deletions velox/functions/prestosql/tests/JsonCastTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,10 @@ TEST_F(JsonCastTest, fromDate) {
testCastToJson<int32_t>(
DATE(),
{0, 1000, -10000, std::nullopt},
{"1970-01-01"_sv, "1972-09-27"_sv, "1942-08-16"_sv, std::nullopt});
{"\"1970-01-01\""_sv,
"\"1972-09-27\""_sv,
"\"1942-08-16\""_sv,
std::nullopt});
testCastToJson<int32_t>(
DATE(),
{std::nullopt, std::nullopt, std::nullopt, std::nullopt},
Expand Down Expand Up @@ -402,9 +405,9 @@ TEST_F(JsonCastTest, fromTimestamp) {
Timestamp{10000000, 0},
Timestamp{-1, 9000},
std::nullopt},
{"1970-01-01T00:00:00.000000000"_sv,
"1970-04-26T17:46:40.000000000"_sv,
"1969-12-31T23:59:59.000009000"_sv,
{"\"1970-01-01 00:00:00.000\""_sv,
"\"1970-04-26 17:46:40.000\""_sv,
"\"1969-12-31 23:59:59.000\""_sv,
std::nullopt});
testCastToJson<Timestamp>(
TIMESTAMP(),
Expand Down Expand Up @@ -450,6 +453,13 @@ TEST_F(JsonCastTest, fromArray) {
"[red,blue]", "[null,null,purple]", "[]"};
testCastFromArray(ARRAY(JSON()), array, expectedJsonArray);

// Tests array of Timestamp elements.
TwoDimVector<Timestamp> arrayTimestamps{
{Timestamp{0, 0}, Timestamp{10000000, 0}}};
std::vector<std::optional<JsonNativeType>> expectedTimestamp{
"[\"1970-01-01 00:00:00.000\",\"1970-04-26 17:46:40.000\"]"};
testCastFromArray(ARRAY(TIMESTAMP()), arrayTimestamps, expectedTimestamp);

// Tests array whose elements are of unknown type.
auto arrayOfUnknownElements = makeArrayWithDictionaryElements<UnknownValue>(
{std::nullopt, std::nullopt, std::nullopt, std::nullopt},
Expand Down Expand Up @@ -538,6 +548,14 @@ TEST_F(JsonCastTest, fromMap) {
R"({"false":2,"true":null})", "{}"};
testCastFromMap(MAP(BOOLEAN(), BIGINT()), mapBoolKey, expectedBoolKey);

// Tests map with Timestamp values.
std::vector<std::vector<Pair<int16_t, Timestamp>>> mapTimestamp{
{{3, Timestamp{0, 0}}, {4, Timestamp{0, 0}}}, {}};
std::vector<std::optional<JsonNativeType>> expectedTimestamp{
R"({"3":"1970-01-01 00:00:00.000","4":"1970-01-01 00:00:00.000"})", "{}"};
testCastFromMap(
MAP(SMALLINT(), TIMESTAMP()), mapTimestamp, expectedTimestamp);

// Tests map whose values are of unknown type.
std::vector<std::optional<StringView>> keys{
"a"_sv, "b"_sv, "c"_sv, "d"_sv, "e"_sv, "f"_sv, "g"_sv};
Expand Down Expand Up @@ -624,6 +642,13 @@ TEST_F(JsonCastTest, fromRow) {
child3,
expectedJsonChild);

// Tests row whose children are Timestamps.
auto rowOfTimestampElements = makeRowWithDictionaryElements<Timestamp>(
{{Timestamp{0, 0}, Timestamp{10000000, 0}}}, ROW({TIMESTAMP()}));
auto rowOfTimestampElementsExpected = makeNullableFlatVector<JsonNativeType>(
{"[null]", "[\"1970-01-01 00:00:00.000\"]"}, JSON());
testCast(rowOfTimestampElements, rowOfTimestampElementsExpected);

// Tests row whose children are of unknown type.
auto rowOfUnknownChildren = makeRowWithDictionaryElements<UnknownValue>(
{{std::nullopt, std::nullopt}, {std::nullopt, std::nullopt}},
Expand Down
115 changes: 87 additions & 28 deletions velox/functions/prestosql/types/JsonType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ void generateJsonTyped(
const SimpleVector<T>& input,
int row,
std::string& result,
const TypePtr& type) {
const TypePtr& type,
const std::shared_ptr<exec::CastHooks>& hooks) {
auto value = input.valueAt(row);

if constexpr (std::is_same_v<T, StringView>) {
Expand Down Expand Up @@ -80,9 +81,30 @@ void generateJsonTyped(
folly::toAppend<std::string, T>(value, &result);
}
} else if constexpr (std::is_same_v<T, Timestamp>) {
result.append(std::to_string(value));
std::string buffer;
if (hooks) {
Timestamp inputValue = value;
const auto& options = hooks->timestampToStringOptions();
if (options.timeZone) {
inputValue.toTimezone(*(options.timeZone));
}
buffer.resize(getMaxStringLength(options));
const auto stringView =
Timestamp::tsToStringView(inputValue, options, buffer.data());
buffer.resize(stringView.size());
} else {
buffer = std::to_string(value);
}
result.reserve(buffer.size() + 2);
result.append("\"");
result.append(buffer);
result.append("\"");
} else if (type->isDate()) {
result.append(DATE()->toString(value));
std::string stringValue = DATE()->toString(value);
result.reserve(stringValue.size() + 2);
result.append("\"");
result.append(stringValue);
result.append("\"");
} else if (type->isDecimal()) {
result.append(DecimalUtil::toString(value, type));
} else {
Expand All @@ -96,15 +118,16 @@ void generateJsonNonKeyTyped(
const SimpleVector<T>& inputVector,
exec::EvalCtx& context,
const SelectivityVector& rows,
FlatVector<StringView>& flatResult) {
FlatVector<StringView>& flatResult,
const std::shared_ptr<exec::CastHooks>& hooks) {
std::string result;
context.applyToSelectedNoThrow(rows, [&](auto row) {
if (inputVector.isNullAt(row)) {
flatResult.set(row, "null");
} else {
result.clear();
generateJsonTyped<T, legacyCast>(
inputVector, row, result, inputVector.type());
inputVector, row, result, inputVector.type(), hooks);

flatResult.set(row, StringView{result});
}
Expand All @@ -116,7 +139,8 @@ void generateJsonKeyTyped(
const SimpleVector<T>& inputVector,
exec::EvalCtx& context,
const SelectivityVector& rows,
FlatVector<StringView>& flatResult) {
FlatVector<StringView>& flatResult,
const std::shared_ptr<exec::CastHooks>& hooks) {
std::string result;
context.applyToSelectedNoThrow(rows, [&](auto row) {
if (inputVector.isNullAt(row)) {
Expand All @@ -129,7 +153,7 @@ void generateJsonKeyTyped(
}

generateJsonTyped<T, legacyCast>(
inputVector, row, result, inputVector.type());
inputVector, row, result, inputVector.type(), hooks);

if constexpr (!std::is_same_v<T, StringView>) {
result.append("\"");
Expand All @@ -149,6 +173,7 @@ void castToJson(
exec::EvalCtx& context,
const SelectivityVector& rows,
FlatVector<StringView>& flatResult,
const std::shared_ptr<exec::CastHooks>& hooks,
bool isMapKey = false) {
using T = typename TypeTraits<kind>::NativeType;

Expand All @@ -160,15 +185,18 @@ void castToJson(
if (FOLLY_LIKELY(!legacyCast)) {
if (!isMapKey) {
generateJsonNonKeyTyped<T, false>(
*inputVector, context, rows, flatResult);
*inputVector, context, rows, flatResult, hooks);
} else {
generateJsonKeyTyped<T, false>(*inputVector, context, rows, flatResult);
generateJsonKeyTyped<T, false>(
*inputVector, context, rows, flatResult, hooks);
}
} else {
if (!isMapKey) {
generateJsonNonKeyTyped<T, true>(*inputVector, context, rows, flatResult);
generateJsonNonKeyTyped<T, true>(
*inputVector, context, rows, flatResult, hooks);
} else {
generateJsonKeyTyped<T, true>(*inputVector, context, rows, flatResult);
generateJsonKeyTyped<T, true>(
*inputVector, context, rows, flatResult, hooks);
}
}
}
Expand All @@ -178,19 +206,22 @@ void castToJsonFromArray(
const BaseVector& input,
exec::EvalCtx& context,
const SelectivityVector& rows,
FlatVector<StringView>& flatResult);
FlatVector<StringView>& flatResult,
const std::shared_ptr<exec::CastHooks>& hooks);

void castToJsonFromMap(
const BaseVector& input,
exec::EvalCtx& context,
const SelectivityVector& rows,
FlatVector<StringView>& flatResult);
FlatVector<StringView>& flatResult,
const std::shared_ptr<exec::CastHooks>& hooks);

void castToJsonFromRow(
const BaseVector& input,
exec::EvalCtx& context,
const SelectivityVector& rows,
FlatVector<StringView>& flatResult);
FlatVector<StringView>& flatResult,
const std::shared_ptr<exec::CastHooks>& hooks);

// Casts complex-type input vectors to Json type.
template <
Expand All @@ -201,16 +232,17 @@ void castToJson(
exec::EvalCtx& context,
const SelectivityVector& rows,
FlatVector<StringView>& flatResult,
const std::shared_ptr<exec::CastHooks>& hooks,
bool isMapKey = false) {
VELOX_CHECK(
!isMapKey, "Casting map with complex key type to JSON is not supported");

if constexpr (kind == TypeKind::ARRAY) {
castToJsonFromArray(input, context, rows, flatResult);
castToJsonFromArray(input, context, rows, flatResult, hooks);
} else if constexpr (kind == TypeKind::MAP) {
castToJsonFromMap(input, context, rows, flatResult);
castToJsonFromMap(input, context, rows, flatResult, hooks);
} else if constexpr (kind == TypeKind::ROW) {
castToJsonFromRow(input, context, rows, flatResult);
castToJsonFromRow(input, context, rows, flatResult, hooks);
} else {
VELOX_FAIL(
"Casting {} to JSON is not supported.", input.type()->toString());
Expand All @@ -224,6 +256,7 @@ struct AsJson {
const VectorPtr& input,
const SelectivityVector& rows,
const BufferPtr& elementToTopLevelRows,
const std::shared_ptr<exec::CastHooks>& hooks,
bool isMapKey = false)
: decoded_(context) {
VELOX_CHECK(rows.hasSelections());
Expand All @@ -234,7 +267,7 @@ struct AsJson {
json_ = input;
} else {
if (!exec::PeeledEncoding::isPeelable(input->encoding())) {
doCast(context, input, rows, isMapKey, json_);
doCast(context, input, rows, isMapKey, json_, hooks);
} else {
exec::withContextSaver([&](exec::ContextSaver& saver) {
exec::LocalSelectivityVector newRowsHolder(*context.execCtx());
Expand All @@ -250,7 +283,7 @@ struct AsJson {
context.saveAndReset(saver, rows);
context.setPeeledEncoding(peeledEncoding);

doCast(context, peeledVectors[0], *newRows, isMapKey, json_);
doCast(context, peeledVectors[0], *newRows, isMapKey, json_, hooks);
json_ = context.getPeeledEncoding()->wrap(
json_->type(), context.pool(), json_, rows);
});
Expand Down Expand Up @@ -300,7 +333,8 @@ struct AsJson {
const VectorPtr& input,
const SelectivityVector& baseRows,
bool isMapKey,
VectorPtr& result) {
VectorPtr& result,
const std::shared_ptr<exec::CastHooks>& hooks) {
context.ensureWritable(baseRows, JSON(), result);
auto flatJsonStrings = result->as<FlatVector<StringView>>();

Expand All @@ -311,6 +345,7 @@ struct AsJson {
context,
baseRows,
*flatJsonStrings,
hooks,
isMapKey);
}

Expand Down Expand Up @@ -344,7 +379,8 @@ void castToJsonFromArray(
const BaseVector& input,
exec::EvalCtx& context,
const SelectivityVector& rows,
FlatVector<StringView>& flatResult) {
FlatVector<StringView>& flatResult,
const std::shared_ptr<exec::CastHooks>& hooks) {
// input is guaranteed to be in flat encoding when passed in.
auto inputArray = input.as<ArrayVector>();

Expand All @@ -369,7 +405,8 @@ void castToJsonFromArray(

auto elementToTopLevelRows = functions::getElementToTopLevelRows(
elements->size(), rows, inputArray, context.pool());
AsJson elementsAsJson{context, elements, elementsRows, elementToTopLevelRows};
AsJson elementsAsJson{
context, elements, elementsRows, elementToTopLevelRows, hooks};

// Estimates an upperbound of the total length of all Json strings for the
// input according to the length of all elements Json strings and the
Expand Down Expand Up @@ -422,7 +459,8 @@ void castToJsonFromMap(
const BaseVector& input,
exec::EvalCtx& context,
const SelectivityVector& rows,
FlatVector<StringView>& flatResult) {
FlatVector<StringView>& flatResult,
const std::shared_ptr<exec::CastHooks>& hooks) {
// input is guaranteed to be in flat encoding when passed in.
auto inputMap = input.as<MapVector>();

Expand Down Expand Up @@ -450,8 +488,9 @@ void castToJsonFromMap(
// Maps with unsupported key types should have already been rejected by
// JsonCastOperator::isSupportedType() beforehand.
AsJson keysAsJson{
context, mapKeys, elementsRows, elementToTopLevelRows, true};
AsJson valuesAsJson{context, mapValues, elementsRows, elementToTopLevelRows};
context, mapKeys, elementsRows, elementToTopLevelRows, hooks, true};
AsJson valuesAsJson{
context, mapValues, elementsRows, elementToTopLevelRows, hooks};

// Estimates an upperbound of the total length of all Json strings for the
// input according to the length of all elements Json strings and the
Expand Down Expand Up @@ -516,7 +555,8 @@ void castToJsonFromRow(
const BaseVector& input,
exec::EvalCtx& context,
const SelectivityVector& rows,
FlatVector<StringView>& flatResult) {
FlatVector<StringView>& flatResult,
const std::shared_ptr<exec::CastHooks>& hooks) {
// input is guaranteed to be in flat encoding when passed in.
VELOX_CHECK_EQ(input.encoding(), VectorEncoding::Simple::ROW);
auto inputRow = input.as<RowVector>();
Expand All @@ -528,7 +568,8 @@ void castToJsonFromRow(
size_t childrenStringSize = 0;
std::vector<AsJson> childrenAsJson;
for (int i = 0; i < childrenSize; ++i) {
childrenAsJson.emplace_back(context, inputRow->childAt(i), rows, nullptr);
childrenAsJson.emplace_back(
context, inputRow->childAt(i), rows, nullptr, hooks);

context.applyToSelectedNoThrow(rows, [&](auto row) {
if (inputRow->isNullAt(row)) {
Expand Down Expand Up @@ -1073,6 +1114,14 @@ class JsonCastOperator : public exec::CastOperator {
const TypePtr& resultType,
VectorPtr& result) const override;

void castTo(
const BaseVector& input,
exec::EvalCtx& context,
const SelectivityVector& rows,
const TypePtr& resultType,
VectorPtr& result,
const std::shared_ptr<exec::CastHooks>& hooks) const override;

void castFrom(
const BaseVector& input,
exec::EvalCtx& context,
Expand Down Expand Up @@ -1197,13 +1246,23 @@ void JsonCastOperator::castTo(
const SelectivityVector& rows,
const TypePtr& resultType,
VectorPtr& result) const {
castTo(input, context, rows, resultType, result, nullptr);
}

void JsonCastOperator::castTo(
const BaseVector& input,
exec::EvalCtx& context,
const SelectivityVector& rows,
const TypePtr& resultType,
VectorPtr& result,
const std::shared_ptr<exec::CastHooks>& hooks) const {
context.ensureWritable(rows, resultType, result);
auto* flatResult = result->as<FlatVector<StringView>>();

// Casting from VARBINARY and OPAQUE are not supported and should have been
// rejected by isSupportedType() in the caller.
VELOX_DYNAMIC_TYPE_DISPATCH_ALL(
castToJson, input.typeKind(), input, context, rows, *flatResult);
castToJson, input.typeKind(), input, context, rows, *flatResult, hooks);
}

/// Converts an input vector from Json type to the type of result vector.
Expand Down

0 comments on commit 5127cfe

Please sign in to comment.