Skip to content

Commit

Permalink
address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
zhli1142015 committed Dec 10, 2024
1 parent 65d2648 commit 2762885
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 23 deletions.
22 changes: 12 additions & 10 deletions velox/docs/functions/spark/json.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,17 @@ JSON Functions

.. spark:function:: from_json(jsonString) -> [json object]
Casting a JSON text to a supported type returns the value represented by this
JSON text. The JSON text must represent a valid value of the type it is casted
to, or null will be returned. Casting to ARRAY and MAP is supported when the
element type of the array is one of the supported types, or when the key type of
the map is VARCHAR and value type of the map is one of the supported types. When
casting from JSON to ROW, only JSON object are supported. Cast from JSON object
to ROW uses case sensitive match for the JSON keys.
Casting a JSON text to a supported type returns the value represented by
the JSON text if it matches the target type; otherwise, NULL is returned.
The function supports ARRAY, MAP, and ROW as root types. For primitive
values, supported types include BOOLEAN, TINYINT, SMALLINT, INTEGER, BIGINT,
REAL, DOUBLE or VARCHAR. Casting to ARRAY and MAP is supported when the
element type of the array or the value type of the map is one of these
supported types. For maps, the key type must be VARCHAR. When casting to
ROW, only JSON objects are supported, and the keys in the JSON object must
match the field names of the ROW exactly (case-sensitive).
Behaviors of the casts are shown with the examples below:::

SELECT from_json('{"a": 1}', 'ROW(a INT)'); -- {a=1}
SELECT from_json('["name", "age", "id"]', 'array<string>'); -- ['name', 'age', 'id']
SELECT from_json('{"a": 1, "b": 2}', 'map<string,int>'); -- {a=1, b=2}
SELECT from_json('{"a": 1}'); -- {'a'=1} // Output type: ROW(a INTEGER)
SELECT from_json('["name", "age", "id"]'); -- ['name', 'age', 'id'] // Output type: ARRAY(VARCHAR)
SELECT from_json('{"a": 1, "b": 2}'); -- {'a'=1, 'b'=2} // Output type: MAP(VARCHAR,INTEGER)
33 changes: 20 additions & 13 deletions velox/functions/sparksql/specialforms/FromJson.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ class FromJsonFunction final : public exec::VectorFunction {
break;
}
default:
VELOX_UNSUPPORTED("INVALID_JSON_SCHEMA");
VELOX_UNSUPPORTED("Unsupported type {}.", result->type()->toString());
}
}

Expand Down Expand Up @@ -518,32 +518,39 @@ class FromJsonFunction final : public exec::VectorFunction {
mutable std::string paddedInput_;
};

bool isSupportedType(const TypePtr& other, bool isRootType = true) {
switch (other->kind()) {
bool isSupportedType(const TypePtr& type, bool isRootType) {
switch (type->kind()) {
case TypeKind::ARRAY:
return isSupportedType(other->childAt(0), false);
return isSupportedType(type->childAt(0), false);
case TypeKind::ROW:
for (const auto& child : other->as<TypeKind::ROW>().children()) {
for (const auto& child : type->as<TypeKind::ROW>().children()) {
if (!isSupportedType(child, false)) {
return false;
}
}
return true;
case TypeKind::MAP:
return (
other->childAt(0)->kind() == TypeKind::VARCHAR &&
isSupportedType(other->childAt(1), false));
type->childAt(0)->kind() == TypeKind::VARCHAR &&
isSupportedType(type->childAt(1), false));
case TypeKind::BIGINT: {
if (type->isDecimal()) {
return false;
}
return !isRootType;
}
case TypeKind::INTEGER: {
if (type->isDate()) {
return false;
}
return !isRootType;
}
case TypeKind::BOOLEAN:
case TypeKind::BIGINT:
case TypeKind::INTEGER:
case TypeKind::SMALLINT:
case TypeKind::TINYINT:
case TypeKind::DOUBLE:
case TypeKind::REAL:
case TypeKind::VARCHAR: {
if (other->isDate() || other->isDecimal()) {
return false;
}
return !isRootType;
}
default:
Expand All @@ -569,7 +576,7 @@ exec::ExprPtr FromJsonCallToSpecialForm::constructSpecialForm(
TypeKind::VARCHAR,
"The first argument of from_json should be of varchar type.");

if (!isSupportedType(type)) {
if (!isSupportedType(type, true)) {
VELOX_UNSUPPORTED("Unsupported type {}.", type->toString());
}

Expand Down

0 comments on commit 2762885

Please sign in to comment.