diff --git a/core/build.gradle b/core/build.gradle index c596251342..d16d1b31fb 100644 --- a/core/build.gradle +++ b/core/build.gradle @@ -54,6 +54,7 @@ dependencies { api "com.fasterxml.jackson.core:jackson-core:${versions.jackson}" api "com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}" api "com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}" + api group: 'com.jayway.jsonpath', name: 'json-path', version: '2.9.0' api group: 'com.google.code.gson', name: 'gson', version: '2.8.9' api group: 'com.tdunning', name: 't-digest', version: '3.3' api project(':common') diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/Cast.java b/core/src/main/java/org/opensearch/sql/ast/expression/Cast.java index 541dbedead..854ba0ed69 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/Cast.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/Cast.java @@ -13,6 +13,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_FLOAT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_INT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_IP; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_JSON; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_LONG; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_SHORT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_STRING; @@ -56,6 +57,7 @@ public class Cast extends UnresolvedExpression { .put("timestamp", CAST_TO_TIMESTAMP.getName()) .put("datetime", CAST_TO_DATETIME.getName()) .put("ip", CAST_TO_IP.getName()) + .put("json", CAST_TO_JSON.getName()) .build(); /** The source expression cast from. */ diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index 2e3bdf7e22..f284820639 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -687,6 +687,14 @@ public static FunctionExpression jsonValid(Expression... expressions) { return compile(FunctionProperties.None, BuiltinFunctionName.JSON_VALID, expressions); } + public static FunctionExpression jsonExtract(Expression... expressions) { + return compile(FunctionProperties.None, BuiltinFunctionName.JSON_EXTRACT, expressions); + } + + public static FunctionExpression stringToJson(Expression value) { + return compile(FunctionProperties.None, BuiltinFunctionName.JSON, value); + } + public static Aggregator avg(Expression... expressions) { return aggregate(BuiltinFunctionName.AVG, expressions); } @@ -843,6 +851,10 @@ public static FunctionExpression castIp(Expression value) { return compile(FunctionProperties.None, BuiltinFunctionName.CAST_TO_IP, value); } + public static FunctionExpression castJson(Expression value) { + return compile(FunctionProperties.None, BuiltinFunctionName.CAST_TO_JSON, value); + } + public static FunctionExpression typeof(Expression value) { return compile(FunctionProperties.None, BuiltinFunctionName.TYPEOF, value); } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 3cc9569a1e..4fccb95ded 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -206,6 +206,8 @@ public enum BuiltinFunctionName { /** Json Functions. */ JSON_VALID(FunctionName.of("json_valid")), + JSON(FunctionName.of("json")), + JSON_EXTRACT(FunctionName.of("json_extract")), /** GEOSPATIAL Functions. */ GEOIP(FunctionName.of("geoip")), @@ -238,6 +240,7 @@ public enum BuiltinFunctionName { CAST_TO_TIMESTAMP(FunctionName.of("cast_to_timestamp")), CAST_TO_DATETIME(FunctionName.of("cast_to_datetime")), CAST_TO_IP(FunctionName.of("cast_to_ip")), + CAST_TO_JSON(FunctionName.of("cast_to_json")), TYPEOF(FunctionName.of("typeof")), /** Relevance Function. */ diff --git a/core/src/main/java/org/opensearch/sql/expression/json/JsonFunctions.java b/core/src/main/java/org/opensearch/sql/expression/json/JsonFunctions.java index acc0c4c064..30c5cde162 100644 --- a/core/src/main/java/org/opensearch/sql/expression/json/JsonFunctions.java +++ b/core/src/main/java/org/opensearch/sql/expression/json/JsonFunctions.java @@ -7,8 +7,10 @@ import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import static org.opensearch.sql.data.type.ExprCoreType.UNDEFINED; import static org.opensearch.sql.expression.function.FunctionDSL.define; import static org.opensearch.sql.expression.function.FunctionDSL.impl; +import static org.opensearch.sql.expression.function.FunctionDSL.nullMissingHandling; import lombok.experimental.UtilityClass; import org.opensearch.sql.expression.function.BuiltinFunctionName; @@ -20,10 +22,24 @@ public class JsonFunctions { public void register(BuiltinFunctionRepository repository) { repository.register(jsonValid()); + repository.register(jsonFunction()); + repository.register(jsonExtract()); } private DefaultFunctionResolver jsonValid() { return define( BuiltinFunctionName.JSON_VALID.getName(), impl(JsonUtils::isValidJson, BOOLEAN, STRING)); } + + private DefaultFunctionResolver jsonFunction() { + return define( + BuiltinFunctionName.JSON.getName(), + impl(nullMissingHandling(JsonUtils::castJson), UNDEFINED, STRING)); + } + + private DefaultFunctionResolver jsonExtract() { + return define( + BuiltinFunctionName.JSON_EXTRACT.getName(), + impl(JsonUtils::extractJson, UNDEFINED, STRING, STRING)); + } } diff --git a/core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperators.java b/core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperators.java index b388f7d89a..c1391ac9ab 100644 --- a/core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperators.java +++ b/core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperators.java @@ -17,6 +17,7 @@ import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.data.type.ExprCoreType.TIME; import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; +import static org.opensearch.sql.data.type.ExprCoreType.UNDEFINED; import static org.opensearch.sql.expression.function.FunctionDSL.impl; import static org.opensearch.sql.expression.function.FunctionDSL.implWithProperties; import static org.opensearch.sql.expression.function.FunctionDSL.nullMissingHandling; @@ -42,6 +43,7 @@ import org.opensearch.sql.expression.function.BuiltinFunctionRepository; import org.opensearch.sql.expression.function.DefaultFunctionResolver; import org.opensearch.sql.expression.function.FunctionDSL; +import org.opensearch.sql.utils.JsonUtils; @UtilityClass public class TypeCastOperators { @@ -57,6 +59,7 @@ public static void register(BuiltinFunctionRepository repository) { repository.register(castToDouble()); repository.register(castToBoolean()); repository.register(castToIp()); + repository.register(castToJson()); repository.register(castToDate()); repository.register(castToTime()); repository.register(castToTimestamp()); @@ -105,7 +108,8 @@ private static DefaultFunctionResolver castToShort() { impl( nullMissingHandling((v) -> new ExprShortValue(v.booleanValue() ? 1 : 0)), SHORT, - BOOLEAN)); + BOOLEAN), + impl(nullMissingHandling((v) -> v), SHORT, UNDEFINED)); } private static DefaultFunctionResolver castToInt() { @@ -119,7 +123,8 @@ private static DefaultFunctionResolver castToInt() { impl( nullMissingHandling((v) -> new ExprIntegerValue(v.booleanValue() ? 1 : 0)), INTEGER, - BOOLEAN)); + BOOLEAN), + impl(nullMissingHandling((v) -> v), INTEGER, UNDEFINED)); } private static DefaultFunctionResolver castToLong() { @@ -133,7 +138,8 @@ private static DefaultFunctionResolver castToLong() { impl( nullMissingHandling((v) -> new ExprLongValue(v.booleanValue() ? 1L : 0L)), LONG, - BOOLEAN)); + BOOLEAN), + impl(nullMissingHandling((v) -> v), LONG, UNDEFINED)); } private static DefaultFunctionResolver castToFloat() { @@ -147,7 +153,8 @@ private static DefaultFunctionResolver castToFloat() { impl( nullMissingHandling((v) -> new ExprFloatValue(v.booleanValue() ? 1f : 0f)), FLOAT, - BOOLEAN)); + BOOLEAN), + impl(nullMissingHandling((v) -> v), FLOAT, UNDEFINED)); } private static DefaultFunctionResolver castToDouble() { @@ -161,7 +168,8 @@ private static DefaultFunctionResolver castToDouble() { impl( nullMissingHandling((v) -> new ExprDoubleValue(v.booleanValue() ? 1D : 0D)), DOUBLE, - BOOLEAN)); + BOOLEAN), + impl(nullMissingHandling((v) -> v), DOUBLE, UNDEFINED)); } private static DefaultFunctionResolver castToBoolean() { @@ -173,7 +181,8 @@ private static DefaultFunctionResolver castToBoolean() { STRING), impl( nullMissingHandling((v) -> ExprBooleanValue.of(v.doubleValue() != 0)), BOOLEAN, DOUBLE), - impl(nullMissingHandling((v) -> v), BOOLEAN, BOOLEAN)); + impl(nullMissingHandling((v) -> v), BOOLEAN, BOOLEAN), + impl(nullMissingHandling((v) -> v), BOOLEAN, UNDEFINED)); } private static DefaultFunctionResolver castToIp() { @@ -183,6 +192,12 @@ private static DefaultFunctionResolver castToIp() { impl(nullMissingHandling((v) -> v), IP, IP)); } + private static DefaultFunctionResolver castToJson() { + return FunctionDSL.define( + BuiltinFunctionName.CAST_TO_JSON.getName(), + impl(nullMissingHandling(JsonUtils::castJson), UNDEFINED, STRING)); + } + private static DefaultFunctionResolver castToDate() { return FunctionDSL.define( BuiltinFunctionName.CAST_TO_DATE.getName(), diff --git a/core/src/main/java/org/opensearch/sql/utils/JsonUtils.java b/core/src/main/java/org/opensearch/sql/utils/JsonUtils.java index 37c374286e..433abe7673 100644 --- a/core/src/main/java/org/opensearch/sql/utils/JsonUtils.java +++ b/core/src/main/java/org/opensearch/sql/utils/JsonUtils.java @@ -1,10 +1,34 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + package org.opensearch.sql.utils; +import static org.opensearch.sql.data.model.ExprValueUtils.*; + import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; +import com.jayway.jsonpath.InvalidJsonException; +import com.jayway.jsonpath.InvalidPathException; +import com.jayway.jsonpath.JsonPath; +import com.jayway.jsonpath.PathNotFoundException; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; import lombok.experimental.UtilityClass; +import org.opensearch.sql.data.model.ExprBooleanValue; +import org.opensearch.sql.data.model.ExprCollectionValue; +import org.opensearch.sql.data.model.ExprDoubleValue; +import org.opensearch.sql.data.model.ExprIntegerValue; +import org.opensearch.sql.data.model.ExprNullValue; +import org.opensearch.sql.data.model.ExprStringValue; +import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.exception.SemanticCheckException; @UtilityClass public class JsonUtils { @@ -12,7 +36,7 @@ public class JsonUtils { * Checks if given JSON string can be parsed as valid JSON. * * @param jsonExprValue JSON string (e.g. "{\"hello\": \"world\"}"). - * @return true if the string can be parsed as valid JSON, else false. + * @return true if the string can be parsed as valid JSON, else false (including null or missing). */ public static ExprValue isValidJson(ExprValue jsonExprValue) { ObjectMapper objectMapper = new ObjectMapper(); @@ -23,9 +47,101 @@ public static ExprValue isValidJson(ExprValue jsonExprValue) { try { objectMapper.readTree(jsonExprValue.stringValue()); - return ExprValueUtils.LITERAL_TRUE; + return LITERAL_TRUE; } catch (JsonProcessingException e) { - return ExprValueUtils.LITERAL_FALSE; + return LITERAL_FALSE; + } + } + + /** + * Converts a JSON encoded string to a {@link ExprValue}. Expression type will be UNDEFINED. + * + * @param json JSON string (e.g. "{\"hello\": \"world\"}"). + * @return ExprValue returns an expression that best represents the provided JSON-encoded string. + *
    + *
  1. {@link ExprTupleValue} if the JSON is an object + *
  2. {@link ExprCollectionValue} if the JSON is an array + *
  3. {@link ExprDoubleValue} if the JSON is a floating-point number scalar + *
  4. {@link ExprIntegerValue} if the JSON is an integral number scalar + *
  5. {@link ExprStringValue} if the JSON is a string scalar + *
  6. {@link ExprBooleanValue} if the JSON is a boolean scalar + *
  7. {@link ExprNullValue} if the JSON is null, empty, or invalid + *
+ */ + public static ExprValue castJson(ExprValue json) { + ObjectMapper objectMapper = new ObjectMapper(); + JsonNode jsonNode; + try { + jsonNode = objectMapper.readTree(json.stringValue()); + } catch (JsonProcessingException e) { + final String errorFormat = "JSON string '%s' is not valid. Error details: %s"; + throw new SemanticCheckException(String.format(errorFormat, json, e.getMessage()), e); + } + + return processJsonNode(jsonNode); + } + + /** + * Extract value of JSON string at given JSON path. + * + * @param json JSON string (e.g. "{\"hello\": \"world\"}"). + * @param path JSON path (e.g. "$.hello") + * @return ExprValue of value at given path of json string. + */ + public static ExprValue extractJson(ExprValue json, ExprValue path) { + if (json == LITERAL_NULL || json == LITERAL_MISSING) { + return json; + } + + String jsonString = json.stringValue(); + String jsonPath = path.stringValue(); + + if (jsonString.isEmpty()) { + return LITERAL_NULL; + } + + try { + Object results = JsonPath.parse(jsonString).read(jsonPath); + return ExprValueUtils.fromObjectValue(results); + } catch (PathNotFoundException e) { + return LITERAL_NULL; + } catch (InvalidPathException e) { + final String errorFormat = "JSON path '%s' is not valid. Error details: %s"; + throw new SemanticCheckException(String.format(errorFormat, path, e.getMessage()), e); + } catch (InvalidJsonException e) { + final String errorFormat = "JSON string '%s' is not valid. Error details: %s"; + throw new SemanticCheckException(String.format(errorFormat, json, e.getMessage()), e); + } + } + + private static ExprValue processJsonNode(JsonNode jsonNode) { + switch (jsonNode.getNodeType()) { + case ARRAY: + List elements = new LinkedList<>(); + for (var iter = jsonNode.iterator(); iter.hasNext(); ) { + jsonNode = iter.next(); + elements.add(processJsonNode(jsonNode)); + } + return new ExprCollectionValue(elements); + case OBJECT: + Map values = new LinkedHashMap<>(); + for (var iter = jsonNode.fields(); iter.hasNext(); ) { + Map.Entry entry = iter.next(); + values.put(entry.getKey(), processJsonNode(entry.getValue())); + } + return ExprTupleValue.fromExprValueMap(values); + case STRING: + return new ExprStringValue(jsonNode.asText()); + case NUMBER: + if (jsonNode.isFloatingPointNumber()) { + return new ExprDoubleValue(jsonNode.asDouble()); + } + return new ExprIntegerValue(jsonNode.asLong()); + case BOOLEAN: + return jsonNode.asBoolean() ? LITERAL_TRUE : LITERAL_FALSE; + default: + // in all other cases, return null + return LITERAL_NULL; } } } diff --git a/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java b/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java index 3228a565c2..1f37e1f0db 100644 --- a/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java @@ -7,58 +7,320 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_FALSE; import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_MISSING; import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_NULL; import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.data.model.ExprBooleanValue; +import org.opensearch.sql.data.model.ExprCollectionValue; +import org.opensearch.sql.data.model.ExprDoubleValue; +import org.opensearch.sql.data.model.ExprFloatValue; +import org.opensearch.sql.data.model.ExprIntegerValue; +import org.opensearch.sql.data.model.ExprLongValue; +import org.opensearch.sql.data.model.ExprNullValue; +import org.opensearch.sql.data.model.ExprStringValue; +import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.FunctionExpression; @ExtendWith(MockitoExtension.class) public class JsonFunctionsTest { - private static final ExprValue JsonNestedObject = - ExprValueUtils.stringValue("{\"a\":\"1\",\"b\":{\"c\":\"2\",\"d\":\"3\"}}"); - private static final ExprValue JsonObject = - ExprValueUtils.stringValue("{\"a\":\"1\",\"b\":\"2\"}"); - private static final ExprValue JsonArray = ExprValueUtils.stringValue("[1, 2, 3, 4]"); - private static final ExprValue JsonScalarString = ExprValueUtils.stringValue("\"abc\""); - private static final ExprValue JsonEmptyString = ExprValueUtils.stringValue(""); - private static final ExprValue JsonInvalidObject = - ExprValueUtils.stringValue("{\"invalid\":\"json\", \"string\"}"); - private static final ExprValue JsonInvalidScalar = ExprValueUtils.stringValue("abc"); - @Test public void json_valid_returns_false() { - assertEquals(LITERAL_FALSE, execute(JsonInvalidObject)); - assertEquals(LITERAL_FALSE, execute(JsonInvalidScalar)); - assertEquals(LITERAL_FALSE, execute(LITERAL_NULL)); - assertEquals(LITERAL_FALSE, execute(LITERAL_MISSING)); + assertEquals( + LITERAL_FALSE, + DSL.jsonValid(DSL.literal(ExprValueUtils.stringValue("{\"invalid\":\"json\", \"string\"}"))) + .valueOf()); + assertEquals( + LITERAL_FALSE, DSL.jsonValid(DSL.literal((ExprValueUtils.stringValue("abc")))).valueOf()); + assertEquals(LITERAL_FALSE, DSL.jsonValid(DSL.literal((LITERAL_NULL))).valueOf()); + assertEquals(LITERAL_FALSE, DSL.jsonValid(DSL.literal((LITERAL_MISSING))).valueOf()); } @Test public void json_valid_throws_ExpressionEvaluationException() { assertThrows( - ExpressionEvaluationException.class, () -> execute(ExprValueUtils.booleanValue(true))); + ExpressionEvaluationException.class, + () -> DSL.jsonValid(DSL.literal((ExprValueUtils.booleanValue(true)))).valueOf()); } @Test public void json_valid_returns_true() { - assertEquals(LITERAL_TRUE, execute(JsonNestedObject)); - assertEquals(LITERAL_TRUE, execute(JsonObject)); - assertEquals(LITERAL_TRUE, execute(JsonArray)); - assertEquals(LITERAL_TRUE, execute(JsonScalarString)); - assertEquals(LITERAL_TRUE, execute(JsonEmptyString)); + + List validJsonStrings = + List.of( + // test json objects are valid + "{\"a\":\"1\",\"b\":\"2\"}", + "{\"a\":1,\"b\":{\"c\":2,\"d\":3}}", + "{\"arr1\": [1,2,3], \"arr2\": [4,5,6]}", + + // test json arrays are valid + "[1, 2, 3, 4]", + "[{\"a\":1,\"b\":2}, {\"c\":3,\"d\":2}]", + + // test json scalars are valid + "\"abc\"", + "1234", + "12.34", + "true", + "false", + "null", + + // test empty string is valid + ""); + + validJsonStrings.stream() + .forEach( + str -> + assertEquals( + LITERAL_TRUE, + DSL.jsonValid(DSL.literal((ExprValueUtils.stringValue(str)))).valueOf(), + String.format("String %s must be valid json", str))); + } + + @Test + void json_returnsJsonObject() { + FunctionExpression exp; + + // Setup + final String objectJson = + "{\"foo\": \"foo\", \"fuzz\": true, \"bar\": 1234, \"bar2\": 12.34, \"baz\": null, " + + "\"obj\": {\"internal\": \"value\"}, \"arr\": [\"string\", true, null]}"; + + LinkedHashMap objectMap = new LinkedHashMap<>(); + objectMap.put("foo", new ExprStringValue("foo")); + objectMap.put("fuzz", ExprBooleanValue.of(true)); + objectMap.put("bar", new ExprLongValue(1234)); + objectMap.put("bar2", new ExprDoubleValue(12.34)); + objectMap.put("baz", ExprNullValue.of()); + objectMap.put( + "obj", ExprTupleValue.fromExprValueMap(Map.of("internal", new ExprStringValue("value")))); + objectMap.put( + "arr", + new ExprCollectionValue( + List.of(new ExprStringValue("string"), ExprBooleanValue.of(true), ExprNullValue.of()))); + ExprValue expectedTupleExpr = ExprTupleValue.fromExprValueMap(objectMap); + + // exercise + exp = DSL.stringToJson(DSL.literal(objectJson)); + + // Verify + var value = exp.valueOf(); + assertTrue(value instanceof ExprTupleValue); + assertEquals(expectedTupleExpr, value); + + // also test the empty object case + assertEquals( + ExprTupleValue.fromExprValueMap(Map.of()), DSL.stringToJson(DSL.literal("{}")).valueOf()); + } + + @Test + void json_returnsJsonArray() { + FunctionExpression exp; + + // Setup + final String arrayJson = "[\"foo\", \"fuzz\", true, \"bar\", 1234, 12.34, null]"; + ExprValue expectedArrayExpr = + new ExprCollectionValue( + List.of( + new ExprStringValue("foo"), + new ExprStringValue("fuzz"), + LITERAL_TRUE, + new ExprStringValue("bar"), + new ExprIntegerValue(1234), + new ExprDoubleValue(12.34), + LITERAL_NULL)); + + // exercise + exp = DSL.stringToJson(DSL.literal(arrayJson)); + + // Verify + var value = exp.valueOf(); + assertTrue(value instanceof ExprCollectionValue); + assertEquals(expectedArrayExpr, value); + + // also test the empty-array case + assertEquals(new ExprCollectionValue(List.of()), DSL.stringToJson(DSL.literal("[]")).valueOf()); + } + + @Test + void json_returnsScalar() { + assertEquals( + new ExprStringValue("foobar"), DSL.stringToJson(DSL.literal("\"foobar\"")).valueOf()); + + assertEquals(new ExprIntegerValue(1234), DSL.stringToJson(DSL.literal("1234")).valueOf()); + + assertEquals(new ExprDoubleValue(12.34), DSL.stringToJson(DSL.literal("12.34")).valueOf()); + + assertEquals(LITERAL_TRUE, DSL.stringToJson(DSL.literal("true")).valueOf()); + assertEquals(LITERAL_FALSE, DSL.stringToJson(DSL.literal("false")).valueOf()); + + assertEquals(LITERAL_NULL, DSL.stringToJson(DSL.literal("null")).valueOf()); + + assertEquals(LITERAL_NULL, DSL.stringToJson(DSL.literal(LITERAL_NULL)).valueOf()); + + assertEquals(LITERAL_MISSING, DSL.stringToJson(DSL.literal(LITERAL_MISSING)).valueOf()); + + assertEquals(LITERAL_NULL, DSL.stringToJson(DSL.literal("")).valueOf()); + } + + @Test + void json_returnsSemanticCheckException() { + // invalid type + assertThrows( + SemanticCheckException.class, () -> DSL.castJson(DSL.literal("invalid")).valueOf()); + + // missing bracket + assertThrows(SemanticCheckException.class, () -> DSL.castJson(DSL.literal("{{[}}")).valueOf()); + + // missing quote + assertThrows( + SemanticCheckException.class, () -> DSL.castJson(DSL.literal("\"missing quote")).valueOf()); + } + + @Test + void json_extract_search() { + ExprValue expected = new ExprIntegerValue(1); + execute_extract_json(expected, "{\"a\":1}", "$.a"); + } + + @Test + void json_extract_search_arrays_out_of_bound() { + execute_extract_json(LITERAL_NULL, "{\"a\":[1,2,3]}", "$.a[4]"); + } + + @Test + void json_extract_search_arrays() { + String jsonArray = "{\"a\":[1,2.3,\"abc\",true,null,{\"c\":{\"d\":1}},[1,2,3]]}"; + List expectedExprValues = + List.of( + new ExprIntegerValue(1), + new ExprFloatValue(2.3), + new ExprStringValue("abc"), + LITERAL_TRUE, + LITERAL_NULL, + ExprTupleValue.fromExprValueMap( + Map.of("c", ExprTupleValue.fromExprValueMap(Map.of("d", new ExprIntegerValue(1))))), + new ExprCollectionValue( + List.of( + new ExprIntegerValue(1), new ExprIntegerValue(2), new ExprIntegerValue(3)))); + + // extract specific index from JSON list + for (int i = 0; i < expectedExprValues.size(); i++) { + String path = String.format("$.a[%d]", i); + execute_extract_json(expectedExprValues.get(i), jsonArray, path); + } + + // extract nested object + ExprValue nestedExpected = + ExprTupleValue.fromExprValueMap(Map.of("d", new ExprIntegerValue(1))); + execute_extract_json(nestedExpected, jsonArray, "$.a[5].c"); + + // extract * from JSON list + ExprValue starExpected = new ExprCollectionValue(expectedExprValues); + execute_extract_json(starExpected, jsonArray, "$.a[*]"); + } + + @Test + void json_extract_returns_null() { + List jsonStrings = + List.of( + "{\"a\":\"1\",\"b\":\"2\"}", + "{\"a\":1,\"b\":{\"c\":2,\"d\":3}}", + "{\"arr1\": [1,2,3], \"arr2\": [4,5,6]}", + "[1, 2, 3, 4]", + "[{\"a\":1,\"b\":2}, {\"c\":3,\"d\":2}]", + "\"abc\"", + "1234", + "12.34", + "true", + "false", + ""); + + jsonStrings.forEach(str -> execute_extract_json(LITERAL_NULL, str, "$.a.path_not_found_key")); + + // null json + assertEquals( + LITERAL_NULL, + DSL.jsonExtract(DSL.literal(LITERAL_NULL), DSL.literal(new ExprStringValue("$.a"))) + .valueOf()); + + // missing json + assertEquals( + LITERAL_MISSING, + DSL.jsonExtract(DSL.literal(LITERAL_MISSING), DSL.literal(new ExprStringValue("$.a"))) + .valueOf()); + } + + @Test + void json_extract_throws_SemanticCheckException() { + // invalid path + SemanticCheckException invalidPathError = + assertThrows( + SemanticCheckException.class, + () -> + DSL.jsonExtract( + DSL.literal(new ExprStringValue("{\"a\":1}")), + DSL.literal(new ExprStringValue("$a"))) + .valueOf()); + assertEquals( + "JSON path '\"$a\"' is not valid. Error details: Illegal character at position 1 expected" + + " '.' or '['", + invalidPathError.getMessage()); + + // invalid json + SemanticCheckException invalidJsonError = + assertThrows( + SemanticCheckException.class, + () -> + DSL.jsonExtract( + DSL.literal(new ExprStringValue("{\"invalid\":\"json\", \"string\"}")), + DSL.literal(new ExprStringValue("$.a"))) + .valueOf()); + assertTrue( + invalidJsonError + .getMessage() + .startsWith( + "JSON string '\"{\"invalid\":\"json\", \"string\"}\"' is not valid. Error" + + " details:")); + } + + @Test + void json_extract_throws_ExpressionEvaluationException() { + // null path + assertThrows( + ExpressionEvaluationException.class, + () -> + DSL.jsonExtract( + DSL.literal(new ExprStringValue("{\"a\":1}")), DSL.literal(LITERAL_NULL)) + .valueOf()); + + // missing path + assertThrows( + ExpressionEvaluationException.class, + () -> + DSL.jsonExtract( + DSL.literal(new ExprStringValue("{\"a\":1}")), DSL.literal(LITERAL_MISSING)) + .valueOf()); } - private ExprValue execute(ExprValue jsonString) { - FunctionExpression exp = DSL.jsonValid(DSL.literal(jsonString)); - return exp.valueOf(); + private static void execute_extract_json(ExprValue expected, String json, String path) { + Expression pathExpr = DSL.literal(ExprValueUtils.stringValue(path)); + Expression jsonExpr = DSL.literal(ExprValueUtils.stringValue(json)); + ExprValue actual = DSL.jsonExtract(jsonExpr, pathExpr).valueOf(); + assertEquals(expected, actual); } } diff --git a/core/src/test/java/org/opensearch/sql/expression/operator/convert/TypeCastOperatorTest.java b/core/src/test/java/org/opensearch/sql/expression/operator/convert/TypeCastOperatorTest.java index fd579dfb47..ff0c8bcc01 100644 --- a/core/src/test/java/org/opensearch/sql/expression/operator/convert/TypeCastOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/operator/convert/TypeCastOperatorTest.java @@ -8,6 +8,8 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_NULL; +import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE; import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; import static org.opensearch.sql.data.type.ExprCoreType.BYTE; import static org.opensearch.sql.data.type.ExprCoreType.DATE; @@ -21,12 +23,16 @@ import static org.opensearch.sql.data.type.ExprCoreType.TIME; import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; import java.util.stream.Stream; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; import org.opensearch.sql.data.model.ExprBooleanValue; import org.opensearch.sql.data.model.ExprByteValue; +import org.opensearch.sql.data.model.ExprCollectionValue; import org.opensearch.sql.data.model.ExprDateValue; import org.opensearch.sql.data.model.ExprDoubleValue; import org.opensearch.sql.data.model.ExprFloatValue; @@ -39,6 +45,7 @@ import org.opensearch.sql.data.model.ExprStringValue; import org.opensearch.sql.data.model.ExprTimeValue; import org.opensearch.sql.data.model.ExprTimestampValue; +import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.exception.SemanticCheckException; @@ -158,6 +165,15 @@ void castBooleanToShort() { assertEquals(new ExprShortValue(0), expression.valueOf()); } + @Test + void castUndefinedToShort() { + Short value = 42; + // json cast is an UNDEFINED type expression + FunctionExpression expression = DSL.castShort(DSL.castJson(DSL.literal(value.toString()))); + assertEquals(SHORT, expression.type()); + assertEquals(new ExprShortValue(value), expression.valueOf()); + } + @Test void castBooleanToInt() { FunctionExpression expression = DSL.castInt(DSL.literal(true)); @@ -169,6 +185,15 @@ void castBooleanToInt() { assertEquals(new ExprIntegerValue(0), expression.valueOf()); } + @Test + void castUndefinedToInt() { + Integer value = 42; + // json cast is an UNDEFINED type expression + FunctionExpression expression = DSL.castInt(DSL.castJson(DSL.literal(value.toString()))); + assertEquals(INTEGER, expression.type()); + assertEquals(new ExprIntegerValue(value), expression.valueOf()); + } + @ParameterizedTest(name = "castToLong({0})") @MethodSource({"numberData"}) void castToLong(ExprValue value) { @@ -201,6 +226,15 @@ void castBooleanToLong() { assertEquals(new ExprLongValue(0), expression.valueOf()); } + @Test + void castUndefinedToLong() { + Long value = 42l; + // json cast is an UNDEFINED type expression + FunctionExpression expression = DSL.castLong(DSL.castJson(DSL.literal(value.toString()))); + assertEquals(LONG, expression.type()); + assertEquals(new ExprLongValue(value), expression.valueOf()); + } + @ParameterizedTest(name = "castToFloat({0})") @MethodSource({"numberData"}) void castToFloat(ExprValue value) { @@ -233,6 +267,15 @@ void castBooleanToFloat() { assertEquals(new ExprFloatValue(0), expression.valueOf()); } + @Test + void castUndefinedToFloat() { + Float value = 23.45f; + // json cast is an UNDEFINED type expression + FunctionExpression expression = DSL.castFloat(DSL.castJson(DSL.literal(value.toString()))); + assertEquals(FLOAT, expression.type()); + assertEquals(new ExprFloatValue(value), expression.valueOf()); + } + @ParameterizedTest(name = "castToDouble({0})") @MethodSource({"numberData"}) void castToDouble(ExprValue value) { @@ -265,6 +308,15 @@ void castBooleanToDouble() { assertEquals(new ExprDoubleValue(0), expression.valueOf()); } + @Test + void castUndefinedToDouble() { + Double value = 23.45e5; + // json cast is an UNDEFINED type expression + FunctionExpression expression = DSL.castDouble(DSL.castJson(DSL.literal(value.toString()))); + assertEquals(DOUBLE, expression.type()); + assertEquals(new ExprDoubleValue(value), expression.valueOf()); + } + @ParameterizedTest(name = "castToBoolean({0})") @MethodSource({"numberData"}) void castToBoolean(ExprValue value) { @@ -294,6 +346,14 @@ void castBooleanToBoolean() { assertEquals(ExprBooleanValue.of(true), expression.valueOf()); } + @Test + void castUndefinedToBoolean() { + // json cast is an UNDEFINED type expression + FunctionExpression expression = DSL.castBoolean(DSL.castJson(DSL.literal("true"))); + assertEquals(BOOLEAN, expression.type()); + assertEquals(ExprBooleanValue.of(true), expression.valueOf()); + } + @Test void castToDate() { FunctionExpression expression = DSL.castDate(DSL.literal("2012-08-07")); @@ -389,4 +449,102 @@ void castToIp() { assertEquals(IP, exp.type()); assertTrue(exp.valueOf().isMissing()); } + + @Test + void castJson_returnsJsonObject() { + FunctionExpression exp; + + // Setup + String objectJson = + "{\"foo\": \"foo\", \"fuzz\": true, \"bar\": 1234, \"bar2\": 12.34, \"baz\": null, " + + "\"obj\": {\"internal\": \"value\"}, \"arr\": [\"string\", true, null]}"; + + LinkedHashMap objectMap = new LinkedHashMap<>(); + objectMap.put("foo", new ExprStringValue("foo")); + objectMap.put("fuzz", ExprBooleanValue.of(true)); + objectMap.put("bar", new ExprLongValue(1234)); + objectMap.put("bar2", new ExprDoubleValue(12.34)); + objectMap.put("baz", ExprNullValue.of()); + objectMap.put( + "obj", ExprTupleValue.fromExprValueMap(Map.of("internal", new ExprStringValue("value")))); + objectMap.put( + "arr", + new ExprCollectionValue( + List.of(new ExprStringValue("string"), ExprBooleanValue.of(true), ExprNullValue.of()))); + ExprValue expectedTupleExpr = ExprTupleValue.fromExprValueMap(objectMap); + + // exercise + exp = DSL.castJson(DSL.literal(objectJson)); + + // Verify + var value = exp.valueOf(); + assertTrue(value instanceof ExprTupleValue); + assertEquals(expectedTupleExpr, value); + + // also test the empty-object case + assertEquals( + ExprTupleValue.fromExprValueMap(Map.of()), DSL.castJson(DSL.literal("{}")).valueOf()); + } + + @Test + void castJson_returnsJsonArray() { + FunctionExpression exp; + + // Setup + String arrayJson = "[\"foo\", \"fuzz\", true, \"bar\", 1234, 12.34, null]"; + ExprValue expectedArrayExpr = + new ExprCollectionValue( + List.of( + new ExprStringValue("foo"), + new ExprStringValue("fuzz"), + LITERAL_TRUE, + new ExprStringValue("bar"), + new ExprIntegerValue(1234), + new ExprDoubleValue(12.34), + LITERAL_NULL)); + + // exercise + exp = DSL.castJson(DSL.literal(arrayJson)); + + // Verify + var value = exp.valueOf(); + assertTrue(value instanceof ExprCollectionValue); + assertEquals(expectedArrayExpr, value); + + // also test the empty-array case + assertEquals(new ExprCollectionValue(List.of()), DSL.castJson(DSL.literal("[]")).valueOf()); + } + + @Test + void castJson_returnsScalar() { + String scalarStringJson = "\"foobar\""; + assertEquals( + new ExprStringValue("foobar"), DSL.castJson(DSL.literal(scalarStringJson)).valueOf()); + + String scalarNumberJson = "1234"; + assertEquals(new ExprIntegerValue(1234), DSL.castJson(DSL.literal(scalarNumberJson)).valueOf()); + + String scalarBooleanJson = "true"; + assertEquals(LITERAL_TRUE, DSL.castJson(DSL.literal(scalarBooleanJson)).valueOf()); + + String scalarNullJson = "null"; + assertEquals(LITERAL_NULL, DSL.castJson(DSL.literal(scalarNullJson)).valueOf()); + + String empty = ""; + assertEquals(LITERAL_NULL, DSL.castJson(DSL.literal(empty)).valueOf()); + } + + @Test + void castJson_returnsSemanticCheckException() { + // invalid type + assertThrows( + SemanticCheckException.class, () -> DSL.castJson(DSL.literal("invalid")).valueOf()); + + // missing bracket + assertThrows(SemanticCheckException.class, () -> DSL.castJson(DSL.literal("{{[}}")).valueOf()); + + // mnissing quote + assertThrows( + SemanticCheckException.class, () -> DSL.castJson(DSL.literal("\"missing quote")).valueOf()); + } } diff --git a/docs/user/ppl/functions/json.rst b/docs/user/ppl/functions/json.rst index fa704b6c65..3e8c21a9e4 100644 --- a/docs/user/ppl/functions/json.rst +++ b/docs/user/ppl/functions/json.rst @@ -23,14 +23,90 @@ Return type: BOOLEAN Example:: > source=json_test | eval is_valid = json_valid(json_string) | fields test_name, json_string, is_valid + fetched rows / total rows = 7/7 + +---------------------+-------------------------------------+----------+ + | test_name | json_string | is_valid | + |---------------------|-------------------------------------|----------| + | json nested object | {"a":"1","b":{"c":"2","d":"3"}} | True | + | json nested list | {"a":"1","b":[{"c":"2"},{"c":"3"}]} | True | + | json object | {"a":"1","b":"2"} | True | + | json array | [1, 2, 3, 4] | True | + | json scalar string | "abc" | True | + | json empty string | | True | + | json invalid object | {"invalid":"json", "string"} | False | + +---------------------+-------------------------------------+----------+ + +JSON +---------- + +Description +>>>>>>>>>>> + +Usage: `json(value)` Evaluates whether a string can be parsed as a json-encoded string and casted as an expression. Returns the JSON value if valid, null otherwise. + +Argument type: STRING + +Return type: BOOLEAN/DOUBLE/INTEGER/NULL/STRUCT/ARRAY + +Example:: + + > source=json_test | where json_valid(json_string) | eval json=json(json_string) | fields test_name, json_string, json + fetched rows / total rows = 6/6 + +---------------------+-------------------------------------+-----------------------------+ + | test_name | json_string | json | + |---------------------|-------------------------------------|-----------------------------| + | json nested object | {"a":"1","b":{"c":"2","d":"3"}} | {a:"1",b:{c:"2",d:"3"} | + | json nested list | {"a":"1","b":[{"c":"2"},{"c":"3"}]} | {a:"1",b:[{c:"2"},{c:"3"}]} | + | json object | {"a":"1","b":"2"} | {a:"1",b:"2"} | + | json array | [1, 2, 3, 4] | [1,2,3,4] | + | json scalar string | "abc" | "abc" | + | json empty string | | null | + +---------------------+-------------------------------------+-----------------------------+ + +JSON_EXTRACT +____________ + +Description +>>>>>>>>>>> + +Usage: `json_extract(doc, path)` Extracts a JSON value from a json document based on the path specified. + +Argument type: STRING, STRING + +Return type: STRING/BOOLEAN/DOUBLE/INTEGER/NULL/STRUCT/ARRAY + +- Returns a JSON array if `path` points to multiple results (e.g. $.a[*]) or if the `path` points to an array. +- Return null if `path` is not valid, or if JSON `doc` is MISSING or NULL. +- Throws SemanticCheckException if `doc` or `path` is malformed. +- Throws ExpressionEvaluationException if `path` is missing. + +Example:: + + > source=json_test | where json_valid(json_string) | eval json_extract=json_extract(json_string, '$.b') | fields test_name, json_string, json_extract fetched rows / total rows = 6/6 - +---------------------+---------------------------------+----------+ - | test_name | json_string | is_valid | - |---------------------|---------------------------------|----------| - | json nested object | {"a":"1","b":{"c":"2","d":"3"}} | True | - | json object | {"a":"1","b":"2"} | True | - | json array | [1, 2, 3, 4] | True | - | json scalar string | "abc" | True | - | json empty string | | True | - | json invalid object | {"invalid":"json", "string"} | False | - +---------------------+---------------------------------+----------+ + +---------------------+-------------------------------------+-------------------+ + | test_name | json_string | json_extract | + |---------------------|-------------------------------------|-------------------| + | json nested object | {"a":"1","b":{"c":"2","d":"3"}} | {c:"2",d:"3"} | + | json nested list | {"a":"1","b":[{"c":"2"},{"c":"3"}]} | [{c:"2"},{c:"3"}] | + | json object | {"a":"1","b":"2"} | 2 | + | json array | [1, 2, 3, 4] | null | + | json scalar string | "abc" | null | + | json empty string | | null | + +---------------------+-------------------------------------+-------------------+ + + > source=json_test | where test_name="json nested list" | eval json_extract=json_extract('{"a":[{"b":1},{"b":2}]}', '$.b[1].c') + fetched rows / total rows = 1/1 + +---------------------+-------------------------------------+--------------+ + | test_name | json_string | json_extract | + |---------------------|-------------------------------------|--------------| + | json nested list | {"a":"1","b":[{"c":"2"},{"c":"3"}]} | 3 | + +---------------------+-------------------------------------+--------------+ + + > source=json_test | where test_name="json nested list" | eval json_extract=json_extract('{"a":[{"b":1},{"b":2}]}', '$.b[*].c') + fetched rows / total rows = 1/1 + +---------------------+-------------------------------------+--------------+ + | test_name | json_string | json_extract | + |---------------------|-------------------------------------|--------------| + | json nested list | {"a":"1","b":[{"c":"2"},{"c":"3"}]} | [2,3] | + +---------------------+-------------------------------------+--------------+ diff --git a/doctest/test_data/json_test.json b/doctest/test_data/json_test.json index 7494fc4aa9..63e7f15011 100644 --- a/doctest/test_data/json_test.json +++ b/doctest/test_data/json_test.json @@ -1,4 +1,5 @@ {"test_name":"json nested object", "json_string":"{\"a\":\"1\",\"b\":{\"c\":\"2\",\"d\":\"3\"}}"} +{"test_name":"json nested list", "json_string":"{\"a\":\"1\",\"b\":[{\"c\":\"2\"}, {\"c\":\"3\"}]}"} {"test_name":"json object", "json_string":"{\"a\":\"1\",\"b\":\"2\"}"} {"test_name":"json array", "json_string":"[1, 2, 3, 4]"} {"test_name":"json scalar string", "json_string":"\"abc\""} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/JsonFunctionsIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/JsonFunctionsIT.java index 9e5ac041fb..27741d6c74 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/JsonFunctionsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/JsonFunctionsIT.java @@ -12,6 +12,9 @@ import static org.opensearch.sql.util.MatcherUtils.verifySchema; import java.io.IOException; +import java.util.List; +import java.util.Map; +import org.json.JSONArray; import org.json.JSONObject; import org.junit.jupiter.api.Test; @@ -37,7 +40,13 @@ public void test_json_valid() throws IOException { rows("json object"), rows("json array"), rows("json scalar string"), - rows("json empty string")); + rows("json scalar int"), + rows("json scalar float"), + rows("json scalar double"), + rows("json scalar boolean true"), + rows("json scalar boolean false"), + rows("json empty string"), + rows("json nested list")); } @Test @@ -52,4 +61,150 @@ public void test_not_json_valid() throws IOException { verifySchema(result, schema("test_name", null, "string")); verifyDataRows(result, rows("json invalid object"), rows("json null")); } + + @Test + public void test_cast_json() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | where json_valid(json_string) | eval casted=cast(json_string as json)" + + " | fields test_name, casted", + TEST_INDEX_JSON_TEST)); + verifySchema(result, schema("test_name", null, "string"), schema("casted", null, "undefined")); + verifyDataRows( + result, + rows( + "json nested object", + new JSONObject(Map.of("a", "1", "b", Map.of("c", "3"), "d", List.of(1, 2, 3)))), + rows("json object", new JSONObject(Map.of("a", "1", "b", "2"))), + rows("json array", new JSONArray(List.of(1, 2, 3, 4))), + rows("json scalar string", "abc"), + rows("json scalar int", 1234), + rows("json scalar float", 12.34f), + rows("json scalar double", 2.99792458e8), + rows("json scalar boolean true", true), + rows("json scalar boolean false", false), + rows("json empty string", null), + rows( + "json nested list", + new JSONObject(Map.of("a", "1", "b", List.of(Map.of("c", "2"), Map.of("c", "3")))))); + } + + @Test + public void test_json() throws IOException { + JSONObject result; + + result = + executeQuery( + String.format( + "source=%s | where json_valid(json_string) | eval casted=json(json_string) | fields" + + " test_name, casted", + TEST_INDEX_JSON_TEST)); + verifySchema(result, schema("test_name", null, "string"), schema("casted", null, "undefined")); + JSONObject firstRow = new JSONObject(Map.of("c", 2)); + verifyDataRows( + result, + rows( + "json nested object", + new JSONObject(Map.of("a", "1", "b", Map.of("c", "3"), "d", List.of(1, 2, 3)))), + rows("json object", new JSONObject(Map.of("a", "1", "b", "2"))), + rows("json array", new JSONArray(List.of(1, 2, 3, 4))), + rows("json scalar string", "abc"), + rows("json scalar int", 1234), + rows("json scalar float", 12.34), + rows("json scalar double", 2.99792458e8), + rows("json scalar boolean true", true), + rows("json scalar boolean false", false), + rows("json empty string", null), + rows( + "json nested list", + new JSONObject(Map.of("a", "1", "b", List.of(Map.of("c", "2"), Map.of("c", "3")))))); + } + + @Test + public void test_cast_json_scalar_to_type() throws IOException { + // cast to integer + JSONObject result; + + result = + executeQuery( + String.format( + "source=%s | " + + "where test_name='json scalar int' | " + + "eval casted=cast(json(json_string) as int) | " + + "fields test_name, casted", + TEST_INDEX_JSON_TEST)); + verifySchema(result, schema("test_name", null, "string"), schema("casted", null, "integer")); + verifyDataRows(result, rows("json scalar int", 1234)); + + result = + executeQuery( + String.format( + "source=%s | " + + "where test_name='json scalar int' | " + + "eval casted=cast(json(json_string) as long) | " + + "fields test_name, casted", + TEST_INDEX_JSON_TEST)); + verifySchema(result, schema("test_name", null, "string"), schema("casted", null, "long")); + verifyDataRows(result, rows("json scalar int", 1234l)); + + result = + executeQuery( + String.format( + "source=%s | " + + "where test_name='json scalar float' | " + + "eval casted=cast(json(json_string) as float) | " + + "fields test_name, casted", + TEST_INDEX_JSON_TEST)); + verifySchema(result, schema("test_name", null, "string"), schema("casted", null, "float")); + verifyDataRows(result, rows("json scalar float", 12.34f)); + + result = + executeQuery( + String.format( + "source=%s | " + + "where test_name='json scalar double' | " + + "eval casted=cast(json(json_string) as double) | " + + "fields test_name, casted", + TEST_INDEX_JSON_TEST)); + verifySchema(result, schema("test_name", null, "string"), schema("casted", null, "double")); + verifyDataRows(result, rows("json scalar double", 2.99792458e8)); + + result = + executeQuery( + String.format( + "source=%s | where test_name='json scalar boolean true' OR test_name='json scalar" + + " boolean false' | eval casted=cast(json(json_string) as boolean) | fields" + + " test_name, casted", + TEST_INDEX_JSON_TEST)); + verifySchema(result, schema("test_name", null, "string"), schema("casted", null, "boolean")); + verifyDataRows( + result, rows("json scalar boolean true", true), rows("json scalar boolean false", false)); + } + + @Test + public void test_json_extract() throws IOException { + JSONObject result; + result = + executeQuery( + String.format( + "source=%s | where json_valid(json_string) | eval" + + " extracted=json_extract(json_string, '$.b') | fields test_name, extracted", + TEST_INDEX_JSON_TEST)); + verifySchema( + result, schema("test_name", null, "string"), schema("extracted", null, "undefined")); + verifyDataRows( + result, + rows("json nested object", new JSONObject(Map.of("c", "3"))), + rows("json object", "2"), + rows("json array", null), + rows("json scalar string", null), + rows("json scalar int", null), + rows("json scalar float", null), + rows("json scalar double", null), + rows("json scalar boolean true", null), + rows("json scalar boolean false", null), + rows("json empty string", null), + rows("json nested list", new JSONArray(List.of(Map.of("c", "2"), Map.of("c", "3"))))); + } } diff --git a/integ-test/src/test/resources/json_test.json b/integ-test/src/test/resources/json_test.json index e393bfeb8e..afbd49cfc3 100644 --- a/integ-test/src/test/resources/json_test.json +++ b/integ-test/src/test/resources/json_test.json @@ -1,5 +1,5 @@ {"index":{"_id":"0"}} -{"test_name":"json nested object", "json_string":"{\"a\":\"1\",\"b\":{\"c\":\"2\",\"d\":\"3\"}}"} +{"test_name":"json nested object", "json_string":"{\"a\":\"1\", \"b\": {\"c\": \"3\"}, \"d\": [1, 2, 3]}"} {"index":{"_id":"1"}} {"test_name":"json object", "json_string":"{\"a\":\"1\",\"b\":\"2\"}"} {"index":{"_id":"2"}} @@ -7,8 +7,20 @@ {"index":{"_id":"3"}} {"test_name":"json scalar string", "json_string":"\"abc\""} {"index":{"_id":"4"}} -{"test_name":"json empty string","json_string":""} +{"test_name":"json scalar int", "json_string":"1234"} {"index":{"_id":"5"}} -{"test_name":"json invalid object", "json_string":"{\"invalid\":\"json\", \"string\"}"} +{"test_name":"json scalar float", "json_string":"12.34"} {"index":{"_id":"6"}} +{"test_name":"json scalar double", "json_string":"2.99792458e8"} +{"index":{"_id":"7"}} +{"test_name":"json scalar boolean true", "json_string":"true"} +{"index":{"_id":"8"}} +{"test_name":"json scalar boolean false", "json_string":"false"} +{"index":{"_id":"9"}} +{"test_name":"json empty string", "json_string":""} +{"index":{"_id":"10"}} +{"test_name":"json invalid object", "json_string":"{\"invalid\":\"json\", \"string\"}"} +{"index":{"_id":"11"}} {"test_name":"json null", "json_string":null} +{"index":{"_id":"12"}} +{"test_name":"json nested list", "json_string":"{\"a\":\"1\",\"b\":[{\"c\":\"2\"}, {\"c\":\"3\"}]}"} diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index f9d9e1b43c..a338e4fa3b 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -333,6 +333,8 @@ CIDRMATCH: 'CIDRMATCH'; // JSON FUNCTIONS JSON_VALID: 'JSON_VALID'; +JSON: 'JSON'; +JSON_EXTRACT: 'JSON_EXTRACT'; // FLOWCONTROL FUNCTIONS IFNULL: 'IFNULL'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index c70f8af2cc..54a5e7e57c 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -409,6 +409,7 @@ convertedDataType | typeName = STRING | typeName = BOOLEAN | typeName = IP + | typeName = JSON ; evalFunctionName @@ -419,6 +420,7 @@ evalFunctionName | flowControlFunctionName | systemFunctionName | positionFunctionName + | jsonFunctionName | geoipFunctionName ; @@ -705,6 +707,11 @@ positionFunctionName : POSITION ; +jsonFunctionName + : JSON + | JSON_EXTRACT + ; + // operators comparisonOperator : EQUAL @@ -869,6 +876,7 @@ keywordsCanBeId | mathematicalFunctionName | positionFunctionName | conditionFunctionName + | jsonFunctionName | geoipFunctionName // commands | SEARCH