From 27299f2a63382bbe1a50654889f9b627dabe0d73 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Fri, 10 Jan 2025 16:07:31 -0800 Subject: [PATCH] PPL: Add JSON_OBJECT function Signed-off-by: Andrew Carbonetto --- .../org/opensearch/sql/expression/DSL.java | 4 + .../function/BuiltinFunctionName.java | 1 + .../sql/expression/json/JsonFunctions.java | 75 ++++++++++++++++ .../datetime/DateTimeFunctionTest.java | 1 - .../expression/json/JsonFunctionsTest.java | 85 +++++++++++++++++++ docs/user/ppl/functions/json.rst | 40 +++++++++ .../opensearch/sql/ppl/JsonFunctionIT.java | 22 +++++ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 10 +++ 9 files changed, 238 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index dc819c8163..1fb1e4e150 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -687,6 +687,10 @@ public static FunctionExpression jsonValid(Expression... expressions) { return compile(FunctionProperties.None, BuiltinFunctionName.JSON_VALID, expressions); } + public static FunctionExpression jsonObject(Expression... expressions) { + return compile(FunctionProperties.None, BuiltinFunctionName.JSON_OBJECT, expressions); + } + public static Aggregator avg(Expression... expressions) { return aggregate(BuiltinFunctionName.AVG, expressions); } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 43fdbf2eb7..08687722fa 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -206,6 +206,7 @@ public enum BuiltinFunctionName { /** Json Functions. */ JSON_VALID(FunctionName.of("json_valid")), + JSON_OBJECT(FunctionName.of("json_object")), /** NULL Test. */ IS_NULL(FunctionName.of("is null")), diff --git a/core/src/main/java/org/opensearch/sql/expression/json/JsonFunctions.java b/core/src/main/java/org/opensearch/sql/expression/json/JsonFunctions.java index 49541e5d59..c214210797 100644 --- a/core/src/main/java/org/opensearch/sql/expression/json/JsonFunctions.java +++ b/core/src/main/java/org/opensearch/sql/expression/json/JsonFunctions.java @@ -7,20 +7,38 @@ import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; import static org.opensearch.sql.expression.function.FunctionDSL.define; import static org.opensearch.sql.expression.function.FunctionDSL.impl; import static org.opensearch.sql.expression.function.FunctionDSL.nullMissingHandling; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; import lombok.experimental.UtilityClass; +import org.apache.commons.lang3.tuple.Pair; +import org.opensearch.sql.data.model.ExprTupleValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.exception.SemanticCheckException; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.FunctionExpression; +import org.opensearch.sql.expression.env.Environment; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.BuiltinFunctionRepository; import org.opensearch.sql.expression.function.DefaultFunctionResolver; +import org.opensearch.sql.expression.function.FunctionBuilder; +import org.opensearch.sql.expression.function.FunctionName; +import org.opensearch.sql.expression.function.FunctionResolver; +import org.opensearch.sql.expression.function.FunctionSignature; import org.opensearch.sql.utils.JsonUtils; @UtilityClass public class JsonFunctions { public void register(BuiltinFunctionRepository repository) { repository.register(jsonValid()); + repository.register(jsonObject()); } private DefaultFunctionResolver jsonValid() { @@ -28,4 +46,61 @@ private DefaultFunctionResolver jsonValid() { BuiltinFunctionName.JSON_VALID.getName(), impl(nullMissingHandling(JsonUtils::isValidJson), BOOLEAN, STRING)); } + + /** Creates a JSON Object/tuple expr from a given list of kv pairs. */ + private static FunctionResolver jsonObject() { + return new FunctionResolver() { + @Override + public FunctionName getFunctionName() { + return BuiltinFunctionName.JSON_OBJECT.getName(); + } + + @Override + public Pair resolve( + FunctionSignature unresolvedSignature) { + List paramList = unresolvedSignature.getParamTypeList(); + // check that we got an even number of arguments + if (paramList.size() % 2 != 0) { + throw new SemanticCheckException( + String.format( + "Expected an even number of arguments but instead got #%d arguments", + paramList.size())); + } + + // check that each "key" argument (of key-value pair) is a string + for (int i = 0; i < paramList.size(); i = i + 2) { + ExprType paramType = paramList.get(i); + if (!ExprCoreType.STRING.equals(paramType)) { + throw new SemanticCheckException( + String.format( + "Expected type %s instead of %s for parameter #%d", + ExprCoreType.STRING, paramType.typeName(), i + 1)); + } + } + + // return the unresolved signature and function builder + return Pair.of( + unresolvedSignature, + (functionProperties, arguments) -> + new FunctionExpression(getFunctionName(), arguments) { + @Override + public ExprValue valueOf(Environment valueEnv) { + LinkedHashMap tupleValues = new LinkedHashMap<>(); + Iterator iter = getArguments().iterator(); + while (iter.hasNext()) { + tupleValues.put( + iter.next().valueOf(valueEnv).stringValue(), + iter.next().valueOf(valueEnv)); + } + return ExprTupleValue.fromExprValueMap(tupleValues); + } + + @Override + public ExprType type() { + return STRUCT; + } + }); + } + }; + } } diff --git a/core/src/test/java/org/opensearch/sql/expression/datetime/DateTimeFunctionTest.java b/core/src/test/java/org/opensearch/sql/expression/datetime/DateTimeFunctionTest.java index 4b287319ba..c820c97196 100644 --- a/core/src/test/java/org/opensearch/sql/expression/datetime/DateTimeFunctionTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/datetime/DateTimeFunctionTest.java @@ -27,7 +27,6 @@ import java.util.List; import java.util.stream.Stream; import lombok.AllArgsConstructor; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; diff --git a/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java b/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java index 2e8ece2817..36e391ad08 100644 --- a/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java @@ -6,16 +6,31 @@ package org.opensearch.sql.expression.json; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_FALSE; +import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_NULL; import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.data.model.ExprBooleanValue; +import org.opensearch.sql.data.model.ExprCollectionValue; +import org.opensearch.sql.data.model.ExprDoubleValue; +import org.opensearch.sql.data.model.ExprLongValue; +import org.opensearch.sql.data.model.ExprNullValue; +import org.opensearch.sql.data.model.ExprStringValue; +import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; import org.opensearch.sql.expression.FunctionExpression; +import org.opensearch.sql.expression.LiteralExpression; @ExtendWith(MockitoExtension.class) public class JsonFunctionsTest { @@ -46,4 +61,74 @@ private ExprValue execute(ExprValue jsonString) { FunctionExpression exp = DSL.jsonValid(DSL.literal(jsonString)); return exp.valueOf(); } + + @Test + public void json_object_returns_tuple() { + FunctionExpression exp; + + // Setup + LinkedHashMap objectMap = new LinkedHashMap<>(); + objectMap.put("foo", new ExprStringValue("foo")); + objectMap.put("fuzz", ExprBooleanValue.of(true)); + objectMap.put("bar", new ExprLongValue(1234)); + objectMap.put("bar2", new ExprDoubleValue(12.34)); + objectMap.put("baz", ExprNullValue.of()); + objectMap.put( + "obj", ExprTupleValue.fromExprValueMap(Map.of("internal", new ExprStringValue("value")))); + // TODO: requires json_array() + // objectMap.put( + // "arr", + // new ExprCollectionValue( + // List.of(new ExprStringValue("string"), ExprBooleanValue.of(true), ExprNullValue.of()))); + ExprValue expectedTupleExpr = ExprTupleValue.fromExprValueMap(objectMap); + + // exercise + exp = DSL.jsonObject( + DSL.literal("foo"), DSL.literal("foo"), + DSL.literal("fuzz"), DSL.literal(true), + DSL.literal("bar"), DSL.literal(1234), + DSL.literal("bar2"), DSL.literal(12.34), + DSL.literal("baz"), new LiteralExpression(ExprValueUtils.nullValue()), + DSL.literal("obj"), DSL.jsonObject( + DSL.literal("internal"), DSL.literal("value") + ) + ); + + // Verify + var value = exp.valueOf(); + assertTrue(value instanceof ExprTupleValue); + assertEquals(expectedTupleExpr, value); + } + + @Test + public void json_object_returns_empty_tuple() { + FunctionExpression exp; + + // Setup + LinkedHashMap objectMap = new LinkedHashMap<>(); + ExprValue expectedTupleExpr = ExprTupleValue.fromExprValueMap(objectMap); + + // exercise + exp = DSL.jsonObject(); + + // Verify + var value = exp.valueOf(); + assertTrue(value instanceof ExprTupleValue); + assertEquals(expectedTupleExpr, value); + } + + @Test + public void json_object_throws_SemanticCheckException() { + // wrong number of arguments + assertThrows( + SemanticCheckException.class, () -> DSL.jsonObject(DSL.literal("only one")).valueOf()); + assertThrows( + SemanticCheckException.class, () -> DSL.jsonObject(DSL.literal("one"), DSL.literal("two"), DSL.literal("three")).valueOf()); + + // key argument is not a string + assertThrows( + SemanticCheckException.class, () -> DSL.jsonObject(DSL.literal(1234), DSL.literal("two")).valueOf()); + assertThrows( + SemanticCheckException.class, () -> DSL.jsonObject(DSL.literal("one"), DSL.literal(true), DSL.literal(true), DSL.literal("four")).valueOf()); + } } diff --git a/docs/user/ppl/functions/json.rst b/docs/user/ppl/functions/json.rst index bf5bd46b7a..3be84bcb23 100644 --- a/docs/user/ppl/functions/json.rst +++ b/docs/user/ppl/functions/json.rst @@ -33,3 +33,43 @@ Example:: | json empty string | | True | | json invalid object | {"invalid":"json", "string"} | True | +---------------------+------------------------------+----------+ + +JSON_OBJECT +----------- + +Description +>>>>>>>>>>> + +Usage: `json_object(, [, , ]...)` returns a JSON object from key-value pairs. + +Argument type: +- A \ must be STRING. +- A \ can be a scalar, another json object, or json array type. Note: scalar fields will be treated as single-value. Use `json_array` to construct an array value from a multi-value. + +Return type: STRUCT + +Example: + + os> source=people | eval result = json_object('key', 123.45) | fields result + fetched rows / total rows = 1/1 + +------------------+ + | result | + +------------------+ + | {"key":123.45} | + +------------------+ + + os> source=people | eval result = json_object('outer', json_object('inner', 123.45)) | fields result + fetched rows / total rows = 1/1 + +------------------------------+ + | result | + +------------------------------+ + | {"outer":{"inner":123.45}} | + +------------------------------+ + + os> source=people | eval result = json_object('array_doc', json_array(123.45, "string", true, null)) | fields result + fetched rows / total rows = 1/1 + +------------------------------+ + | result | + +------------------------------+ + | {"array_doc":[123.45, "string", true, null]} | + +------------------------------+ diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/JsonFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/JsonFunctionIT.java index f02750147d..5e5ac69c96 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/JsonFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/JsonFunctionIT.java @@ -12,6 +12,8 @@ import static org.opensearch.sql.util.MatcherUtils.verifySchema; import java.io.IOException; +import java.util.List; +import java.util.Map; import org.json.JSONObject; import org.junit.jupiter.api.Test; @@ -51,4 +53,24 @@ public void test_not_json_valid() throws IOException { verifySchema(result, schema("test_name", null, "string")); verifyDataRows(result, rows("json invalid object")); } + + @Test + public void test_json_object() throws IOException { + JSONObject result; + + result = + executeQuery( + String.format( + "source=%s | eval obj=json_object(\"key\", json(json_string)) | fields test_name, obj" + + " test_name, casted", + TEST_INDEX_JSON_TEST)); + verifySchema(result, schema("test_name", null, "string"), schema("casted", null, "undefined")); + verifyDataRows( + result, + rows("json object", Map.of("key", Map.of("a", "1", "b", "2"))), + rows("json array", Map.of("key", List.of(1, 2, 3, 4))), + rows("json scalar string", Map.of("key", "abc")), + rows("json empty string", Map.of("key", null)) + ); + } } diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 5b6b9e41b8..e8e1c3d726 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -334,6 +334,7 @@ CIDRMATCH: 'CIDRMATCH'; // JSON FUNCTIONS JSON_VALID: 'JSON_VALID'; +JSON_OBJECT: 'JSON_OBJECT'; // FLOWCONTROL FUNCTIONS IFNULL: 'IFNULL'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 999c5d9c87..675141ea3a 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -310,6 +310,7 @@ valueExpression | extractFunction # extractFunctionCall | getFormatFunction # getFormatFunctionCall | timestampFunction # timestampFunctionCall + | jsonObjectFunction # jsonObjectFunctionCall | LT_PRTHS valueExpression RT_PRTHS # parentheticValueExpr ; @@ -324,6 +325,10 @@ positionFunction : positionFunctionName LT_PRTHS functionArg IN functionArg RT_PRTHS ; +jsonObjectFunction + : jsonObjectFunctionName LT_PRTHS functionArg COMMA functionArg (COMMA functionArg COMMA functionArg)* RT_PRTHS + ; + booleanExpression : booleanFunctionCall ; @@ -419,6 +424,7 @@ evalFunctionName | flowControlFunctionName | systemFunctionName | positionFunctionName + | jsonObjectFunctionName ; functionArgs @@ -700,6 +706,10 @@ positionFunctionName : POSITION ; +jsonObjectFunctionName + : JSON_OBJECT + ; + // operators comparisonOperator : EQUAL