diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index 44ecc2bc86..dc819c8163 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -683,6 +683,10 @@ public static FunctionExpression notLike(Expression... expressions) { return compile(FunctionProperties.None, BuiltinFunctionName.NOT_LIKE, expressions); } + public static FunctionExpression jsonValid(Expression... expressions) { + return compile(FunctionProperties.None, BuiltinFunctionName.JSON_VALID, expressions); + } + public static Aggregator avg(Expression... expressions) { return aggregate(BuiltinFunctionName.AVG, expressions); } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index f8e9cf7c5f..43fdbf2eb7 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -204,6 +204,9 @@ public enum BuiltinFunctionName { TRIM(FunctionName.of("trim")), UPPER(FunctionName.of("upper")), + /** Json Functions. */ + JSON_VALID(FunctionName.of("json_valid")), + /** NULL Test. */ IS_NULL(FunctionName.of("is null")), IS_NOT_NULL(FunctionName.of("is not null")), diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java index 79ea58b860..72d637fd2b 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java @@ -28,6 +28,7 @@ import org.opensearch.sql.expression.datetime.DateTimeFunctions; import org.opensearch.sql.expression.datetime.IntervalClause; import org.opensearch.sql.expression.ip.IPFunctions; +import org.opensearch.sql.expression.json.JsonFunctions; import org.opensearch.sql.expression.operator.arthmetic.ArithmeticFunctions; import org.opensearch.sql.expression.operator.arthmetic.MathematicalFunctions; import org.opensearch.sql.expression.operator.convert.TypeCastOperators; @@ -83,6 +84,7 @@ public static synchronized BuiltinFunctionRepository getInstance() { SystemFunctions.register(instance); OpenSearchFunctions.register(instance); IPFunctions.register(instance); + JsonFunctions.register(instance); } return instance; } diff --git a/core/src/main/java/org/opensearch/sql/expression/json/JsonFunctions.java b/core/src/main/java/org/opensearch/sql/expression/json/JsonFunctions.java new file mode 100644 index 0000000000..49541e5d59 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/json/JsonFunctions.java @@ -0,0 +1,31 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.json; + +import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import static org.opensearch.sql.expression.function.FunctionDSL.define; +import static org.opensearch.sql.expression.function.FunctionDSL.impl; +import static org.opensearch.sql.expression.function.FunctionDSL.nullMissingHandling; + +import lombok.experimental.UtilityClass; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.BuiltinFunctionRepository; +import org.opensearch.sql.expression.function.DefaultFunctionResolver; +import org.opensearch.sql.utils.JsonUtils; + +@UtilityClass +public class JsonFunctions { + public void register(BuiltinFunctionRepository repository) { + repository.register(jsonValid()); + } + + private DefaultFunctionResolver jsonValid() { + return define( + BuiltinFunctionName.JSON_VALID.getName(), + impl(nullMissingHandling(JsonUtils::isValidJson), BOOLEAN, STRING)); + } +} diff --git a/core/src/main/java/org/opensearch/sql/utils/JsonUtils.java b/core/src/main/java/org/opensearch/sql/utils/JsonUtils.java new file mode 100644 index 0000000000..d7f37b4197 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/utils/JsonUtils.java @@ -0,0 +1,26 @@ +package org.opensearch.sql.utils; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import lombok.experimental.UtilityClass; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; + +@UtilityClass +public class JsonUtils { + /** + * Checks if given JSON string can be parsed as valid JSON. + * + * @param jsonExprValue JSON string (e.g. "{\"hello\": \"world\"}"). + * @return true if the string can be parsed as valid JSON, else false. + */ + public static ExprValue isValidJson(ExprValue jsonExprValue) { + ObjectMapper objectMapper = new ObjectMapper(); + try { + objectMapper.readTree(jsonExprValue.stringValue()); + return ExprValueUtils.LITERAL_TRUE; + } catch (JsonProcessingException e) { + return ExprValueUtils.LITERAL_FALSE; + } + } +} diff --git a/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java b/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java new file mode 100644 index 0000000000..e374841e7f --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java @@ -0,0 +1,52 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.json; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_FALSE; +import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.FunctionExpression; + +@ExtendWith(MockitoExtension.class) +public class JsonFunctionsTest { + private static final ExprValue JsonNestedObject = + ExprValueUtils.stringValue("{\"a\":\"1\",\"b\":{\"c\":\"2\",\"d\":\"3\"}}"); + private static final ExprValue JsonObject = + ExprValueUtils.stringValue("{\"a\":\"1\",\"b\":\"2\"}"); + private static final ExprValue JsonArray = ExprValueUtils.stringValue("[1, 2, 3, 4]"); + private static final ExprValue JsonScalarString = ExprValueUtils.stringValue("\"abc\""); + private static final ExprValue JsonEmptyString = ExprValueUtils.stringValue(""); + private static final ExprValue JsonInvalidObject = + ExprValueUtils.stringValue("{\"invalid\":\"json\", \"string\"}"); + private static final ExprValue JsonInvalidScalar = ExprValueUtils.stringValue("abc"); + + @Test + public void json_valid_returns_false() { + assertEquals(LITERAL_FALSE, execute(JsonInvalidObject)); + assertEquals(LITERAL_FALSE, execute(JsonInvalidScalar)); + } + + @Test + public void json_valid_returns_true() { + assertEquals(LITERAL_TRUE, execute(JsonNestedObject)); + assertEquals(LITERAL_TRUE, execute(JsonObject)); + assertEquals(LITERAL_TRUE, execute(JsonArray)); + assertEquals(LITERAL_TRUE, execute(JsonScalarString)); + assertEquals(LITERAL_TRUE, execute(JsonEmptyString)); + } + + private ExprValue execute(ExprValue jsonString) { + FunctionExpression exp = DSL.jsonValid(DSL.literal(jsonString)); + return exp.valueOf(); + } +} diff --git a/docs/category.json b/docs/category.json index 32f56cfb46..efbb57d6e6 100644 --- a/docs/category.json +++ b/docs/category.json @@ -34,6 +34,7 @@ "user/ppl/functions/datetime.rst", "user/ppl/functions/expressions.rst", "user/ppl/functions/ip.rst", + "user/ppl/functions/json.rst", "user/ppl/functions/math.rst", "user/ppl/functions/relevance.rst", "user/ppl/functions/string.rst" diff --git a/docs/user/dql/metadata.rst b/docs/user/dql/metadata.rst index aba4eb0c75..b059c0cded 100644 --- a/docs/user/dql/metadata.rst +++ b/docs/user/dql/metadata.rst @@ -35,7 +35,7 @@ Example 1: Show All Indices Information SQL query:: os> SHOW TABLES LIKE '%' - fetched rows / total rows = 10/10 + fetched rows / total rows = 11/11 +----------------+-------------+-----------------+------------+---------+----------+------------+-----------+---------------------------+----------------+ | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION | |----------------+-------------+-----------------+------------+---------+----------+------------+-----------+---------------------------+----------------| @@ -44,6 +44,7 @@ SQL query:: | docTestCluster | null | accounts | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | apache | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | books | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | json_test | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | nested | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | nyc_taxi | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | people | BASE TABLE | null | null | null | null | null | null | diff --git a/docs/user/ppl/functions/json.rst b/docs/user/ppl/functions/json.rst new file mode 100644 index 0000000000..74c173be13 --- /dev/null +++ b/docs/user/ppl/functions/json.rst @@ -0,0 +1,36 @@ +==================== +JSON Functions +==================== + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 1 + +JSON_VALID +---------- + +Description +>>>>>>>>>>> + +Usage: `json_valid(json_string)` checks if `json_string` is a valid STRING string. + +Argument type: STRING + +Return type: BOOLEAN + +Example:: + + > source=json_test | eval is_valid = json_valid(json_string) | fields test_name, json_string, is_valid + fetched rows / total rows = 6/6 + +---------------------+---------------------------------+----------+ + | test_name | json_string | is_valid | + |---------------------|---------------------------------|----------| + | json nested object | {"a":"1","b":{"c":"2","d":"3"}} | True | + | json object | {"a":"1","b":"2"} | True | + | json array | [1, 2, 3, 4] | True | + | json scalar string | "abc" | True | + | json empty string | | True | + | json invalid object | {"invalid":"json", "string"} | False | + +---------------------+---------------------------------+----------+ diff --git a/doctest/test_data/json_test.json b/doctest/test_data/json_test.json new file mode 100644 index 0000000000..7494fc4aa9 --- /dev/null +++ b/doctest/test_data/json_test.json @@ -0,0 +1,6 @@ +{"test_name":"json nested object", "json_string":"{\"a\":\"1\",\"b\":{\"c\":\"2\",\"d\":\"3\"}}"} +{"test_name":"json object", "json_string":"{\"a\":\"1\",\"b\":\"2\"}"} +{"test_name":"json array", "json_string":"[1, 2, 3, 4]"} +{"test_name":"json scalar string", "json_string":"\"abc\""} +{"test_name":"json empty string","json_string":""} +{"test_name":"json invalid object", "json_string":"{\"invalid\":\"json\", \"string\"}"} diff --git a/doctest/test_docs.py b/doctest/test_docs.py index 1d46766c6d..906bbd65b5 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -30,6 +30,7 @@ NESTED = "nested" DATASOURCES = ".ql-datasources" WEBLOGS = "weblogs" +JSON_TEST = "json_test" class DocTestConnection(OpenSearchConnection): @@ -123,6 +124,7 @@ def set_up_test_indices(test): load_file("nested_objects.json", index_name=NESTED) load_file("datasources.json", index_name=DATASOURCES) load_file("weblogs.json", index_name=WEBLOGS) + load_file("json_test.json", index_name=JSON_TEST) def load_file(filename, index_name): @@ -151,7 +153,7 @@ def set_up(test): def tear_down(test): # drop leftover tables after each test - test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED, WEBLOGS], ignore_unavailable=True) + test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED, WEBLOGS, JSON_TEST], ignore_unavailable=True) docsuite = partial(doctest.DocFileSuite, diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index 1728be74e6..d4f7213736 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -22,6 +22,7 @@ import static org.opensearch.sql.legacy.TestUtils.getGameOfThronesIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getGeopointIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getJoinTypeIndexMapping; +import static org.opensearch.sql.legacy.TestUtils.getJsonTestIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getLocationIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getMappingFile; import static org.opensearch.sql.legacy.TestUtils.getNestedSimpleIndexMapping; @@ -745,7 +746,12 @@ public enum Index { TestsConstants.TEST_INDEX_GEOPOINT, "dates", getGeopointIndexMapping(), - "src/test/resources/geopoints.json"); + "src/test/resources/geopoints.json"), + JSON_TEST( + TestsConstants.TEST_INDEX_JSON_TEST, + "json", + getJsonTestIndexMapping(), + "src/test/resources/json_test.json"); private final String name; private final String type; diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java index 195dda0cbd..610ad1366a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java @@ -250,6 +250,11 @@ public static String getGeopointIndexMapping() { return getMappingFile(mappingFile); } + public static String getJsonTestIndexMapping() { + String mappingFile = "json_test_index_mapping.json"; + return getMappingFile(mappingFile); + } + public static void loadBulk(Client client, String jsonPath, String defaultIndex) throws Exception { System.out.println(String.format("Loading file %s into opensearch cluster", jsonPath)); diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index 1e336f544e..387054ac7e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -58,6 +58,7 @@ public class TestsConstants { public static final String TEST_INDEX_MULTI_NESTED_TYPE = TEST_INDEX + "_multi_nested"; public static final String TEST_INDEX_NESTED_WITH_NULLS = TEST_INDEX + "_nested_with_nulls"; public static final String TEST_INDEX_GEOPOINT = TEST_INDEX + "_geopoint"; + public static final String TEST_INDEX_JSON_TEST = TEST_INDEX + "_json_test"; public static final String DATASOURCES = ".ql-datasources"; public static final String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/JsonFunctionsIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/JsonFunctionsIT.java new file mode 100644 index 0000000000..f852a97d48 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/JsonFunctionsIT.java @@ -0,0 +1,55 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_JSON_TEST; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; + +public class JsonFunctionsIT extends PPLIntegTestCase { + @Override + public void init() throws IOException { + loadIndex(Index.JSON_TEST); + } + + @Test + public void test_json_valid() throws IOException { + JSONObject result; + + result = + executeQuery( + String.format( + "source=%s | where json_valid(json_string) | fields test_name", + TEST_INDEX_JSON_TEST)); + verifySchema(result, schema("test_name", null, "string")); + verifyDataRows( + result, + rows("json nested object"), + rows("json object"), + rows("json array"), + rows("json scalar string"), + rows("json empty string")); + } + + @Test + public void test_not_json_valid() throws IOException { + JSONObject result; + + result = + executeQuery( + String.format( + "source=%s | where not json_valid(json_string) | fields test_name", + TEST_INDEX_JSON_TEST)); + verifySchema(result, schema("test_name", null, "string")); + verifyDataRows(result, rows("json invalid object")); + } +} diff --git a/integ-test/src/test/resources/indexDefinitions/json_test_index_mapping.json b/integ-test/src/test/resources/indexDefinitions/json_test_index_mapping.json new file mode 100644 index 0000000000..86bd0c6e94 --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/json_test_index_mapping.json @@ -0,0 +1,12 @@ +{ + "mappings": { + "properties": { + "test_name": { + "type": "keyword" + }, + "json_string": { + "type": "keyword" + } + } + } +} diff --git a/integ-test/src/test/resources/json_test.json b/integ-test/src/test/resources/json_test.json new file mode 100644 index 0000000000..badb4f4f6e --- /dev/null +++ b/integ-test/src/test/resources/json_test.json @@ -0,0 +1,12 @@ +{"index":{"_id":"0"}} +{"test_name":"json nested object", "json_string":"{\"a\":\"1\",\"b\":{\"c\":\"2\",\"d\":\"3\"}}"} +{"index":{"_id":"1"}} +{"test_name":"json object", "json_string":"{\"a\":\"1\",\"b\":\"2\"}"} +{"index":{"_id":"2"}} +{"test_name":"json array", "json_string":"[1, 2, 3, 4]"} +{"index":{"_id":"3"}} +{"test_name":"json scalar string", "json_string":"\"abc\""} +{"index":{"_id":"4"}} +{"test_name":"json empty string","json_string":""} +{"index":{"_id":"5"}} +{"test_name":"json invalid object", "json_string":"{\"invalid\":\"json\", \"string\"}"} diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 053ec530db..5b6b9e41b8 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -332,6 +332,9 @@ ISNULL: 'ISNULL'; ISNOTNULL: 'ISNOTNULL'; CIDRMATCH: 'CIDRMATCH'; +// JSON FUNCTIONS +JSON_VALID: 'JSON_VALID'; + // FLOWCONTROL FUNCTIONS IFNULL: 'IFNULL'; NULLIF: 'NULLIF'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 27f7e4014b..999c5d9c87 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -662,6 +662,7 @@ conditionFunctionName | ISNULL | ISNOTNULL | CIDRMATCH + | JSON_VALID ; // flow control function return non-boolean value