From 23db6989e2e26535d497b302cd4eed7dde2bf3a5 Mon Sep 17 00:00:00 2001 From: Priyanshi Gupta Date: Tue, 7 Jan 2025 17:07:46 +0530 Subject: [PATCH] Reporting jsonb subscripting issue in assessment and analyze (#2129) Reporting the JSONB subscription in the DMLs, DDLs, and PLPGSQL objects. The approach to detecting these is unclear, so trying to detect as many as possible in the source schema. There are still edge cases that can't be detected easily. https://yugabyte.atlassian.net/browse/DB-14545 --- .../expectedAssessmentReport.json | 81 ++++++++++-- .../pg_assessment_report_uqc.sql | 38 +++++- .../unsupported_query_constructs.sql | 13 +- yb-voyager/cmd/assessMigrationCommand.go | 1 + yb-voyager/src/query/queryissue/constants.go | 8 +- yb-voyager/src/query/queryissue/detectors.go | 62 ++++++++++ .../src/query/queryissue/detectors_test.go | 34 +++++ yb-voyager/src/query/queryissue/helpers.go | 23 ++++ yb-voyager/src/query/queryissue/issues_dml.go | 13 ++ .../src/query/queryissue/issues_dml_test.go | 17 ++- .../query/queryissue/parser_issue_detector.go | 41 ++++++ .../queryissue/parser_issue_detector_test.go | 117 +++++++++++++++++- .../src/query/queryparser/ddl_processor.go | 40 ++++++ .../src/query/queryparser/helpers_protomsg.go | 6 + .../src/query/queryparser/helpers_struct.go | 77 ++++++++++++ .../src/query/queryparser/traversal_proto.go | 1 + 16 files changed, 557 insertions(+), 15 deletions(-) diff --git a/migtests/tests/pg/assessment-report-test-uqc/expectedAssessmentReport.json b/migtests/tests/pg/assessment-report-test-uqc/expectedAssessmentReport.json index 276d6f01e..b15b10839 100644 --- a/migtests/tests/pg/assessment-report-test-uqc/expectedAssessmentReport.json +++ b/migtests/tests/pg/assessment-report-test-uqc/expectedAssessmentReport.json @@ -25,15 +25,21 @@ }, { "ObjectType": "TABLE", - "TotalCount": 4, - "InvalidCount": 1, - "ObjectNames": "sales.json_data, analytics.metrics, sales.events, sales.orders" + "TotalCount": 5, + "InvalidCount": 2, + "ObjectNames": "analytics.metrics, sales.orders, sales.test_json_chk, sales.events, sales.json_data" }, { "ObjectType": "VIEW", "TotalCount": 3, "InvalidCount": 3, - "ObjectNames": "sales.event_analysis_view, sales.event_analysis_view2, sales.employ_depart_view" + "ObjectNames": "sales.employ_depart_view, sales.event_analysis_view, sales.event_analysis_view2" + }, + { + "ObjectType": "FUNCTION", + "TotalCount": 1, + "InvalidCount": 1, + "ObjectNames": "sales.get_user_info" } ] }, @@ -43,9 +49,10 @@ "sales.orders", "analytics.metrics", "sales.events", - "sales.json_data" + "sales.json_data", + "sales.test_json_chk" ], - "ColocatedReasoning": "Recommended instance type with 4 vCPU and 16 GiB memory could fit 4 objects (4 tables/materialized views and 0 explicit/implicit indexes) with 0.00 MB size and throughput requirement of 0 reads/sec and 0 writes/sec as colocated. Non leaf partition tables/indexes and unsupported tables/indexes were not considered.", + "ColocatedReasoning": "Recommended instance type with 4 vCPU and 16 GiB memory could fit 5 objects (5 tables/materialized views and 0 explicit/implicit indexes) with 0.00 MB size and throughput requirement of 0 reads/sec and 0 writes/sec as colocated. Non leaf partition tables/indexes and unsupported tables/indexes were not considered.", "ShardedTables": null, "NumNodes": 3, "VCPUsPerInstance": 4, @@ -86,6 +93,16 @@ "MinimumVersionsFixedIn": null }, { + "FeatureName": "Jsonb Subscripting", + "Objects": [ + { + "ObjectName": "sales.test_json_chk", + "SqlStatement": "CREATE TABLE sales.test_json_chk (\n id integer,\n name text,\n email text,\n active text,\n data jsonb,\n CONSTRAINT test_json_chk_data_check CHECK ((data['key'::text] \u003c\u003e '{}'::jsonb))\n);" + } + ], + "MinimumVersionsFixedIn": null + }, + { "FeatureName": "Json Type Predicate", "Objects": [ { @@ -98,6 +115,20 @@ ], "UnsupportedFeaturesDesc": "Features of the source database that are not supported on the target YugabyteDB.", "TableIndexStats": [ + { + "SchemaName": "sales", + "ObjectName": "test_json_chk", + "RowCount": 2, + "ColumnCount": 5, + "Reads": 6, + "Writes": 2, + "ReadsPerSecond": 0, + "WritesPerSecond": 0, + "IsIndex": false, + "ObjectType": "", + "ParentTableName": null, + "SizeInBytes": 8192 + }, { "SchemaName": "sales", "ObjectName": "orders", @@ -182,6 +213,30 @@ "DocsLink": "", "MinimumVersionsFixedIn": null }, + { + "ConstructTypeName": "Jsonb Subscripting", + "Query": "SELECT \n data,\n data[$1] AS name, \n (data[$2]) as active\nFROM sales.test_json_chk", + "DocsLink": "", + "MinimumVersionsFixedIn": null + }, + { + "ConstructTypeName": "Jsonb Subscripting", + "Query": "SELECT (sales.get_user_info($1))[$2] AS user_info", + "DocsLink": "", + "MinimumVersionsFixedIn": null + }, + { + "ConstructTypeName": "Jsonb Subscripting", + "Query": "SELECT (jsonb_build_object($1, $2, $3, $4, $5, $6) || $7)[$8] AS json_obj", + "DocsLink": "", + "MinimumVersionsFixedIn": null + }, + { + "ConstructTypeName": "Jsonb Subscripting", + "Query": "SELECT ($1 :: jsonb)[$2][$3] as b", + "DocsLink": "", + "MinimumVersionsFixedIn": null + }, { "ConstructTypeName": "Json Type Predicate", "Query": "SELECT * \nFROM sales.json_data\nWHERE array_column IS JSON ARRAY", @@ -189,5 +244,17 @@ "MinimumVersionsFixedIn": null } ], - "UnsupportedPlPgSqlObjects": null + "UnsupportedPlPgSqlObjects": [ + { + "FeatureName": "Jsonb Subscripting", + "Objects": [ + { + "ObjectType": "FUNCTION", + "ObjectName": "sales.get_user_info", + "SqlStatement": "SELECT\n data,\n data['name'] AS name,\n (data['active']) as active\n FROM sales.test_json_chk;" + } + ], + "MinimumVersionsFixedIn": null + } + ] } \ No newline at end of file diff --git a/migtests/tests/pg/assessment-report-test-uqc/pg_assessment_report_uqc.sql b/migtests/tests/pg/assessment-report-test-uqc/pg_assessment_report_uqc.sql index b4fe35874..987f4f05b 100644 --- a/migtests/tests/pg/assessment-report-test-uqc/pg_assessment_report_uqc.sql +++ b/migtests/tests/pg/assessment-report-test-uqc/pg_assessment_report_uqc.sql @@ -47,6 +47,42 @@ create view sales.employ_depart_view AS SELECT any_value(name) AS any_employee FROM employees; +CREATE TABLE sales.test_json_chk ( + id int, + name text, + email text, + active text, + data jsonb, + CHECK (data['key']<>'{}') +); + +INSERT INTO sales.test_json_chk (id, name, email, active, data) +VALUES (1, 'John Doe', 'john@example.com', 'Y', jsonb_build_object('key', 'value', 'name', 'John Doe', 'active', 'Y')); + +INSERT INTO sales.test_json_chk (id, name, email, active, data) +VALUES (2, 'Jane Smith', 'jane@example.com', 'N', jsonb_build_object('key', 'value', 'name', 'Jane Smith', 'active', 'N')); + +CREATE OR REPLACE FUNCTION sales.get_user_info(user_id INT) +RETURNS JSONB AS $$ +BEGIN + PERFORM + data, + data['name'] AS name, + (data['active']) as active + FROM sales.test_json_chk; + + RETURN ( + SELECT jsonb_build_object( + 'id', id, + 'name', name, + 'email', email, + 'active', active + ) + FROM sales.test_json_chk + WHERE id = user_id + ); +END; +$$ LANGUAGE plpgsql; CREATE TABLE sales.events ( id int PRIMARY KEY, event_range daterange @@ -85,4 +121,4 @@ INSERT INTO public.json_data ( 3, '[1, 2, 3, 4]', 4, '"hello"', 5, '{"uniqueKey1": "value1", "uniqueKey2": "value2"}' -); \ No newline at end of file +); diff --git a/migtests/tests/pg/assessment-report-test-uqc/unsupported_query_constructs.sql b/migtests/tests/pg/assessment-report-test-uqc/unsupported_query_constructs.sql index 43a68b8e3..d2f4f089e 100644 --- a/migtests/tests/pg/assessment-report-test-uqc/unsupported_query_constructs.sql +++ b/migtests/tests/pg/assessment-report-test-uqc/unsupported_query_constructs.sql @@ -27,6 +27,17 @@ SELECT any_value(name) AS any_employee FROM employees; +SELECT (sales.get_user_info(2))['name'] AS user_info; + +SELECT (jsonb_build_object('name', 'PostgreSQL', 'version', 17, 'open_source', TRUE) || '{"key": "value2"}')['name'] AS json_obj; + +SELECT + data, + data['name'] AS name, + (data['active']) as active +FROM sales.test_json_chk; + +SELECT ('{"a": { "b": {"c": "1"}}}' :: jsonb)['a']['b'] as b; --PG15 SELECT range_agg(event_range) AS union_of_ranges FROM sales.events; @@ -37,4 +48,4 @@ FROM sales.events; -- -- PG 16 and above feature SELECT * FROM sales.json_data -WHERE array_column IS JSON ARRAY; \ No newline at end of file +WHERE array_column IS JSON ARRAY; diff --git a/yb-voyager/cmd/assessMigrationCommand.go b/yb-voyager/cmd/assessMigrationCommand.go index f6c17cbb4..f22eba98e 100644 --- a/yb-voyager/cmd/assessMigrationCommand.go +++ b/yb-voyager/cmd/assessMigrationCommand.go @@ -1043,6 +1043,7 @@ func fetchUnsupportedPGFeaturesFromSchemaReport(schemaAnalysisReport utils.Schem unsupportedFeatures = append(unsupportedFeatures, getUnsupportedFeaturesFromSchemaAnalysisReport(queryissue.JSON_CONSTRUCTOR_FUNCTION_NAME, "", queryissue.JSON_CONSTRUCTOR_FUNCTION, schemaAnalysisReport, false, "")) unsupportedFeatures = append(unsupportedFeatures, getUnsupportedFeaturesFromSchemaAnalysisReport(queryissue.AGGREGATION_FUNCTIONS_NAME, "", queryissue.AGGREGATE_FUNCTION, schemaAnalysisReport, false, "")) unsupportedFeatures = append(unsupportedFeatures, getUnsupportedFeaturesFromSchemaAnalysisReport(queryissue.SECURITY_INVOKER_VIEWS_NAME, "", queryissue.SECURITY_INVOKER_VIEWS, schemaAnalysisReport, false, "")) + unsupportedFeatures = append(unsupportedFeatures, getUnsupportedFeaturesFromSchemaAnalysisReport(queryissue.JSONB_SUBSCRIPTING_NAME, "", queryissue.JSONB_SUBSCRIPTING, schemaAnalysisReport, false, "")) unsupportedFeatures = append(unsupportedFeatures, getUnsupportedFeaturesFromSchemaAnalysisReport(queryissue.FOREIGN_KEY_REFERENCES_PARTITIONED_TABLE_NAME, "", queryissue.FOREIGN_KEY_REFERENCES_PARTITIONED_TABLE, schemaAnalysisReport, false, "")) unsupportedFeatures = append(unsupportedFeatures, getUnsupportedFeaturesFromSchemaAnalysisReport(queryissue.JSON_TYPE_PREDICATE_NAME, "", queryissue.JSON_TYPE_PREDICATE, schemaAnalysisReport, false, "")) diff --git a/yb-voyager/src/query/queryissue/constants.go b/yb-voyager/src/query/queryissue/constants.go index e6ab413d2..edb8edf27 100644 --- a/yb-voyager/src/query/queryissue/constants.go +++ b/yb-voyager/src/query/queryissue/constants.go @@ -73,9 +73,11 @@ const ( FETCH_WITH_TIES = "FETCH_WITH_TIES" REGEX_FUNCTIONS = "REGEX_FUNCTIONS" - MULTI_RANGE_DATATYPE = "MULTI_RANGE_DATATYPE" - COPY_FROM_WHERE = "COPY FROM ... WHERE" - COPY_ON_ERROR = "COPY ... ON_ERROR" + JSONB_SUBSCRIPTING = "JSONB_SUBSCRIPTING" + JSONB_SUBSCRIPTING_NAME = "Jsonb Subscripting" + MULTI_RANGE_DATATYPE = "MULTI_RANGE_DATATYPE" + COPY_FROM_WHERE = "COPY FROM ... WHERE" + COPY_ON_ERROR = "COPY ... ON_ERROR" FOREIGN_KEY_REFERENCES_PARTITIONED_TABLE = "FOREIGN_KEY_REFERENCED_PARTITIONED_TABLE" FOREIGN_KEY_REFERENCES_PARTITIONED_TABLE_NAME = "Foreign key constraint references partitioned table" diff --git a/yb-voyager/src/query/queryissue/detectors.go b/yb-voyager/src/query/queryissue/detectors.go index d1ec43c5e..1bc45b099 100644 --- a/yb-voyager/src/query/queryissue/detectors.go +++ b/yb-voyager/src/query/queryissue/detectors.go @@ -206,6 +206,68 @@ func (d *RangeTableFuncDetector) GetIssues() []QueryIssue { return issues } +type JsonbSubscriptingDetector struct { + query string + jsonbColumns []string + detected bool + jsonbFunctions []string +} + +func NewJsonbSubscriptingDetector(query string, jsonbColumns []string, jsonbFunctions []string) *JsonbSubscriptingDetector { + return &JsonbSubscriptingDetector{ + query: query, + jsonbColumns: jsonbColumns, + jsonbFunctions: jsonbFunctions, + } +} + +func (j *JsonbSubscriptingDetector) Detect(msg protoreflect.Message) error { + + if queryparser.GetMsgFullName(msg) != queryparser.PG_QUERY_A_INDIRECTION_NODE { + return nil + } + aIndirectionNode, ok := queryparser.GetAIndirectionNode(msg) + if !ok { + return nil + } + + /* + Indirection node is to determine if subscripting is happening in the query e.g. data['name'] - jsonb, numbers[1] - array type, and ('{"a": {"b": {"c": 1}}}'::jsonb)['a']['b']['c']; + Arg is the data on which subscripting is happening e.g data, numbers (columns) and constant data type casted to jsonb ('{"a": {"b": {"c": 1}}}'::jsonb) + Indices are the actual fields that are being accessed while subscripting or the index in case of array type e.g. name, 1, a, b etc. + So we are checking the arg is of jsonb type here + */ + arg := aIndirectionNode.GetArg() + if arg == nil { + return nil + } + /* + Caveats - + + Still with this approach we won't be able to cover all cases e.g. + + select ab_data['name'] from (select Data as ab_data from test_jsonb);`, + + parseTree - stmts:{stmt:{select_stmt:{target_list:{res_target:{val:{a_indirection:{arg:{column_ref:{fields:{string:{sval:"ab_data"}} location:9}} + indirection:{a_indices:{uidx:{a_const:{sval:{sval:"name"} location:17}}}}}} location:9}} from_clause:{range_subselect:{subquery:{select_stmt:{ + target_list:{res_target:{name:"ab_data" val:{column_ref:{fields:{string:{sval:"data"}} location:38}} location:38}} + from_clause:{range_var:{relname:"test_jsonb" inh:true relpersistence:"p" location:59}} limit_option:LIMIT_OPTION_DEFAULT op:SETOP_NONE}}}} + limit_option:LIMIT_OPTION_DEFAULT op:SETOP_NONE}} + */ + if queryparser.DoesNodeHandleJsonbData(arg, j.jsonbColumns, j.jsonbFunctions) { + j.detected = true + } + return nil +} + +func (j *JsonbSubscriptingDetector) GetIssues() []QueryIssue { + var issues []QueryIssue + if j.detected { + issues = append(issues, NewJsonbSubscriptingIssue(DML_QUERY_OBJECT_TYPE, "", j.query)) + } + return issues +} + type SelectStmtDetector struct { query string limitOptionWithTiesDetected bool diff --git a/yb-voyager/src/query/queryissue/detectors_test.go b/yb-voyager/src/query/queryissue/detectors_test.go index e1aa7a32a..08aaf1b4a 100644 --- a/yb-voyager/src/query/queryissue/detectors_test.go +++ b/yb-voyager/src/query/queryissue/detectors_test.go @@ -680,6 +680,40 @@ func TestCombinationOfDetectors1WithObjectCollector(t *testing.T) { } } +func TestJsonbSubscriptingDetector(t *testing.T) { + withoutIssueSqls := []string{ + `SELECT numbers[1] AS first_number + FROM array_data;`, + `select ab_data['name'] from (select Data as ab_data from test_jsonb);`, // NOT REPORTED AS OF NOW because of caveat + } + issuesSqls := []string{ + `SELECT ('{"a": {"b": {"c": 1}}}'::jsonb)['a']['b']['c'];`, + `UPDATE json_data +SET data = jsonb_set(data, '{user,details,city}', '"San Francisco"') +WHERE data['user']['name'] = '"Alice"';`, + `SELECT + data->>$1 AS name, + data[$2][$3] AS second_score +FROM test_jsonb1`, + `SELECT (jsonb_build_object('name', 'PostgreSQL', 'version', 14, 'open_source', TRUE) || '{"key": "value2"}')['name'] AS json_obj;`, + `SELECT (data || '{"new_key": "new_value"}' )['name'] FROM test_jsonb;`, + `SELECT ('{"key": "value1"}'::jsonb || '{"key": "value2"}'::jsonb)['key'] AS object_in_array;`, + } + + for _, sql := range withoutIssueSqls { + issues := getDetectorIssues(t, NewJsonbSubscriptingDetector(sql, []string{}, []string{}), sql) + + assert.Equal(t, 0, len(issues), "Expected 1 issue for SQL: %s", sql) + } + + for _, sql := range issuesSqls { + issues := getDetectorIssues(t, NewJsonbSubscriptingDetector(sql, []string{"data"}, []string{"jsonb_build_object"}), sql) + + assert.Equal(t, 1, len(issues), "Expected 1 issue for SQL: %s", sql) + assert.Equal(t, JSONB_SUBSCRIPTING, issues[0].Type, "Expected System Columns issue for SQL: %s", sql) + } +} + func TestJsonConstructorDetector(t *testing.T) { sql := `SELECT JSON_ARRAY('PostgreSQL', 12, TRUE, NULL) AS json_array;` diff --git a/yb-voyager/src/query/queryissue/helpers.go b/yb-voyager/src/query/queryissue/helpers.go index 12f2a2e24..dfc89f119 100644 --- a/yb-voyager/src/query/queryissue/helpers.go +++ b/yb-voyager/src/query/queryissue/helpers.go @@ -134,3 +134,26 @@ var unsupportedLargeObjectFunctions = mapset.NewThreadUnsafeSet([]string{ //functions provided by lo extension, refer - https://www.postgresql.org/docs/current/lo.html#LO-RATIONALE "lo_manage", "lo_oid", }...) + +// catalog functions return type jsonb +var catalogFunctionsReturningJsonb = mapset.NewThreadUnsafeSet([]string{ + /* + SELECT + DISTINCT p.proname AS Function_Name + FROM + pg_catalog.pg_proc p + LEFT JOIN pg_catalog.pg_language l ON p.prolang = l.oid + LEFT JOIN pg_catalog.pg_namespace n ON p.pronamespace = n.oid + WHERE + pg_catalog.pg_function_is_visible(p.oid) AND pg_catalog.pg_get_function_result(p.oid) = 'jsonb' + + ORDER BY Function_Name; + */ + "jsonb_agg", "jsonb_agg_finalfn", "jsonb_agg_strict", "jsonb_array_element", + "jsonb_build_array", "jsonb_build_object", "jsonb_concat", "jsonb_delete", + "jsonb_delete_path", "jsonb_extract_path", "jsonb_in", "jsonb_insert", + "jsonb_object", "jsonb_object_agg", "jsonb_object_agg_finalfn", "jsonb_object_agg_strict", + "jsonb_object_agg_unique", "jsonb_object_agg_unique_strict", "jsonb_object_field", "jsonb_path_query_array", + "jsonb_path_query_array_tz", "jsonb_path_query_first", "jsonb_path_query_first_tz", "jsonb_recv", + "jsonb_set", "jsonb_set_lax", "jsonb_strip_nulls", "to_jsonb", "ts_headline", +}...) diff --git a/yb-voyager/src/query/queryissue/issues_dml.go b/yb-voyager/src/query/queryissue/issues_dml.go index 0752051de..e9b8f2592 100644 --- a/yb-voyager/src/query/queryissue/issues_dml.go +++ b/yb-voyager/src/query/queryissue/issues_dml.go @@ -142,6 +142,19 @@ func NewLOFuntionsIssue(objectType string, objectName string, sqlStatement strin return newQueryIssue(loFunctionsIssue, objectType, objectName, sqlStatement, details) } +var jsonbSubscriptingIssue = issue.Issue{ + Type: JSONB_SUBSCRIPTING, + TypeName: JSONB_SUBSCRIPTING_NAME, + TypeDescription: "Jsonb subscripting is not supported in YugabyteDB yet", + Suggestion: "Use Arrow operators (-> / ->>) to access the jsonb fields.", + GH: "", + DocsLink: "", //TODO +} + +func NewJsonbSubscriptingIssue(objectType string, objectName string, sqlStatement string) QueryIssue { + return newQueryIssue(jsonbSubscriptingIssue, objectType, objectName, sqlStatement, map[string]interface{}{}) +} + var jsonPredicateIssue = issue.Issue{ Type: JSON_TYPE_PREDICATE, TypeName: JSON_TYPE_PREDICATE_NAME, diff --git a/yb-voyager/src/query/queryissue/issues_dml_test.go b/yb-voyager/src/query/queryissue/issues_dml_test.go index 6825eb1af..d9a0b8209 100644 --- a/yb-voyager/src/query/queryissue/issues_dml_test.go +++ b/yb-voyager/src/query/queryissue/issues_dml_test.go @@ -25,9 +25,9 @@ import ( "github.com/stretchr/testify/assert" "github.com/testcontainers/testcontainers-go/modules/yugabytedb" - testutils "github.com/yugabyte/yb-voyager/yb-voyager/test/utils" "github.com/yugabyte/yb-voyager/yb-voyager/src/ybversion" + testutils "github.com/yugabyte/yb-voyager/yb-voyager/test/utils" ) func testLOFunctionsIssue(t *testing.T) { @@ -43,6 +43,17 @@ func testLOFunctionsIssue(t *testing.T) { assertErrorCorrectlyThrownForIssueForYBVersion(t, err, "Transaction for catalog table write operation 'pg_largeobject_metadata' not found", loDatatypeIssue) } +func testJsonbSubscriptingIssue(t *testing.T) { + ctx := context.Background() + conn, err := getConn() + assert.NoError(t, err) + + defer conn.Close(context.Background()) + _, err = conn.Exec(ctx, `SELECT ('{"a": {"b": {"c": 1}}}'::jsonb)['a']['b']['c'];`) + + assertErrorCorrectlyThrownForIssueForYBVersion(t, err, "cannot subscript type jsonb because it is not an array", loDatatypeIssue) +} + func testRegexFunctionsIssue(t *testing.T) { ctx := context.Background() conn, err := getConn() @@ -190,7 +201,7 @@ SELECT FROM any_value_ex GROUP BY department;`, -`CREATE TABLE events ( + `CREATE TABLE events ( id SERIAL PRIMARY KEY, event_range daterange ); @@ -264,6 +275,8 @@ func TestDMLIssuesInYBVersion(t *testing.T) { success = t.Run(fmt.Sprintf("%s-%s", "json query functions", ybVersion), testJsonQueryFunctions) assert.True(t, success) + success = t.Run(fmt.Sprintf("%s-%s", "json subscripting", ybVersion), testJsonbSubscriptingIssue) + assert.True(t, success) success = t.Run(fmt.Sprintf("%s-%s", "aggregate functions", ybVersion), testAggFunctions) assert.True(t, success) diff --git a/yb-voyager/src/query/queryissue/parser_issue_detector.go b/yb-voyager/src/query/queryissue/parser_issue_detector.go index e4db26c0f..defde4987 100644 --- a/yb-voyager/src/query/queryissue/parser_issue_detector.go +++ b/yb-voyager/src/query/queryissue/parser_issue_detector.go @@ -63,6 +63,12 @@ type ParserIssueDetector struct { // Boolean to check if there are any unlogged tables that were filtered // out because they are fixed as per the target db version IsUnloggedTablesIssueFiltered bool + + //Functions in exported schema + functionObjects []*queryparser.Function + + //columns names with jsonb type + jsonbColumns []string } func NewParserIssueDetector() *ParserIssueDetector { @@ -221,6 +227,11 @@ func (p *ParserIssueDetector) ParseRequiredDDLs(query string) error { p.columnsWithUnsupportedIndexDatatypes[table.GetObjectName()][col.ColumnName] = "user_defined_type" } } + + if col.TypeName == "jsonb" { + // used to detect the jsonb subscripting happening on these columns + p.jsonbColumns = append(p.jsonbColumns, col.ColumnName) + } } case *queryparser.CreateType: @@ -235,6 +246,9 @@ func (p *ParserIssueDetector) ParseRequiredDDLs(query string) error { if index.AccessMethod == GIN_ACCESS_METHOD { p.IsGinIndexPresentInSchema = true } + case *queryparser.Function: + fn, _ := ddlObj.(*queryparser.Function) + p.functionObjects = append(p.functionObjects, fn) } return nil } @@ -379,6 +393,7 @@ func (p *ParserIssueDetector) genericIssues(query string) ([]QueryIssue, error) NewCopyCommandUnsupportedConstructsDetector(query), NewJsonConstructorFuncDetector(query), NewJsonQueryFunctionDetector(query), + NewJsonbSubscriptingDetector(query, p.jsonbColumns, p.getJsonbReturnTypeFunctions()), NewJsonPredicateExprDetector(query), } @@ -423,3 +438,29 @@ func (p *ParserIssueDetector) genericIssues(query string) ([]QueryIssue, error) return result, nil } + +func (p *ParserIssueDetector) getJsonbReturnTypeFunctions() []string { + var jsonbFunctions []string + jsonbColumns := p.jsonbColumns + for _, function := range p.functionObjects { + returnType := function.ReturnType + if strings.HasSuffix(returnType, "%TYPE") { + // e.g. public.table_name.column%TYPE + qualifiedColumn := strings.TrimSuffix(returnType, "%TYPE") + parts := strings.Split(qualifiedColumn, ".") + column := parts[len(parts)-1] + if slices.Contains(jsonbColumns, column) { + jsonbFunctions = append(jsonbFunctions, function.FuncName) + } + } else { + // e.g. public.udt_type, text, trigger, jsonb + parts := strings.Split(returnType, ".") + typeName := parts[len(parts)-1] + if typeName == "jsonb" { + jsonbFunctions = append(jsonbFunctions, function.FuncName) + } + } + } + jsonbFunctions = append(jsonbFunctions, catalogFunctionsReturningJsonb.ToSlice()...) + return jsonbFunctions +} diff --git a/yb-voyager/src/query/queryissue/parser_issue_detector_test.go b/yb-voyager/src/query/queryissue/parser_issue_detector_test.go index 4ad6d7b7b..e13597625 100644 --- a/yb-voyager/src/query/queryissue/parser_issue_detector_test.go +++ b/yb-voyager/src/query/queryissue/parser_issue_detector_test.go @@ -573,7 +573,7 @@ JSON_TABLE(data, '$.skills[*]' ) ) AS jt;`, `SELECT JSON_ARRAY($1, 12, TRUE, $2) AS json_array;`, -`CREATE TABLE sales.json_data ( + `CREATE TABLE sales.json_data ( id int PRIMARY KEY, array_column TEXT CHECK (array_column IS JSON ARRAY), unique_keys_column TEXT CHECK (unique_keys_column IS JSON WITH UNIQUE KEYS) @@ -654,6 +654,120 @@ JSON_TABLE(data, '$.skills[*]' } } +func TestJsonbSubscriptingIssue(t *testing.T) { + ddlSqls := []string{ + `CREATE TABLE test_jsonb1 ( + id SERIAL PRIMARY KEY, + data JSONB +);`, + `CREATE TABLE users ( + id SERIAL PRIMARY KEY, + name TEXT, + email TEXT, + active BOOLEAN +);`, + `CREATE TABLE test_json_chk ( + id int, + data1 jsonb, + CHECK (data1['key']<>'') +);`, + `CREATE OR REPLACE FUNCTION get_user_info(user_id INT) +RETURNS JSONB AS $$ +BEGIN + RETURN ( + SELECT jsonb_build_object( + 'id', id, + 'name', name, + 'email', email, + 'active', active + ) + FROM users + WHERE id = user_id + ); +END; +$$ LANGUAGE plpgsql;`, + } + sqls := []string{ + + `CREATE TABLE test_json_chk ( + id int, + data1 jsonb, + CHECK (data1['key']<>'') +);`, + `SELECT + data->>'name' AS name, + data['scores'][1] AS second_score +FROM test_jsonb1;`, + `SELECT ('[{"key": "value1"}, {"key": "value2"}]'::jsonb)[1]['key'] AS object_in_array; `, + `SELECT (JSON_OBJECT( + 'movie' VALUE JSON_OBJECT('code' VALUE 'P123', 'title' VALUE 'Jaws'), + 'director' VALUE 'Steven Spielberg' +)::JSONB)['movie'] AS nested_json_object;`, + `SELECT (jsonb_build_object('name', 'PostgreSQL', 'version', 14, 'open_source', TRUE))['name'] AS json_obj;`, + `SELECT ('{"key": "value1"}'::jsonb || '{"key": "value2"}'::jsonb)['key'] AS object_in_array;`, + `SELECT ('{"key": "value1"}'::jsonb || '{"key": "value2"}')['key'] AS object_in_array;`, + `SELECT (data || '{"new_key": "new_value"}' )['name'] FROM test_jsonb;`, + `SELECT (jsonb_build_object('name', 'PostgreSQL', 'version', 14, 'open_source', TRUE))['name'] AS json_obj;`, + `SELECT (jsonb_build_object('name', 'PostgreSQL', 'version', 14, 'open_source', TRUE) || '{"key": "value2"}')['name'] AS json_obj;`, + `SELECT (ROW('Alice', 'Smith', 25))['0'] ;`, + `SELECT (get_user_info(2))['name'] AS user_info;`, + } + + stmtsWithExpectedIssues := map[string][]QueryIssue{ + sqls[0]: []QueryIssue{ + NewJsonbSubscriptingIssue(TABLE_OBJECT_TYPE, "test_json_chk", sqls[0]), + }, + sqls[1]: []QueryIssue{ + NewJsonbSubscriptingIssue(DML_QUERY_OBJECT_TYPE, "", sqls[1]), + }, + sqls[2]: []QueryIssue{ + NewJsonbSubscriptingIssue(DML_QUERY_OBJECT_TYPE, "", sqls[2]), + }, + sqls[3]: []QueryIssue{ + NewJsonbSubscriptingIssue(DML_QUERY_OBJECT_TYPE, "", sqls[3]), + NewJsonConstructorFunctionIssue(DML_QUERY_OBJECT_TYPE, "", sqls[3], []string{JSON_OBJECT}), + }, + sqls[4]: []QueryIssue{ + NewJsonbSubscriptingIssue(DML_QUERY_OBJECT_TYPE, "", sqls[4]), + }, + sqls[5]: []QueryIssue{ + NewJsonbSubscriptingIssue(DML_QUERY_OBJECT_TYPE, "", sqls[5]), + }, + sqls[6]: []QueryIssue{ + NewJsonbSubscriptingIssue(DML_QUERY_OBJECT_TYPE, "", sqls[6]), + }, + sqls[7]: []QueryIssue{ + NewJsonbSubscriptingIssue(DML_QUERY_OBJECT_TYPE, "", sqls[7]), + }, + sqls[8]: []QueryIssue{ + NewJsonbSubscriptingIssue(DML_QUERY_OBJECT_TYPE, "", sqls[8]), + }, + sqls[9]: []QueryIssue{ + NewJsonbSubscriptingIssue(DML_QUERY_OBJECT_TYPE, "", sqls[9]), + }, + sqls[10]: []QueryIssue{}, + sqls[11]: []QueryIssue{ + NewJsonbSubscriptingIssue(DML_QUERY_OBJECT_TYPE, "", sqls[11]), + }, + } + + parserIssueDetector := NewParserIssueDetector() + for _, stmt := range ddlSqls { + err := parserIssueDetector.ParseRequiredDDLs(stmt) + assert.NoError(t, err, "Error parsing required ddl: %s", stmt) + } + for stmt, expectedIssues := range stmtsWithExpectedIssues { + issues, err := parserIssueDetector.GetAllIssues(stmt, ybversion.LatestStable) + assert.NoError(t, err, "Error detecting issues for statement: %s", stmt) + assert.Equal(t, len(expectedIssues), len(issues), "Mismatch in issue count for statement: %s", stmt) + for _, expectedIssue := range expectedIssues { + found := slices.ContainsFunc(issues, func(queryIssue QueryIssue) bool { + return cmp.Equal(expectedIssue, queryIssue) + }) + assert.True(t, found, "Expected issue not found: %v in statement: %s", expectedIssue, stmt) + } + } +} func TestAggregateFunctions(t *testing.T) { sqls := []string{ `SELECT @@ -710,6 +824,7 @@ $$ LANGUAGE plpgsql;`, } } } + func TestRegexFunctionsIssue(t *testing.T) { dmlStmts := []string{ `SELECT regexp_count('This is an example. Another example. Example is a common word.', 'example')`, diff --git a/yb-voyager/src/query/queryparser/ddl_processor.go b/yb-voyager/src/query/queryparser/ddl_processor.go index 4bc488f79..412dc4c8c 100644 --- a/yb-voyager/src/query/queryparser/ddl_processor.go +++ b/yb-voyager/src/query/queryparser/ddl_processor.go @@ -962,6 +962,43 @@ func (mv *MView) GetSchemaName() string { return mv.SchemaName } func (mv *MView) GetObjectType() string { return MVIEW_OBJECT_TYPE } +// ============================Function Processor ================= + +type FunctionProcessor struct{} + +func NewFunctionProcessor() *FunctionProcessor { + return &FunctionProcessor{} +} + +func (mv *FunctionProcessor) Process(parseTree *pg_query.ParseResult) (DDLObject, error) { + funcNode, ok := getCreateFuncStmtNode(parseTree) + if !ok { + return nil, fmt.Errorf("not a CREATE FUNCTION statement") + } + + funcNameList := funcNode.CreateFunctionStmt.GetFuncname() + funcSchemaName, funcName := getFunctionObjectName(funcNameList) + function := Function{ + SchemaName: funcSchemaName, + FuncName: funcName, + ReturnType: GetReturnTypeOfFunc(parseTree), + } + return &function, nil +} + +type Function struct { + SchemaName string + FuncName string + ReturnType string +} + +func (f *Function) GetObjectName() string { + return lo.Ternary(f.SchemaName != "", fmt.Sprintf("%s.%s", f.SchemaName, f.FuncName), f.FuncName) +} +func (f *Function) GetSchemaName() string { return f.SchemaName } + +func (f *Function) GetObjectType() string { return FUNCTION_OBJECT_TYPE } + //=============================No-Op PROCESSOR ================== //No op Processor for objects we don't have Processor yet @@ -1010,6 +1047,8 @@ func GetDDLProcessor(parseTree *pg_query.ParseResult) (DDLProcessor, error) { } else { return NewNoOpProcessor(), nil } + case PG_QUERY_CREATE_FUNCTION_STMT: + return NewFunctionProcessor(), nil default: return NewNoOpProcessor(), nil } @@ -1046,6 +1085,7 @@ const ( PG_QUERY_FOREIGN_TABLE_STMT = "pg_query.CreateForeignTableStmt" PG_QUERY_VIEW_STMT = "pg_query.ViewStmt" PG_QUERY_CREATE_TABLE_AS_STMT = "pg_query.CreateTableAsStmt" + PG_QUERY_CREATE_FUNCTION_STMT = "pg_query.CreateFunctionStmt" ) var deferrableConstraintsList = []pg_query.ConstrType{ diff --git a/yb-voyager/src/query/queryparser/helpers_protomsg.go b/yb-voyager/src/query/queryparser/helpers_protomsg.go index de3204f3c..73d76b7e1 100644 --- a/yb-voyager/src/query/queryparser/helpers_protomsg.go +++ b/yb-voyager/src/query/queryparser/helpers_protomsg.go @@ -449,3 +449,9 @@ func TraverseAndExtractDefNamesFromDefElem(msg protoreflect.Message) ([]string, return defNames, nil } + +func GetAIndirectionNode(msg protoreflect.Message) (*pg_query.A_Indirection, bool) { + protoMsg := msg.Interface().(protoreflect.ProtoMessage) + aIndirection, ok := protoMsg.(*pg_query.A_Indirection) + return aIndirection, ok +} diff --git a/yb-voyager/src/query/queryparser/helpers_struct.go b/yb-voyager/src/query/queryparser/helpers_struct.go index c3128de6b..1efbe99da 100644 --- a/yb-voyager/src/query/queryparser/helpers_struct.go +++ b/yb-voyager/src/query/queryparser/helpers_struct.go @@ -17,6 +17,7 @@ package queryparser import ( "fmt" + "slices" "strings" pg_query "github.com/pganalyze/pg_query_go/v6" @@ -216,6 +217,9 @@ func getQualifiedTypeName(typeNames []*pg_query.Node) string { } func convertParserTypeNameToString(typeVar *pg_query.TypeName) string { + if typeVar == nil { + return "" + } typeNames := typeVar.GetNames() finalTypeName := getQualifiedTypeName(typeNames) // type name can qualified table_name.column in case of %TYPE if typeVar.PctType { // %TYPE declaration, so adding %TYPE for using it further @@ -265,3 +269,76 @@ func IsDDL(parseTree *pg_query.ParseResult) (bool, error) { //Not Full-proof as we don't have all DDL types but atleast we will skip all the types we know currently return !ok, nil } + +/* +this function checks whether the current node handles the jsonb data or not by evaluating all different type of nodes - +column ref - column of jsonb type +type cast - constant data with type casting to jsonb type +func call - function call returning the jsonb data +Expression - if any of left and right operands are of node type handling jsonb data +*/ +func DoesNodeHandleJsonbData(node *pg_query.Node, jsonbColumns []string, jsonbFunctions []string) bool { + switch { + case node.GetColumnRef() != nil: + /* + SELECT numbers[1] AS first_number + FROM array_data; + {a_indirection:{arg:{column_ref:{fields:{string:{sval:"numbers"}} location:69}} + indirection:{a_indices:{uidx:{a_const:{ival:{ival:1} location:77}}}}}} location:69}} + */ + _, col := GetColNameFromColumnRef(node.GetColumnRef().ProtoReflect()) + if slices.Contains(jsonbColumns, col) { + return true + } + + case node.GetTypeCast() != nil: + /* + SELECT ('{"a": {"b": {"c": 1}}}'::jsonb)['a']['b']['c']; + {a_indirection:{arg:{type_cast:{arg:{a_const:{sval:{sval:"{\"a\": {\"b\": {\"c\": 1}}}"} location:280}} + type_name:{names:{string:{sval:"jsonb"}} typemod:-1 location:306} location:304}} + */ + typeCast := node.GetTypeCast() + typeName, _ := getTypeNameAndSchema(typeCast.GetTypeName().GetNames()) + if typeName == "jsonb" { + return true + } + case node.GetFuncCall() != nil: + /* + SELECT (jsonb_build_object('name', 'PostgreSQL', 'version', 14, 'open_source', TRUE))['name'] AS json_obj; + val:{a_indirection:{arg:{func_call:{funcname:{string:{sval:"jsonb_build_object"}} args:{a_const:{sval:{sval:"name"} + location:194}} args:{a_const:{sval:{sval:"PostgreSQL"} location:202}} args:{a_const:{sval:{sval:"version"} location:216}} + args:{a_const:{ival:{ival:14} location:227}} + */ + funcCall := node.GetFuncCall() + _, funcName := getFunctionObjectName(funcCall.Funcname) + if slices.Contains(jsonbFunctions, funcName) { + return true + } + case node.GetAExpr() != nil: + /* + SELECT ('{"key": "value1"}'::jsonb || '{"key1": "value2"}'::jsonb)['key'] AS object_in_array; + val:{a_indirection:{arg:{a_expr:{kind:AEXPR_OP name:{string:{sval:"||"}} lexpr:{type_cast:{arg:{a_const:{sval:{sval:"{\"key\": \"value1\"}"} + location:81}} type_name:{names:{string:{sval:"jsonb"}} typemod:-1 location:102} location:100}} rexpr:{type_cast:{arg:{a_const:{sval:{sval:"{\"key1\": \"value2\"}"} + location:111}} type_name:{names:{string:{sval:"jsonb"}} typemod:-1 location:132} location:130}} location:108}} indirection:{a_indices:{uidx:{a_const:{sval:{sval:"key"} + location:139}}}}}} + + SELECT (data || '{"new_key": "new_value"}' )['name'] FROM test_jsonb; + {val:{a_indirection:{arg:{a_expr:{kind:AEXPR_OP name:{string:{sval:"||"}} lexpr:{column_ref:{fields:{string:{sval:"data"}} location:10}} rexpr:{a_const:{sval:{sval:"{\"new_key\": \"new_value\"}"} + location:18}} location:15}} indirection:{a_indices:{uidx:{a_const:{sval:{sval:"name"} + + SELECT (jsonb_build_object('name', 'PostgreSQL', 'version', 14, 'open_source', TRUE) || '{"key": "value2"}')['name'] AS json_obj; + {val:{a_indirection:{arg:{a_expr:{kind:AEXPR_OP name:{string:{sval:"||"}} lexpr:{column_ref:{fields:{string:{sval:"data"}} location:10}} rexpr:{a_const:{sval:{sval:"{\"new_key\": \"new_value\"}"} + location:18}} location:15}} indirection:{a_indices:{uidx:{a_const:{sval:{sval:"name"} location:47}}}}}} location:9}} + */ + expr := node.GetAExpr() + lExpr := expr.GetLexpr() + rExpr := expr.GetRexpr() + if lExpr != nil && DoesNodeHandleJsonbData(lExpr, jsonbColumns, jsonbFunctions) { + return true + } + if rExpr != nil && DoesNodeHandleJsonbData(rExpr, jsonbColumns, jsonbFunctions) { + return true + } + } + return false +} diff --git a/yb-voyager/src/query/queryparser/traversal_proto.go b/yb-voyager/src/query/queryparser/traversal_proto.go index 08703dd5b..9177afc10 100644 --- a/yb-voyager/src/query/queryparser/traversal_proto.go +++ b/yb-voyager/src/query/queryparser/traversal_proto.go @@ -43,6 +43,7 @@ const ( PG_QUERY_DELETESTMT_NODE = "pg_query.DeleteStmt" PG_QUERY_SELECTSTMT_NODE = "pg_query.SelectStmt" + PG_QUERY_A_INDIRECTION_NODE = "pg_query.A_Indirection" PG_QUERY_JSON_OBJECT_AGG_NODE = "pg_query.JsonObjectAgg" PG_QUERY_JSON_ARRAY_AGG_NODE = "pg_query.JsonArrayAgg" PG_QUERY_JSON_ARRAY_CONSTRUCTOR_AGG_NODE = "pg_query.JsonArrayConstructor"