Skip to content

Commit

Permalink
Reporting jsonb subscripting issue in assessment and analyze (#2129)
Browse files Browse the repository at this point in the history
Reporting the JSONB subscription in the DMLs, DDLs, and PLPGSQL objects.
The approach to detecting these is unclear, so trying to detect as many as possible in the source schema. There are still edge cases that can't be detected easily. https://yugabyte.atlassian.net/browse/DB-14545
  • Loading branch information
priyanshi-yb authored Jan 7, 2025
1 parent 5a50ec5 commit 23db698
Show file tree
Hide file tree
Showing 16 changed files with 557 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,21 @@
},
{
"ObjectType": "TABLE",
"TotalCount": 4,
"InvalidCount": 1,
"ObjectNames": "sales.json_data, analytics.metrics, sales.events, sales.orders"
"TotalCount": 5,
"InvalidCount": 2,
"ObjectNames": "analytics.metrics, sales.orders, sales.test_json_chk, sales.events, sales.json_data"
},
{
"ObjectType": "VIEW",
"TotalCount": 3,
"InvalidCount": 3,
"ObjectNames": "sales.event_analysis_view, sales.event_analysis_view2, sales.employ_depart_view"
"ObjectNames": "sales.employ_depart_view, sales.event_analysis_view, sales.event_analysis_view2"
},
{
"ObjectType": "FUNCTION",
"TotalCount": 1,
"InvalidCount": 1,
"ObjectNames": "sales.get_user_info"
}
]
},
Expand All @@ -43,9 +49,10 @@
"sales.orders",
"analytics.metrics",
"sales.events",
"sales.json_data"
"sales.json_data",
"sales.test_json_chk"
],
"ColocatedReasoning": "Recommended instance type with 4 vCPU and 16 GiB memory could fit 4 objects (4 tables/materialized views and 0 explicit/implicit indexes) with 0.00 MB size and throughput requirement of 0 reads/sec and 0 writes/sec as colocated. Non leaf partition tables/indexes and unsupported tables/indexes were not considered.",
"ColocatedReasoning": "Recommended instance type with 4 vCPU and 16 GiB memory could fit 5 objects (5 tables/materialized views and 0 explicit/implicit indexes) with 0.00 MB size and throughput requirement of 0 reads/sec and 0 writes/sec as colocated. Non leaf partition tables/indexes and unsupported tables/indexes were not considered.",
"ShardedTables": null,
"NumNodes": 3,
"VCPUsPerInstance": 4,
Expand Down Expand Up @@ -86,6 +93,16 @@
"MinimumVersionsFixedIn": null
},
{
"FeatureName": "Jsonb Subscripting",
"Objects": [
{
"ObjectName": "sales.test_json_chk",
"SqlStatement": "CREATE TABLE sales.test_json_chk (\n id integer,\n name text,\n email text,\n active text,\n data jsonb,\n CONSTRAINT test_json_chk_data_check CHECK ((data['key'::text] \u003c\u003e '{}'::jsonb))\n);"
}
],
"MinimumVersionsFixedIn": null
},
{
"FeatureName": "Json Type Predicate",
"Objects": [
{
Expand All @@ -98,6 +115,20 @@
],
"UnsupportedFeaturesDesc": "Features of the source database that are not supported on the target YugabyteDB.",
"TableIndexStats": [
{
"SchemaName": "sales",
"ObjectName": "test_json_chk",
"RowCount": 2,
"ColumnCount": 5,
"Reads": 6,
"Writes": 2,
"ReadsPerSecond": 0,
"WritesPerSecond": 0,
"IsIndex": false,
"ObjectType": "",
"ParentTableName": null,
"SizeInBytes": 8192
},
{
"SchemaName": "sales",
"ObjectName": "orders",
Expand Down Expand Up @@ -182,12 +213,48 @@
"DocsLink": "",
"MinimumVersionsFixedIn": null
},
{
"ConstructTypeName": "Jsonb Subscripting",
"Query": "SELECT \n data,\n data[$1] AS name, \n (data[$2]) as active\nFROM sales.test_json_chk",
"DocsLink": "",
"MinimumVersionsFixedIn": null
},
{
"ConstructTypeName": "Jsonb Subscripting",
"Query": "SELECT (sales.get_user_info($1))[$2] AS user_info",
"DocsLink": "",
"MinimumVersionsFixedIn": null
},
{
"ConstructTypeName": "Jsonb Subscripting",
"Query": "SELECT (jsonb_build_object($1, $2, $3, $4, $5, $6) || $7)[$8] AS json_obj",
"DocsLink": "",
"MinimumVersionsFixedIn": null
},
{
"ConstructTypeName": "Jsonb Subscripting",
"Query": "SELECT ($1 :: jsonb)[$2][$3] as b",
"DocsLink": "",
"MinimumVersionsFixedIn": null
},
{
"ConstructTypeName": "Json Type Predicate",
"Query": "SELECT * \nFROM sales.json_data\nWHERE array_column IS JSON ARRAY",
"DocsLink": "",
"MinimumVersionsFixedIn": null
}
],
"UnsupportedPlPgSqlObjects": null
"UnsupportedPlPgSqlObjects": [
{
"FeatureName": "Jsonb Subscripting",
"Objects": [
{
"ObjectType": "FUNCTION",
"ObjectName": "sales.get_user_info",
"SqlStatement": "SELECT\n data,\n data['name'] AS name,\n (data['active']) as active\n FROM sales.test_json_chk;"
}
],
"MinimumVersionsFixedIn": null
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,42 @@ create view sales.employ_depart_view AS SELECT
any_value(name) AS any_employee
FROM employees;

CREATE TABLE sales.test_json_chk (
id int,
name text,
email text,
active text,
data jsonb,
CHECK (data['key']<>'{}')
);

INSERT INTO sales.test_json_chk (id, name, email, active, data)
VALUES (1, 'John Doe', '[email protected]', 'Y', jsonb_build_object('key', 'value', 'name', 'John Doe', 'active', 'Y'));

INSERT INTO sales.test_json_chk (id, name, email, active, data)
VALUES (2, 'Jane Smith', '[email protected]', 'N', jsonb_build_object('key', 'value', 'name', 'Jane Smith', 'active', 'N'));

CREATE OR REPLACE FUNCTION sales.get_user_info(user_id INT)
RETURNS JSONB AS $$
BEGIN
PERFORM
data,
data['name'] AS name,
(data['active']) as active
FROM sales.test_json_chk;

RETURN (
SELECT jsonb_build_object(
'id', id,
'name', name,
'email', email,
'active', active
)
FROM sales.test_json_chk
WHERE id = user_id
);
END;
$$ LANGUAGE plpgsql;
CREATE TABLE sales.events (
id int PRIMARY KEY,
event_range daterange
Expand Down Expand Up @@ -85,4 +121,4 @@ INSERT INTO public.json_data (
3, '[1, 2, 3, 4]',
4, '"hello"',
5, '{"uniqueKey1": "value1", "uniqueKey2": "value2"}'
);
);
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,17 @@ SELECT
any_value(name) AS any_employee
FROM employees;

SELECT (sales.get_user_info(2))['name'] AS user_info;

SELECT (jsonb_build_object('name', 'PostgreSQL', 'version', 17, 'open_source', TRUE) || '{"key": "value2"}')['name'] AS json_obj;

SELECT
data,
data['name'] AS name,
(data['active']) as active
FROM sales.test_json_chk;

SELECT ('{"a": { "b": {"c": "1"}}}' :: jsonb)['a']['b'] as b;
--PG15
SELECT range_agg(event_range) AS union_of_ranges
FROM sales.events;
Expand All @@ -37,4 +48,4 @@ FROM sales.events;
-- -- PG 16 and above feature
SELECT *
FROM sales.json_data
WHERE array_column IS JSON ARRAY;
WHERE array_column IS JSON ARRAY;
1 change: 1 addition & 0 deletions yb-voyager/cmd/assessMigrationCommand.go
Original file line number Diff line number Diff line change
Expand Up @@ -1043,6 +1043,7 @@ func fetchUnsupportedPGFeaturesFromSchemaReport(schemaAnalysisReport utils.Schem
unsupportedFeatures = append(unsupportedFeatures, getUnsupportedFeaturesFromSchemaAnalysisReport(queryissue.JSON_CONSTRUCTOR_FUNCTION_NAME, "", queryissue.JSON_CONSTRUCTOR_FUNCTION, schemaAnalysisReport, false, ""))
unsupportedFeatures = append(unsupportedFeatures, getUnsupportedFeaturesFromSchemaAnalysisReport(queryissue.AGGREGATION_FUNCTIONS_NAME, "", queryissue.AGGREGATE_FUNCTION, schemaAnalysisReport, false, ""))
unsupportedFeatures = append(unsupportedFeatures, getUnsupportedFeaturesFromSchemaAnalysisReport(queryissue.SECURITY_INVOKER_VIEWS_NAME, "", queryissue.SECURITY_INVOKER_VIEWS, schemaAnalysisReport, false, ""))
unsupportedFeatures = append(unsupportedFeatures, getUnsupportedFeaturesFromSchemaAnalysisReport(queryissue.JSONB_SUBSCRIPTING_NAME, "", queryissue.JSONB_SUBSCRIPTING, schemaAnalysisReport, false, ""))
unsupportedFeatures = append(unsupportedFeatures, getUnsupportedFeaturesFromSchemaAnalysisReport(queryissue.FOREIGN_KEY_REFERENCES_PARTITIONED_TABLE_NAME, "", queryissue.FOREIGN_KEY_REFERENCES_PARTITIONED_TABLE, schemaAnalysisReport, false, ""))
unsupportedFeatures = append(unsupportedFeatures, getUnsupportedFeaturesFromSchemaAnalysisReport(queryissue.JSON_TYPE_PREDICATE_NAME, "", queryissue.JSON_TYPE_PREDICATE, schemaAnalysisReport, false, ""))

Expand Down
8 changes: 5 additions & 3 deletions yb-voyager/src/query/queryissue/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,11 @@ const (
FETCH_WITH_TIES = "FETCH_WITH_TIES"
REGEX_FUNCTIONS = "REGEX_FUNCTIONS"

MULTI_RANGE_DATATYPE = "MULTI_RANGE_DATATYPE"
COPY_FROM_WHERE = "COPY FROM ... WHERE"
COPY_ON_ERROR = "COPY ... ON_ERROR"
JSONB_SUBSCRIPTING = "JSONB_SUBSCRIPTING"
JSONB_SUBSCRIPTING_NAME = "Jsonb Subscripting"
MULTI_RANGE_DATATYPE = "MULTI_RANGE_DATATYPE"
COPY_FROM_WHERE = "COPY FROM ... WHERE"
COPY_ON_ERROR = "COPY ... ON_ERROR"

FOREIGN_KEY_REFERENCES_PARTITIONED_TABLE = "FOREIGN_KEY_REFERENCED_PARTITIONED_TABLE"
FOREIGN_KEY_REFERENCES_PARTITIONED_TABLE_NAME = "Foreign key constraint references partitioned table"
Expand Down
62 changes: 62 additions & 0 deletions yb-voyager/src/query/queryissue/detectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,68 @@ func (d *RangeTableFuncDetector) GetIssues() []QueryIssue {
return issues
}

type JsonbSubscriptingDetector struct {
query string
jsonbColumns []string
detected bool
jsonbFunctions []string
}

func NewJsonbSubscriptingDetector(query string, jsonbColumns []string, jsonbFunctions []string) *JsonbSubscriptingDetector {
return &JsonbSubscriptingDetector{
query: query,
jsonbColumns: jsonbColumns,
jsonbFunctions: jsonbFunctions,
}
}

func (j *JsonbSubscriptingDetector) Detect(msg protoreflect.Message) error {

if queryparser.GetMsgFullName(msg) != queryparser.PG_QUERY_A_INDIRECTION_NODE {
return nil
}
aIndirectionNode, ok := queryparser.GetAIndirectionNode(msg)
if !ok {
return nil
}

/*
Indirection node is to determine if subscripting is happening in the query e.g. data['name'] - jsonb, numbers[1] - array type, and ('{"a": {"b": {"c": 1}}}'::jsonb)['a']['b']['c'];
Arg is the data on which subscripting is happening e.g data, numbers (columns) and constant data type casted to jsonb ('{"a": {"b": {"c": 1}}}'::jsonb)
Indices are the actual fields that are being accessed while subscripting or the index in case of array type e.g. name, 1, a, b etc.
So we are checking the arg is of jsonb type here
*/
arg := aIndirectionNode.GetArg()
if arg == nil {
return nil
}
/*
Caveats -
Still with this approach we won't be able to cover all cases e.g.
select ab_data['name'] from (select Data as ab_data from test_jsonb);`,
parseTree - stmts:{stmt:{select_stmt:{target_list:{res_target:{val:{a_indirection:{arg:{column_ref:{fields:{string:{sval:"ab_data"}} location:9}}
indirection:{a_indices:{uidx:{a_const:{sval:{sval:"name"} location:17}}}}}} location:9}} from_clause:{range_subselect:{subquery:{select_stmt:{
target_list:{res_target:{name:"ab_data" val:{column_ref:{fields:{string:{sval:"data"}} location:38}} location:38}}
from_clause:{range_var:{relname:"test_jsonb" inh:true relpersistence:"p" location:59}} limit_option:LIMIT_OPTION_DEFAULT op:SETOP_NONE}}}}
limit_option:LIMIT_OPTION_DEFAULT op:SETOP_NONE}}
*/
if queryparser.DoesNodeHandleJsonbData(arg, j.jsonbColumns, j.jsonbFunctions) {
j.detected = true
}
return nil
}

func (j *JsonbSubscriptingDetector) GetIssues() []QueryIssue {
var issues []QueryIssue
if j.detected {
issues = append(issues, NewJsonbSubscriptingIssue(DML_QUERY_OBJECT_TYPE, "", j.query))
}
return issues
}

type SelectStmtDetector struct {
query string
limitOptionWithTiesDetected bool
Expand Down
34 changes: 34 additions & 0 deletions yb-voyager/src/query/queryissue/detectors_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,40 @@ func TestCombinationOfDetectors1WithObjectCollector(t *testing.T) {
}
}

func TestJsonbSubscriptingDetector(t *testing.T) {
withoutIssueSqls := []string{
`SELECT numbers[1] AS first_number
FROM array_data;`,
`select ab_data['name'] from (select Data as ab_data from test_jsonb);`, // NOT REPORTED AS OF NOW because of caveat
}
issuesSqls := []string{
`SELECT ('{"a": {"b": {"c": 1}}}'::jsonb)['a']['b']['c'];`,
`UPDATE json_data
SET data = jsonb_set(data, '{user,details,city}', '"San Francisco"')
WHERE data['user']['name'] = '"Alice"';`,
`SELECT
data->>$1 AS name,
data[$2][$3] AS second_score
FROM test_jsonb1`,
`SELECT (jsonb_build_object('name', 'PostgreSQL', 'version', 14, 'open_source', TRUE) || '{"key": "value2"}')['name'] AS json_obj;`,
`SELECT (data || '{"new_key": "new_value"}' )['name'] FROM test_jsonb;`,
`SELECT ('{"key": "value1"}'::jsonb || '{"key": "value2"}'::jsonb)['key'] AS object_in_array;`,
}

for _, sql := range withoutIssueSqls {
issues := getDetectorIssues(t, NewJsonbSubscriptingDetector(sql, []string{}, []string{}), sql)

assert.Equal(t, 0, len(issues), "Expected 1 issue for SQL: %s", sql)
}

for _, sql := range issuesSqls {
issues := getDetectorIssues(t, NewJsonbSubscriptingDetector(sql, []string{"data"}, []string{"jsonb_build_object"}), sql)

assert.Equal(t, 1, len(issues), "Expected 1 issue for SQL: %s", sql)
assert.Equal(t, JSONB_SUBSCRIPTING, issues[0].Type, "Expected System Columns issue for SQL: %s", sql)
}
}

func TestJsonConstructorDetector(t *testing.T) {
sql := `SELECT JSON_ARRAY('PostgreSQL', 12, TRUE, NULL) AS json_array;`

Expand Down
23 changes: 23 additions & 0 deletions yb-voyager/src/query/queryissue/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,26 @@ var unsupportedLargeObjectFunctions = mapset.NewThreadUnsafeSet([]string{
//functions provided by lo extension, refer - https://www.postgresql.org/docs/current/lo.html#LO-RATIONALE
"lo_manage", "lo_oid",
}...)

// catalog functions return type jsonb
var catalogFunctionsReturningJsonb = mapset.NewThreadUnsafeSet([]string{
/*
SELECT
DISTINCT p.proname AS Function_Name
FROM
pg_catalog.pg_proc p
LEFT JOIN pg_catalog.pg_language l ON p.prolang = l.oid
LEFT JOIN pg_catalog.pg_namespace n ON p.pronamespace = n.oid
WHERE
pg_catalog.pg_function_is_visible(p.oid) AND pg_catalog.pg_get_function_result(p.oid) = 'jsonb'
ORDER BY Function_Name;
*/
"jsonb_agg", "jsonb_agg_finalfn", "jsonb_agg_strict", "jsonb_array_element",
"jsonb_build_array", "jsonb_build_object", "jsonb_concat", "jsonb_delete",
"jsonb_delete_path", "jsonb_extract_path", "jsonb_in", "jsonb_insert",
"jsonb_object", "jsonb_object_agg", "jsonb_object_agg_finalfn", "jsonb_object_agg_strict",
"jsonb_object_agg_unique", "jsonb_object_agg_unique_strict", "jsonb_object_field", "jsonb_path_query_array",
"jsonb_path_query_array_tz", "jsonb_path_query_first", "jsonb_path_query_first_tz", "jsonb_recv",
"jsonb_set", "jsonb_set_lax", "jsonb_strip_nulls", "to_jsonb", "ts_headline",
}...)
13 changes: 13 additions & 0 deletions yb-voyager/src/query/queryissue/issues_dml.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,19 @@ func NewLOFuntionsIssue(objectType string, objectName string, sqlStatement strin
return newQueryIssue(loFunctionsIssue, objectType, objectName, sqlStatement, details)
}

var jsonbSubscriptingIssue = issue.Issue{
Type: JSONB_SUBSCRIPTING,
TypeName: JSONB_SUBSCRIPTING_NAME,
TypeDescription: "Jsonb subscripting is not supported in YugabyteDB yet",
Suggestion: "Use Arrow operators (-> / ->>) to access the jsonb fields.",
GH: "",
DocsLink: "", //TODO
}

func NewJsonbSubscriptingIssue(objectType string, objectName string, sqlStatement string) QueryIssue {
return newQueryIssue(jsonbSubscriptingIssue, objectType, objectName, sqlStatement, map[string]interface{}{})
}

var jsonPredicateIssue = issue.Issue{
Type: JSON_TYPE_PREDICATE,
TypeName: JSON_TYPE_PREDICATE_NAME,
Expand Down
Loading

0 comments on commit 23db698

Please sign in to comment.