Skip to content

Commit

Permalink
feat: preserve unknown fields from the REST API representation in `Sc…
Browse files Browse the repository at this point in the history
…hemaField` (#2097)

* feat: preserve unknown fields from the REST API representaton in `SchemaField`

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* remove unnecessary variable

* remove unused private method

* fix pytype

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
tswast and gcf-owl-bot[bot] authored Dec 27, 2024
1 parent 887e126 commit aaf1eb8
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 75 deletions.
82 changes: 25 additions & 57 deletions google/cloud/bigquery/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@

import collections
import enum
from typing import Any, Dict, Iterable, Optional, Union, cast
from typing import Any, cast, Dict, Iterable, Optional, Union

from google.cloud.bigquery import _helpers
from google.cloud.bigquery import standard_sql
from google.cloud.bigquery.enums import StandardSqlTypeNames

Expand Down Expand Up @@ -203,15 +204,8 @@ def __init__(
self._properties["rangeElementType"] = {"type": range_element_type}
if isinstance(range_element_type, FieldElementType):
self._properties["rangeElementType"] = range_element_type.to_api_repr()

self._fields = tuple(fields)

@staticmethod
def __get_int(api_repr, name):
v = api_repr.get(name, _DEFAULT_VALUE)
if v is not _DEFAULT_VALUE:
v = int(v)
return v
if fields: # Don't set the property if it's not set.
self._properties["fields"] = [field.to_api_repr() for field in fields]

@classmethod
def from_api_repr(cls, api_repr: dict) -> "SchemaField":
Expand All @@ -225,43 +219,19 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField":
Returns:
google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object.
"""
field_type = api_repr["type"].upper()

# Handle optional properties with default values
mode = api_repr.get("mode", "NULLABLE")
description = api_repr.get("description", _DEFAULT_VALUE)
fields = api_repr.get("fields", ())
policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE)
placeholder = cls("this_will_be_replaced", "PLACEHOLDER")

default_value_expression = api_repr.get("defaultValueExpression", None)
# Note: we don't make a copy of api_repr because this can cause
# unnecessary slowdowns, especially on deeply nested STRUCT / RECORD
# fields. See https://github.com/googleapis/python-bigquery/issues/6
placeholder._properties = api_repr

if policy_tags is not None and policy_tags is not _DEFAULT_VALUE:
policy_tags = PolicyTagList.from_api_repr(policy_tags)

if api_repr.get("rangeElementType"):
range_element_type = cast(dict, api_repr.get("rangeElementType"))
element_type = range_element_type.get("type")
else:
element_type = None

return cls(
field_type=field_type,
fields=[cls.from_api_repr(f) for f in fields],
mode=mode.upper(),
default_value_expression=default_value_expression,
description=description,
name=api_repr["name"],
policy_tags=policy_tags,
precision=cls.__get_int(api_repr, "precision"),
scale=cls.__get_int(api_repr, "scale"),
max_length=cls.__get_int(api_repr, "maxLength"),
range_element_type=element_type,
)
return placeholder

@property
def name(self):
"""str: The name of the field."""
return self._properties["name"]
return self._properties.get("name", "")

@property
def field_type(self):
Expand All @@ -270,7 +240,10 @@ def field_type(self):
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type
"""
return self._properties["type"]
type_ = self._properties.get("type")
if type_ is None: # Shouldn't happen, but some unit tests do this.
return None
return cast(str, type_).upper()

@property
def mode(self):
Expand All @@ -279,7 +252,7 @@ def mode(self):
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode
"""
return self._properties.get("mode")
return cast(str, self._properties.get("mode", "NULLABLE")).upper()

@property
def is_nullable(self):
Expand All @@ -299,17 +272,17 @@ def description(self):
@property
def precision(self):
"""Optional[int]: Precision (number of digits) for the NUMERIC field."""
return self._properties.get("precision")
return _helpers._int_or_none(self._properties.get("precision"))

@property
def scale(self):
"""Optional[int]: Scale (digits after decimal) for the NUMERIC field."""
return self._properties.get("scale")
return _helpers._int_or_none(self._properties.get("scale"))

@property
def max_length(self):
"""Optional[int]: Maximum length for the STRING or BYTES field."""
return self._properties.get("maxLength")
return _helpers._int_or_none(self._properties.get("maxLength"))

@property
def range_element_type(self):
Expand All @@ -329,7 +302,7 @@ def fields(self):
Must be empty unset if ``field_type`` is not 'RECORD'.
"""
return self._fields
return tuple(_to_schema_fields(self._properties.get("fields", [])))

@property
def policy_tags(self):
Expand All @@ -345,15 +318,10 @@ def to_api_repr(self) -> dict:
Returns:
Dict: A dictionary representing the SchemaField in a serialized form.
"""
answer = self._properties.copy()

# If this is a RECORD type, then sub-fields are also included,
# add this to the serialized representation.
if self.field_type.upper() in _STRUCT_TYPES:
answer["fields"] = [f.to_api_repr() for f in self.fields]

# Done; return the serialized dictionary.
return answer
# Note: we don't make a copy of _properties because this can cause
# unnecessary slowdowns, especially on deeply nested STRUCT / RECORD
# fields. See https://github.com/googleapis/python-bigquery/issues/6
return self._properties

def _key(self):
"""A tuple key that uniquely describes this field.
Expand Down Expand Up @@ -389,7 +357,7 @@ def _key(self):
self.mode.upper(), # pytype: disable=attribute-error
self.default_value_expression,
self.description,
self._fields,
self.fields,
policy_tags,
)

Expand Down
29 changes: 24 additions & 5 deletions tests/unit/job/test_load_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import copy
import warnings

import pytest
Expand Down Expand Up @@ -571,16 +572,34 @@ def test_schema_setter_valid_mappings_list(self):
config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]}
)

def test_schema_setter_invalid_mappings_list(self):
def test_schema_setter_allows_unknown_properties(self):
config = self._get_target_class()()

schema = [
{"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
{"name": "age", "typeoo": "INTEGER", "mode": "REQUIRED"},
{
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED",
"someNewProperty": "test-value",
},
{
"name": "age",
# Note: This type should be included, too. Avoid client-side
# validation, as it could prevent backwards-compatible
# evolution of the server-side behavior.
"typo": "INTEGER",
"mode": "REQUIRED",
"anotherNewProperty": "another-test",
},
]

with self.assertRaises(Exception):
config.schema = schema
# Make sure the setter doesn't mutate schema.
expected_schema = copy.deepcopy(schema)

config.schema = schema

# _properties should include all fields, including unknown ones.
assert config._properties["load"]["schema"]["fields"] == expected_schema

def test_schema_setter_unsetting_schema(self):
from google.cloud.bigquery.schema import SchemaField
Expand Down
37 changes: 29 additions & 8 deletions tests/unit/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from google.cloud import bigquery
from google.cloud.bigquery.standard_sql import StandardSqlStructType
from google.cloud.bigquery.schema import PolicyTagList
import copy
import unittest
from unittest import mock

import pytest

from google.cloud import bigquery
from google.cloud.bigquery.standard_sql import StandardSqlStructType
from google.cloud.bigquery.schema import PolicyTagList


class TestSchemaField(unittest.TestCase):
@staticmethod
Expand Down Expand Up @@ -821,13 +823,32 @@ def test_schema_fields_sequence(self):
result = self._call_fut(schema)
self.assertEqual(result, schema)

def test_invalid_mapping_representation(self):
def test_unknown_properties(self):
schema = [
{"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
{"name": "address", "typeooo": "STRING", "mode": "REQUIRED"},
{
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED",
"someNewProperty": "test-value",
},
{
"name": "age",
# Note: This type should be included, too. Avoid client-side
# validation, as it could prevent backwards-compatible
# evolution of the server-side behavior.
"typo": "INTEGER",
"mode": "REQUIRED",
"anotherNewProperty": "another-test",
},
]
with self.assertRaises(Exception):
self._call_fut(schema)

# Make sure the setter doesn't mutate schema.
expected_schema = copy.deepcopy(schema)

result = self._call_fut(schema)

for api_repr, field in zip(expected_schema, result):
assert field.to_api_repr() == api_repr

def test_valid_mapping_representation(self):
from google.cloud.bigquery.schema import SchemaField
Expand Down
32 changes: 27 additions & 5 deletions tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import copy
import datetime
import logging
import re
Expand Down Expand Up @@ -711,14 +712,35 @@ def test_schema_setter_valid_fields(self):
table.schema = [full_name, age]
self.assertEqual(table.schema, [full_name, age])

def test_schema_setter_invalid_mapping_representation(self):
def test_schema_setter_allows_unknown_properties(self):
dataset = DatasetReference(self.PROJECT, self.DS_ID)
table_ref = dataset.table(self.TABLE_NAME)
table = self._make_one(table_ref)
full_name = {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}
invalid_field = {"name": "full_name", "typeooo": "STRING", "mode": "REQUIRED"}
with self.assertRaises(Exception):
table.schema = [full_name, invalid_field]
schema = [
{
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED",
"someNewProperty": "test-value",
},
{
"name": "age",
# Note: This type should be included, too. Avoid client-side
# validation, as it could prevent backwards-compatible
# evolution of the server-side behavior.
"typo": "INTEGER",
"mode": "REQUIRED",
"anotherNewProperty": "another-test",
},
]

# Make sure the setter doesn't mutate schema.
expected_schema = copy.deepcopy(schema)

table.schema = schema

# _properties should include all fields, including unknown ones.
assert table._properties["schema"]["fields"] == expected_schema

def test_schema_setter_valid_mapping_representation(self):
from google.cloud.bigquery.schema import SchemaField
Expand Down

0 comments on commit aaf1eb8

Please sign in to comment.