diff --git a/src/zenml/constants.py b/src/zenml/constants.py index 183b1acce1..c303775617 100644 --- a/src/zenml/constants.py +++ b/src/zenml/constants.py @@ -174,6 +174,11 @@ def handle_int_env_var(var: str, default: int = 0) -> int: "ZENML_PIPELINE_API_TOKEN_EXPIRATION" ) +# Materializers environment variables +ENV_ZENML_MATERIALIAZERS_ALLOW_NON_ASCII_JSON_DUMPS = ( + "ZENML_MATERIALIAZERS_ALLOW_NON_ASCII_JSON_DUMPS" +) + # ZenML Server environment variables ENV_ZENML_SERVER_PREFIX = "ZENML_SERVER_" ENV_ZENML_SERVER_DEPLOYMENT_TYPE = f"{ENV_ZENML_SERVER_PREFIX}DEPLOYMENT_TYPE" diff --git a/src/zenml/materializers/built_in_materializer.py b/src/zenml/materializers/built_in_materializer.py index 06e28291c7..df16b9ef4c 100644 --- a/src/zenml/materializers/built_in_materializer.py +++ b/src/zenml/materializers/built_in_materializer.py @@ -28,6 +28,10 @@ ) from zenml.artifact_stores.base_artifact_store import BaseArtifactStore +from zenml.constants import ( + ENV_ZENML_MATERIALIAZERS_ALLOW_NON_ASCII_JSON_DUMPS, + handle_bool_env_var, +) from zenml.enums import ArtifactType, VisualizationType from zenml.logger import get_logger from zenml.materializers.base_materializer import BaseMaterializer @@ -48,7 +52,9 @@ str, type(None), ) # complex/bytes are not JSON serializable - +ZENML_MATERIALIAZERS_ALLOW_NON_ASCII_JSON_DUMPS = handle_bool_env_var( + ENV_ZENML_MATERIALIAZERS_ALLOW_NON_ASCII_JSON_DUMPS, False +) class BuiltInMaterializer(BaseMaterializer): """Handle JSON-serializable basic types (`bool`, `float`, `int`, `str`).""" @@ -94,7 +100,10 @@ def save(self, data: Union[bool, float, int, str]) -> None: Args: data: The data to store. """ - yaml_utils.write_json(self.data_path, data) + yaml_utils.write_json( + self.data_path, data, + ensure_ascii=not ZENML_MATERIALIAZERS_ALLOW_NON_ASCII_JSON_DUMPS + ) def extract_metadata( self, data: Union[bool, float, int, str] @@ -371,7 +380,10 @@ def save(self, data: Any) -> None: # If the data is serializable, just write it into a single JSON file. if _is_serializable(data): - yaml_utils.write_json(self.data_path, data) + yaml_utils.write_json( + self.data_path, data, + ensure_ascii=not ZENML_MATERIALIAZERS_ALLOW_NON_ASCII_JSON_DUMPS + ) return # non-serializable dict: Handle as non-serializable list of lists. diff --git a/src/zenml/utils/yaml_utils.py b/src/zenml/utils/yaml_utils.py index 6f84832526..e221f14677 100644 --- a/src/zenml/utils/yaml_utils.py +++ b/src/zenml/utils/yaml_utils.py @@ -122,6 +122,7 @@ def write_json( file_path: str, contents: Any, encoder: Optional[Type[json.JSONEncoder]] = None, + **json_dump_args ) -> None: """Write contents as JSON format to file_path. @@ -143,6 +144,7 @@ def write_json( json.dumps( contents, cls=encoder, + **json_dump_args ), )