diff --git a/dcicutils/misc_utils.py b/dcicutils/misc_utils.py index 0d719c421..e830e55e5 100644 --- a/dcicutils/misc_utils.py +++ b/dcicutils/misc_utils.py @@ -4,6 +4,7 @@ from collections import namedtuple import appdirs +from copy import deepcopy import contextlib import datetime import functools @@ -2199,28 +2200,58 @@ def merge_key_value_dict_lists(x, y): return [key_value_dict(k, v) for k, v in merged.items()] -def merge_objects(target: Union[dict, List[Any]], source: Union[dict, List[Any]], full: bool = False) -> dict: +def merge_objects(target: Union[dict, List[Any]], source: Union[dict, List[Any]], + full: bool = False, # deprecated + expand_lists: Optional[bool] = None, + primitive_lists: bool = False, + copy: bool = False, _recursing: bool = False) -> Union[dict, List[Any]]: """ - Merges the given source dictionary or list into the target dictionary or list. - This MAY well change the given target (dictionary or list) IN PLACE. - The the full argument is True then any target lists longer than the - source be will be filled out with the last element(s) of the source. + Merges the given source dictionary or list into the target dictionary or list and returns the + result. This MAY well change the given target (dictionary or list) IN PLACE ... UNLESS the copy + argument is True, then the given target will not change as a local copy is made (and returned). + + If the expand_lists argument is True then any target lists longer than the + source be will be filled out with the last element(s) of the source; the full + argument (is deprecated and) is a synomym for this. The default is False. + + If the primitive_lists argument is True then lists of primitives (i.e. lists in which + NONE of its elements are dictionaries, lists, or tuples) will themselves be treated + like primitives, meaning the whole of a source list will replace the corresponding + target; otherwise they will be merged normally, meaning each element of a source list + will be merged, recursively, into the corresponding target list. The default is False. """ + def is_primitive_list(value: Any) -> bool: # noqa + if not isinstance(value, list): + return False + for item in value: + if isinstance(item, (dict, list, tuple)): + return False + return True + if target is None: return source + if expand_lists not in (True, False): + expand_lists = full is True + if (copy is True) and (_recursing is not True): + target = deepcopy(target) if isinstance(target, dict) and isinstance(source, dict) and source: for key, value in source.items(): - target[key] = merge_objects(target[key], value, full) if key in target else value + if ((primitive_lists is True) and + (key in target) and is_primitive_list(target[key]) and is_primitive_list(value)): # noqa + target[key] = value + else: + target[key] = merge_objects(target[key], value, + expand_lists=expand_lists, _recursing=True) if key in target else value elif isinstance(target, list) and isinstance(source, list) and source: for i in range(max(len(source), len(target))): if i < len(target): if i < len(source): - target[i] = merge_objects(target[i], source[i], full) - elif full: - target[i] = merge_objects(target[i], source[len(source) - 1], full) + target[i] = merge_objects(target[i], source[i], expand_lists=expand_lists, _recursing=True) + elif expand_lists is True: + target[i] = merge_objects(target[i], source[len(source) - 1], expand_lists=expand_lists) else: target.append(source[i]) - elif source: + elif source not in (None, {}, []): target = source return target diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 7f6a88d5f..c9b895849 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -74,7 +74,7 @@ def __init__(self, file: Optional[str] = None, portal: Optional[Union[VirtualApp self._nrows = 0 self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None self._norefs = True if norefs is True else False - self._merge = True if merge is True else False + self._merge = True if merge is True else False # New merge functionality (2024-05-25) self._debug_sleep = None if debug_sleep: try: @@ -347,7 +347,7 @@ def _load_json_file(self, file: str) -> None: (self._portal.get_schema(schema_name_inferred_from_file_name) is not None)): # noqa # If the JSON file name looks like a schema name then assume it # contains an object or an array of object of that schema type. - if self._merge: + if self._merge: # New merge functionality (2024-05-25) data = self._merge_with_existing_portal_object(data, schema_name_inferred_from_file_name) self._add(Schema.type_name(file), data) elif isinstance(data, dict): @@ -356,7 +356,7 @@ def _load_json_file(self, file: str) -> None: # which (each property) contains a list of object of that schema type. for schema_name in data: item = data[schema_name] - if self._merge: + if self._merge: # New merge functionality (2024-05-25) item = self._merge_with_existing_portal_object(item, schema_name) self._add(schema_name, item) @@ -380,8 +380,7 @@ def _load_reader(self, reader: RowReader, type_name: str) -> None: structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number) if self._autoadd_properties: self._add_properties(structured_row, self._autoadd_properties, schema) - # New merge functionality (2024-05-25). - if self._merge: + if self._merge: # New merge functionality (2024-05-25) structured_row = self._merge_with_existing_portal_object(structured_row, schema_name) if (prune_error := self._prune_structured_row(structured_row)) is not None: self._note_error({"src": create_dict(type=schema_name, row=reader.row_number), @@ -437,7 +436,7 @@ def _merge_with_existing_portal_object(self, portal_object: dict, portal_type: s """ for identifying_path in self._portal.get_identifying_paths(portal_object, portal_type): if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True, raise_exception=False): - return merge_objects(existing_portal_object, portal_object) + return merge_objects(existing_portal_object, portal_object, primitive_lists=True) return portal_object def _is_ref_lookup_specified_type(ref_lookup_flags: int) -> bool: diff --git a/pyproject.toml b/pyproject.toml index 1c0bd9c84..de7f45574 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.9.0.1b2" # TODO: To become 8.10.0 +version = "8.9.0.1b3" # TODO: To become 8.10.0 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT"