Skip to content

Commit

Permalink
merge functionality for submitr
Browse files Browse the repository at this point in the history
  • Loading branch information
dmichaels-harvard committed May 30, 2024
1 parent 6b77767 commit d27b68e
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 17 deletions.
51 changes: 41 additions & 10 deletions dcicutils/misc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from collections import namedtuple
import appdirs
from copy import deepcopy
import contextlib
import datetime
import functools
Expand Down Expand Up @@ -2199,28 +2200,58 @@ def merge_key_value_dict_lists(x, y):
return [key_value_dict(k, v) for k, v in merged.items()]


def merge_objects(target: Union[dict, List[Any]], source: Union[dict, List[Any]], full: bool = False) -> dict:
def merge_objects(target: Union[dict, List[Any]], source: Union[dict, List[Any]],
full: bool = False, # deprecated
expand_lists: Optional[bool] = None,
primitive_lists: bool = False,
copy: bool = False, _recursing: bool = False) -> Union[dict, List[Any]]:
"""
Merges the given source dictionary or list into the target dictionary or list.
This MAY well change the given target (dictionary or list) IN PLACE.
The the full argument is True then any target lists longer than the
source be will be filled out with the last element(s) of the source.
Merges the given source dictionary or list into the target dictionary or list and returns the
result. This MAY well change the given target (dictionary or list) IN PLACE ... UNLESS the copy
argument is True, then the given target will not change as a local copy is made (and returned).
If the expand_lists argument is True then any target lists longer than the
source be will be filled out with the last element(s) of the source; the full
argument (is deprecated and) is a synomym for this. The default is False.
If the primitive_lists argument is True then lists of primitives (i.e. lists in which
NONE of its elements are dictionaries, lists, or tuples) will themselves be treated
like primitives, meaning the whole of a source list will replace the corresponding
target; otherwise they will be merged normally, meaning each element of a source list
will be merged, recursively, into the corresponding target list. The default is False.
"""
def is_primitive_list(value: Any) -> bool: # noqa
if not isinstance(value, list):
return False
for item in value:
if isinstance(item, (dict, list, tuple)):
return False
return True

if target is None:
return source
if expand_lists not in (True, False):
expand_lists = full is True
if (copy is True) and (_recursing is not True):
target = deepcopy(target)
if isinstance(target, dict) and isinstance(source, dict) and source:
for key, value in source.items():
target[key] = merge_objects(target[key], value, full) if key in target else value
if ((primitive_lists is True) and
(key in target) and is_primitive_list(target[key]) and is_primitive_list(value)): # noqa
target[key] = value
else:
target[key] = merge_objects(target[key], value,
expand_lists=expand_lists, _recursing=True) if key in target else value
elif isinstance(target, list) and isinstance(source, list) and source:
for i in range(max(len(source), len(target))):
if i < len(target):
if i < len(source):
target[i] = merge_objects(target[i], source[i], full)
elif full:
target[i] = merge_objects(target[i], source[len(source) - 1], full)
target[i] = merge_objects(target[i], source[i], expand_lists=expand_lists, _recursing=True)
elif expand_lists is True:
target[i] = merge_objects(target[i], source[len(source) - 1], expand_lists=expand_lists)
else:
target.append(source[i])
elif source:
elif source not in (None, {}, []):
target = source
return target

Expand Down
11 changes: 5 additions & 6 deletions dcicutils/structured_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def __init__(self, file: Optional[str] = None, portal: Optional[Union[VirtualApp
self._nrows = 0
self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
self._norefs = True if norefs is True else False
self._merge = True if merge is True else False
self._merge = True if merge is True else False # New merge functionality (2024-05-25)
self._debug_sleep = None
if debug_sleep:
try:
Expand Down Expand Up @@ -347,7 +347,7 @@ def _load_json_file(self, file: str) -> None:
(self._portal.get_schema(schema_name_inferred_from_file_name) is not None)): # noqa
# If the JSON file name looks like a schema name then assume it
# contains an object or an array of object of that schema type.
if self._merge:
if self._merge: # New merge functionality (2024-05-25)
data = self._merge_with_existing_portal_object(data, schema_name_inferred_from_file_name)
self._add(Schema.type_name(file), data)
elif isinstance(data, dict):
Expand All @@ -356,7 +356,7 @@ def _load_json_file(self, file: str) -> None:
# which (each property) contains a list of object of that schema type.
for schema_name in data:
item = data[schema_name]
if self._merge:
if self._merge: # New merge functionality (2024-05-25)
item = self._merge_with_existing_portal_object(item, schema_name)
self._add(schema_name, item)

Expand All @@ -380,8 +380,7 @@ def _load_reader(self, reader: RowReader, type_name: str) -> None:
structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number)
if self._autoadd_properties:
self._add_properties(structured_row, self._autoadd_properties, schema)
# New merge functionality (2024-05-25).
if self._merge:
if self._merge: # New merge functionality (2024-05-25)
structured_row = self._merge_with_existing_portal_object(structured_row, schema_name)
if (prune_error := self._prune_structured_row(structured_row)) is not None:
self._note_error({"src": create_dict(type=schema_name, row=reader.row_number),
Expand Down Expand Up @@ -437,7 +436,7 @@ def _merge_with_existing_portal_object(self, portal_object: dict, portal_type: s
"""
for identifying_path in self._portal.get_identifying_paths(portal_object, portal_type):
if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True, raise_exception=False):
return merge_objects(existing_portal_object, portal_object)
return merge_objects(existing_portal_object, portal_object, primitive_lists=True)
return portal_object

def _is_ref_lookup_specified_type(ref_lookup_flags: int) -> bool:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dcicutils"
version = "8.9.0.1b2" # TODO: To become 8.10.0
version = "8.9.0.1b3" # TODO: To become 8.10.0
description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
authors = ["4DN-DCIC Team <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit d27b68e

Please sign in to comment.