Skip to content

Commit

Permalink
Merge pull request #1102 from uc-cdis/feat/PXP-10776-parent-child-stu…
Browse files Browse the repository at this point in the history
…dy-authz

feat(PXP-10776): Cascading Authorization
  • Loading branch information
k-burt-uch authored Jul 31, 2023
2 parents 7d6f1b0 + 6e2abea commit ead31d2
Show file tree
Hide file tree
Showing 8 changed files with 587 additions and 553 deletions.
19 changes: 19 additions & 0 deletions fence/config-default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,25 @@ dbGaP:
#
# NOTE: when this is "false" the above would become "phs000123"
parse_consent_code: true
# When a dbGaP study authorizes access to child studies through a parent study ID,
# you can use this mapping. When a user gets access to the first ID, they automatically
# get access to the list of projects to the right.
#
# There's usually a note in the "Authorized Access" section of the dbGaP study page
# (https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/study.cgi?study_id=phs001843.v1.p2)
# along the lines of:
# Note: The data for this study is collected as a substudy of
# phs001194.v3.p2. dbGaP Authorized Access requests for
# this data should be made for study phs001194.v3.p2 and
# not phs001843.v1.p2
#
# There are also other dbGaP APIs that expose this parent/child mapping.
# Example: https://dbgap.ncbi.nlm.nih.gov/ss/dbgapssws.cgi?request=Study&phs=000571&v=6
#
# If `parse_consent_code` is true, then a user will be given access to the exact
# same consent codes in the child studies
parent_to_child_studies_mapping:
# 'phs001194': ['phs000571', 'phs001843']
# A consent of "c999" can indicate access to that study's "exchange area data"
# and when a user has access to one study's exchange area data, they
# have access to the parent study's "common exchange area data" that is not study
Expand Down
26 changes: 24 additions & 2 deletions fence/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,18 +130,40 @@ def post_process(self):
"BILLING_PROJECT_FOR_SA_CREDS or BILLING_PROJECT_FOR_SIGNED_URLS is set to a non-None value. "
"SESSION_ALLOWED_SCOPES includes `google_credentials`. Removing "
"`google_credentials` from USER_ALLOWED_SCOPES as this could allow "
"end-users to indescriminently bill our default project. Clients are inheritently "
"end-users to indiscriminately bill our default project. Clients are inherently "
"trusted, so we do not restrict this scope for clients."
)
self._configs["SESSION_ALLOWED_SCOPES"].remove("google_credentials")

if (
not self._configs["ENABLE_VISA_UPDATE_CRON"]
and self._configs["GLOBAL_PARSE_VISAS_ON_LOGIN"] != False
and self._configs["GLOBAL_PARSE_VISAS_ON_LOGIN"] is not False
):
raise Exception(
"Visa parsing on login is enabled but `ENABLE_VISA_UPDATE_CRON` is disabled!"
)

self._validate_parent_child_studies(self._configs["dbGaP"])

@staticmethod
def _validate_parent_child_studies(dbgap_configs):
if isinstance(dbgap_configs, list):
configs = dbgap_configs
else:
configs = [dbgap_configs]

all_parent_studies = set()
for dbgap_config in configs:
parent_studies = dbgap_config.get(
"parent_to_child_studies_mapping", {}
).keys()
conflicts = parent_studies & all_parent_studies
if len(conflicts) > 0:
raise Exception(
f"{conflicts} are duplicate parent study ids found in parent_to_child_studies_mapping for "
f"multiple dbGaP configurations."
)
all_parent_studies.update(parent_studies)


config = FenceConfig(DEFAULT_CFG_PATH)
152 changes: 82 additions & 70 deletions fence/sync/sync_users.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,6 @@ def __init__(
self.sync_from_local_yaml_file = sync_from_local_yaml_file
self.is_sync_from_dbgap_server = is_sync_from_dbgap_server
self.dbGaP = dbGaP
self.parse_consent_code = dbGaP[0].get("parse_consent_code", True)
self.session = db_session
self.driver = get_SQLAlchemyDriver(DB)
self.project_mapping = project_mapping or {}
Expand All @@ -352,7 +351,11 @@ def __init__(
self.auth_source = defaultdict(set)
# auth_source used for logging. username : [source1, source2]
self.visa_types = config.get("USERSYNC", {}).get("visa_types", {})

self.parent_to_child_studies_mapping = {}
for dbgap_config in dbGaP:
self.parent_to_child_studies_mapping.update(
dbgap_config.get("parent_to_child_studies_mapping", {})
)
if storage_credentials:
self.storage_manager = StorageManager(
storage_credentials, logger=self.logger
Expand Down Expand Up @@ -455,6 +458,11 @@ def _get_from_ftp_with_proxy(self, server, path):
)
os.system(execstr)

def _get_parse_consent_code(self, dbgap_config={}):
return dbgap_config.get(
"parse_consent_code", True
) # Should this really be true?

def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
"""
parse csv files to python dict
Expand Down Expand Up @@ -514,8 +522,9 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
study_common_exchange_areas = dbgap_config.get(
"study_common_exchange_areas", {}
)
parse_consent_code = self._get_parse_consent_code(dbgap_config)

if self.parse_consent_code and enable_common_exchange_area_access:
if parse_consent_code and enable_common_exchange_area_access:
self.logger.info(
f"using study to common exchange area mapping: {study_common_exchange_areas}"
)
Expand Down Expand Up @@ -585,7 +594,7 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
)
)
continue
if len(phsid) > 1 and self.parse_consent_code:
if len(phsid) > 1 and parse_consent_code:
consent_code = phsid[-1]

# c999 indicates full access to all consents and access
Expand Down Expand Up @@ -622,6 +631,15 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):

dbgap_project += "." + consent_code

self._add_children_for_dbgap_project(
dbgap_project,
privileges,
username,
sess,
user_projects,
dbgap_config,
)

display_name = row.get("user name") or ""
tags = {"dbgap_role": row.get("role") or ""}

Expand Down Expand Up @@ -651,6 +669,47 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):

return user_projects, user_info

def _get_children(self, dbgap_project):
return self.parent_to_child_studies_mapping.get(dbgap_project.split(".")[0])

def _add_children_for_dbgap_project(
self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
):
"""
Adds the configured child studies for the given dbgap_project, adding it to the provided user_projects. If
parse_consent_code is true, then the consents granted in the provided dbgap_project will also be granted to the
child studies.
"""
parent_phsid = dbgap_project
parse_consent_code = self._get_parse_consent_code(dbgap_config)
child_suffix = ""
if parse_consent_code and re.match(
config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"], dbgap_project
):
parent_phsid_parts = dbgap_project.split(".")
parent_phsid = parent_phsid_parts[0]
child_suffix = "." + parent_phsid_parts[1]

if parent_phsid not in self.parent_to_child_studies_mapping:
return

self.logger.info(
f"found parent study {parent_phsid} and Fence "
"is configured to provide additional access to child studies. Giving user "
f"{username} {privileges} privileges in projects: "
f"{{k + child_suffix: v + child_suffix for k, v in self.parent_to_child_studies_mapping.items()}}."
)
child_studies = self.parent_to_child_studies_mapping.get(parent_phsid, [])
for child_study in child_studies:
self._add_dbgap_project_for_user(
child_study + child_suffix,
privileges,
username,
sess,
user_projects,
dbgap_config,
)

def _add_dbgap_project_for_user(
self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
):
Expand Down Expand Up @@ -705,7 +764,7 @@ def sync_two_phsids_dict(
phsids2_overrides_phsids1=True,
):
"""
Merge pshid1 into phsids2. If `phsids2_overrides_phsids1`, values in
Merge phsids1 into phsids2. If `phsids2_overrides_phsids1`, values in
phsids1 are overriden by values in phsids2. phsids2 ends up containing
the merged dict (see explanation below).
`source1` and `source2`: for logging.
Expand Down Expand Up @@ -1402,7 +1461,7 @@ def _process_user_projects(
phsid = project.split(".")
dbgap_project = phsid[0]
privileges = user_projects[username][project]
if len(phsid) > 1 and self.parse_consent_code:
if len(phsid) > 1 and self._get_parse_consent_code(dbgap_config):
consent_code = phsid[-1]

# c999 indicates full access to all consents and access
Expand Down Expand Up @@ -1548,10 +1607,11 @@ def _sync(self, sess):
# Note: if there are multiple dbgap sftp servers configured
# this parameter is always from the config for the first dbgap sftp server
# not any additional ones
if self.parse_consent_code:
self._grant_all_consents_to_c999_users(
user_projects, user_yaml.project_to_resource
)
for dbgap_config in self.dbGaP:
if self._get_parse_consent_code(dbgap_config):
self._grant_all_consents_to_c999_users(
user_projects, user_yaml.project_to_resource
)

google_update_ex = None

Expand Down Expand Up @@ -1636,6 +1696,14 @@ def _grant_all_consents_to_c999_users(
consent_mapping.setdefault(accession_number["phsid"], set()).add(
".".join([accession_number["phsid"], accession_number["consent"]])
)
children = self._get_children(accession_number["phsid"])
if children:
for child_phs in children:
consent_mapping.setdefault(child_phs, set()).add(
".".join(
[child_phs, accession_number["consent"]]
) # Assign parent consent to child study
)

self.logger.debug(f"consent mapping: {consent_mapping}")

Expand Down Expand Up @@ -2299,63 +2367,6 @@ def _pick_sync_type(self, visa):

return sync_client

def parse_user_visas(self, db_session):
"""
Retrieve all visas from fence db and parse to python dict
Return:
Tuple[[dict, dict]]:
(user_project, user_info) where user_project is a mapping from
usernames to project permissions and user_info is a mapping
from usernames to user details, such as email
Example:
(
{
username: {
'project1': {'read-storage','write-storage'},
'project2': {'read-storage'},
}
},
{
username: {
'email': '[email protected]',
'display_name': 'display name',
'phone_number': '123-456-789',
'tags': {'dbgap_role': 'PI'}
}
},
)
"""
user_projects = dict()
user_info = dict()

users = db_session.query(User).all()

for user in users:
projects = {}
info = {}
if user.ga4gh_visas_v1:
for visa in user.ga4gh_visas_v1:
project = {}
visa_type = self._pick_sync_type(visa)
encoded_visa = visa.ga4gh_visa
project, info = visa_type._parse_single_visa(
user,
encoded_visa,
visa.expires,
self.parse_consent_code,
)
projects = {**projects, **project}
if projects:
self.auth_source[user.username].add("visas")
user_projects[user.username] = projects
user_info[user.username] = info

return (user_projects, user_info)

def sync_single_user_visas(self, user, ga4gh_visas, sess=None, expires=None):
"""
Sync a single user's visas during login or DRS/data access
Expand All @@ -2377,6 +2388,7 @@ def sync_single_user_visas(self, user, ga4gh_visas, sess=None, expires=None):
"""
self.ras_sync_client = RASVisa(logger=self.logger)
dbgap_config = self.dbGaP[0]
parse_consent_code = self._get_parse_consent_code(dbgap_config)
enable_common_exchange_area_access = dbgap_config.get(
"enable_common_exchange_area_access", False
)
Expand Down Expand Up @@ -2409,7 +2421,7 @@ def sync_single_user_visas(self, user, ga4gh_visas, sess=None, expires=None):
user,
encoded_visa,
visa.expires,
self.parse_consent_code,
parse_consent_code,
)
except Exception:
self.logger.warning(
Expand All @@ -2425,7 +2437,7 @@ def sync_single_user_visas(self, user, ga4gh_visas, sess=None, expires=None):

user_projects = self.parse_projects(user_projects)

if self.parse_consent_code and enable_common_exchange_area_access:
if parse_consent_code and enable_common_exchange_area_access:
self.logger.info(
f"using study to common exchange area mapping: {study_common_exchange_areas}"
)
Expand All @@ -2438,7 +2450,7 @@ def sync_single_user_visas(self, user, ga4gh_visas, sess=None, expires=None):
sess,
)

if self.parse_consent_code:
if parse_consent_code:
self._grant_all_consents_to_c999_users(
user_projects, user_yaml.project_to_resource
)
Expand Down
Loading

0 comments on commit ead31d2

Please sign in to comment.