Skip to content

Commit

Permalink
updating cBioPortals to include token access cohorts, initializing da…
Browse files Browse the repository at this point in the history
…ta models
  • Loading branch information
jessicaw9910 committed Apr 2, 2024
1 parent 66fe44a commit 843d5d1
Show file tree
Hide file tree
Showing 6 changed files with 772 additions and 155 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -183,3 +183,6 @@ $RECYCLE.BIN/
*.lnk

# End of https://www.toptal.com/developers/gitignore/api/osx,windows,linux

# Requests cache directory
requests_cache/
709 changes: 657 additions & 52 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ tqdm = "4.64.0"
pandas = ">=2,<3"
requests = ">=2.28.1,<3"
requests-cache = ">=0.9.7,<1"
bravado = "^11.0.3"



Expand Down
205 changes: 103 additions & 102 deletions src/missense_kinase_toolkit/cbioportal.py
Original file line number Diff line number Diff line change
@@ -1,95 +1,145 @@
from __future__ import annotations

import os
import re

import requests
from requests.auth import HTTPBasicAuth
import pandas as pd
import os

from missense_kinase_toolkit import requests_wrapper
from bravado.client import SwaggerClient
from bravado.requests_client import RequestsClient


CBIOPORTAL_TOKEN_VAR = "CBIOPORTAL_TOKEN"
CBIOPORTAL_INSTANCE_VAR = "CBIOPORTAL_INSTANCE"


def create_setlist(
input_object: requests.models.Response,
attr: str,
) -> tuple[list, set]:
"""Create a list and set of unique values from a response object
Parameters
----------
input_object : requests.models.Response
Response object from a request
attr : str
Attribute to extract from the response object
def maybe_get_cbioportal_token_from_env() -> str | None:
"""Get the cBioPortal token from the environment
Returns
-------
tuple[list, set]
List and set of unique values from the response object
str | None
cBioPortal token as string if exists, otherwise None
"""
list_output = []
set_output = set()
try:
token = os.environ[CBIOPORTAL_TOKEN_VAR]
except KeyError:
token = None

for entry in input_object:
list_output.append(entry[attr])
set_output.add(entry[attr])
return token

return list_output, set_output

def maybe_get_cbioportal_instance_from_env(
) -> str | None:
"""Get the cBioPortal instance from the environment
def print_counts(
list_input: list,
) -> None:
"""Print the counts of unique values in a list
Returns
-------
str | None
cBioPortal instance as string if exists, otherwise None
"""
try:
instance = os.environ[CBIOPORTAL_INSTANCE_VAR]
except KeyError:
instance = None

return instance

def get_all_mutations_by_study(
str_study_id: str
) -> list | None:
"""Get mutations cBioPortal data
Parameters
----------
list_input : list
List of values to count
str_studyid : str
Study ID within cBioPortal instance;
e.g. MSKCC clinical sequencing cohort is "msk_impact_2017" and MSKCC clinical sequencing cohort is "mskimpact"
Returns
-------
None
requests.models.Response
cBioPortal data
"""
for Unique in set(list_input):
n = list_input.count(Unique)
print(f"{Unique:<15} \t {n:>10}")


def parse_obj2dict(
input_object: requests.models.Response,
) -> dict:
"""Parse a response object into a dictionary
token = maybe_get_cbioportal_token_from_env()

instance = maybe_get_cbioportal_instance_from_env()
if instance is not None:
url = f"https://{instance}/api/v2/api-docs"
else:
url = "https://cbioportal.org/api/v2/api-docs"

if all(v is not None for v in (token, instance)):
http_client = RequestsClient()
http_client.set_api_key(
instance,
f"Bearer {token}",
param_name='Authorization',
param_in='header'
)
cbioportal = SwaggerClient.from_url(
url,
http_client=http_client,
config={
"validate_requests":False,
"validate_responses":False,
"validate_swagger_spec": False
}
)
else:
cbioportal = SwaggerClient.from_url(
url,
config={
"validate_requests":False,
"validate_responses":False,
"validate_swagger_spec": False
}
)

studies = cbioportal.Studies.getAllStudiesUsingGET().result()
study_ids = [study.studyId for study in studies]

if str_study_id in study_ids:
#TODO - add error handling
#TODO - extract multiple studies
muts = cbioportal.Mutations.getMutationsInMolecularProfileBySampleListIdUsingGET(
molecularProfileId=f"{str_study_id}_mutations",
sampleListId=f"{str_study_id}_all",
projection="DETAILED"
).result()
else:
raise ValueError(f"Study {str_study_id} not found in cBioPortal instance {instance}")

return muts


def parse_muts2dataframe(
list_input: list,
) -> pd.DataFrame:
"""Parse a list of abc.Mutation into a dictionary
Parameters
----------
input_object : requests.models.Response
Response object from a request
input_object : list
List of abc.Mutation objects
Returns
-------
dict
Dictionary of values from the response object
pd.DataFrame
Dataframe for the input list of abc.Mutation objects
"""
dict_output = {}

list_dir = dir(input_object[0])

list_dir = dir(list_input[0])
for attr in list_dir:
list_attr = []
for entry in input_object:
for entry in list_input:
try:
add = int(entry[attr])
except ValueError:
add = str(entry[attr])
list_attr.append(add)
dict_output[attr] = list_attr

return dict_output
df = pd.DataFrame.from_dict(dict_output)
return df


def parse_series2dict(
Expand Down Expand Up @@ -152,57 +202,8 @@ def calc_vaf(
return vaf


def get_cbioportal_token(

) -> str:
"""Get the cBioPortal token from the environment
Returns
-------
str
cBioPortal token
"""
token = os.environ[CBIOPORTAL_TOKEN_VAR]

return token


def get_cbioprotal_data() -> requests.models.Response:
"""Get the cBioPortal data
Returns
-------
requests.models.Response
cBioPortal data
"""
token = get_cbioportal_token()
url = "https://cbioportal.mskcc.org/api/v2/api-docs"

headers = {"Content-Type":"application/json", "Authorization": f"Bearer {token}"}

res = requests_wrapper.get_cached_session().get(
url, headers=headers
)

res.json().keys()
res.json()["paths"].keys()
res.json()["paths"]['/cancer-types']
res.json()["paths"]["/molecular-profiles/{molecularProfileId}/molecular-data/fetch"]
dir(res)
res.Studies

headers = {"X-Auth-Token": token}
response = requests.get(url, headers=headers)


url = 'https://api_url'
headers = {'Accept': 'application/json'}
auth = HTTPBasicAuth('apikey', '1234abcd')
files = {'file': open('filename', 'rb')}

req = requests.get(url, headers=headers, auth=auth, files=files)

return response
muts = get_all_mutations_by_study("mskimpact")
df_muts = parse_muts2dataframe(muts)


# from bravado.client import SwaggerClient
Expand Down
5 changes: 5 additions & 0 deletions src/missense_kinase_toolkit/data_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# cBioPortal mutations
# Pfam annotations
# UniProt annotations (cannonical sequence)
# Kinase lists
# KLIFs annotations
4 changes: 3 additions & 1 deletion src/missense_kinase_toolkit/pfam.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
from missense_kinase_toolkit import requests_wrapper


def retrieve_pfam(uniprot_id: str) -> pd.DataFrame | str | None:
def retrieve_pfam(
uniprot_id: str,
) -> pd.DataFrame | str | None:
"""Retrieve Pfam domain information for a given UniProt ID using InterPro REST API
Parameters
Expand Down

0 comments on commit 843d5d1

Please sign in to comment.