Skip to content

Commit

Permalink
Uniprot (#21)
Browse files Browse the repository at this point in the history
* added templates and config settings from ASAP Discovery to the docs to improve RTD; updated docstrings throughout; created uniprot module

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* corrected HumanKinaseInfo > KinaseInfo change in test

* added RTD badge to README

* fixed RTD link

* fixed RTD link - non-badge this time

* refactored code so that all existing work resides in databases sub-module

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* updated tests for cbioportal error

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
jessicaw9910 and pre-commit-ci[bot] authored May 3, 2024
1 parent 2486c92 commit 7babab8
Show file tree
Hide file tree
Showing 25 changed files with 148 additions and 91 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ missense-kinase-toolkit

An ETL pipeline package to facilitate structure-based ML for human kinase property prediction

Additional documentation can be found [here](https://stackoverflow.com/questions/75922593/sphinx-readthedocs-and-package-version).
Additional documentation can be found [here](https://missense-kinase-toolkit.readthedocs.io/en/latest/).

### Copyright

Expand Down
1 change: 1 addition & 0 deletions docs/_templates/custom-class-template.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

.. autoclass:: {{ objname }}
:members:
:private-members:
:show-inheritance:
:inherited-members:

Expand Down
2 changes: 0 additions & 2 deletions docs/_templates/custom-module-template.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# courtesy of ASAP Discovery team

{{ fullname | escape | underline}}

.. automodule:: {{ fullname }}
Expand Down
2 changes: 1 addition & 1 deletion docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ API Documentation
:template: custom-module-template.rst
:recursive:

missense_kinase_toolkit
missense_kinase_toolkit.databases
2 changes: 2 additions & 0 deletions docs/getting_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ This page details how to get started with missense-kinase-toolkit.
Installation
++++++++++++

#TODO add pip install instructions

We have used `poetry` as our default package manager for this project. Once you have cloned the repository and have either installed `poetry` locally or in your environment of interest, you can install the package by running the following command in the root directory of the repository using the provided `poetry.lock` file and the following command

.. code-block:: bash
Expand Down
3 changes: 0 additions & 3 deletions src/missense_kinase_toolkit/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +0,0 @@
from importlib.metadata import version

__version__ = version("missense-kinase-toolkit")
3 changes: 3 additions & 0 deletions src/missense_kinase_toolkit/databases/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from importlib.metadata import version

__version__ = version("missense-kinase-toolkit")
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from bravado.client import SwaggerClient
from bravado.requests_client import RequestsClient

from missense_kinase_toolkit import config, io_utils
from missense_kinase_toolkit.databases import config, io_utils


logger = logging.getLogger(__name__)
Expand All @@ -15,6 +15,8 @@ class cBioPortal():
def __init__(self):
"""Initialize cBioPortal Class object.
Upon initialization, cBioPortal API is queried.
Attributes
----------
instance : str
Expand All @@ -27,7 +29,7 @@ def __init__(self):
"""
self.instance = config.get_cbioportal_instance()
self.url = f"https://{self.instance}/api/v2/api-docs"
self._cbioportal = self.get_cbioportal_api()
self._cbioportal = self.query_cbioportal_api()

def _set_api_key(self):
"""Set API key for cBioPortal API.
Expand All @@ -51,8 +53,8 @@ def _set_api_key(self):
print("No API token provided")
return http_client

def get_cbioportal_api(self):
"""Get cBioPortal API as bravado.client.SwaggerClient object.
def query_cbioportal_api(self):
"""Queries cBioPortal API for instance as bravado.client.SwaggerClient object.
Returns
-------
Expand Down Expand Up @@ -95,6 +97,8 @@ def __init__(
) -> None:
"""Initialize Mutations Class object.
Upon initialization, cBioPortal API is queried and mutations for specificied study are retrieved.
Parameters
----------
study_id : str
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import argparse

from missense_kinase_toolkit import config, io_utils, cbioportal
from missense_kinase_toolkit.databases import config, io_utils, cbioportal

def parsearg_utils():
parser = argparse.ArgumentParser(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pandas as pd

from missense_kinase_toolkit import config, io_utils, scrapers, klifs
from missense_kinase_toolkit.databases import config, io_utils, scrapers, klifs

def parsearg_utils():
parser = argparse.ArgumentParser(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import argparse

from missense_kinase_toolkit import config, io_utils
from missense_kinase_toolkit.databases import config, io_utils


def parsearg_utils():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"""str: Environment variable for cBioPortal instance; if none provided, default is `www.cbioportal.org`"""
CBIOPORTAL_TOKEN_VAR = "CBIOPORTAL_TOKEN"
"""str: Environment variable for cBioPortal token; if none provided, default is `None`"""
REQUEST_CACHE_VAR = "REQUESTS_CACHE"
REQUESTS_CACHE_VAR = "REQUESTS_CACHE"
"""str: Environment variable for request cache file prefix; if none provided, default is requests_cache"""


Expand Down Expand Up @@ -114,7 +114,7 @@ def maybe_get_cbioportal_token(


def set_request_cache(
val: bool
val: str
) -> None:
"""Set the request cache path in environment variables.
Expand All @@ -128,8 +128,7 @@ def set_request_cache(
None
"""
#TODO: val should be bool but doesn't work with env, fix
os.environ[REQUEST_CACHE_VAR] = str(val)
os.environ[REQUESTS_CACHE_VAR] = val


def maybe_get_request_cache(
Expand All @@ -143,6 +142,6 @@ def maybe_get_request_cache(
"""
try:
return os.environ[REQUEST_CACHE_VAR]
return os.environ[REQUESTS_CACHE_VAR]
except KeyError:
return None
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import requests

from missense_kinase_toolkit import requests_wrapper, utils_requests
from missense_kinase_toolkit.databases import requests_wrapper, utils_requests


def maybe_get_symbol_from_hgnc_search(
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ class KLIFS():
def __init__(self):
"""Initialize KLIFS Class object.
Upon initialization, KLIFS API is queried.
Attributes
----------
url : str
Expand All @@ -20,9 +22,9 @@ def __init__(self):
"""
self.url = "https://dev.klifs.net/swagger_v2/swagger.json"
self._klifs = self.get_klifs_api()
self._klifs = self.query_klifs_api()

def get_klifs_api(self):
def query_klifs_api(self):
"""Get KLIFS API as bravado.client.SwaggerClient object.
Returns
Expand Down Expand Up @@ -60,6 +62,8 @@ def __init__(
) -> None:
"""Initialize KinaseInfo Class object.
Upon initialization, KLIFS API is queried and kinase information for specificied kinase is retrieved.
Parameters
----------
kinase_name : str
Expand All @@ -80,9 +84,9 @@ def __init__(
super().__init__()
self.kinase_name = kinase_name
self.species = species
self._kinase_info = self.get_kinase_info()
self._kinase_info = self.query_kinase_info()

def get_kinase_info(
def query_kinase_info(
self
) -> dict[str, str | int | None]:
"""Get information about a kinase from KLIFS.
Expand Down Expand Up @@ -133,3 +137,7 @@ def get_kinase_name(self):
def get_species(self):
"""Get species of the kinase."""
return self.species

def get_kinase_info(self):
"""Get information about the kinase."""
return self._kinase_info
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pandas as pd

from missense_kinase_toolkit import requests_wrapper, utils_requests
from missense_kinase_toolkit.databases import requests_wrapper, utils_requests


def retrieve_pfam(
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pandas as pd

from missense_kinase_toolkit import requests_wrapper
from missense_kinase_toolkit.databases import requests_wrapper


def kinhub(
Expand Down Expand Up @@ -52,6 +52,7 @@ def kinhub(
df_kinhub = pd.DataFrame.from_dict(dict_kinhub)
# df_kinhub = clean_names(df_kinhub)

# aggregate rows with the same HGNC Name (e.g., multiple kinase domains like JAK)
list_cols = df_kinhub.columns.to_list()
list_cols.remove("HGNC Name")
df_kinhub_agg = df_kinhub.groupby(["HGNC Name"], as_index=False, sort=False).agg(set)
Expand Down
File renamed without changes.
109 changes: 109 additions & 0 deletions tests/test_databases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
"""
Unit and regression test for the missense_kinase_toolkit package.
"""

# Import package, test suite, and other packages as needed
import pytest


def test_missense_kinase_toolkit_database_imported():
"""Test if module is imported."""
import sys
import missense_kinase_toolkit.databases

assert "missense_kinase_toolkit.databases" in sys.modules


def test_config():
from missense_kinase_toolkit.databases import config

# test that the function to set the output directory works
config.set_output_dir("test")
assert config.get_output_dir() == "test"

# test that the function to set the request cache works
config.set_request_cache("test")
assert config.maybe_get_request_cache() == "test"

# test that the function to set the cBioPortal instance works
config.set_cbioportal_instance("test")
assert config.get_cbioportal_instance() == "test"

# test that the function to set the cBioPortal token works
config.set_cbioportal_token("test")
assert config.maybe_get_cbioportal_token() == "test"


def test_cbioportal():
from missense_kinase_toolkit.databases import config, cbioportal

config.set_cbioportal_instance("www.cbioportal.org")

# test that the function to set the API key for cBioPortal works
# cbioportal.cBioPortal()._set_api_key()

# test that the function to query the cBioPortal API works
cbioportal_instance = cbioportal.cBioPortal()

# test that server status is up
assert cbioportal_instance._cbioportal.Server_running_status.getServerStatusUsingGET().response().result["status"] == "UP"

# test that Zehir cohort is available
list_studies = cbioportal_instance._cbioportal.Studies.getAllStudiesUsingGET().result()
list_study_ids = [study.studyId for study in list_studies]
assert "msk_impact_2017" in list_study_ids


def test_io_utils():
from missense_kinase_toolkit.databases import io_utils
import pandas as pd
import os

os.environ["OUTPUT_DIR"] = "."

# test that the functions to save and load dataframes work
df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
io_utils.save_dataframe_to_csv(df, "test1.csv")
df_read = io_utils.load_csv_to_dataframe("test1.csv")
assert df.equals(df_read)

# test that the function to concatenate csv files with glob works
io_utils.save_dataframe_to_csv(df, "test2.csv")
df_concat = io_utils.concatenate_csv_files_with_glob("*test*.csv")
assert df_concat.equals(pd.concat([df, df]))

# remove the files created
os.remove("test1.csv")
os.remove("test2.csv")

# test that the function to convert a string to a list works
assert io_utils.convert_str2list("a,b,c") == ["a", "b", "c"]
assert io_utils.convert_str2list("a, b, c") == ["a", "b", "c"]


def test_kinhub_scraper():
from missense_kinase_toolkit.databases import scrapers

df_kinhub = scrapers.kinhub()

assert df_kinhub.shape[0] == 517
assert df_kinhub.shape[1] == 8
assert "HGNC Name" in df_kinhub.columns
assert "UniprotID" in df_kinhub.columns


def test_klifs_KinaseInfo():
from missense_kinase_toolkit.databases import klifs

dict_egfr = klifs.KinaseInfo("EGFR")._kinase_info

assert dict_egfr["family"] == "EGFR"
assert dict_egfr["full_name"] == "epidermal growth factor receptor"
assert dict_egfr["gene_name"] == "EGFR"
assert dict_egfr["group"] == "TK"
assert dict_egfr["iuphar"] == 1797
assert dict_egfr["kinase_ID"] == 406
assert dict_egfr["name"] == "EGFR"
assert dict_egfr["pocket"] == "KVLGSGAFGTVYKVAIKELEILDEAYVMASVDPHVCRLLGIQLITQLMPFGCLLDYVREYLEDRRLVHRDLAARNVLVITDFGLA"
assert dict_egfr["species"] == "Human"
assert dict_egfr["uniprot"] == "P00533"
Loading

0 comments on commit 7babab8

Please sign in to comment.