diff --git a/README.md b/README.md index b2ba44c..604c276 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ missense-kinase-toolkit An ETL pipeline package to facilitate structure-based ML for human kinase property prediction -Additional documentation can be found [here](https://stackoverflow.com/questions/75922593/sphinx-readthedocs-and-package-version). +Additional documentation can be found [here](https://missense-kinase-toolkit.readthedocs.io/en/latest/). ### Copyright diff --git a/docs/_templates/custom-class-template.rst b/docs/_templates/custom-class-template.rst index b29757c..4ec6877 100644 --- a/docs/_templates/custom-class-template.rst +++ b/docs/_templates/custom-class-template.rst @@ -4,6 +4,7 @@ .. autoclass:: {{ objname }} :members: + :private-members: :show-inheritance: :inherited-members: diff --git a/docs/_templates/custom-module-template.rst b/docs/_templates/custom-module-template.rst index c4b447e..6adfe40 100644 --- a/docs/_templates/custom-module-template.rst +++ b/docs/_templates/custom-module-template.rst @@ -1,5 +1,3 @@ -# courtesy of ASAP Discovery team - {{ fullname | escape | underline}} .. automodule:: {{ fullname }} diff --git a/docs/api.rst b/docs/api.rst index 8635d12..0b48372 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -6,4 +6,4 @@ API Documentation :template: custom-module-template.rst :recursive: - missense_kinase_toolkit + missense_kinase_toolkit.databases diff --git a/docs/getting_started.rst b/docs/getting_started.rst index b9bafb9..3ad4e73 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -6,6 +6,8 @@ This page details how to get started with missense-kinase-toolkit. Installation ++++++++++++ +#TODO add pip install instructions + We have used `poetry` as our default package manager for this project. Once you have cloned the repository and have either installed `poetry` locally or in your environment of interest, you can install the package by running the following command in the root directory of the repository using the provided `poetry.lock` file and the following command .. code-block:: bash diff --git a/src/missense_kinase_toolkit/__init__.py b/src/missense_kinase_toolkit/__init__.py index 2ff4e16..e69de29 100644 --- a/src/missense_kinase_toolkit/__init__.py +++ b/src/missense_kinase_toolkit/__init__.py @@ -1,3 +0,0 @@ -from importlib.metadata import version - -__version__ = version("missense-kinase-toolkit") diff --git a/src/missense_kinase_toolkit/databases/__init__.py b/src/missense_kinase_toolkit/databases/__init__.py new file mode 100644 index 0000000..2ff4e16 --- /dev/null +++ b/src/missense_kinase_toolkit/databases/__init__.py @@ -0,0 +1,3 @@ +from importlib.metadata import version + +__version__ = version("missense-kinase-toolkit") diff --git a/src/missense_kinase_toolkit/cbioportal.py b/src/missense_kinase_toolkit/databases/cbioportal.py similarity index 91% rename from src/missense_kinase_toolkit/cbioportal.py rename to src/missense_kinase_toolkit/databases/cbioportal.py index cd54b2e..a1b755b 100644 --- a/src/missense_kinase_toolkit/cbioportal.py +++ b/src/missense_kinase_toolkit/databases/cbioportal.py @@ -4,7 +4,7 @@ from bravado.client import SwaggerClient from bravado.requests_client import RequestsClient -from missense_kinase_toolkit import config, io_utils +from missense_kinase_toolkit.databases import config, io_utils logger = logging.getLogger(__name__) @@ -15,6 +15,8 @@ class cBioPortal(): def __init__(self): """Initialize cBioPortal Class object. + Upon initialization, cBioPortal API is queried. + Attributes ---------- instance : str @@ -27,7 +29,7 @@ def __init__(self): """ self.instance = config.get_cbioportal_instance() self.url = f"https://{self.instance}/api/v2/api-docs" - self._cbioportal = self.get_cbioportal_api() + self._cbioportal = self.query_cbioportal_api() def _set_api_key(self): """Set API key for cBioPortal API. @@ -51,8 +53,8 @@ def _set_api_key(self): print("No API token provided") return http_client - def get_cbioportal_api(self): - """Get cBioPortal API as bravado.client.SwaggerClient object. + def query_cbioportal_api(self): + """Queries cBioPortal API for instance as bravado.client.SwaggerClient object. Returns ------- @@ -95,6 +97,8 @@ def __init__( ) -> None: """Initialize Mutations Class object. + Upon initialization, cBioPortal API is queried and mutations for specificied study are retrieved. + Parameters ---------- study_id : str diff --git a/src/missense_kinase_toolkit/cli/__init__.py b/src/missense_kinase_toolkit/databases/cli/__init__.py similarity index 100% rename from src/missense_kinase_toolkit/cli/__init__.py rename to src/missense_kinase_toolkit/databases/cli/__init__.py diff --git a/src/missense_kinase_toolkit/cli/extract_cbioportal.py b/src/missense_kinase_toolkit/databases/cli/extract_cbioportal.py similarity index 95% rename from src/missense_kinase_toolkit/cli/extract_cbioportal.py rename to src/missense_kinase_toolkit/databases/cli/extract_cbioportal.py index 67a454c..0905961 100755 --- a/src/missense_kinase_toolkit/cli/extract_cbioportal.py +++ b/src/missense_kinase_toolkit/databases/cli/extract_cbioportal.py @@ -2,7 +2,7 @@ import argparse -from missense_kinase_toolkit import config, io_utils, cbioportal +from missense_kinase_toolkit.databases import config, io_utils, cbioportal def parsearg_utils(): parser = argparse.ArgumentParser( diff --git a/src/missense_kinase_toolkit/cli/extract_kinase_annotations.py b/src/missense_kinase_toolkit/databases/cli/extract_kinase_annotations.py similarity index 95% rename from src/missense_kinase_toolkit/cli/extract_kinase_annotations.py rename to src/missense_kinase_toolkit/databases/cli/extract_kinase_annotations.py index b10c331..c34895d 100755 --- a/src/missense_kinase_toolkit/cli/extract_kinase_annotations.py +++ b/src/missense_kinase_toolkit/databases/cli/extract_kinase_annotations.py @@ -4,7 +4,7 @@ import pandas as pd -from missense_kinase_toolkit import config, io_utils, scrapers, klifs +from missense_kinase_toolkit.databases import config, io_utils, scrapers, klifs def parsearg_utils(): parser = argparse.ArgumentParser( diff --git a/src/missense_kinase_toolkit/cli/transform_cbioportal.py b/src/missense_kinase_toolkit/databases/cli/transform_cbioportal.py similarity index 97% rename from src/missense_kinase_toolkit/cli/transform_cbioportal.py rename to src/missense_kinase_toolkit/databases/cli/transform_cbioportal.py index de524a1..096d0a9 100755 --- a/src/missense_kinase_toolkit/cli/transform_cbioportal.py +++ b/src/missense_kinase_toolkit/databases/cli/transform_cbioportal.py @@ -2,7 +2,7 @@ import argparse -from missense_kinase_toolkit import config, io_utils +from missense_kinase_toolkit.databases import config, io_utils def parsearg_utils(): diff --git a/src/missense_kinase_toolkit/config.py b/src/missense_kinase_toolkit/databases/config.py similarity index 93% rename from src/missense_kinase_toolkit/config.py rename to src/missense_kinase_toolkit/databases/config.py index 03dba71..e6327ca 100644 --- a/src/missense_kinase_toolkit/config.py +++ b/src/missense_kinase_toolkit/databases/config.py @@ -8,7 +8,7 @@ """str: Environment variable for cBioPortal instance; if none provided, default is `www.cbioportal.org`""" CBIOPORTAL_TOKEN_VAR = "CBIOPORTAL_TOKEN" """str: Environment variable for cBioPortal token; if none provided, default is `None`""" -REQUEST_CACHE_VAR = "REQUESTS_CACHE" +REQUESTS_CACHE_VAR = "REQUESTS_CACHE" """str: Environment variable for request cache file prefix; if none provided, default is requests_cache""" @@ -114,7 +114,7 @@ def maybe_get_cbioportal_token( def set_request_cache( - val: bool + val: str ) -> None: """Set the request cache path in environment variables. @@ -128,8 +128,7 @@ def set_request_cache( None """ - #TODO: val should be bool but doesn't work with env, fix - os.environ[REQUEST_CACHE_VAR] = str(val) + os.environ[REQUESTS_CACHE_VAR] = val def maybe_get_request_cache( @@ -143,6 +142,6 @@ def maybe_get_request_cache( """ try: - return os.environ[REQUEST_CACHE_VAR] + return os.environ[REQUESTS_CACHE_VAR] except KeyError: return None diff --git a/src/missense_kinase_toolkit/data_models.py b/src/missense_kinase_toolkit/databases/data_models.py similarity index 100% rename from src/missense_kinase_toolkit/data_models.py rename to src/missense_kinase_toolkit/databases/data_models.py diff --git a/src/missense_kinase_toolkit/hgnc.py b/src/missense_kinase_toolkit/databases/hgnc.py similarity index 98% rename from src/missense_kinase_toolkit/hgnc.py rename to src/missense_kinase_toolkit/databases/hgnc.py index a84d820..4e8d92e 100644 --- a/src/missense_kinase_toolkit/hgnc.py +++ b/src/missense_kinase_toolkit/databases/hgnc.py @@ -1,6 +1,6 @@ import requests -from missense_kinase_toolkit import requests_wrapper, utils_requests +from missense_kinase_toolkit.databases import requests_wrapper, utils_requests def maybe_get_symbol_from_hgnc_search( diff --git a/src/missense_kinase_toolkit/io_utils.py b/src/missense_kinase_toolkit/databases/io_utils.py similarity index 100% rename from src/missense_kinase_toolkit/io_utils.py rename to src/missense_kinase_toolkit/databases/io_utils.py diff --git a/src/missense_kinase_toolkit/klifs.py b/src/missense_kinase_toolkit/databases/klifs.py similarity index 88% rename from src/missense_kinase_toolkit/klifs.py rename to src/missense_kinase_toolkit/databases/klifs.py index c94266f..95d9b16 100644 --- a/src/missense_kinase_toolkit/klifs.py +++ b/src/missense_kinase_toolkit/databases/klifs.py @@ -11,6 +11,8 @@ class KLIFS(): def __init__(self): """Initialize KLIFS Class object. + Upon initialization, KLIFS API is queried. + Attributes ---------- url : str @@ -20,9 +22,9 @@ def __init__(self): """ self.url = "https://dev.klifs.net/swagger_v2/swagger.json" - self._klifs = self.get_klifs_api() + self._klifs = self.query_klifs_api() - def get_klifs_api(self): + def query_klifs_api(self): """Get KLIFS API as bravado.client.SwaggerClient object. Returns @@ -60,6 +62,8 @@ def __init__( ) -> None: """Initialize KinaseInfo Class object. + Upon initialization, KLIFS API is queried and kinase information for specificied kinase is retrieved. + Parameters ---------- kinase_name : str @@ -80,9 +84,9 @@ def __init__( super().__init__() self.kinase_name = kinase_name self.species = species - self._kinase_info = self.get_kinase_info() + self._kinase_info = self.query_kinase_info() - def get_kinase_info( + def query_kinase_info( self ) -> dict[str, str | int | None]: """Get information about a kinase from KLIFS. @@ -133,3 +137,7 @@ def get_kinase_name(self): def get_species(self): """Get species of the kinase.""" return self.species + + def get_kinase_info(self): + """Get information about the kinase.""" + return self._kinase_info diff --git a/src/missense_kinase_toolkit/pfam.py b/src/missense_kinase_toolkit/databases/pfam.py similarity index 97% rename from src/missense_kinase_toolkit/pfam.py rename to src/missense_kinase_toolkit/databases/pfam.py index d99dbe1..d929df5 100644 --- a/src/missense_kinase_toolkit/pfam.py +++ b/src/missense_kinase_toolkit/databases/pfam.py @@ -2,7 +2,7 @@ import pandas as pd -from missense_kinase_toolkit import requests_wrapper, utils_requests +from missense_kinase_toolkit.databases import requests_wrapper, utils_requests def retrieve_pfam( diff --git a/src/missense_kinase_toolkit/py.typed b/src/missense_kinase_toolkit/databases/py.typed similarity index 100% rename from src/missense_kinase_toolkit/py.typed rename to src/missense_kinase_toolkit/databases/py.typed diff --git a/src/missense_kinase_toolkit/requests_wrapper.py b/src/missense_kinase_toolkit/databases/requests_wrapper.py similarity index 100% rename from src/missense_kinase_toolkit/requests_wrapper.py rename to src/missense_kinase_toolkit/databases/requests_wrapper.py diff --git a/src/missense_kinase_toolkit/scrapers.py b/src/missense_kinase_toolkit/databases/scrapers.py similarity index 92% rename from src/missense_kinase_toolkit/scrapers.py rename to src/missense_kinase_toolkit/databases/scrapers.py index b15868f..f1a4837 100644 --- a/src/missense_kinase_toolkit/scrapers.py +++ b/src/missense_kinase_toolkit/databases/scrapers.py @@ -1,6 +1,6 @@ import pandas as pd -from missense_kinase_toolkit import requests_wrapper +from missense_kinase_toolkit.databases import requests_wrapper def kinhub( @@ -52,6 +52,7 @@ def kinhub( df_kinhub = pd.DataFrame.from_dict(dict_kinhub) # df_kinhub = clean_names(df_kinhub) + # aggregate rows with the same HGNC Name (e.g., multiple kinase domains like JAK) list_cols = df_kinhub.columns.to_list() list_cols.remove("HGNC Name") df_kinhub_agg = df_kinhub.groupby(["HGNC Name"], as_index=False, sort=False).agg(set) diff --git a/src/missense_kinase_toolkit/uniprot.py b/src/missense_kinase_toolkit/databases/uniprot.py similarity index 100% rename from src/missense_kinase_toolkit/uniprot.py rename to src/missense_kinase_toolkit/databases/uniprot.py diff --git a/src/missense_kinase_toolkit/utils_requests.py b/src/missense_kinase_toolkit/databases/utils_requests.py similarity index 100% rename from src/missense_kinase_toolkit/utils_requests.py rename to src/missense_kinase_toolkit/databases/utils_requests.py diff --git a/tests/test_databases.py b/tests/test_databases.py new file mode 100644 index 0000000..8b75546 --- /dev/null +++ b/tests/test_databases.py @@ -0,0 +1,109 @@ +""" +Unit and regression test for the missense_kinase_toolkit package. +""" + +# Import package, test suite, and other packages as needed +import pytest + + +def test_missense_kinase_toolkit_database_imported(): + """Test if module is imported.""" + import sys + import missense_kinase_toolkit.databases + + assert "missense_kinase_toolkit.databases" in sys.modules + + +def test_config(): + from missense_kinase_toolkit.databases import config + + # test that the function to set the output directory works + config.set_output_dir("test") + assert config.get_output_dir() == "test" + + # test that the function to set the request cache works + config.set_request_cache("test") + assert config.maybe_get_request_cache() == "test" + + # test that the function to set the cBioPortal instance works + config.set_cbioportal_instance("test") + assert config.get_cbioportal_instance() == "test" + + # test that the function to set the cBioPortal token works + config.set_cbioportal_token("test") + assert config.maybe_get_cbioportal_token() == "test" + + +def test_cbioportal(): + from missense_kinase_toolkit.databases import config, cbioportal + + config.set_cbioportal_instance("www.cbioportal.org") + + # test that the function to set the API key for cBioPortal works + # cbioportal.cBioPortal()._set_api_key() + + # test that the function to query the cBioPortal API works + cbioportal_instance = cbioportal.cBioPortal() + + # test that server status is up + assert cbioportal_instance._cbioportal.Server_running_status.getServerStatusUsingGET().response().result["status"] == "UP" + + # test that Zehir cohort is available + list_studies = cbioportal_instance._cbioportal.Studies.getAllStudiesUsingGET().result() + list_study_ids = [study.studyId for study in list_studies] + assert "msk_impact_2017" in list_study_ids + + +def test_io_utils(): + from missense_kinase_toolkit.databases import io_utils + import pandas as pd + import os + + os.environ["OUTPUT_DIR"] = "." + + # test that the functions to save and load dataframes work + df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + io_utils.save_dataframe_to_csv(df, "test1.csv") + df_read = io_utils.load_csv_to_dataframe("test1.csv") + assert df.equals(df_read) + + # test that the function to concatenate csv files with glob works + io_utils.save_dataframe_to_csv(df, "test2.csv") + df_concat = io_utils.concatenate_csv_files_with_glob("*test*.csv") + assert df_concat.equals(pd.concat([df, df])) + + # remove the files created + os.remove("test1.csv") + os.remove("test2.csv") + + # test that the function to convert a string to a list works + assert io_utils.convert_str2list("a,b,c") == ["a", "b", "c"] + assert io_utils.convert_str2list("a, b, c") == ["a", "b", "c"] + + +def test_kinhub_scraper(): + from missense_kinase_toolkit.databases import scrapers + + df_kinhub = scrapers.kinhub() + + assert df_kinhub.shape[0] == 517 + assert df_kinhub.shape[1] == 8 + assert "HGNC Name" in df_kinhub.columns + assert "UniprotID" in df_kinhub.columns + + +def test_klifs_KinaseInfo(): + from missense_kinase_toolkit.databases import klifs + + dict_egfr = klifs.KinaseInfo("EGFR")._kinase_info + + assert dict_egfr["family"] == "EGFR" + assert dict_egfr["full_name"] == "epidermal growth factor receptor" + assert dict_egfr["gene_name"] == "EGFR" + assert dict_egfr["group"] == "TK" + assert dict_egfr["iuphar"] == 1797 + assert dict_egfr["kinase_ID"] == 406 + assert dict_egfr["name"] == "EGFR" + assert dict_egfr["pocket"] == "KVLGSGAFGTVYKVAIKELEILDEAYVMASVDPHVCRLLGIQLITQLMPFGCLLDYVREYLEDRRLVHRDLAARNVLVITDFGLA" + assert dict_egfr["species"] == "Human" + assert dict_egfr["uniprot"] == "P00533" diff --git a/tests/test_missense_kinase_toolkit.py b/tests/test_missense_kinase_toolkit.py deleted file mode 100644 index 41f89a4..0000000 --- a/tests/test_missense_kinase_toolkit.py +++ /dev/null @@ -1,65 +0,0 @@ -""" -Unit and regression test for the missense_kinase_toolkit package. -""" - -# Import package, test suite, and other packages as needed -import sys - -import pytest - -import missense_kinase_toolkit - - -def test_missense_kinase_toolkit_imported(): - """Sample test, will always pass so long as import statement worked.""" - assert "missense_kinase_toolkit" in sys.modules - - -def test_kinhub_scraper(): - from missense_kinase_toolkit import scrapers - - df_kinhub = scrapers.kinhub() - - assert df_kinhub.shape[0] == 517 - assert df_kinhub.shape[1] == 8 - assert "HGNC Name" in df_kinhub.columns - assert "UniprotID" in df_kinhub.columns - - -def test_klifs_KinaseInfo(): - from missense_kinase_toolkit import klifs - - dict_egfr = klifs.KinaseInfo("EGFR")._kinase_info - - assert dict_egfr["family"] == "EGFR" - assert dict_egfr["full_name"] == "epidermal growth factor receptor" - assert dict_egfr["gene_name"] == "EGFR" - assert dict_egfr["group"] == "TK" - assert dict_egfr["iuphar"] == 1797 - assert dict_egfr["kinase_ID"] == 406 - assert dict_egfr["name"] == "EGFR" - assert dict_egfr["pocket"] == "KVLGSGAFGTVYKVAIKELEILDEAYVMASVDPHVCRLLGIQLITQLMPFGCLLDYVREYLEDRRLVHRDLAARNVLVITDFGLA" - assert dict_egfr["species"] == "Human" - assert dict_egfr["uniprot"] == "P00533" - - -def test_io_utils(): - from missense_kinase_toolkit import io_utils - import pandas as pd - import os - - os.environ["OUTPUT_DIR"] = "." - df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - io_utils.save_dataframe_to_csv(df, "test1.csv") - df_read = io_utils.load_csv_to_dataframe("test1.csv") - assert df.equals(df_read) - - io_utils.save_dataframe_to_csv(df, "test2.csv") - df_concat = io_utils.concatenate_csv_files_with_glob("*test*.csv") - assert df_concat.equals(pd.concat([df, df])) - - os.remove("test1.csv") - os.remove("test2.csv") - - assert io_utils.convert_str2list("a,b,c") == ["a", "b", "c"] - assert io_utils.convert_str2list("a, b, c") == ["a", "b", "c"]