diff --git a/pywikipathways/download_pathway_archive.py b/pywikipathways/download_pathway_archive.py index 2e556f6..407e301 100644 --- a/pywikipathways/download_pathway_archive.py +++ b/pywikipathways/download_pathway_archive.py @@ -6,6 +6,31 @@ from .list_organisms import * def download_pathway_archive(date='current', organism=None, format='gpml', destpath='./'): + """Download Pathway Archive + + Access the monthly archives of pathway content from WikiPathways. + + If you do not specify an organism, then an archive file will not be downloaded. + Instead, the archive will be opened in a tab in your default browser. + + Args: + date (str, optional): The timestamp for a monthly release (e.g., 20171010) + or "current" (default) for the latest release. + organism (str, optional): A particular species. See `listOrganisms`. + format (str, optional): Either "gpml" (default), "gmt", or "svg". + destpath (str, optional): Destination path for the file to be downloaded to. + Default is the current working directory. + + Returns: + str: Filename of the downloaded file or an opened tab in the default browser. + + Examples: + >>> download_pathway_archive() # open in browser + >>> download_pathway_archive(format="gmt") # open in browser + >>> download_pathway_archive(date="20230710", format="svg") # open in browser + >>> download_pathway_archive(date="20230710", organism="Mus musculus", format="svg") # download file + >>> download_pathway_archive(organism="Mus musculus") # download file + """ # get validated format if not format in ['gpml', 'gmt', 'svg']: sys.exit(format + " is not in ['gpml', 'gmt', 'svg']. Please specify one of these.") @@ -24,18 +49,16 @@ def download_pathway_archive(date='current', organism=None, format='gpml', destp # download specific file, or... if organism: if date == 'current': - curr_files = pandas.read_html("https://wikipathways-data.wmcloud.org/current/" + format)[0]['Filename'] - filename = curr_files[curr_files.str.contains(organism.replace(" ", "_"))] - filename = list(filename)[0] - if not True in curr_files.str.contains(organism.replace(" ", "_")): + curr_files = pandas.read_html("https://data.wikipathways.org/current/" + format)[0]["File Name"] + filename = curr_files[curr_files.str.contains(organism.replace(" ", "_"))].iloc[0] + if len(filename) == 0: sys.exit('Could not find a file matching your specifications. Try browsing http://data.wikipathways.org.') else: - if requests.get("https://wikipathways-data.wmcloud.org/" + date).ok: - ext = ".zip" - if format == 'gmt': - ext = ".gmt" - filename = "-".join(['wikipathways', date, format, organism.replace(" ", "_")]) + ext - url = "/".join(['http://data.wikipathways.org', date, format, filename]) + ext = ".zip" + if format == 'gmt': + ext = ".gmt" + filename = "-".join(['wikipathways', date, format, organism.replace(" ", "_")]) + ext + url = "/".join(['https://data.wikipathways.org', date, format, filename]) r = requests.get(url) file = open(filename, "wb") file.write(r.content) diff --git a/pywikipathways/list_organisms.py b/pywikipathways/list_organisms.py index a323c51..3db7174 100644 --- a/pywikipathways/list_organisms.py +++ b/pywikipathways/list_organisms.py @@ -1,5 +1,16 @@ -from .utilities import * +import requests def list_organisms(): - res = wikipathways_get('listOrganisms', {'format': 'json'}) - return res['organisms'] + """List Organisms. + + Retrieve the list of organisms supported by WikiPathways + + Returns: + list: A list of organisms + + Example: + >>> list_organisms() + """ + res = requests.get("https://www.wikipathways.org/json/listOrganisms.json") + res.raise_for_status() + return res.json()['organisms'] diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_download_pathway_archive.py b/tests/test_download_pathway_archive.py new file mode 100644 index 0000000..c339a31 --- /dev/null +++ b/tests/test_download_pathway_archive.py @@ -0,0 +1,13 @@ +import pytest +from pywikipathways.download_pathway_archive import * + +def test_successful_download(): + filename = download_pathway_archive(date='current', organism='Mus musculus', format='gpml') + assert filename == 'wikipathways-20240910-gpml-Mus_musculus.zip' + + filename = download_pathway_archive(date='current', organism='Mus musculus', format='gmt') + assert filename == 'wikipathways-20240910-gmt-Mus_musculus.gmt' + + filename = download_pathway_archive(date='current', organism='Mus musculus', format='svg') + assert filename == 'wikipathways-20240910-svg-Mus_musculus.zip' +