Skip to content

Commit

Permalink
Fix issue #8 "can't download pathway archive" (#9)
Browse files Browse the repository at this point in the history
* Use the new JSON-API #8

* Add docstring

* Add __init__.py to tests

* Add test_download_pathway_archive.py to tests

* Refactor download_pathway_archive.py to use new JSON-API endpoint
  • Loading branch information
kozo2 authored Sep 23, 2024
1 parent e6676b7 commit a0f95d8
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 13 deletions.
43 changes: 33 additions & 10 deletions pywikipathways/download_pathway_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,31 @@
from .list_organisms import *

def download_pathway_archive(date='current', organism=None, format='gpml', destpath='./'):
"""Download Pathway Archive
Access the monthly archives of pathway content from WikiPathways.
If you do not specify an organism, then an archive file will not be downloaded.
Instead, the archive will be opened in a tab in your default browser.
Args:
date (str, optional): The timestamp for a monthly release (e.g., 20171010)
or "current" (default) for the latest release.
organism (str, optional): A particular species. See `listOrganisms`.
format (str, optional): Either "gpml" (default), "gmt", or "svg".
destpath (str, optional): Destination path for the file to be downloaded to.
Default is the current working directory.
Returns:
str: Filename of the downloaded file or an opened tab in the default browser.
Examples:
>>> download_pathway_archive() # open in browser
>>> download_pathway_archive(format="gmt") # open in browser
>>> download_pathway_archive(date="20230710", format="svg") # open in browser
>>> download_pathway_archive(date="20230710", organism="Mus musculus", format="svg") # download file
>>> download_pathway_archive(organism="Mus musculus") # download file
"""
# get validated format
if not format in ['gpml', 'gmt', 'svg']:
sys.exit(format + " is not in ['gpml', 'gmt', 'svg']. Please specify one of these.")
Expand All @@ -24,18 +49,16 @@ def download_pathway_archive(date='current', organism=None, format='gpml', destp
# download specific file, or...
if organism:
if date == 'current':
curr_files = pandas.read_html("https://wikipathways-data.wmcloud.org/current/" + format)[0]['Filename']
filename = curr_files[curr_files.str.contains(organism.replace(" ", "_"))]
filename = list(filename)[0]
if not True in curr_files.str.contains(organism.replace(" ", "_")):
curr_files = pandas.read_html("https://data.wikipathways.org/current/" + format)[0]["File Name"]
filename = curr_files[curr_files.str.contains(organism.replace(" ", "_"))].iloc[0]
if len(filename) == 0:
sys.exit('Could not find a file matching your specifications. Try browsing http://data.wikipathways.org.')
else:
if requests.get("https://wikipathways-data.wmcloud.org/" + date).ok:
ext = ".zip"
if format == 'gmt':
ext = ".gmt"
filename = "-".join(['wikipathways', date, format, organism.replace(" ", "_")]) + ext
url = "/".join(['http://data.wikipathways.org', date, format, filename])
ext = ".zip"
if format == 'gmt':
ext = ".gmt"
filename = "-".join(['wikipathways', date, format, organism.replace(" ", "_")]) + ext
url = "/".join(['https://data.wikipathways.org', date, format, filename])
r = requests.get(url)
file = open(filename, "wb")
file.write(r.content)
Expand Down
17 changes: 14 additions & 3 deletions pywikipathways/list_organisms.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
from .utilities import *
import requests

def list_organisms():
res = wikipathways_get('listOrganisms', {'format': 'json'})
return res['organisms']
"""List Organisms.
Retrieve the list of organisms supported by WikiPathways
Returns:
list: A list of organisms
Example:
>>> list_organisms()
"""
res = requests.get("https://www.wikipathways.org/json/listOrganisms.json")
res.raise_for_status()
return res.json()['organisms']
Empty file added tests/__init__.py
Empty file.
13 changes: 13 additions & 0 deletions tests/test_download_pathway_archive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pytest
from pywikipathways.download_pathway_archive import *

def test_successful_download():
filename = download_pathway_archive(date='current', organism='Mus musculus', format='gpml')
assert filename == 'wikipathways-20240910-gpml-Mus_musculus.zip'

filename = download_pathway_archive(date='current', organism='Mus musculus', format='gmt')
assert filename == 'wikipathways-20240910-gmt-Mus_musculus.gmt'

filename = download_pathway_archive(date='current', organism='Mus musculus', format='svg')
assert filename == 'wikipathways-20240910-svg-Mus_musculus.zip'

0 comments on commit a0f95d8

Please sign in to comment.