diff --git a/.gitignore b/.gitignore index 87c7e64..c9e554a 100644 --- a/.gitignore +++ b/.gitignore @@ -132,3 +132,4 @@ bert_model/ bsp_model/ word2vec_model/ .pypirc +docs/_build diff --git a/.travis.yml b/.travis.yml index 32ac666..c5ab8b8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,5 @@ language: python python: - - "3.6" - - "3.7" - "3.8" before_install: @@ -9,6 +7,7 @@ before_install: - pip install -U pytest - pip install codecov - pip install -U "bio-embeddings[all] @ git+https://github.com/sacdallago/bio_embeddings.git" + - pip install git+https://github.com/facebookresearch/esm.git install: - pip install -r requirements.txt @@ -16,6 +15,6 @@ env: - CODECOV_TOKEN=4f9eafa3-0ca3-48e4-9841-8fb25ff5d7c6 script: - - pytest --verbose --color=yes phages2050 + - pytest --verbose --color=yes phages2050/features after_success: - codecov diff --git a/CHANGELOG.md b/CHANGELOG.md index 89a00a3..ca65929 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ All notable changes to this project will be documented in this file. +## [0.0.8] - 11.10.2020 +### Added +* Initial online documentation; + +### Changed +* Migrated the codebase to Python 3.8+; + + ## [0.0.7] - 18.09.2020 ### Added * new extention for `embedding` module with ESM transformer-based protein embedding model; diff --git a/README.md b/README.md index fb0a25d..489076d 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ > "Keep calm, use AI for phages and stop AMR" -PHAGES2050 is a novel [Python 3.6+ programming language](https://python.org) framework to boost bacteriophage research & therapy and infrastructure in order to achieve the full potential to fight against antimicrobial resistant bacteria within **Natural Language Processing (NLP)** and **Deep Learning**. +PHAGES2050 is a novel [Python 3.8+ programming language](https://python.org) framework to boost bacteriophage research & therapy and infrastructure in order to achieve the full potential to fight against antimicrobial resistant bacteria within **Natural Language Processing (NLP)** and **Deep Learning**. Our project is about developing a AI-based framework for microbiologists and bioinformaticians who hunt, explore and classify phages. Applying the framework will shorten the duration of computational methods required to match phages with bacteria for specific patient cases. Having such organised framework at hand and freely-available will help develop personalized phage therapy and make it accessible to people worldwide. @@ -73,7 +73,7 @@ _PHAGES2050_ can be installed by running: pip install phages2050 ``` -It requires Python 3.6.0+ to run. You can also use Conda: +It requires Python 3.8.0+ to run. You can also use Conda: ``` conda install -c conda-forge phages2050 @@ -98,7 +98,7 @@ pip install git+https://github.com/facebookresearch/esm.git ## Community and Contributions -Happy to see you willing to make the PHAGES2050 better. Development on the latest stable version of Python 3+ is preferred. As of this writing it's 3.6. You can use any operating system. +Happy to see you willing to make the PHAGES2050 better. Development on the latest stable version of Python 3+ is preferred. As of this writing it's 3.8. You can use any operating system. If you're fixing a bug or adding a new feature, add a test with *[pytest](https://github.com/pytest-dev/pytest)* and check the code with *[Black](https://github.com/psf/black/)* and *[mypy](https://github.com/python/mypy)*. Before adding any large feature, first open an issue for us to discuss the idea with the core devs and community. diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_static/images/phages2050-logo.png b/docs/_static/images/phages2050-logo.png new file mode 100644 index 0000000..dee70be Binary files /dev/null and b/docs/_static/images/phages2050-logo.png differ diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..82bd87d --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,61 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys +sys.path.insert(0, os.path.abspath('..')) + + +# -- Project information ----------------------------------------------------- + +project = 'PHAGES2050' +copyright = '2020, Piotr Tynecki, Iwona Świętochowska, Yana Minina, Przemysław Mitura, Wojciech Łaguna' +author = 'Piotr Tynecki, Iwona Świętochowska, Yana Minina, Przemysław Mitura, Wojciech Łaguna' + +# The full version, including alpha/beta/rc tags +release = version = '0.0.8' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.viewcode' +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +html_logo = '_static/images/phages2050-logo.png' + +master_doc = 'index' diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..acf22ba --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,17 @@ +Welcome to PHAGES2050's documentation! +====================================== + +PHAGES2050 is a novel Python 3.8+ programming language framework to boost bacteriophage research & therapy. + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + modules + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..2119f51 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/modules.rst b/docs/modules.rst new file mode 100644 index 0000000..76410eb --- /dev/null +++ b/docs/modules.rst @@ -0,0 +1,8 @@ +PHAGES2050 +========== + +.. toctree:: + :maxdepth: 4 + + phages2050 + setup diff --git a/docs/phages2050.classifiers.proteins.rst b/docs/phages2050.classifiers.proteins.rst new file mode 100644 index 0000000..b3cba68 --- /dev/null +++ b/docs/phages2050.classifiers.proteins.rst @@ -0,0 +1,21 @@ +phages2050.classifiers.proteins package +======================================= + +Submodules +---------- + +phages2050.classifiers.proteins.structural\_protein module +---------------------------------------------------------- + +.. automodule:: phages2050.classifiers.proteins.structural_protein + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: phages2050.classifiers.proteins + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/phages2050.classifiers.rst b/docs/phages2050.classifiers.rst new file mode 100644 index 0000000..48624a5 --- /dev/null +++ b/docs/phages2050.classifiers.rst @@ -0,0 +1,18 @@ +phages2050.classifiers package +============================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + phages2050.classifiers.proteins + +Module contents +--------------- + +.. automodule:: phages2050.classifiers + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/phages2050.crawlers.millardlab.rst b/docs/phages2050.crawlers.millardlab.rst new file mode 100644 index 0000000..c3e48c6 --- /dev/null +++ b/docs/phages2050.crawlers.millardlab.rst @@ -0,0 +1,21 @@ +phages2050.crawlers.millardlab package +====================================== + +Submodules +---------- + +phages2050.crawlers.millardlab.crawler module +--------------------------------------------- + +.. automodule:: phages2050.crawlers.millardlab.crawler + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: phages2050.crawlers.millardlab + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/phages2050.crawlers.ncbi.rst b/docs/phages2050.crawlers.ncbi.rst new file mode 100644 index 0000000..ab1264a --- /dev/null +++ b/docs/phages2050.crawlers.ncbi.rst @@ -0,0 +1,10 @@ +phages2050.crawlers.ncbi package +================================ + +Module contents +--------------- + +.. automodule:: phages2050.crawlers.ncbi + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/phages2050.crawlers.rst b/docs/phages2050.crawlers.rst new file mode 100644 index 0000000..b8b4ff3 --- /dev/null +++ b/docs/phages2050.crawlers.rst @@ -0,0 +1,19 @@ +phages2050.crawlers package +=========================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + phages2050.crawlers.millardlab + phages2050.crawlers.ncbi + +Module contents +--------------- + +.. automodule:: phages2050.crawlers + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/phages2050.embeddings.nucleotides.rst b/docs/phages2050.embeddings.nucleotides.rst new file mode 100644 index 0000000..3896a1f --- /dev/null +++ b/docs/phages2050.embeddings.nucleotides.rst @@ -0,0 +1,21 @@ +phages2050.embeddings.nucleotides package +========================================= + +Submodules +---------- + +phages2050.embeddings.nucleotides.word2vec module +------------------------------------------------- + +.. automodule:: phages2050.embeddings.nucleotides.word2vec + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: phages2050.embeddings.nucleotides + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/phages2050.embeddings.proteins.rst b/docs/phages2050.embeddings.proteins.rst new file mode 100644 index 0000000..642620a --- /dev/null +++ b/docs/phages2050.embeddings.proteins.rst @@ -0,0 +1,29 @@ +phages2050.embeddings.proteins package +====================================== + +Submodules +---------- + +phages2050.embeddings.proteins.bert module +------------------------------------------ + +.. automodule:: phages2050.embeddings.proteins.bert + :members: + :undoc-members: + :show-inheritance: + +phages2050.embeddings.proteins.esm module +----------------------------------------- + +.. automodule:: phages2050.embeddings.proteins.esm + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: phages2050.embeddings.proteins + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/phages2050.embeddings.rst b/docs/phages2050.embeddings.rst new file mode 100644 index 0000000..b0bf14d --- /dev/null +++ b/docs/phages2050.embeddings.rst @@ -0,0 +1,19 @@ +phages2050.embeddings package +============================= + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + phages2050.embeddings.nucleotides + phages2050.embeddings.proteins + +Module contents +--------------- + +.. automodule:: phages2050.embeddings + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/phages2050.features.extractors.rst b/docs/phages2050.features.extractors.rst new file mode 100644 index 0000000..25882ae --- /dev/null +++ b/docs/phages2050.features.extractors.rst @@ -0,0 +1,21 @@ +phages2050.features.extractors package +====================================== + +Submodules +---------- + +phages2050.features.extractors.proteins module +---------------------------------------------- + +.. automodule:: phages2050.features.extractors.proteins + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: phages2050.features.extractors + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/phages2050.features.io.rst b/docs/phages2050.features.io.rst new file mode 100644 index 0000000..ab5dceb --- /dev/null +++ b/docs/phages2050.features.io.rst @@ -0,0 +1,21 @@ +phages2050.features.io package +============================== + +Submodules +---------- + +phages2050.features.io.fasta module +----------------------------------- + +.. automodule:: phages2050.features.io.fasta + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: phages2050.features.io + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/phages2050.features.rst b/docs/phages2050.features.rst new file mode 100644 index 0000000..700e338 --- /dev/null +++ b/docs/phages2050.features.rst @@ -0,0 +1,20 @@ +phages2050.features package +=========================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + phages2050.features.extractors + phages2050.features.io + phages2050.features.transformers + +Module contents +--------------- + +.. automodule:: phages2050.features + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/phages2050.features.transformers.rst b/docs/phages2050.features.transformers.rst new file mode 100644 index 0000000..641275f --- /dev/null +++ b/docs/phages2050.features.transformers.rst @@ -0,0 +1,21 @@ +phages2050.features.transformers package +======================================== + +Submodules +---------- + +phages2050.features.transformers.kmers module +--------------------------------------------- + +.. automodule:: phages2050.features.transformers.kmers + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: phages2050.features.transformers + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/phages2050.rst b/docs/phages2050.rst new file mode 100644 index 0000000..c19a193 --- /dev/null +++ b/docs/phages2050.rst @@ -0,0 +1,21 @@ +phages2050 package +================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + phages2050.classifiers + phages2050.crawlers + phages2050.embeddings + phages2050.features + +Module contents +--------------- + +.. automodule:: phages2050 + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/setup.rst b/docs/setup.rst new file mode 100644 index 0000000..552eb49 --- /dev/null +++ b/docs/setup.rst @@ -0,0 +1,7 @@ +setup module +============ + +.. automodule:: setup + :members: + :undoc-members: + :show-inheritance: diff --git a/phages2050/embeddings/proteins/bert.py b/phages2050/embeddings/proteins/bert.py index 178a05c..bc4a712 100644 --- a/phages2050/embeddings/proteins/bert.py +++ b/phages2050/embeddings/proteins/bert.py @@ -10,7 +10,7 @@ from fake_useragent import UserAgent -from bio_embeddings.embed.bert_embedder import BertEmbedder +from bio_embeddings.embed.prottrans_bert_bfd_embedder import ProtTransBertBFDEmbedder import torch @@ -96,7 +96,7 @@ def __init__(self, model_dir: str, cuda_device: int = None): if not os.path.exists(self.model_dir): raise Exception("BERT model wasn't downloaded yet") - self.embedder = BertEmbedder(model_directory=self.model_dir) + self.embedder = ProtTransBertBFDEmbedder(model_directory=self.model_dir) self.cuda_device = cuda_device # Select GPU card (if you have more than one) diff --git a/requirements.txt b/requirements.txt index f35faab..fcc8260 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,13 @@ black==19.10b0 lxml==4.5.2 -pandas==1.1.2 +pandas==1.1.3 requests==2.24.0 biopython==1.78 fake-useragent==0.1.11 -joblib==0.16.0 +joblib==0.17.0 pandarallel==1.5.1 scikit-learn==0.22.2.post1 gensim==3.8.3 numpy==1.19.2 -pytest==6.0.2 +pytest==6.1.1 coverage==5.3 diff --git a/setup.py b/setup.py index b8b3353..d81983f 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ long_description = (CURRENT_DIR / "README.md").read_text(encoding="utf8") description = ( - "PHAGES2050 is a novel Python 3.6+ programming language framework" + "PHAGES2050 is a novel Python 3.8+ programming language framework" " to boost bacteriophage research & therapy" ) @@ -17,7 +17,7 @@ dependencies = list(map(str.strip, filter(None, dependencies.split("\n")))) -version = "0.0.7" +version = "0.0.8" setup( name="phages2050", @@ -30,7 +30,7 @@ author_email="p.tynecki@doktoranci.pb.edu.pl", url="https://github.com/ptynecki/PHAGES2050", download_url=f"https://github.com/ptynecki/PHAGES2050/archive/v{version}.tar.gz", - setup_requires=["setuptools>=49.6.0", "wheel>=0.35.1"], + setup_requires=["setuptools>=50.3.0", "wheel>=0.35.1"], install_requires=dependencies, packages=[ "phages2050", @@ -61,7 +61,7 @@ classifiers=[ "Development Status :: 3 - Alpha", "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.8", ], - python_requires=">=3.6", + python_requires=">=3.8", )