Skip to content

Commit

Permalink
BREAKING CHANGE: GenomeInfo -> AssemblyInfo migration (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
alejandrogzi authored Jul 29, 2024
1 parent 0f6d56e commit e8ccea9
Show file tree
Hide file tree
Showing 20 changed files with 89 additions and 89 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
# genomeinfo
# assemblyinfo
12 changes: 6 additions & 6 deletions genomeinfo/__init__.py → assemblyinfo/__init__.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
from __future__ import annotations

from . import core
from .interface import GenomeInfo
from .interface import AssemblyInfo

for module in [core]:
for name, func in module.__dict__.items():
if callable(func) and not name.startswith("_"):
setattr(GenomeInfo, name, func)
setattr(AssemblyInfo, name, func)

__all__ = ["GenomeInfo"]
__all__ = ["AssemblyInfo"]


_db: GenomeInfo | None = None
_db: AssemblyInfo | None = None

def connect() -> GenomeInfo:
def connect() -> AssemblyInfo:
global _db
if _db is None:
_db = GenomeInfo()
_db = AssemblyInfo()
return _db
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions genomeinfo/core/__init__.py → assemblyinfo/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
get_refseq_accession,
)
from .assembly import (
AssemblyInfo,
Assembly,
assembly_info,
)
from .chrom import (
Expand Down Expand Up @@ -52,6 +52,6 @@
"get_chromsizes",
"get_chrom_eq",
"get_seqinfo",
"AssemblyInfo",
"Assembly",
"assembly_info",
]
8 changes: 4 additions & 4 deletions genomeinfo/core/acc.py → assemblyinfo/core/acc.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def get_genbank_accession(cls, patch: str) -> str:
Examples
--------
>>> GenomeInfo.get_genbank_accession("GRCh38.p14")
>>> AssemblyInfo.get_genbank_accession("GRCh38.p14")
"""
if not patch:
raise ValueError("ERROR: you must provide a patch!")
Expand Down Expand Up @@ -60,7 +60,7 @@ def get_refseq_accession(cls, patch: str) -> str:
Examples
--------
>>> GenomeInfo.get_refseq_accession("GRCh38.p14")
>>> AssemblyInfo.get_refseq_accession("GRCh38.p14")
"""
if not patch:
raise ValueError("ERROR: you must provide a patch!")
Expand Down Expand Up @@ -91,7 +91,7 @@ def get_patch_from_accession(cls, accession: str) -> List[str]:
Examples
--------
>>> GenomeInfo.get_patch_from_accession("GCA_000001405.29")
>>> AssemblyInfo.get_patch_from_accession("GCA_000001405.29")
"""
if not accession:
raise ValueError("ERROR: you must provide an accession!")
Expand Down Expand Up @@ -130,7 +130,7 @@ def get_assembly_from_accession(cls, accession: str) -> List[str]:
Examples
--------
>>> GenomeInfo.get_assembly_from_accession("GCA_000001405.29")
>>> AssemblyInfo.get_assembly_from_accession("GCA_000001405.29")
"""
if not accession:
raise ValueError("ERROR: you must provide an accession!")
Expand Down
12 changes: 6 additions & 6 deletions genomeinfo/core/assembly.py → assemblyinfo/core/assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
from .chrom import filter_chromosome_data
from .info import get_assembly_metadata

__all__ = ["AssemblyInfo", "assembly_info"]
__all__ = ["Assembly", "assembly_info"]


@dataclass
class AssemblyInfo:
class Assembly:
"""
A dataclass to store assembly information.
"""
Expand All @@ -35,7 +35,7 @@ def chromeq(self) -> Dict[str, Dict[str, str]]:
return pd.DataFrame(self.aliases).T

def __repr__(self):
return (f"AssemblyInfo(assembly={self.assembly}",
return (f"Assembly(assembly={self.assembly}",
f"species={self.species}",
f"common_name={self.common_name})")

Expand All @@ -47,7 +47,7 @@ def assembly_info(
roles: Optional[List[str]] = None,
units: Optional[List[str]] = None,
length: Optional[str] = None,
) -> AssemblyInfo:
) -> Assembly:
"""
Get assembly information for a given assembly.
Parameters
Expand All @@ -64,7 +64,7 @@ def assembly_info(
Chromosome length.
Returns
-------
AssemblyInfo
Assembly
Assembly information.
Examples
--------
Expand Down Expand Up @@ -92,7 +92,7 @@ def assembly_info(

metadata = get_assembly_metadata(cls, assembly=assembly)

return AssemblyInfo(
return Assembly(
assembly=assembly,
species=metadata["species"],
common_name=metadata["common_name"],
Expand Down
10 changes: 5 additions & 5 deletions genomeinfo/core/chrom.py → assemblyinfo/core/chrom.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def filter_chromosome_data(
Examples
--------
>>> GenomeInfo.filter_chromosome_data("hg38", roles=["assembled"])
>>> AssemblyInfo.filter_chromosome_data("hg38", roles=["assembled"])
"""
if assembly in cls._data["assembly"].tolist():
group = "assembly"
Expand Down Expand Up @@ -121,7 +121,7 @@ def get_chromnames(
Examples
--------
>>> GenomeInfo.get_chromnames("hg38", provider="ucsc")
>>> AssemblyInfo.get_chromnames("hg38", provider="ucsc")
"""
if not provider or provider == "ucsc":
colname = "name"
Expand Down Expand Up @@ -173,7 +173,7 @@ def get_chromsizes(
Examples
--------
>>> GenomeInfo.get_chromsizes("hg38", provider="ucsc")
>>> AssemblyInfo.get_chromsizes("hg38", provider="ucsc")
"""
if not provider or provider == "ucsc":
colname = "name"
Expand Down Expand Up @@ -226,7 +226,7 @@ def get_chrom_eq(
Examples
--------
>>> GenomeInfo.get_chrom_eq("hg38", providers=["ucsc", "genbank"])
>>> AssemblyInfo.get_chrom_eq("hg38", providers=["ucsc", "genbank"])
"""
if not providers:
providers = ["name", "ncbi", "genbank", "refseq"]
Expand Down Expand Up @@ -257,7 +257,7 @@ def get_seqinfo(cls, assembly: str) -> pd.DataFrame:
Examples
--------
>>> GenomeInfo.get_seqinfo("hg38")
>>> AssemblyInfo.get_seqinfo("hg38")
"""
if assembly in cls._data["assembly"].tolist():
group = "assembly"
Expand Down
34 changes: 17 additions & 17 deletions genomeinfo/core/info.py → assemblyinfo/core/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@

def get_db(cls) -> pd.DataFrame:
"""
Returns the GenomeInfo database.
Returns the AssemblyInfo database.
This method returns the database stored in the class attribute `_data`.
Parameters
----------
cls : type
The class object containing the GenomeInfo data.
The class object containing the AssemblyInfo data.
Returns
-------
Expand All @@ -37,24 +37,24 @@ def get_db(cls) -> pd.DataFrame:
Examples
--------
>>> GenomeInfo.get_db()
>>> AssemblyInfo.get_db()
"""

return cls._data


def info(cls) -> str:
"""
Display information about available entries in GenomeInfo.
Display information about available entries in AssemblyInfo.
This method prints a formatted message showing the unique values for
species, UCSC assemblies, and NCBI assemblies available in the
GenomeInfo data.
AssemblyInfo data.
Parameters
----------
cls : type
The class object containing the GenomeInfo data.
The class object containing the AssemblyInfo data.
Returns
-------
Expand All @@ -63,9 +63,9 @@ def info(cls) -> str:
Examples
--------
>>> GenomeInfo.info()
>>> AssemblyInfo.info()
```
GenomeInfo available entries:
AssemblyInfo available entries:
Species:
human, mouse, rat
Assemblies (UCSC):
Expand Down Expand Up @@ -123,7 +123,7 @@ def get_info(cls, key: str, value: Optional[str]) -> pd.DataFrame:
Examples
--------
>>> GenomeInfo.get_info("species", "homo_sapiens")
>>> AssemblyInfo.get_info("species", "homo_sapiens")
"""
if value is None:
raise ValueError(f"ERROR! Pick a {key}: {cls._data[key].unique()}")
Expand Down Expand Up @@ -163,7 +163,7 @@ def get_species_info(cls, species: Optional[str] = None) -> str:
Examples
--------
>>> GenomeInfo.get_species_info("species", "homo_sapiens")
>>> AssemblyInfo.get_species_info("species", "homo_sapiens")
```
Genome Information for homo_sapiens:
Common Names:
Expand Down Expand Up @@ -201,7 +201,7 @@ def get_organism_info(cls, organism: Optional[str] = None) -> str:
Examples
--------
>>> GenomeInfo.get_species_info("species", "homo_sapiens")
>>> AssemblyInfo.get_species_info("species", "homo_sapiens")
```
Genome Information for human:
Species:
Expand Down Expand Up @@ -249,7 +249,7 @@ def get_assembly_metadata(cls, assembly: Optional[str] = None) -> Dict[str, Any]
Examples
--------
>>> GenomeInfo.get_assembly_metadata("hg38")
>>> AssemblyInfo.get_assembly_metadata("hg38")
{
'assembly_level': 'Chromosome',
'assembly_method': None,
Expand Down Expand Up @@ -316,7 +316,7 @@ def build_assembly_info(cls, local_db: pd.DataFrame, assembly: str) -> Dict[str,
Examples
--------
>>> GenomeInfo.build_assembly_info(local_db, "hg38")
>>> AssemblyInfo.build_assembly_info(local_db, "hg38")
"""
if len(local_db.patch) > 1:
latest = sorted(local_db.patch.tolist(), key=get_version, reverse=True)[0]
Expand Down Expand Up @@ -357,7 +357,7 @@ def available_assemblies(cls, provider: Optional[str] = None) -> List[str]:
Examples
--------
>>> GenomeInfo.available_assemblies()
>>> AssemblyInfo.available_assemblies()
```
['WS144',
'WBcel215',
Expand Down Expand Up @@ -397,7 +397,7 @@ def available_patches(cls, assembly: Optional[str] = None) -> List[str]:
Examples
--------
>>> GenomeInfo.available_patches('GRCh38')
>>> AssemblyInfo.available_patches('GRCh38')
```
['GRCh38',
'GRCh38.p1',
Expand Down Expand Up @@ -433,7 +433,7 @@ def available_species(cls) -> List[str]:
Examples
--------
>>> GenomeInfo.available_species()
>>> AssemblyInfo.available_species()
```
['homo_sapiens', 'mus_musculus']
```
Expand Down Expand Up @@ -462,7 +462,7 @@ def available_accessions(cls, assembly: str) -> List[str]:
Examples
---------
>>> GenomeInfo.available_accessions('hg38')
>>> AssemblyInfo.available_accessions('hg38')
```
['GCA_000001405.15',
'GCA_000001405.16',
Expand Down
File renamed without changes.
File renamed without changes.
6 changes: 3 additions & 3 deletions genomeinfo/interface.py → assemblyinfo/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

import pyarrow.parquet as pq

__all__ = ["GenomeInfo"]
__all__ = ["AssemblyInfo"]


class GenomeInfo:
class AssemblyInfo:
_instance = None
_db_path = Path(__file__).parent / "data" / "db.parquet"

Expand All @@ -23,5 +23,5 @@ def _load_db(self) -> None:

@classmethod
def connect(cls):
"""Returns the singleton instance of GenomeInfo."""
"""Returns the singleton instance of AssemblyInfo."""
return cls()
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information

info = metadata("genomeinfo")
info = metadata("assemblyinfo")
project_name = info["Name"]
project = "GenomeInfo"
project = "AssemblyInfo"
author = "Open2C"
copyright = f"{datetime.now():%Y}, {author}."
version = info["Version"]
Expand Down
8 changes: 4 additions & 4 deletions docs/index.rst
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
.. GenomeInfo documentation master file, created by
.. assemblyinfo documentation master file, created by
sphinx-quickstart on Sat Jul 6 23:33:22 2024.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
genomeinfo
==========
assemblyinfo
============

`Genomeinfo <https://github.com/open2c/genomeinfo>`_ is a library to interact with genome metadata from different providers in python. This package is build on top of NCBI and UCSC data, allowing fast and accurate interconversions between chromosome names, latest assemblies and more!
`Assemblyinfo <https://github.com/open2c/assemblyinfo>`_ is a library to interact with genome metadata from different providers in python. This package is build on top of NCBI and UCSC data, allowing fast and accurate interconversions between chromosome names, latest assemblies and more!

.. toctree::
:maxdepth: 2
Expand Down
6 changes: 3 additions & 3 deletions docs/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,18 @@
## Installation

```
$ pip install genomeinfo
$ pip install assemblyinfo
```

To install the latest development version of bioframe from github, first make a local clone of the github repository:

```
$ git clone https://github.com/open2c/genomeinfo
$ git clone https://github.com/open2c/assemblyinfo
```

Then, compile and install bioframe in [development mode](https://setuptools.readthedocs.io/en/latest/setuptools.html#development-mode). This installs the package without moving it to a system folder, and thus allows for testing changes to the python code on the fly.

```
$ cd bioframe
$ cd assemblyinfo
$ pip install -e ./
```
Loading

0 comments on commit e8ccea9

Please sign in to comment.