diff --git a/genomeinfo/__init__.py b/genomeinfo/__init__.py index 6e66689..e10b202 100644 --- a/genomeinfo/__init__.py +++ b/genomeinfo/__init__.py @@ -1,5 +1,7 @@ -from .interface import GenomeInfo +from __future__ import annotations + from . import core +from .interface import GenomeInfo for module in [core]: for name, func in module.__dict__.items(): @@ -7,3 +9,12 @@ setattr(GenomeInfo, name, func) __all__ = ["GenomeInfo"] + + +_db: GenomeInfo | None = None + +def connect() -> GenomeInfo: + global _db + if _db is None: + _db = GenomeInfo() + return _db diff --git a/genomeinfo/build/schema.py b/genomeinfo/build/schema.py index bd50b22..a809459 100644 --- a/genomeinfo/build/schema.py +++ b/genomeinfo/build/schema.py @@ -60,11 +60,9 @@ "WBcel235": "ce11", "WS190": "ce6", "WS195": np.NaN, - "WS195": np.NaN, "ASM2820141v1": np.NaN, } DROSOPHILA_ASSEMBLY_MAP = { - "Release_5": "dm3", "Release_5": "dm3", "Release_6": "dm6", "Release_6_plus_ISO1_MT": "dm6", diff --git a/genomeinfo/core/chrom.py b/genomeinfo/core/chrom.py index 376bc4c..6da8c68 100644 --- a/genomeinfo/core/chrom.py +++ b/genomeinfo/core/chrom.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from __future__ import annotations import pandas as pd @@ -14,9 +14,9 @@ def filter_chromosome_data( cls, assembly: str, - roles: Optional[List[str]] = None, - units: Optional[List[str]] = None, - length: Optional[str] = None, + roles: list[str] | None = None, + units: list[str] | None = None, + length: str | None = None, ) -> pd.DataFrame: """ Filters the chromosome data based on the provided parameters. @@ -25,9 +25,9 @@ def filter_chromosome_data( ---------- assembly : str The assembly name to filter by. - roles : Optional[List[str]] + roles : list[str], optional The roles to filter by. - units : Optional[List[str]] + units : list[str], optional The units to filter by. length : Optional[str] The length condition to filter by (e.g., '> 1000'). @@ -88,11 +88,11 @@ def filter_chromosome_data( def get_chromnames( cls, assembly: str, - provider: Optional[str] = None, - roles: Optional[List[str]] = None, - units: Optional[List[str]] = None, - length: Optional[str] = None, -) -> List[str]: + provider: str | None = None, + roles: list[str] | None = None, + units: list[str] | None = None, + length: str | None = None, +) -> list[str]: """ Returns the chromosome names for the specified assembly. @@ -140,10 +140,10 @@ def get_chromnames( def get_chromsizes( cls, assembly: str, - provider: Optional[str] = None, - roles: Optional[List[str]] = None, - units: Optional[List[str]] = None, - length: Optional[str] = None, + provider: str | None = None, + roles: list[str] | None = None, + units: list[str] | None = None, + length: str | None = None, ) -> pd.Series: """ Returns the chromosome sizes for the specified assembly. @@ -193,10 +193,10 @@ def get_chromsizes( def get_chrom_eq( cls, assembly: str, - providers: Optional[List[str]] = None, - roles: Optional[List[str]] = None, - units: Optional[List[str]] = None, - length: Optional[str] = None, + providers: list[str] | None = None, + roles: list[str] | None = None, + units: list[str] | None = None, + length: str | None = None, ) -> pd.DataFrame: """ Returns the chromosome equivalence for the specified assembly. diff --git a/genomeinfo/core/info.py b/genomeinfo/core/info.py index 13e54bd..a9ed725 100644 --- a/genomeinfo/core/info.py +++ b/genomeinfo/core/info.py @@ -161,10 +161,6 @@ def get_species_info(cls, species: Optional[str] = None) -> str: species : Optional[str] The species name to filter by. - Returns - ---------- - NoReturn - Examples -------- >>> GenomeInfo.get_species_info("species", "homo_sapiens") @@ -203,10 +199,6 @@ def get_organism_info(cls, organism: Optional[str] = None) -> str: organism : Optional[str] The common name of the organism to filter by. - Returns - ---------- - NoReturn - Examples -------- >>> GenomeInfo.get_species_info("species", "homo_sapiens") @@ -334,14 +326,14 @@ def build_assembly_info(cls, local_db: pd.DataFrame, assembly: str) -> Dict[str, f"patch=='{local_db.assembly.unique()[0]}'" ).metadata.tolist()[0] - return core | { + return dict(core, **{ "species": local_db.species.unique()[0], "common_name": local_db.common_name.unique()[0], "synonyms": [local_db.assembly.unique()[0], local_db.assembly_ucsc.unique()[0]], "patches": local_db.patch.tolist(), "genbank": local_db.genbank_accession.tolist(), "refseq": local_db.refseq_accession.tolist(), - } + }) def available_assemblies(cls, provider: Optional[str] = None) -> List[str]: diff --git a/genomeinfo/interface.py b/genomeinfo/interface.py index a58c175..28b60c8 100644 --- a/genomeinfo/interface.py +++ b/genomeinfo/interface.py @@ -1,10 +1,8 @@ -import pandas as pd -import pyarrow.parquet as pq -import pyarrow as pa -import numpy as np -from typing import Self, NoReturn +from __future__ import annotations + from pathlib import Path -import os + +import pyarrow.parquet as pq __all__ = ["GenomeInfo"] @@ -13,17 +11,17 @@ class GenomeInfo: _instance = None _db_path = Path(__file__).parent / "data" / "db.parquet" - def __new__(cls, *args, **kwargs) -> Self: + def __new__(cls, *args, **kwargs): if cls._instance is None: - cls._instance = super(GenomeInfo, cls).__new__(cls) + cls._instance = super().__new__(cls) cls._instance._load_db() return cls._instance - def _load_db(self) -> NoReturn: + def _load_db(self) -> None: """Private method to connect to the database.""" self._data = pq.read_table(self._db_path).to_pandas() @classmethod - def connect(cls) -> Self: + def connect(cls): """Returns the singleton instance of GenomeInfo.""" return cls()