Skip to content

Commit

Permalink
Merge pull request #24 from TheJacksonLaboratory/G3-75-all-geneweaver…
Browse files Browse the repository at this point in the history
…-packages-need-to-upgrade-pydantic-2-0

G3-75: Upgrade pydantic to >=2
  • Loading branch information
bergsalex authored Jul 11, 2024
2 parents 573f419 + 2c91c6b commit c02820a
Show file tree
Hide file tree
Showing 15 changed files with 255 additions and 142 deletions.
207 changes: 150 additions & 57 deletions poetry.lock

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "geneweaver-core"
version = "0.10.0a2"
version = "0.10.0a3"
description = "The core of the Jax-Geneweaver Python library"
authors = ["Jax Computational Sciences <[email protected]>"]
readme = "README.md"
Expand All @@ -13,11 +13,12 @@ packages = [

[tool.poetry.dependencies]
python = "^3.9"
pydantic = { extras = ["dotenv"], version = "^1.10" }
pydantic = { extras = ["dotenv"], version = "^2.8" }
openpyxl = "^3.1.2"
numpy = ">=1.22,<2"
pandas = ">=1.5,<3"
requests = "^2.32.0"
pydantic-settings = "^2.3.4"

[tool.poetry.group.dev.dependencies]
pylint = "^2.15.4"
Expand All @@ -26,6 +27,7 @@ pydocstyle = "^6.1.1"
pytest-cov = "^4.0.0"
geneweaver-testing = "^0.1.0"


[tool.ruff]
select = ['F', 'E', 'W', 'A', 'C90', 'N', 'B', 'ANN', 'D', 'I', 'ERA', 'PD', 'NPY', 'PT']

Expand Down
15 changes: 7 additions & 8 deletions src/geneweaver/core/config_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,23 @@
https://pydantic-docs.helpmanual.io/usage/settings/
"""

from pydantic import BaseSettings
from pydantic_settings import BaseSettings, SettingsConfigDict


class ExternalServiceSettings(BaseSettings):
"""External Service Config and Settings Configuration."""

PUBMED_XLM_SVC_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id={0}&retmode=xml"
PUBMED_XLM_SVC_URL: str = (
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id={0}&retmode=xml"
)


class CoreSettings(BaseSettings):
"""Root Config and Settings Configuration."""

PROJECT_NAME = "jax-geneweaver-core"
VERSION = "0.0.2"
PROJECT_NAME: str = "jax-geneweaver-core"
VERSION: str = "0.0.2"
LOG_LEVEL: str = "INFO"
SERVICE_URLS: ExternalServiceSettings = ExternalServiceSettings()

class Config:
"""Pydantic Config class."""

env_prefix = "GW_"
model_config = SettingsConfigDict(env_prefix="GW_")
4 changes: 4 additions & 0 deletions src/geneweaver/core/enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,8 @@ class Species(_StrToIntMixin, Enum):
SACCHAROMYCES_CEREVISIAE = "Saccharomyces Cerevisiae"
GALLUS_GALLUS = "Gallus Gallus"
CANIS_FAMILIARIS = "Canis Familiaris"
XENOPUS_TROPICALIS = "Xenopus Tropicalis"
XENOPUS_LAEVIS = "Xenopus Laevis"

@staticmethod
def _int_class() -> Enum:
Expand All @@ -243,6 +245,8 @@ class SpeciesInt(_IntToStrMixin, IntEnum):
SACCHAROMYCES_CEREVISIAE = 9
GALLUS_GALLUS = 10
CANIS_FAMILIARIS = 11
XENOPUS_TROPICALIS = 12
XENOPUS_LAEVIS = 13

@staticmethod
def _str_class() -> Enum:
Expand Down
39 changes: 23 additions & 16 deletions src/geneweaver/core/schema/batch.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
"""Module for defining schemas for batch endpoints."""

# ruff: noqa: N805, ANN001, ANN101
from typing import List, Optional, Union
# ruff: noqa: N805, ANN001, ANN101, ANN401
from typing import Any, List, Optional, Type, Union

from geneweaver.core.enum import GeneIdentifierInt, MicroarrayInt, SpeciesInt
from geneweaver.core.parse.score import parse_score
from geneweaver.core.schema.gene import GeneValue
from geneweaver.core.schema.messages import MessageResponse
from geneweaver.core.schema.score import GenesetScoreType
from pydantic import BaseModel, validator
from pydantic import BaseModel, field_validator, model_validator
from typing_extensions import Self

# Header characters which DO NOT need to be space separated.
HEADER_CHARACTERS = {
Expand Down Expand Up @@ -86,17 +87,21 @@ class BatchUploadGeneset(BaseModel):
description: str = ""
values: List[GeneValue]

@validator("species", pre=True)
def initialize_species(cls, v) -> SpeciesInt:
@field_validator("species", mode="before")
@classmethod
def initialize_species(cls: Type["BatchUploadGeneset"], v: Any) -> SpeciesInt:
"""Initialize species."""
if isinstance(v, SpeciesInt):
return v
elif isinstance(v, str):
return SpeciesInt[v.replace(" ", "_").upper()]
return SpeciesInt(v)

@validator("gene_id_type", pre=True)
def initialize_gene_id_type(cls, v) -> Union[GeneIdentifierInt, MicroarrayInt]:
@field_validator("gene_id_type", mode="before")
@classmethod
def initialize_gene_id_type(
cls: Type["BatchUploadGeneset"], v: Any
) -> Union[GeneIdentifierInt, MicroarrayInt]:
"""Initialize gene id type."""
if isinstance(v, GeneIdentifierInt) or isinstance(v, MicroarrayInt):
return v
Expand All @@ -111,29 +116,31 @@ def initialize_gene_id_type(cls, v) -> Union[GeneIdentifierInt, MicroarrayInt]:
]
return MicroarrayInt(v)

@validator("score", pre=True)
def initialize_score(cls, v) -> GenesetScoreType:
@field_validator("score", mode="before")
@classmethod
def initialize_score(cls: Type["BatchUploadGeneset"], v: Any) -> GenesetScoreType:
"""Initialize score type."""
if isinstance(v, GenesetScoreType):
return v
elif isinstance(v, dict):
return GenesetScoreType(**v)
return parse_score(v)

@validator("private", pre=True)
def private_to_bool(cls, v) -> bool:
@field_validator("private", mode="before")
@classmethod
def private_to_bool(cls: Type["BatchUploadGeneset"], v: Any) -> bool:
"""Convert private str to bool."""
if isinstance(v, bool):
return v
return v.lower() != "public"

@validator("curation_id", pre=True)
def curation_id_to_int(cls, v, values) -> int:
@model_validator(mode="after")
def curation_id_to_int(self) -> Self:
"""Initialize curation id based on `private` value."""
if not v:
if not self.curation_id:
# If the geneset is private, it should be set to have
# curation tier 5, otherwise it should be set to have
# curation tier 4.
# It should default to private if not specified.
return 5 if values.get("private", True) else 4
return v
self.curation_id = 5 if self.private else 4
return self
14 changes: 5 additions & 9 deletions src/geneweaver/core/schema/gene.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Any, List, Optional

from geneweaver.core.enum import GeneIdentifier, Species
from pydantic import BaseModel
from pydantic import BaseModel, ConfigDict


class Gene(BaseModel):
Expand All @@ -26,20 +26,16 @@ class GeneRow(BaseModel):
gdb_id: int
sp_id: int
ode_pref: bool
ode_date: Optional[str]
old_ode_gene_ids: Optional[List[int]]
ode_date: Optional[str] = None
old_ode_gene_ids: Optional[List[int]] = None


class GeneValue(BaseModel):
"""A gene value."""

symbol: str
value: float

class Config:
"""Pydantic config."""

allow_mutation = False
model_config = ConfigDict(frozen=True)

def __str__(self: "GeneValue") -> str:
"""Return the gene symbol."""
Expand Down Expand Up @@ -76,4 +72,4 @@ class GeneDatabaseRow(BaseModel):
gdb_shortname: str
gdb_date: str
gdb_precision: int
gdb_linkout_url: Optional[str]
gdb_linkout_url: Optional[str] = None
26 changes: 11 additions & 15 deletions src/geneweaver/core/schema/legacy_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@
from typing import List, Optional

from geneweaver.core.enum import GenesetAccess, ScoreType
from pydantic import BaseModel, HttpUrl
from pydantic import BaseModel, ConfigDict, HttpUrl


class AddGenesetByUserPublication(BaseModel):
"""Publication schema for adding genesets by user."""

pub_abstract: Optional[str]
pub_authors: Optional[str]
pub_journal: Optional[str]
pub_pages: Optional[str]
pub_pubmed: Optional[str]
pub_title: Optional[str]
pub_volume: Optional[str]
pub_year: Optional[str]
pub_abstract: Optional[str] = None
pub_authors: Optional[str] = None
pub_journal: Optional[str] = None
pub_pages: Optional[str] = None
pub_pubmed: Optional[str] = None
pub_title: Optional[str] = None
pub_volume: Optional[str] = None
pub_year: Optional[str] = None


class AddGenesetByUserBase(BaseModel):
Expand All @@ -28,14 +28,10 @@ class AddGenesetByUserBase(BaseModel):
gs_name: str
gs_threshold_type: ScoreType
permissions: GenesetAccess
publication: Optional[AddGenesetByUserPublication]
publication: Optional[AddGenesetByUserPublication] = None
select_groups: List[str]
sp_id: str

class Config:
"""Pydantic config."""

use_enum_values = True
model_config = ConfigDict(use_enum_values=True)


class AddGenesetByUser(AddGenesetByUserBase):
Expand Down
23 changes: 10 additions & 13 deletions src/geneweaver/core/schema/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from typing import Optional

from geneweaver.core.enum import ScoreType
from pydantic import BaseModel, validator
from pydantic import BaseModel, model_validator
from typing_extensions import Self


class GenesetScoreType(BaseModel):
Expand All @@ -29,26 +30,22 @@ def threshold_as_db_string(self) -> str:
else:
return str(self.threshold)

@validator("threshold_low")
def threshold_low_must_be_less_than_threshold(
cls, v: Optional[float], values: dict
) -> Optional[float]:
@model_validator(mode="after")
def threshold_low_must_be_less_than_threshold(self) -> Self:
"""Threshold low must be less than threshold."""
if v is not None and v > values.get("threshold"):
if self.threshold_low is not None and self.threshold_low > self.threshold:
raise ValueError("threshold_low must be less than threshold")
return v
return self

@validator("threshold_low")
def threshold_low_correlation_and_effect_only(
cls, v: Optional[float], values: dict
) -> Optional[float]:
@model_validator(mode="after")
def threshold_low_correlation_and_effect_only(self) -> Self:
"""Threshold low should only be set for correlation and effect score types."""
if v is not None and values.get("score_type") not in [
if self.threshold_low is not None and self.score_type not in [
ScoreType.CORRELATION,
ScoreType.EFFECT,
]:
raise ValueError(
"threshold_low should only be set for "
"correlation and effect score types"
)
return v
return self
6 changes: 3 additions & 3 deletions src/geneweaver/core/schema/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Species(BaseModel):
id: int # noqa: A003
name: str
taxonomic_id: int
reference_gene_identifier: Optional[GeneIdentifier]
reference_gene_identifier: Optional[GeneIdentifier] = None


class SpeciesRow(BaseModel):
Expand All @@ -22,7 +22,7 @@ class SpeciesRow(BaseModel):
sp_id: int
sp_name: str
sp_taxid: int
sp_ref_gdb_id: Optional[int]
sp_ref_gdb_id: Optional[int] = None
sp_date: datetime.date
sp_biomart_info: Optional[str]
sp_biomart_info: Optional[str] = None
sp_source_data: Json[Any]
10 changes: 7 additions & 3 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,22 @@
@pytest.fixture(scope="session")
def core_settings_fields() -> List[str]:
"""Return a list of the pydantic Settings class fields."""
return [field.name for field in CoreSettings.__fields__.values()]
return [name for name in CoreSettings.model_fields.keys()]


@pytest.fixture(scope="session")
def core_settings_required_fields() -> List[str]:
"""Return a list of the pydantic Settings class fields."""
return [field.name for field in CoreSettings.__fields__.values() if field.required]
return [
name for name, field in CoreSettings.model_fields.items() if field.is_required()
]


@pytest.fixture(scope="session")
def core_settings_optional_fields() -> Dict[str, Any]:
"""Return a dict of the optional pydantic Settings class fields."""
return {
f.name: f.default for f in CoreSettings.__fields__.values() if not f.required
name: field.default
for name, field in CoreSettings.model_fields.items()
if not field.is_required()
}
2 changes: 1 addition & 1 deletion tests/unit/publication/pubmed/test_get_publication.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def test_get_publication(

result = get_publication(pubmed_id)

assert result.dict() == expected.dict()
assert result.model_dump() == expected.model_dump()

mock_get_xml_for_pubmed_id.assert_called_once_with(pubmed_id)
mock_extract_fields.assert_called_once()
2 changes: 1 addition & 1 deletion tests/unit/render/batch/test_format_genest_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ def test_format_geneset_metadata_standard(mock_batch_upload_geneset_all_combinat
# Assert that the output is a string and has the expected structure
assert isinstance(formatted_metadata, str)
# Additional assertions can be made based on the specific format of the metadata
assert formatted_metadata.count("\n") == 8
assert formatted_metadata.count("\n") == 9
for char in ["!", "@", "%", "A", ":", "=", "+"]:
assert char in formatted_metadata
4 changes: 3 additions & 1 deletion tests/unit/schema/legacy/test_legacy_schema_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
AddGenesetByUserFile,
AddGenesetByUserPublication,
)
from pydantic.networks import AnyUrl


def test_add_gs_by_user_pub_schema(add_geneset_by_user_publication_data: dict) -> None:
Expand Down Expand Up @@ -44,4 +45,5 @@ def test_add_gs_by_user_file_schema(add_geneset_by_user_file_data: dict) -> None
"""Test the AddGenesetByUserFile class."""
gs = AddGenesetByUserFile(**add_geneset_by_user_file_data)
_shared_add_gs_by_user_asserts(gs, add_geneset_by_user_file_data)
assert gs.file_url == add_geneset_by_user_file_data["file_url"]
assert gs.file_url == AnyUrl(add_geneset_by_user_file_data["file_url"])
assert str(gs.file_url) == add_geneset_by_user_file_data["file_url"] + "/"
Loading

0 comments on commit c02820a

Please sign in to comment.