Skip to content

Commit

Permalink
Merge pull request #20 from yaseminbridges/18-use-pheval-as-a-dependency
Browse files Browse the repository at this point in the history
18 use pheval as a dependency
  • Loading branch information
yaseminbridges authored Jan 4, 2024
2 parents 91cac4f + f1590cd commit 3523838
Show file tree
Hide file tree
Showing 7 changed files with 2,363 additions and 2,792 deletions.
4,949 changes: 2,342 additions & 2,607 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ polars = "^0.17.5"
oaklib = "^0.5.1"
click = "^8.1.3"
ontobio = "^2.8.5"
pheval = {git = "https://github.com/monarch-initiative/pheval.git"}

[tool.poetry.dev-dependencies]
pytest = "^7.2.0"
Expand Down
15 changes: 6 additions & 9 deletions src/phenotype2phenopacket/add/add_genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@

import polars as pl
from phenopackets import Disease

from phenotype2phenopacket.utils.gene_map_utils import (
from pheval.utils.phenopacket_utils import (
GeneIdentifierUpdater,
create_gene_identifier_map,
create_hgnc_dict,
phenopacket_reader,
)

from phenotype2phenopacket.utils.phenopacket_utils import (
PhenopacketInterpretationExtender,
PhenopacketUtil,
phenopacket_reader,
write_phenopacket,
)
from phenotype2phenopacket.utils.utils import all_files
Expand Down Expand Up @@ -66,20 +66,17 @@ def add_genes(
) if phenopacket_with_genes is not None else None


def add_genes_to_directory(
phenopacket_dir: Path, disease_pg: pl.DataFrame, hgnc_data_file: Path, output_dir: Path
):
def add_genes_to_directory(phenopacket_dir: Path, disease_pg: pl.DataFrame, output_dir: Path):
"""
Add known gene-to-phenotype relationships to the interpretations of a directory of phenopackets.
Args:
phenopacket_dir (Path): Directory containing the phenopacket files.
disease_pg (pl.DataFrame): DataFrame containing disease.pg entries.
hgnc_data_file (Path): File path to HGNC data file.
output_dir (Path): Directory to store the updated phenopackets.
"""
hgnc_dict = create_hgnc_dict(hgnc_data_file)
identifier_map = create_gene_identifier_map(hgnc_data_file)
hgnc_dict = create_hgnc_dict()
identifier_map = create_gene_identifier_map()
gene_identifier_updater = GeneIdentifierUpdater(
gene_identifier="ensembl_id", hgnc_data=hgnc_dict, identifier_map=identifier_map
)
Expand Down
10 changes: 0 additions & 10 deletions src/phenotype2phenopacket/cli_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,6 @@
help="Path to disease.pg data file.",
type=Path,
)
@click.option(
"--hgnc-data",
"-h",
required=True,
help="Path to hgnc_full_set data file.",
type=Path,
)
@click.option(
"--output-dir",
"-o",
Expand All @@ -38,7 +31,6 @@
def add_genes_command(
phenopacket_dir: Path,
disease_pg: Path,
hgnc_data: Path,
output_dir: Path,
):
"""
Expand All @@ -47,14 +39,12 @@ def add_genes_command(
Args:
phenopacket_dir (Path): Directory containing the phenopacket files.
disease_pg (Path): Path to the disease.pg file.
hgnc_data (Path): Path to the HGNC data file.
output_dir (Path): Directory to store the updated phenopackets.
"""
output_dir.mkdir(exist_ok=True)
disease_pg_df = read_disease_pg(disease_pg)
add_genes_to_directory(
phenopacket_dir,
disease_pg_df,
hgnc_data,
output_dir,
)
116 changes: 0 additions & 116 deletions src/phenotype2phenopacket/utils/gene_map_utils.py

This file was deleted.

50 changes: 9 additions & 41 deletions src/phenotype2phenopacket/utils/phenopacket_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import json
import re
import secrets
import signal
Expand All @@ -9,15 +8,13 @@
from typing import List, Union

import polars as pl
from google.protobuf.json_format import MessageToJson, Parse
from google.protobuf.timestamp_pb2 import Timestamp
from oaklib.implementations import ProntoImplementation
from ontobio import Ontology
from phenopackets import (
Age,
Diagnosis,
Disease,
Family,
GeneDescriptor,
GenomicInterpretation,
Individual,
Expand All @@ -29,8 +26,8 @@
Resource,
TimeElement,
)
from pheval.utils.phenopacket_utils import GeneIdentifierUpdater, create_json_message

from phenotype2phenopacket.utils.gene_map_utils import GeneIdentifierUpdater
from phenotype2phenopacket.utils.utils import is_float


Expand Down Expand Up @@ -119,25 +116,6 @@ class PhenopacketFile:
phenopacket_path: Path


def phenopacket_reader(file: Path):
"""
Read a Phenopacket file and returns its contents as a Phenopacket or Family object
Args:
file (Path): Path to the Phenopacket file
Returns:
Union[Phenopacket, Family]: Contents of the Phenopacket file as a Phenopacket or Family object
"""
file = open(file, "r")
phenopacket = json.load(file)
file.close()
if "proband" in phenopacket:
return Parse(json.dumps(phenopacket), Family())
else:
return Parse(json.dumps(phenopacket), Phenopacket())


def create_phenopacket_file_name_from_disease(disease_name: str) -> Path:
"""
Create a Phenopacket file name from the disease.
Expand All @@ -149,19 +127,6 @@ def create_phenopacket_file_name_from_disease(disease_name: str) -> Path:
return Path(normalised_string.replace(" ", "_") + ".json")


def create_json_message(phenopacket: Phenopacket) -> str:
"""
Create a JSON message for writing to a file.
Args:
phenopacket (Phenopacket): The Phenopacket object to convert to JSON.
Returns:
str: A JSON-formatted string representation of the Phenopacket or Family object.
"""
return MessageToJson(phenopacket)


def write_phenopacket(phenopacket: Phenopacket, output_file: Path) -> None:
"""
Write a Phenopacket object to a file in JSON format.
Expand All @@ -172,11 +137,14 @@ def write_phenopacket(phenopacket: Phenopacket, output_file: Path) -> None:
"""
phenopacket_json = create_json_message(phenopacket)
suffix = 1
while Path(
output_file.parents[0].joinpath(f"{output_file.stem}_patient_{suffix}.json")
).is_file():
suffix += 1
output_file = output_file.parents[0].joinpath(f"{output_file.stem}_patient_{suffix}.json")
if "_patient_" not in output_file.stem:
while Path(
output_file.parents[0].joinpath(f"{output_file.stem}_patient_{suffix}.json")
).is_file():
suffix += 1
output_file = output_file.parents[0].joinpath(f"{output_file.stem}_patient_{suffix}.json")
else:
pass
with open(output_file, "w") as file:
file.write(phenopacket_json)
file.close()
Expand Down
14 changes: 5 additions & 9 deletions tests/test_phenopacket_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@
Resource,
TimeElement,
)
from polars.testing import assert_frame_equal

from phenotype2phenopacket.utils.gene_map_utils import (
from pheval.utils.phenopacket_utils import (
GeneIdentifierUpdater,
create_gene_identifier_map,
create_hgnc_dict,
)
from polars.testing import assert_frame_equal

from phenotype2phenopacket.utils.phenopacket_utils import (
OnsetTerm,
PhenopacketInterpretationExtender,
Expand Down Expand Up @@ -1183,12 +1183,8 @@ def setUpClass(cls) -> None:
}
cls.gene_identifier_updater = GeneIdentifierUpdater(
gene_identifier="ensembl_id",
hgnc_data=create_hgnc_dict(
Path("./src/phenotype2phenopacket/resources/hgnc_complete_set_2023-04-01.txt")
),
identifier_map=create_gene_identifier_map(
Path("./src/phenotype2phenopacket/resources/hgnc_complete_set_2023-04-01.txt")
),
hgnc_data=create_hgnc_dict(),
identifier_map=create_gene_identifier_map(),
)

def test_create_gene_genomic_interpretation(self):
Expand Down

0 comments on commit 3523838

Please sign in to comment.