Skip to content

Commit

Permalink
Merge pull request #25 from yaseminbridges/24-sigalrm-doesnt-work-on-…
Browse files Browse the repository at this point in the history
…windows

Remove dependency on SIGALARM
  • Loading branch information
yaseminbridges authored Jan 31, 2024
2 parents 4ab4e47 + 169a60f commit 7dccebb
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 43 deletions.
8 changes: 5 additions & 3 deletions src/phenotype2phenopacket/add/add_genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,11 @@ def add_genes(
omim_disease_phenotype_gene_map=filtered_disease_pg,
gene_identifier_updater=gene_identifier_updater,
)
write_phenopacket(
phenopacket_with_genes, output_dir.joinpath(phenopacket_path.name)
) if phenopacket_with_genes is not None else None
(
write_phenopacket(phenopacket_with_genes, output_dir.joinpath(phenopacket_path.name))
if phenopacket_with_genes is not None
else None
)


def add_genes_to_directory(phenopacket_dir: Path, disease_pg: pl.DataFrame, output_dir: Path):
Expand Down
54 changes: 30 additions & 24 deletions src/phenotype2phenopacket/utils/phenopacket_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import re
import secrets
import signal
import threading
import warnings
from copy import copy
from dataclasses import dataclass
Expand Down Expand Up @@ -99,10 +99,6 @@ class OnsetTerm:
}


def handler(signum, frame):
raise TimeoutError("Took too long to filter terms.")


@dataclass
class PhenopacketFile:
"""
Expand Down Expand Up @@ -352,22 +348,32 @@ def filter_phenotype_entries(self, frequency_df: pl.DataFrame, max_number: int):
It sets a time limit for execution and handles timeouts by returning filtered results or sampled data.
"""
time_limit = 15
signal.signal(signal.SIGALRM, handler)
signal.alarm(time_limit)
try:
while len(self.filtered_df) < max_number:
for phenotype_entry in frequency_df.rows(named=True):
if len(self.filtered_df) >= max_number:
break
self.check_frequency(phenotype_entry)
return pl.from_dicts(self.filtered_df)
except TimeoutError:

def worker():
try:
while len(self.filtered_df) < max_number:
for phenotype_entry in frequency_df.rows(named=True):
if len(self.filtered_df) >= max_number:
break
self.check_frequency(phenotype_entry)
except Exception as e:
print("Error in worker thread:", e)

thread = threading.Thread(target=worker)
thread.daemon = True
stop_event = threading.Event()
thread.start()
thread.join(timeout=time_limit)

if thread.is_alive():
stop_event.set()
print("Timed out!")
if len(self.filtered_df) == 0:
return frequency_df.sample(n=max_number)
else:
return pl.from_dicts(self.filtered_df)
finally:
signal.alarm(0)
else:
return pl.from_dicts(self.filtered_df)

def get_patient_terms(self) -> pl.DataFrame:
"""
Expand Down Expand Up @@ -477,10 +483,10 @@ def get_parents_of_terms(self, phenotype_entry: dict, steps: int) -> dict:
return phenotype_entry
for _i in range(steps):
parents = self.ontology.hierarchical_parents(term_id)
parent = self.secret_rand.choice(parents)
if not parents:
warnings.warn(f"No parents found for term {term}", stacklevel=2)
return phenotype_entry
parent = self.secret_rand.choice(parents)
rels = self.ontology.entity_alias_map(parent)
term = "".join(rels[(list(rels.keys())[0])])
if (
Expand Down Expand Up @@ -607,9 +613,9 @@ def create_individual(self, onset_range: OnsetTerm = None) -> Individual:
age = None
return Individual(
id="patient1",
time_at_last_encounter=TimeElement(age=Age(iso8601duration=f"P{age}Y"))
if age is not None
else None,
time_at_last_encounter=(
TimeElement(age=Age(iso8601duration=f"P{age}Y")) if age is not None else None
),
)

def create_onset(self, phenotype_annotation_entry: dict) -> TimeElement:
Expand Down Expand Up @@ -876,9 +882,9 @@ def create_gene_genomic_interpretation(
)
return GenomicInterpretation(
subject_or_biosample_id="patient1",
interpretation_status=4
if gene_to_phenotype_entry["disease_name"].startswith("?") is False
else 0,
interpretation_status=(
4 if gene_to_phenotype_entry["disease_name"].startswith("?") is False else 0
),
gene=GeneDescriptor(
value_id=gene_identifier_updater.find_identifier(gene_symbol),
symbol=gene_symbol,
Expand Down
38 changes: 22 additions & 16 deletions tests/test_phenopacket_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -868,14 +868,17 @@ def test_remove_terms_to_be_randomised(self):
def test_alter_term_specificity_less_specific(self):
mock_return_value = 0.4
mock_steps_value = 1
with patch.object(
self.synthetic_patient_generator,
"return_less_or_more_specific",
return_value=mock_return_value,
), patch.object(
self.synthetic_patient_generator,
"get_number_of_steps_for_randomisation",
side_effect=[mock_steps_value, mock_steps_value],
with (
patch.object(
self.synthetic_patient_generator,
"return_less_or_more_specific",
return_value=mock_return_value,
),
patch.object(
self.synthetic_patient_generator,
"get_number_of_steps_for_randomisation",
side_effect=[mock_steps_value, mock_steps_value],
),
):
altered_phenotype = self.synthetic_patient_generator.alter_term_specificity(
[],
Expand Down Expand Up @@ -918,14 +921,17 @@ def test_alter_term_specificity_less_specific(self):
def test_alter_term_specificity_more_specific(self):
mock_return_value = 0.8
mock_steps_value = 1
with patch.object(
self.synthetic_patient_generator,
"return_less_or_more_specific",
return_value=mock_return_value,
), patch.object(
self.synthetic_patient_generator,
"get_number_of_steps_for_randomisation",
side_effect=[mock_steps_value, mock_steps_value],
with (
patch.object(
self.synthetic_patient_generator,
"return_less_or_more_specific",
return_value=mock_return_value,
),
patch.object(
self.synthetic_patient_generator,
"get_number_of_steps_for_randomisation",
side_effect=[mock_steps_value, mock_steps_value],
),
):
altered_phenotype = self.synthetic_patient_generator.alter_term_specificity(
[
Expand Down

0 comments on commit 7dccebb

Please sign in to comment.