Skip to content

Commit

Permalink
Merge pull request #236 from MannLabs/linting_IV
Browse files Browse the repository at this point in the history
Linting iv
  • Loading branch information
mschwoer authored Jan 9, 2025
2 parents 4e2b51f + 68e9571 commit 9e0e1eb
Show file tree
Hide file tree
Showing 12 changed files with 147 additions and 123 deletions.
2 changes: 1 addition & 1 deletion alphabase/peptide/precursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def calc_precursor_mz(precursor_df: pd.DataFrame, batch_size: int = 500000):

warnings.warn(
"`alphabase.peptide.precursor.calc_precursor_mz()` is deprecated, "
"it will be removed in alphabse>=2.0.0. "
"it will be removed in alphabase>=2.0.0. "
"Please use `alphabase.peptide.precursor.update_precursor_mz()` instead.",
FutureWarning,
)
Expand Down
10 changes: 5 additions & 5 deletions alphabase/psm_reader/alphapept_reader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import Optional
from typing import Optional, Tuple

import h5py
import numba
Expand All @@ -15,7 +15,7 @@


@numba.njit
def parse_ap(precursor):
def parse_ap(precursor: str) -> Tuple[str, str, str, str, int]:
"""Parser to parse peptide strings."""
items = precursor.split("_")
decoy = 1 if len(items) == 3 else 0 # noqa: PLR2004 magic value
Expand Down Expand Up @@ -53,8 +53,8 @@ def __init__(
*,
column_mapping: Optional[dict] = None,
modification_mapping: Optional[dict] = None,
fdr=0.01,
keep_decoy=False,
fdr: float = 0.01,
keep_decoy: bool = False,
**kwargs,
):
"""Reading PSMs from alphapept's *.ms_data.hdf."""
Expand All @@ -73,7 +73,7 @@ def _init_column_mapping(self) -> None:
def _init_modification_mapping(self) -> None:
self.modification_mapping = psm_reader_yaml["alphapept"]["modification_mapping"]

def _load_file(self, filename):
def _load_file(self, filename: str) -> pd.DataFrame:
with h5py.File(filename, "r") as _hdf:
dataset = _hdf[self.hdf_dataset]
df = pd.DataFrame({col: dataset[col] for col in dataset})
Expand Down
52 changes: 29 additions & 23 deletions alphabase/psm_reader/dia_psm_reader.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional
from typing import List, Optional

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -26,13 +26,16 @@ def __init__( # noqa: PLR0913 many arguments in function definition
*,
column_mapping: Optional[dict] = None,
modification_mapping: Optional[dict] = None,
fdr=0.01,
keep_decoy=False,
fixed_C57=False,
mod_seq_columns=psm_reader_yaml["spectronaut"]["mod_seq_columns"],
rt_unit="minute",
fdr: float = 0.01,
keep_decoy: bool = False,
fixed_C57: bool = False, # noqa: N803 TODO: make this *,fixed_c57 (breaking)
mod_seq_columns: Optional[List[str]] = None,
rt_unit: str = "minute",
**kwargs,
):
if mod_seq_columns is None:
mod_seq_columns = psm_reader_yaml["spectronaut"]["mod_seq_columns"]

super().__init__(
column_mapping=column_mapping,
modification_mapping=modification_mapping,
Expand All @@ -50,7 +53,7 @@ def __init__( # noqa: PLR0913 many arguments in function definition
def _init_column_mapping(self) -> None:
self.column_mapping = psm_reader_yaml["spectronaut"]["column_mapping"]

def _load_file(self, filename):
def _load_file(self, filename: str) -> pd.DataFrame:
self.csv_sep = self._get_table_delimiter(filename)
df = pd.read_csv(filename, sep=self.csv_sep, keep_default_na=False)
self._find_mod_seq_column(df)
Expand All @@ -71,13 +74,16 @@ def __init__( # noqa: PLR0913 many arguments in function definition
*,
column_mapping: Optional[dict] = None,
modification_mapping: Optional[dict] = None,
fdr=0.01,
keep_decoy=False,
fixed_C57=False,
mod_seq_columns=psm_reader_yaml["spectronaut"]["mod_seq_columns"],
fdr: float = 0.01,
keep_decoy: bool = False,
fixed_C57: bool = False, # noqa: N803 TODO: make this *,fixed_c57 (breaking)
mod_seq_columns: Optional[List[str]] = None,
**kwargs,
):
"""SWATH or OpenSWATH library, similar to `SpectronautReader`."""
if mod_seq_columns is None:
mod_seq_columns = psm_reader_yaml["spectronaut"]["mod_seq_columns"]

super().__init__(
column_mapping=column_mapping,
modification_mapping=modification_mapping,
Expand All @@ -95,10 +101,10 @@ def __init__( # noqa: PLR0913 many arguments in function definition
*,
column_mapping: Optional[dict] = None,
modification_mapping: Optional[dict] = None,
fdr=0.01,
keep_decoy=False,
fixed_C57=False,
rt_unit="minute",
fdr: float = 0.01,
keep_decoy: bool = False,
fixed_C57: bool = False, # noqa: N803 TODO: make this *,fixed_c57 (breaking)
rt_unit: str = "minute",
**kwargs,
):
"""Also similar to `MaxQuantReader`,
Expand All @@ -120,12 +126,12 @@ def __init__( # noqa: PLR0913 many arguments in function definition
def _init_column_mapping(self) -> None:
self.column_mapping = psm_reader_yaml["diann"]["column_mapping"]

def _load_file(self, filename):
def _load_file(self, filename: str) -> pd.DataFrame:
self.csv_sep = self._get_table_delimiter(filename)
return pd.read_csv(filename, sep=self.csv_sep, keep_default_na=False)

def _post_process(self, origin_df: pd.DataFrame) -> None:
super()._post_process(origin_df)
def _post_process(self) -> None:
super()._post_process()
self._psm_df.rename(
columns={PsmDfCols.SPEC_IDX: PsmDfCols.DIANN_SPEC_INDEX}, inplace=True
)
Expand All @@ -149,10 +155,10 @@ def __init__( # noqa: PLR0913 many arguments in function definition
*,
column_mapping: Optional[dict] = None,
modification_mapping: Optional[dict] = None,
fdr=0.01,
keep_decoy=False,
fixed_C57=False,
rt_unit="minute",
fdr: float = 0.01,
keep_decoy: bool = False,
fixed_C57: bool = False, # noqa: N803 TODO: make this *,fixed_c57 (breaking)
rt_unit: str = "minute",
**kwargs,
):
super().__init__(
Expand All @@ -172,7 +178,7 @@ def __init__( # noqa: PLR0913 many arguments in function definition
def _init_column_mapping(self) -> None:
self.column_mapping = psm_reader_yaml["spectronaut_report"]["column_mapping"]

def _load_file(self, filename):
def _load_file(self, filename: str) -> pd.DataFrame:
self.mod_seq_column = "ModifiedSequence"
self.csv_sep = self._get_table_delimiter(filename)
df = pd.read_csv(filename, sep=self.csv_sep, keep_default_na=False)
Expand Down
7 changes: 5 additions & 2 deletions alphabase/psm_reader/keys.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from typing import Any, List, NoReturn


class ConstantsClass(type):
"""A metaclass for classes that should only contain string constants."""

def __setattr__(cls, name, value):
def __setattr__(cls, name: Any, value: Any) -> NoReturn: # noqa: ANN401
raise TypeError("Constants class cannot be modified")

def get_values(cls):
def get_values(cls) -> List[str]:
"""Get all user-defined string values of the class."""
return [
value
Expand Down
25 changes: 14 additions & 11 deletions alphabase/psm_reader/maxquant_reader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import copy
import warnings
from typing import Optional
from typing import List, Optional

import numba
import numpy as np
Expand Down Expand Up @@ -31,7 +31,7 @@
@numba.njit
def replace_parentheses_with_brackets(
modseq: str,
):
) -> str:
mod_depth = 0
for i, aa in enumerate(modseq):
if aa == "(":
Expand All @@ -53,7 +53,7 @@ def replace_parentheses_with_brackets(
def parse_mod_seq(
modseq: str,
mod_sep: str = "()",
fixed_C57: bool = True,
fixed_C57: bool = True, # noqa: FBT001, FBT002, N803 TODO: make this *,fixed_c57 (breaking)
) -> tuple:
"""Extract modifications and sites from the modified sequence (modseq).
Expand Down Expand Up @@ -137,10 +137,10 @@ def __init__( # noqa: PLR0913 many arguments in function definition
*,
column_mapping: Optional[dict] = None,
modification_mapping: Optional[dict] = None,
fdr=0.01,
keep_decoy=False,
fixed_C57=True,
mod_seq_columns=None,
fdr: float = 0.01,
keep_decoy: bool = False,
fixed_C57: bool = True, # noqa: N803 TODO: make this *,fixed_c57 (breaking)
mod_seq_columns: Optional[List[str]] = None,
**kwargs,
):
"""Reader for MaxQuant msms.txt and evidence.txt.
Expand Down Expand Up @@ -174,7 +174,10 @@ def __init__( # noqa: PLR0913 many arguments in function definition
"""
if mod_seq_columns is None:
mod_seq_columns = ["Modified sequence"]
mod_seq_columns = [
"Modified sequence"
] # TODO: why not take from psm_reader.yaml?

super().__init__(
column_mapping=column_mapping,
modification_mapping=modification_mapping,
Expand All @@ -187,7 +190,7 @@ def __init__( # noqa: PLR0913 many arguments in function definition
self._mod_seq_columns = mod_seq_columns
self.mod_seq_column = "Modified sequence"

def _find_mod_seq_column(self, df) -> None:
def _find_mod_seq_column(self, df: pd.DataFrame) -> None:
for mod_seq_col in self._mod_seq_columns:
if mod_seq_col in df.columns:
self.mod_seq_column = mod_seq_col
Expand Down Expand Up @@ -241,7 +244,7 @@ def _extend_mod_brackets(self) -> None:

self.modification_mapping[key] = list(mod_set)

def _translate_decoy(self, origin_df=None) -> None:
def _translate_decoy(self) -> None:
if PsmDfCols.DECOY in self._psm_df.columns:
self._psm_df[PsmDfCols.DECOY] = (
self._psm_df[PsmDfCols.DECOY] == "-"
Expand All @@ -250,7 +253,7 @@ def _translate_decoy(self, origin_df=None) -> None:
def _init_column_mapping(self) -> None:
self.column_mapping = psm_reader_yaml["maxquant"]["column_mapping"]

def _load_file(self, filename):
def _load_file(self, filename: str) -> pd.DataFrame:
csv_sep = self._get_table_delimiter(filename)
df = pd.read_csv(filename, sep=csv_sep, keep_default_na=False)
self._find_mod_seq_column(df)
Expand Down
34 changes: 15 additions & 19 deletions alphabase/psm_reader/msfragger_reader.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional
from typing import List, Optional, Tuple

import numpy as np
import pandas as pd
Expand All @@ -15,15 +15,17 @@
)


def _is_fragger_decoy(proteins):
def _is_fragger_decoy(proteins: List[str]) -> bool:
return all(prot.lower().startswith("rev_") for prot in proteins)


mass_mapped_mods = psm_reader_yaml["msfragger_pepxml"]["mass_mapped_mods"]
mod_mass_tol = psm_reader_yaml["msfragger_pepxml"]["mod_mass_tol"]


def _get_mods_from_masses(sequence, msf_aa_mods): # noqa: PLR0912, C901 many branches, too complex TODO: refactor
def _get_mods_from_masses( # noqa: PLR0912, C901 too many branches, too complex TODO: refactor
sequence: str, msf_aa_mods: List[str]
) -> Tuple[str, str, str, str]:
mods = []
mod_sites = []
aa_mass_diffs = []
Expand Down Expand Up @@ -81,12 +83,6 @@ def _get_mods_from_masses(sequence, msf_aa_mods): # noqa: PLR0912, C901 many br
class MSFragger_PSM_TSV_Reader(PSMReaderBase): # noqa: N801 name should use CapWords convention TODO: refactor
def __init__(
self,
*,
column_mapping: Optional[dict] = None,
modification_mapping: Optional[dict] = None,
fdr=0.01,
keep_decoy=False,
rt_unit="second",
**kwargs,
):
raise NotImplementedError("MSFragger_PSM_TSV_Reader for psm.tsv")
Expand All @@ -98,10 +94,10 @@ def __init__( # noqa: PLR0913 many arguments in function definition
*,
column_mapping: Optional[dict] = None,
modification_mapping: Optional[dict] = None,
fdr=0.001, # refers to E-value in the PepXML
keep_decoy=False,
rt_unit="second",
keep_unknown_aa_mass_diffs=False,
fdr: float = 0.001, # refers to E-value in the PepXML
keep_decoy: bool = False,
rt_unit: str = "second",
keep_unknown_aa_mass_diffs: bool = False,
**kwargs,
):
"""MSFragger is not fully supported as we can only access the pepxml file."""
Expand All @@ -121,7 +117,7 @@ def _init_column_mapping(self) -> None:
def _translate_modifications(self) -> None:
pass

def _load_file(self, filename):
def _load_file(self, filename: str) -> pd.DataFrame:
msf_df = pepxml.DataFrame(filename)
msf_df.fillna("", inplace=True)
if "ion_mobility" in msf_df.columns:
Expand All @@ -133,7 +129,7 @@ def _load_file(self, filename):
self.column_mapping[PsmDfCols.TO_REMOVE] = "to_remove"
return msf_df

def _translate_decoy(self, origin_df=None) -> None:
def _translate_decoy(self) -> None:
self._psm_df[PsmDfCols.DECOY] = (
self._psm_df[PsmDfCols.PROTEINS].apply(_is_fragger_decoy).astype(np.int8)
)
Expand All @@ -144,11 +140,11 @@ def _translate_decoy(self, origin_df=None) -> None:
if not self._keep_decoy:
self._psm_df[PsmDfCols.TO_REMOVE] += self._psm_df[PsmDfCols.DECOY] > 0

def _translate_score(self, origin_df=None) -> None:
def _translate_score(self) -> None:
# evalue score
self._psm_df[PsmDfCols.SCORE] = -np.log(self._psm_df[PsmDfCols.SCORE] + 1e-100)

def _load_modifications(self, msf_df) -> None:
def _load_modifications(self, msf_df: pd.DataFrame) -> None:
if len(msf_df) == 0:
self._psm_df[PsmDfCols.MODS] = ""
self._psm_df[PsmDfCols.MOD_SITES] = ""
Expand Down Expand Up @@ -176,8 +172,8 @@ def _load_modifications(self, msf_df) -> None:
inplace=True,
)

def _post_process(self, origin_df: pd.DataFrame) -> None:
super()._post_process(origin_df)
def _post_process(self) -> None:
super()._post_process()
self._psm_df = (
self._psm_df.query(f"{PsmDfCols.TO_REMOVE}==0")
.drop(columns=PsmDfCols.TO_REMOVE)
Expand Down
Loading

0 comments on commit 9e0e1eb

Please sign in to comment.