Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Linting iii #235

Merged
merged 17 commits into from
Jan 9, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions alphabase/psm_reader/alphapept_reader.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os
from pathlib import Path
from typing import Optional
mschwoer marked this conversation as resolved.
Show resolved Hide resolved

import h5py
@@ -18,7 +18,7 @@
def parse_ap(precursor):
"""Parser to parse peptide strings."""
items = precursor.split("_")
decoy = 1 if len(items) == 3 else 0
decoy = 1 if len(items) == 3 else 0 # noqa: PLR2004 magic value
modseq = items[0]
charge = items[-1]

@@ -77,7 +77,7 @@ def _load_file(self, filename):
with h5py.File(filename, "r") as _hdf:
dataset = _hdf[self.hdf_dataset]
df = pd.DataFrame({col: dataset[col] for col in dataset})
df[PsmDfCols.RAW_NAME] = os.path.basename(filename)[: -len(".ms_data.hdf")]
df[PsmDfCols.RAW_NAME] = Path(filename).name[: -len(".ms_data.hdf")]
df["precursor"] = df["precursor"].str.decode("utf-8")
# df['naked_sequence'] = df['naked_sequence'].str.decode('utf-8')
if "scan_no" in df.columns:
8 changes: 4 additions & 4 deletions alphabase/psm_reader/dia_psm_reader.py
Original file line number Diff line number Diff line change
@@ -21,7 +21,7 @@ class SpectronautReader(MaxQuantReader):

"""

def __init__(
def __init__( # noqa: PLR0913 many arguments in function definition
self,
*,
column_mapping: Optional[dict] = None,
@@ -66,7 +66,7 @@ def _load_file(self, filename):


class SwathReader(SpectronautReader):
def __init__(
def __init__( # noqa: PLR0913 many arguments in function definition
self,
*,
column_mapping: Optional[dict] = None,
@@ -90,7 +90,7 @@ def __init__(


class DiannReader(SpectronautReader):
def __init__(
def __init__( # noqa: PLR0913 many arguments in function definition
self,
*,
column_mapping: Optional[dict] = None,
@@ -144,7 +144,7 @@ class SpectronautReportReader(MaxQuantReader):

"""

def __init__(
def __init__( # noqa: PLR0913 many arguments in function definition
self,
*,
column_mapping: Optional[dict] = None,
38 changes: 19 additions & 19 deletions alphabase/psm_reader/maxquant_reader.py
Original file line number Diff line number Diff line change
@@ -18,8 +18,8 @@
warnings.filterwarnings("always")

mod_to_unimod_dict = {}
for mod_name, unimod_id in MOD_DF[["mod_name", "unimod_id"]].values:
unimod_id = int(unimod_id)
for mod_name, unimod_id_ in MOD_DF[["mod_name", "unimod_id"]].to_numpy():
mschwoer marked this conversation as resolved.
Show resolved Hide resolved
unimod_id = int(unimod_id_)
if unimod_id in (-1, "-1"):
continue
if mod_name[-2] == "@":
@@ -81,14 +81,14 @@ def parse_mod_seq(
0 for N-term; -1 for C-term; 1 to N for normal modifications.

"""
PeptideModSeq = modseq
peptide_mod_seq = modseq
underscore_for_ncterm = modseq[0] == "_"
mod_list = []
site_list = []
site = PeptideModSeq.find(mod_sep[0])
site = peptide_mod_seq.find(mod_sep[0])
while site != -1:
site_end = PeptideModSeq.find(mod_sep[1], site + 1) + 1
if site_end < len(PeptideModSeq) and PeptideModSeq[site_end] == mod_sep[1]:
site_end = peptide_mod_seq.find(mod_sep[1], site + 1) + 1
if site_end < len(peptide_mod_seq) and peptide_mod_seq[site_end] == mod_sep[1]:
site_end += 1
if underscore_for_ncterm:
site_list.append(site - 1)
@@ -97,42 +97,42 @@ def parse_mod_seq(
start_mod = site
if start_mod > 0:
start_mod -= 1
mod_list.append(PeptideModSeq[start_mod:site_end])
PeptideModSeq = PeptideModSeq[:site] + PeptideModSeq[site_end:]
site = PeptideModSeq.find(mod_sep[0], site)
mod_list.append(peptide_mod_seq[start_mod:site_end])
peptide_mod_seq = peptide_mod_seq[:site] + peptide_mod_seq[site_end:]
site = peptide_mod_seq.find(mod_sep[0], site)

# patch for phos. How many other modification formats does MQ have?
site = PeptideModSeq.find("p")
site = peptide_mod_seq.find("p")
while site != -1:
mod_list.append(PeptideModSeq[site : site + 2])
mod_list.append(peptide_mod_seq[site : site + 2])
site_list = [i - 1 if i > site else i for i in site_list]
if underscore_for_ncterm:
site_list.append(site)
else:
site_list.append(site + 1)
PeptideModSeq = PeptideModSeq[:site] + PeptideModSeq[site + 1 :]
site = PeptideModSeq.find("p", site)
peptide_mod_seq = peptide_mod_seq[:site] + peptide_mod_seq[site + 1 :]
site = peptide_mod_seq.find("p", site)

if fixed_C57:
site = PeptideModSeq.find("C")
site = peptide_mod_seq.find("C")
while site != -1:
if underscore_for_ncterm:
site_list.append(site)
else:
site_list.append(site + 1)
mod_list.append("C" + "Carbamidomethyl (C)".join(mod_sep))
site = PeptideModSeq.find("C", site + 1)
sequence = PeptideModSeq.strip("_")
nAA = len(sequence)
site = peptide_mod_seq.find("C", site + 1)
sequence = peptide_mod_seq.strip("_")
n_aa = len(sequence)
return (
sequence,
";".join(mod_list),
";".join([str(i) if i <= nAA else "-1" for i in site_list]),
";".join([str(i) if i <= n_aa else "-1" for i in site_list]),
)


class MaxQuantReader(PSMReaderBase):
def __init__(
def __init__( # noqa: PLR0913 many arguments in function definition
self,
*,
column_mapping: Optional[dict] = None,
8 changes: 4 additions & 4 deletions alphabase/psm_reader/msfragger_reader.py
Original file line number Diff line number Diff line change
@@ -23,7 +23,7 @@ def _is_fragger_decoy(proteins):
mod_mass_tol = psm_reader_yaml["msfragger_pepxml"]["mod_mass_tol"]


def _get_mods_from_masses(sequence, msf_aa_mods):
def _get_mods_from_masses(sequence, msf_aa_mods): # noqa: PLR0912, C901 many branches, too complex TODO: refactor
mods = []
mod_sites = []
aa_mass_diffs = []
@@ -78,7 +78,7 @@ def _get_mods_from_masses(sequence, msf_aa_mods):
)


class MSFragger_PSM_TSV_Reader(PSMReaderBase):
class MSFragger_PSM_TSV_Reader(PSMReaderBase): # noqa: N801 name should use CapWords convention TODO: refactor
def __init__(
self,
*,
@@ -93,7 +93,7 @@ def __init__(


class MSFraggerPepXML(PSMReaderBase):
def __init__(
def __init__( # noqa: PLR0913 many arguments in function definition
self,
*,
column_mapping: Optional[dict] = None,
@@ -129,7 +129,7 @@ def _load_file(self, filename):
msf_df[PsmDfCols.RAW_NAME] = (
msf_df["spectrum"].str.split(".").apply(lambda x: x[0])
)
msf_df["to_remove"] = 0 # TODO revisit
msf_df["to_remove"] = 0 # TODO: revisit
self.column_mapping[PsmDfCols.TO_REMOVE] = "to_remove"
return msf_df

56 changes: 30 additions & 26 deletions alphabase/psm_reader/pfind_reader.py
Original file line number Diff line number Diff line change
@@ -12,7 +12,7 @@
)


def convert_one_pFind_mod(mod):
def _convert_one_pfind_mod(mod: str) -> Optional[str]: # noqa: C901 too complex (11 > 10) TODO: refactor
if mod[-1] == ")":
mod = mod[: (mod.find("(") - 1)]
idx = mod.rfind("[")
@@ -22,40 +22,44 @@ def convert_one_pFind_mod(mod):
idx = mod.rfind("[")
name = mod[:idx]
site = mod[(idx + 1) : -1]

if len(site) == 1:
return name + "@" + site
if site == "AnyN-term":
return name + "@" + "Any_N-term"
if site == "ProteinN-term":
return name + "@" + "Protein_N-term"
if site.startswith("AnyN-term"):
return name + "@" + site[-1] + "^Any_N-term"
if site.startswith("ProteinN-term"):
return name + "@" + site[-1] + "^Protein_N-term"
if site == "AnyC-term":
return name + "@" + "Any_C-term"
if site == "ProteinC-term":
return name + "@" + "Protein_C-term"
if site.startswith("AnyC-term"):
return name + "@" + site[-1] + "^Any_C-term"
if site.startswith("ProteinC-term"):
return name + "@" + site[-1] + "^Protein_C-term"
return None


def translate_pFind_mod(mod_str):
return_value = name + "@" + site
elif site == "AnyN-term":
return_value = name + "@" + "Any_N-term"
elif site == "ProteinN-term":
return_value = name + "@" + "Protein_N-term"
elif site.startswith("AnyN-term"):
return_value = name + "@" + site[-1] + "^Any_N-term"
elif site.startswith("ProteinN-term"):
return_value = name + "@" + site[-1] + "^Protein_N-term"
elif site == "AnyC-term":
return_value = name + "@" + "Any_C-term"
elif site == "ProteinC-term":
return_value = name + "@" + "Protein_C-term"
elif site.startswith("AnyC-term"):
return_value = name + "@" + site[-1] + "^Any_C-term"
elif site.startswith("ProteinC-term"):
return_value = name + "@" + site[-1] + "^Protein_C-term"
else:
return_value = None

return return_value


def translate_pFind_mod(mod_str): # noqa: N802 name `get_pFind_mods` should be lowercase TODO: used by peptdeep
if not mod_str:
return ""
ret_mods = []
for mod in mod_str.split(";"):
mod = convert_one_pFind_mod(mod)
for mod_ in mod_str.split(";"):
mod = _convert_one_pfind_mod(mod_)
if not mod or mod not in ap_mod.MOD_INFO_DICT:
return pd.NA
ret_mods.append(mod)
return ";".join(ret_mods)


def get_pFind_mods(pfind_mod_str):
def get_pFind_mods(pfind_mod_str): # noqa: N802 name `get_pFind_mods` should be lowercase TODO: used by peptdeep
pfind_mod_str = pfind_mod_str.strip(";")
if not pfind_mod_str:
return "", ""
@@ -84,7 +88,7 @@ def parse_pfind_protein(protein, keep_reverse=True):
)


class pFindReader(PSMReaderBase):
class pFindReader(PSMReaderBase): # noqa: N801 name `pFindReader` should use CapWords convention TODO: used by peptdeep, alpharaw
def __init__(
self,
*,
8 changes: 3 additions & 5 deletions alphabase/psm_reader/psm_reader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import copy
import os
import warnings
from pathlib import Path
from typing import NoReturn, Optional

import numpy as np
@@ -77,7 +77,7 @@ def _keep_modifications(mod_str: str, mod_set: set) -> str:


#: See `psm_reader.yaml <https://github.com/MannLabs/alphabase/blob/main/alphabase/constants/const_files/psm_reader.yaml>`_
psm_reader_yaml = load_yaml(os.path.join(CONST_FILE_FOLDER, "psm_reader.yaml"))
psm_reader_yaml = load_yaml(Path(CONST_FILE_FOLDER) / "psm_reader.yaml")


class PSMReaderBase:
@@ -268,9 +268,7 @@ def load(self, _file) -> pd.DataFrame:
return self.import_file(_file)

def import_files(self, file_list: list):
df_list = []
for _file in file_list:
df_list.append(self.import_file(_file))
df_list = [self.import_file(file) for file in file_list]
self._psm_df = pd.concat(df_list, ignore_index=True)
return self._psm_df

Loading
Loading