From 85c243c9efbec4c7e78c88adf99d674affaeba99 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Fri, 15 Nov 2024 16:06:37 +0100 Subject: [PATCH 1/4] fix some docstrings --- alphabase/psm_reader/dia_psm_reader.py | 4 +- alphabase/psm_reader/keys.py | 1 + alphabase/psm_reader/maxquant_reader.py | 7 +++- alphabase/psm_reader/psm_reader.py | 56 +++++++++++++++---------- alphabase/psm_reader/sage_reader.py | 9 ++++ alphabase/spectral_library/reader.py | 15 +++---- ruff-lint-psm-readers.toml | 4 +- 7 files changed, 60 insertions(+), 36 deletions(-) diff --git a/alphabase/psm_reader/dia_psm_reader.py b/alphabase/psm_reader/dia_psm_reader.py index 09dace3f..71da89c2 100644 --- a/alphabase/psm_reader/dia_psm_reader.py +++ b/alphabase/psm_reader/dia_psm_reader.py @@ -107,9 +107,7 @@ def __init__( # noqa: PLR0913 many arguments in function definition rt_unit: str = "minute", **kwargs, ): - """Also similar to `MaxQuantReader`, - but different in column_mapping and modification_mapping. - """ + """Similar to `SpectronautReader` but different in column_mapping and modification_mapping.""" super().__init__( column_mapping=column_mapping, modification_mapping=modification_mapping, diff --git a/alphabase/psm_reader/keys.py b/alphabase/psm_reader/keys.py index cee93ea8..388afeb8 100644 --- a/alphabase/psm_reader/keys.py +++ b/alphabase/psm_reader/keys.py @@ -5,6 +5,7 @@ class ConstantsClass(type): """A metaclass for classes that should only contain string constants.""" def __setattr__(cls, name: Any, value: Any) -> NoReturn: # noqa: ANN401 + """Raise an error when trying to set an attribute.""" raise TypeError("Constants class cannot be modified") def get_values(cls) -> List[str]: diff --git a/alphabase/psm_reader/maxquant_reader.py b/alphabase/psm_reader/maxquant_reader.py index 298668ce..a4b7bac6 100644 --- a/alphabase/psm_reader/maxquant_reader.py +++ b/alphabase/psm_reader/maxquant_reader.py @@ -66,9 +66,9 @@ def parse_mod_seq( separator to indicate the modification section. Defaults to '()' - fixed_C : bool + fixed_C57 : bool If Carbamidomethyl@C is a fixed modification - and not displayed in the sequence. Defaults to True for MaxQuant. + and not displayed in the sequence. Defaults to True. Returns ------- @@ -172,6 +172,9 @@ def __init__( # noqa: PLR0913 many arguments in function definition The columns to find modified sequences, by default ['Modified sequence'] + **kwargs : dict + deprecated + """ if mod_seq_columns is None: mod_seq_columns = [ diff --git a/alphabase/psm_reader/psm_reader.py b/alphabase/psm_reader/psm_reader.py index 2c2becaf..9b4dc97e 100644 --- a/alphabase/psm_reader/psm_reader.py +++ b/alphabase/psm_reader/psm_reader.py @@ -15,18 +15,17 @@ def translate_other_modification(mod_str: str, mod_dict: dict) -> str: - """Translate modifications of `mod_str` to the AlphaBase - format mapped by mod_dict. + """Translate modifications of `mod_str` to the AlphaBase format mapped by mod_dict. Parameters ---------- - mod_str : str - mod list in str format, seperated by ';', - e.g. ModA;ModB - mod_dict : dict - translate mod dict from others to AlphaBase, - e.g. for pFind, key=['Phospho[S]','Oxidation[M]'], - value=['Phospho@S','Oxidation@M'] + mod_str : str + mod list in str format, seperated by ';', + e.g. ModA;ModB + mod_dict : dict + translate mod dict from others to AlphaBase, + e.g. for pFind, key=['Phospho[S]','Oxidation[M]'], + value=['Phospho@S','Oxidation@M'] Returns ------- @@ -91,7 +90,9 @@ def __init__( rt_unit: str = "minute", **kwargs, ): - """The Base class for all PSMReaders. The key of the sub-classes for different + """The Base class for all PSMReaders. + + The key of the sub-classes for different search engine format is to re-define `column_mapping` and `modification_mapping`. Parameters @@ -112,6 +113,7 @@ def __init__( } ``` Defaults to None. + modification_mapping : dict, optional A dict that maps alphabase's modifications to other engine's. If it is None, this dict will be init by @@ -126,12 +128,21 @@ def __init__( } ``` Defaults to None. + fdr : float, optional FDR level to keep PSMs. Defaults to 0.01. + keep_decoy : bool, optional If keep decoy PSMs in self.psm_df. - Defautls to False. + Defaults to False. + + rt_unit : str, optional + The unit of RT in the search engine result. + Defaults to 'minute'. + + **kwargs: dict + deprecated Attributes ---------- @@ -140,10 +151,8 @@ def __init__( modification_mapping : dict Dict structure same as modification_mapping in Args. We must use self.set_modification_mapping(new_mapping) to update it. - _psm_df : pd.DataFrame - the PSM DataFrame after loading from search engines. psm_df : pd.DataFrame - the getter of self._psm_df + the PSM DataFrame after loading from search engines. keep_fdr : float The only PSMs with FDR<=keep_fdr were returned in self._psm_df. keep_decoy : bool @@ -280,8 +289,9 @@ def import_files(self, file_list: List[str]) -> pd.DataFrame: return self._psm_df def import_file(self, _file: str) -> pd.DataFrame: - """This is the main entry function of PSM readers, - it imports the file with following steps: + """Main entry function of PSM readers. + + Imports a file with following steps: ``` origin_df = self._load_file(_file) self._translate_columns(origin_df) @@ -364,6 +374,7 @@ def normalize_rt_by_raw_name(self) -> None: def _load_file(self, filename: str) -> pd.DataFrame: """Load original dataframe from PSM filename. + Different search engines may store PSMs in different ways: tsv, csv, HDF, XML, ... @@ -399,8 +410,7 @@ def _find_mapped_columns(self, origin_df: pd.DataFrame) -> Dict[str, str]: return mapped_columns def _translate_columns(self, origin_df: pd.DataFrame) -> None: - """Translate the dataframe from other search engines - to AlphaBase format. + """Translate the dataframe from other search engines to AlphaBase format. Parameters ---------- @@ -426,6 +436,7 @@ def _translate_columns(self, origin_df: pd.DataFrame) -> None: def _transform_table(self) -> None: """Transform the dataframe format if needed. + Usually only needed in combination with spectral libraries. Parameters @@ -442,7 +453,8 @@ def _transform_table(self) -> None: def _load_modifications(self, origin_df: pd.DataFrame) -> NoReturn: """Read modification information from 'origin_df'. - Some of search engines use modified_sequence, some of them + + Some search engines use modified_sequence, some of them use additional columns to store modifications and the sites. Parameters @@ -483,9 +495,9 @@ def _translate_modifications(self) -> None: ) def _post_process(self) -> None: - """Set 'nAA' columns, remove unknown modifications - and perform other post processings, - e.g. get 'rt_norm', remove decoys, filter FDR... + """Set 'nAA' columns, remove unknown modifications and perform other post processings. + + E.g. get 'rt_norm', remove decoys, filter FDR... """ self._psm_df[PsmDfCols.NAA] = self._psm_df[PsmDfCols.SEQUENCE].str.len() diff --git a/alphabase/psm_reader/sage_reader.py b/alphabase/psm_reader/sage_reader.py index 090d0845..7f1dfcb0 100644 --- a/alphabase/psm_reader/sage_reader.py +++ b/alphabase/psm_reader/sage_reader.py @@ -25,6 +25,7 @@ def __init__( mp_process_num: int = 10, ): """Translate Sage style modifications to alphabase style modifications. + A modified sequence like VM[+15.9949]QENSSSFSDLSER will be translated to mods: Oxidation@M, mod_sites: 2. By default, the translation is done by matching the observed mass and location to the UniMod database. If a custom translation dataframe is provided, the translation will be done based on the custom translation dataframe first. @@ -57,6 +58,7 @@ def __init__( def __call__(self, psm_df: pd.DataFrame) -> pd.DataFrame: """Translate modifications in the PSMs to alphabase style modifications. + 1. Discover all modifications in the PSMs. 2. Annotate modifications from custom translation df, if provided. 3. Annotate all remaining modifications from UniMod. @@ -107,6 +109,7 @@ def _annotate_from_custom_translation( self, discovered_modifications_df: pd.DataFrame, translation_df: pd.DataFrame ) -> Tuple[pd.DataFrame, pd.DataFrame]: """Annotate modifications from custom translation df, if provided. + Discovered modifications are first matched using the custom translation dataframe. If no match is found, the modifications are returned for matching using UniMod. @@ -153,6 +156,7 @@ def _annotate_from_unimod( self, discovered_modifications_df: pd.DataFrame, translation_df: pd.DataFrame ) -> pd.DataFrame: """Annotate all remaining modifications from UniMod. + UniMod modification are used from the global MOD_DF. Parameters @@ -476,6 +480,9 @@ def _apply_translate_modifications_mp( mp_process_num : int The number of parallel processes + progress_bar : bool, optional + Whether to show a progress bar. Defaults to True + """ df_list = [] with mp.get_context("spawn").Pool(mp_process_num) as p: @@ -497,6 +504,7 @@ def _apply_translate_modifications_mp( def _get_annotated_mod_df() -> pd.DataFrame: """Annotates the modification dataframe for annotation of sage output. + Due to the modified sequence based notation, C-Terminal and sidechain modifications on the last AA could be confused. @@ -526,6 +534,7 @@ def _get_annotated_mod_df() -> pd.DataFrame: def _sage_spec_idx_from_scan_nr(scan_indicator_str: str) -> int: """Extract the spectrum index from the scan_nr field in Sage output. + Sage uses 1-based indexing for spectra, so we need to subtract 1 to convert to 0-based indexing. Parameters diff --git a/alphabase/spectral_library/reader.py b/alphabase/spectral_library/reader.py index 1da44991..d0c04173 100644 --- a/alphabase/spectral_library/reader.py +++ b/alphabase/spectral_library/reader.py @@ -57,6 +57,8 @@ def __init__( # noqa: PLR0913 many arguments in function definition default is 0.01 fixed_C57: bool + If true, the search engine will not show `Carbamidomethyl` in the modified sequences. + By default False mod_seq_columns: list of str List of column names in the csv file containing the modified sequence. @@ -76,8 +78,10 @@ def __init__( # noqa: PLR0913 many arguments in function definition Decoy type for the spectral library. Can be either `pseudo_reverse` or `diann` - """ + **kwargs: dict + deprecated + """ if mod_seq_columns is None: mod_seq_columns = psm_reader_yaml["library_reader_base"]["mod_seq_columns"] @@ -138,6 +142,7 @@ def _find_key_columns(self, lib_df: pd.DataFrame) -> None: def _get_fragment_intensity(self, lib_df: pd.DataFrame) -> pd.DataFrame: # noqa: PLR0912, C901 too many branches, too complex TODO: refactor """Create the self._fragment_intensity dataframe from a given spectral library. + In the process, the input dataframe is converted from long format to a precursor dataframe and returned. Parameters @@ -244,9 +249,7 @@ def _get_fragment_intensity(self, lib_df: pd.DataFrame) -> pd.DataFrame: # noqa return df def _load_file(self, filename: str) -> pd.DataFrame: - """Load the spectral library from a csv file. - Reimplementation of `PSMReaderBase._translate_columns`. - """ + """Load the spectral library from a csv file.""" csv_sep = self._get_table_delimiter(filename) df = pd.read_csv( @@ -281,9 +284,7 @@ def _load_file(self, filename: str) -> pd.DataFrame: def _post_process( self, ) -> None: - """Process the spectral library and create the `fragment_intensity`, `fragment_mz`dataframe. - Reimplementation of `PSMReaderBase._post_process`. - """ + """Process the spectral library and create the `fragment_intensity`, `fragment_mz` dataframe.""" # identify unknown modifications len_before = len(self._psm_df) self._psm_df = self._psm_df[~self._psm_df[PsmDfCols.MODS].isna()] diff --git a/ruff-lint-psm-readers.toml b/ruff-lint-psm-readers.toml index 3c6dc61a..eb234586 100644 --- a/ruff-lint-psm-readers.toml +++ b/ruff-lint-psm-readers.toml @@ -32,8 +32,8 @@ ignore = [ # "PD002", #pandas-use-of-inplace-argument # TODO revisit - # these still need to be resolved: - "D", +# # these still need to be resolved: + "D101", "D100", "D103", "D107", "D102", # reader-specific "FA100", # Add `from __future__ import annotations` to simplify `typing.Optional` From f291f2c3dbdc0084c8479708c0c191eec2378e48 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Fri, 15 Nov 2024 16:09:44 +0100 Subject: [PATCH 2/4] fix some docstrings --- alphabase/psm_reader/alphapept_reader.py | 2 ++ alphabase/psm_reader/dia_psm_reader.py | 2 ++ alphabase/psm_reader/keys.py | 2 ++ alphabase/psm_reader/maxquant_reader.py | 2 ++ alphabase/psm_reader/msfragger_reader.py | 2 ++ alphabase/psm_reader/pfind_reader.py | 2 ++ alphabase/psm_reader/psm_reader.py | 2 ++ alphabase/psm_reader/sage_reader.py | 2 ++ alphabase/spectral_library/reader.py | 2 ++ ruff-lint-psm-readers.toml | 5 ++++- 10 files changed, 22 insertions(+), 1 deletion(-) diff --git a/alphabase/psm_reader/alphapept_reader.py b/alphabase/psm_reader/alphapept_reader.py index 6475f912..614c7c00 100644 --- a/alphabase/psm_reader/alphapept_reader.py +++ b/alphabase/psm_reader/alphapept_reader.py @@ -1,3 +1,5 @@ +"""Reader for AlphaPept's *.ms_data.hdf files.""" + from pathlib import Path from typing import Optional, Tuple diff --git a/alphabase/psm_reader/dia_psm_reader.py b/alphabase/psm_reader/dia_psm_reader.py index 71da89c2..5351c438 100644 --- a/alphabase/psm_reader/dia_psm_reader.py +++ b/alphabase/psm_reader/dia_psm_reader.py @@ -1,3 +1,5 @@ +"""Readers for Spectronaut's output library and reports, Swath data and DIANN data.""" + from typing import List, Optional import numpy as np diff --git a/alphabase/psm_reader/keys.py b/alphabase/psm_reader/keys.py index 388afeb8..3f4badba 100644 --- a/alphabase/psm_reader/keys.py +++ b/alphabase/psm_reader/keys.py @@ -1,3 +1,5 @@ +"""Constants for accessing the columns of a PSM dataframe.""" + from typing import Any, List, NoReturn diff --git a/alphabase/psm_reader/maxquant_reader.py b/alphabase/psm_reader/maxquant_reader.py index a4b7bac6..c92b4bc9 100644 --- a/alphabase/psm_reader/maxquant_reader.py +++ b/alphabase/psm_reader/maxquant_reader.py @@ -1,3 +1,5 @@ +"""Reader for MaxQuant data.""" + import copy import warnings from typing import List, Optional diff --git a/alphabase/psm_reader/msfragger_reader.py b/alphabase/psm_reader/msfragger_reader.py index 0fc68b00..70cdc01c 100644 --- a/alphabase/psm_reader/msfragger_reader.py +++ b/alphabase/psm_reader/msfragger_reader.py @@ -1,3 +1,5 @@ +"""MSFragger reader.""" + from typing import List, Optional, Tuple import numpy as np diff --git a/alphabase/psm_reader/pfind_reader.py b/alphabase/psm_reader/pfind_reader.py index 36dc0604..dcba1071 100644 --- a/alphabase/psm_reader/pfind_reader.py +++ b/alphabase/psm_reader/pfind_reader.py @@ -1,3 +1,5 @@ +"""pFind reader.""" + from typing import Optional, Tuple, Union import numpy as np diff --git a/alphabase/psm_reader/psm_reader.py b/alphabase/psm_reader/psm_reader.py index 9b4dc97e..58bf9fec 100644 --- a/alphabase/psm_reader/psm_reader.py +++ b/alphabase/psm_reader/psm_reader.py @@ -1,3 +1,5 @@ +"""The base class for all PSM readers and the provider for all readers.""" + import copy import warnings from pathlib import Path diff --git a/alphabase/psm_reader/sage_reader.py b/alphabase/psm_reader/sage_reader.py index 7f1dfcb0..90272a06 100644 --- a/alphabase/psm_reader/sage_reader.py +++ b/alphabase/psm_reader/sage_reader.py @@ -1,3 +1,5 @@ +"""SageReader for reading Sage output files.""" + import logging import multiprocessing as mp import re diff --git a/alphabase/spectral_library/reader.py b/alphabase/spectral_library/reader.py index d0c04173..5f50ebd3 100644 --- a/alphabase/spectral_library/reader.py +++ b/alphabase/spectral_library/reader.py @@ -1,3 +1,5 @@ +"""Module for reading spectral libraries.""" + import warnings from typing import List, Optional diff --git a/ruff-lint-psm-readers.toml b/ruff-lint-psm-readers.toml index eb234586..5d8694f5 100644 --- a/ruff-lint-psm-readers.toml +++ b/ruff-lint-psm-readers.toml @@ -33,7 +33,10 @@ ignore = [ "PD002", #pandas-use-of-inplace-argument # TODO revisit # # these still need to be resolved: - "D101", "D100", "D103", "D107", "D102", + "D101", + "D103", + "D107", + "D102", # reader-specific "FA100", # Add `from __future__ import annotations` to simplify `typing.Optional` From 9cc23f33b2d866a4078f61a0b495405f2af10fbf Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Fri, 15 Nov 2024 16:12:20 +0100 Subject: [PATCH 3/4] fix some docstrings --- alphabase/psm_reader/alphapept_reader.py | 2 ++ alphabase/psm_reader/dia_psm_reader.py | 4 ++++ alphabase/psm_reader/maxquant_reader.py | 2 ++ alphabase/psm_reader/msfragger_reader.py | 5 +++++ alphabase/psm_reader/pfind_reader.py | 3 +++ alphabase/psm_reader/psm_reader.py | 4 ++++ alphabase/psm_reader/sage_reader.py | 8 ++++++++ alphabase/spectral_library/reader.py | 2 ++ ruff-lint-psm-readers.toml | 2 +- 9 files changed, 31 insertions(+), 1 deletion(-) diff --git a/alphabase/psm_reader/alphapept_reader.py b/alphabase/psm_reader/alphapept_reader.py index 614c7c00..a398fe5c 100644 --- a/alphabase/psm_reader/alphapept_reader.py +++ b/alphabase/psm_reader/alphapept_reader.py @@ -50,6 +50,8 @@ def parse_ap(precursor: str) -> Tuple[str, str, str, str, int]: class AlphaPeptReader(PSMReaderBase): + """Reader for AlphaPept's *.ms_data.hdf files.""" + def __init__( self, *, diff --git a/alphabase/psm_reader/dia_psm_reader.py b/alphabase/psm_reader/dia_psm_reader.py index 5351c438..b9a2e75b 100644 --- a/alphabase/psm_reader/dia_psm_reader.py +++ b/alphabase/psm_reader/dia_psm_reader.py @@ -71,6 +71,8 @@ def _load_file(self, filename: str) -> pd.DataFrame: class SwathReader(SpectronautReader): + """Reader for SWATH or OpenSWATH library TSV/CSV.""" + def __init__( # noqa: PLR0913 many arguments in function definition self, *, @@ -98,6 +100,8 @@ def __init__( # noqa: PLR0913 many arguments in function definition class DiannReader(SpectronautReader): + """Reader for DIANN data.""" + def __init__( # noqa: PLR0913 many arguments in function definition self, *, diff --git a/alphabase/psm_reader/maxquant_reader.py b/alphabase/psm_reader/maxquant_reader.py index c92b4bc9..5ec5b3fc 100644 --- a/alphabase/psm_reader/maxquant_reader.py +++ b/alphabase/psm_reader/maxquant_reader.py @@ -134,6 +134,8 @@ def parse_mod_seq( class MaxQuantReader(PSMReaderBase): + """Reader for MaxQuant data.""" + def __init__( # noqa: PLR0913 many arguments in function definition self, *, diff --git a/alphabase/psm_reader/msfragger_reader.py b/alphabase/psm_reader/msfragger_reader.py index 70cdc01c..08306b1d 100644 --- a/alphabase/psm_reader/msfragger_reader.py +++ b/alphabase/psm_reader/msfragger_reader.py @@ -83,14 +83,19 @@ def _get_mods_from_masses( # noqa: PLR0912, C901 too many branches, too complex class MSFragger_PSM_TSV_Reader(PSMReaderBase): # noqa: N801 name should use CapWords convention TODO: refactor + """Reader for MSFragger's psm.tsv file.""" + def __init__( self, **kwargs, ): + """Constructor.""" raise NotImplementedError("MSFragger_PSM_TSV_Reader for psm.tsv") class MSFraggerPepXML(PSMReaderBase): + """Reader for MSFragger's pep.xml file.""" + def __init__( # noqa: PLR0913 many arguments in function definition self, *, diff --git a/alphabase/psm_reader/pfind_reader.py b/alphabase/psm_reader/pfind_reader.py index dcba1071..95704afd 100644 --- a/alphabase/psm_reader/pfind_reader.py +++ b/alphabase/psm_reader/pfind_reader.py @@ -92,6 +92,8 @@ def parse_pfind_protein(protein: str, *, keep_reverse: bool = True) -> str: class pFindReader(PSMReaderBase): # noqa: N801 name `pFindReader` should use CapWords convention TODO: used by peptdeep, alpharaw + """Reader for pFind's *.txt files.""" + def __init__( self, *, @@ -101,6 +103,7 @@ def __init__( keep_decoy: bool = False, **kwargs, ): + """Reading PSMs from pFind's *.txt.""" super().__init__( column_mapping=column_mapping, modification_mapping=modification_mapping, diff --git a/alphabase/psm_reader/psm_reader.py b/alphabase/psm_reader/psm_reader.py index 58bf9fec..a70d5039 100644 --- a/alphabase/psm_reader/psm_reader.py +++ b/alphabase/psm_reader/psm_reader.py @@ -82,6 +82,8 @@ def _keep_modifications(mod_str: str, mod_set: set) -> str: class PSMReaderBase: + """The Base class for all PSMReaders.""" + def __init__( self, *, @@ -558,6 +560,8 @@ def filter_psm_by_modifications( class PSMReaderProvider: + """A factory class to register and get readers for different PSM types.""" + def __init__(self): self.reader_dict = {} diff --git a/alphabase/psm_reader/sage_reader.py b/alphabase/psm_reader/sage_reader.py index 90272a06..87663a78 100644 --- a/alphabase/psm_reader/sage_reader.py +++ b/alphabase/psm_reader/sage_reader.py @@ -20,6 +20,8 @@ class SageModificationTranslation: + """Translate Sage style modifications to alphabase style modifications.""" + def __init__( self, custom_translation_df: pd.DataFrame = None, @@ -560,6 +562,8 @@ def _sage_spec_idx_from_scan_nr(scan_indicator_str: str) -> int: class SageReaderBase(PSMReaderBase): + """Base class for SageReader.""" + def __init__( # noqa: PLR0913 many arguments in function definition self, *, @@ -628,6 +632,8 @@ def _translate_modifications(self) -> None: class SageReaderTSV(SageReaderBase): + """Reader for Sage output files in TSV format.""" + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -636,6 +642,8 @@ def _load_file(self, filename: str) -> pd.DataFrame: class SageReaderParquet(SageReaderBase): + """Reader for Sage output files in parquet format.""" + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/alphabase/spectral_library/reader.py b/alphabase/spectral_library/reader.py index 5f50ebd3..6134994a 100644 --- a/alphabase/spectral_library/reader.py +++ b/alphabase/spectral_library/reader.py @@ -16,6 +16,8 @@ class LibraryReaderBase(MaxQuantReader, SpecLibBase): + """Base class for reading spectral libraries.""" + def __init__( # noqa: PLR0913 many arguments in function definition self, charged_frag_types: List[str] = [ diff --git a/ruff-lint-psm-readers.toml b/ruff-lint-psm-readers.toml index 5d8694f5..a1f310c1 100644 --- a/ruff-lint-psm-readers.toml +++ b/ruff-lint-psm-readers.toml @@ -33,7 +33,7 @@ ignore = [ "PD002", #pandas-use-of-inplace-argument # TODO revisit # # these still need to be resolved: - "D101", +# "D101", "D103", "D107", "D102", From 5645032b67c5c57e4dba696ee89577da82b9c398 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Fri, 15 Nov 2024 16:16:51 +0100 Subject: [PATCH 4/4] fix some docstrings --- alphabase/psm_reader/alphapept_reader.py | 1 + alphabase/psm_reader/dia_psm_reader.py | 3 +++ alphabase/psm_reader/maxquant_reader.py | 3 +++ alphabase/psm_reader/msfragger_reader.py | 1 + alphabase/psm_reader/pfind_reader.py | 4 ++++ alphabase/psm_reader/psm_reader.py | 9 +++++++++ alphabase/psm_reader/sage_reader.py | 4 ++++ ruff-lint-psm-readers.toml | 8 +------- 8 files changed, 26 insertions(+), 7 deletions(-) diff --git a/alphabase/psm_reader/alphapept_reader.py b/alphabase/psm_reader/alphapept_reader.py index a398fe5c..b3dfbd8f 100644 --- a/alphabase/psm_reader/alphapept_reader.py +++ b/alphabase/psm_reader/alphapept_reader.py @@ -109,4 +109,5 @@ def _load_modifications(self, df: pd.DataFrame) -> None: def register_readers() -> None: + """Register readers for AlphaPept's *.ms_data.hdf files.""" psm_reader_provider.register_reader("alphapept", AlphaPeptReader) diff --git a/alphabase/psm_reader/dia_psm_reader.py b/alphabase/psm_reader/dia_psm_reader.py index b9a2e75b..172e8a7b 100644 --- a/alphabase/psm_reader/dia_psm_reader.py +++ b/alphabase/psm_reader/dia_psm_reader.py @@ -35,6 +35,7 @@ def __init__( # noqa: PLR0913 many arguments in function definition rt_unit: str = "minute", **kwargs, ): + """Initialize SpectronautReader.""" if mod_seq_columns is None: mod_seq_columns = psm_reader_yaml["spectronaut"]["mod_seq_columns"] @@ -165,6 +166,7 @@ def __init__( # noqa: PLR0913 many arguments in function definition rt_unit: str = "minute", **kwargs, ): + """Initialize SpectronautReportReader.""" super().__init__( column_mapping=column_mapping, modification_mapping=modification_mapping, @@ -194,6 +196,7 @@ def _load_file(self, filename: str) -> pd.DataFrame: def register_readers() -> None: + """Register readers for Spectronaut's output library and reports, Swath data and DIANN data.""" psm_reader_provider.register_reader("spectronaut", SpectronautReader) psm_reader_provider.register_reader("speclib_tsv", SpectronautReader) psm_reader_provider.register_reader("openswath", SwathReader) diff --git a/alphabase/psm_reader/maxquant_reader.py b/alphabase/psm_reader/maxquant_reader.py index 5ec5b3fc..ae00441b 100644 --- a/alphabase/psm_reader/maxquant_reader.py +++ b/alphabase/psm_reader/maxquant_reader.py @@ -34,6 +34,7 @@ def replace_parentheses_with_brackets( modseq: str, ) -> str: + """Replace parentheses with brackets in the modified sequence.""" mod_depth = 0 for i, aa in enumerate(modseq): if aa == "(": @@ -212,6 +213,7 @@ def _init_modification_mapping(self) -> None: def set_modification_mapping( self, modification_mapping: Optional[dict] = None ) -> None: + """Set modification mapping.""" super().set_modification_mapping(modification_mapping) self._add_all_unimod() self._extend_mod_brackets() @@ -307,4 +309,5 @@ def _load_modifications(self, origin_df: pd.DataFrame) -> None: def register_readers() -> None: + """Register MaxQuant reader.""" psm_reader_provider.register_reader("maxquant", MaxQuantReader) diff --git a/alphabase/psm_reader/msfragger_reader.py b/alphabase/psm_reader/msfragger_reader.py index 08306b1d..efebddf5 100644 --- a/alphabase/psm_reader/msfragger_reader.py +++ b/alphabase/psm_reader/msfragger_reader.py @@ -189,6 +189,7 @@ def _post_process(self) -> None: def register_readers() -> None: + """Register MSFragger readers.""" psm_reader_provider.register_reader("msfragger_psm_tsv", MSFragger_PSM_TSV_Reader) psm_reader_provider.register_reader("msfragger", MSFragger_PSM_TSV_Reader) psm_reader_provider.register_reader("msfragger_pepxml", MSFraggerPepXML) diff --git a/alphabase/psm_reader/pfind_reader.py b/alphabase/psm_reader/pfind_reader.py index 95704afd..9517279c 100644 --- a/alphabase/psm_reader/pfind_reader.py +++ b/alphabase/psm_reader/pfind_reader.py @@ -51,6 +51,7 @@ def _convert_one_pfind_mod(mod: str) -> Optional[str]: # noqa: C901 too comple def translate_pFind_mod(mod_str: str) -> Union[str, NAType]: # noqa: N802 name `get_pFind_mods` should be lowercase TODO: used by peptdeep + """Translate pFind modification string.""" if not mod_str: return "" ret_mods = [] @@ -63,6 +64,7 @@ def translate_pFind_mod(mod_str: str) -> Union[str, NAType]: # noqa: N802 name def get_pFind_mods(pfind_mod_str: str) -> Tuple[str, str]: # noqa: N802 name `get_pFind_mods` should be lowercase TODO: used by peptdeep + """Parse pFind modification string.""" pfind_mod_str = pfind_mod_str.strip(";") if not pfind_mod_str: return "", "" @@ -81,6 +83,7 @@ def get_pFind_mods(pfind_mod_str: str) -> Tuple[str, str]: # noqa: N802 name `g def parse_pfind_protein(protein: str, *, keep_reverse: bool = True) -> str: + """Parse pFind protein string.""" proteins = protein.strip("/").split("/") return ";".join( [ @@ -156,5 +159,6 @@ def _load_modifications(self, pfind_df: pd.DataFrame) -> None: def register_readers() -> None: + """Register pFind readers.""" psm_reader_provider.register_reader("pfind", pFindReader) psm_reader_provider.register_reader("pfind3", pFindReader) diff --git a/alphabase/psm_reader/psm_reader.py b/alphabase/psm_reader/psm_reader.py index a70d5039..65598d52 100644 --- a/alphabase/psm_reader/psm_reader.py +++ b/alphabase/psm_reader/psm_reader.py @@ -196,6 +196,7 @@ def __init__( @property def psm_df(self) -> pd.DataFrame: + """Get the PSM DataFrame.""" return self._psm_df def add_modification_mapping(self, modification_mapping: dict) -> None: @@ -237,6 +238,7 @@ def add_modification_mapping(self, modification_mapping: dict) -> None: def set_modification_mapping( self, modification_mapping: Optional[dict] = None ) -> None: + """Set the modification mapping.""" if modification_mapping is None: self._init_modification_mapping() elif isinstance(modification_mapping, str): @@ -288,6 +290,7 @@ def load(self, _file: Union[List[str], str]) -> pd.DataFrame: return self.import_file(_file) def import_files(self, file_list: List[str]) -> pd.DataFrame: + """Import multiple files.""" df_list = [self.import_file(file) for file in file_list] self._psm_df = pd.concat(df_list, ignore_index=True) return self._psm_df @@ -365,10 +368,12 @@ def _normalize_rt(self) -> None: ).clip(0, 1) def normalize_rt_by_raw_name(self) -> None: + """Normalize RT by raw name.""" if PsmDfCols.RT not in self._psm_df.columns: return if PsmDfCols.RT_NORM not in self._psm_df.columns: self._normalize_rt() + if PsmDfCols.RAW_NAME not in self._psm_df.columns: return for _, df_group in self._psm_df.groupby(PsmDfCols.RAW_NAME): @@ -563,11 +568,13 @@ class PSMReaderProvider: """A factory class to register and get readers for different PSM types.""" def __init__(self): + """Initialize PSMReaderProvider.""" self.reader_dict = {} def register_reader( self, reader_type: str, reader_class: Type[PSMReaderBase] ) -> None: + """Register a reader by reader_type.""" self.reader_dict[reader_type.lower()] = reader_class def get_reader( @@ -580,6 +587,7 @@ def get_reader( keep_decoy: bool = False, **kwargs, ) -> PSMReaderBase: + """Get a reader by reader_type.""" return self.reader_dict[reader_type.lower()]( column_mapping=column_mapping, modification_mapping=modification_mapping, @@ -592,6 +600,7 @@ def get_reader_by_yaml( self, yaml_dict: dict, ) -> PSMReaderBase: + """Get a reader by a yaml dict.""" return self.get_reader(**copy.deepcopy(yaml_dict)) diff --git a/alphabase/psm_reader/sage_reader.py b/alphabase/psm_reader/sage_reader.py index 87663a78..5c4c8543 100644 --- a/alphabase/psm_reader/sage_reader.py +++ b/alphabase/psm_reader/sage_reader.py @@ -576,6 +576,7 @@ def __init__( # noqa: PLR0913 many arguments in function definition mp_process_num: int = 10, **kwargs, ): + """Initialize SageReaderBase.""" self.custom_translation_df = custom_translation_df self.mp_process_num = mp_process_num @@ -635,6 +636,7 @@ class SageReaderTSV(SageReaderBase): """Reader for Sage output files in TSV format.""" def __init__(self, *args, **kwargs): + """Initialize SageReaderTSV.""" super().__init__(*args, **kwargs) def _load_file(self, filename: str) -> pd.DataFrame: @@ -645,6 +647,7 @@ class SageReaderParquet(SageReaderBase): """Reader for Sage output files in parquet format.""" def __init__(self, *args, **kwargs): + """Initialize SageReaderParquet.""" super().__init__(*args, **kwargs) def _load_file(self, filename: str) -> pd.DataFrame: @@ -652,5 +655,6 @@ def _load_file(self, filename: str) -> pd.DataFrame: def register_readers() -> None: + """Register Sage readers.""" psm_reader_provider.register_reader("sage_tsv", SageReaderTSV) psm_reader_provider.register_reader("sage_parquet", SageReaderParquet) diff --git a/ruff-lint-psm-readers.toml b/ruff-lint-psm-readers.toml index a1f310c1..9d38be38 100644 --- a/ruff-lint-psm-readers.toml +++ b/ruff-lint-psm-readers.toml @@ -32,13 +32,7 @@ ignore = [ # "PD002", #pandas-use-of-inplace-argument # TODO revisit -# # these still need to be resolved: -# "D101", - "D103", - "D107", - "D102", - - # reader-specific + # psm_reader-specific "FA100", # Add `from __future__ import annotations` to simplify `typing.Optional` # same as pyproject.toml