diff --git a/alphadia/cli.py b/alphadia/cli.py index 599276b5..8d3e86d1 100644 --- a/alphadia/cli.py +++ b/alphadia/cli.py @@ -16,14 +16,18 @@ import alphadia from alphadia import utils +from alphadia.constants.keys import ConfigKeys from alphadia.exceptions import CustomError from alphadia.search_plan import SearchPlan from alphadia.workflow import reporting logger = logging.getLogger() +epilog = "Parameters passed via CLI will overwrite parameters from config file (except for '--file': will be merged)." -parser = argparse.ArgumentParser(description="Search DIA experiments with alphaDIA") +parser = argparse.ArgumentParser( + description="Search DIA experiments with alphaDIA", epilog=epilog +) parser.add_argument( "--version", "-v", @@ -32,17 +36,19 @@ ) parser.add_argument( "--output", + "--output-directory", "-o", type=str, - help="Output directory", + help="Output directory.", nargs="?", default=None, ) parser.add_argument( "--file", + "--raw-path", "-f", type=str, - help="Raw data input files.", + help="Path to raw data input file. Can be passed multiple times.", action="append", default=[], ) @@ -58,21 +64,23 @@ "--regex", "-r", type=str, - help="Regex to match raw files in directory.", + help="Regex to match raw files in 'directory'.", nargs="?", default=".*", ) parser.add_argument( "--library", + "--library-path", "-l", type=str, - help="Spectral library.", + help="Path to spectral library file.", nargs="?", default=None, ) parser.add_argument( "--fasta", - help="Fasta file(s) used to generate or annotate the spectral library.", + "--fasta-path", + help="Path to fasta file used to generate or annotate the spectral library. Can be passed multiple times.", action="append", default=[], ) @@ -80,21 +88,22 @@ "--config", "-c", type=str, - help="Config yaml which will be used to update the default config.", + help="Path to config yaml file which will be used to update the default config.", nargs="?", default=None, ) parser.add_argument( "--config-dict", type=str, - help="Python Dict which will be used to update the default config.", + help="Python dictionary which will be used to update the default config.", nargs="?", default="{}", ) parser.add_argument( - "--quant-dir", + "--quant-dir", # TODO deprecate + "--quant-directory", type=str, - help="Directory to save the quantification results (psm & frag parquet files) to be reused in a distributed search", + help="Directory to save the quantification results (psm & frag parquet files) to be reused in a distributed search.", nargs="?", default=None, ) @@ -148,7 +157,7 @@ def _get_raw_path_list_from_args_and_config( list: a list of file paths that match the specified regex pattern. """ - raw_path_list = config.get("raw_path_list", []) + raw_path_list = config.get(ConfigKeys.RAW_PATHS, []) raw_path_list += args.file if (config_directory := config.get("directory")) is not None: @@ -172,17 +181,6 @@ def _get_raw_path_list_from_args_and_config( return raw_path_list -def _get_fasta_list_from_args_and_config( - args: argparse.Namespace, config: dict -) -> list: - """Parse fasta file list from command line arguments and config file, merging them if both are given.""" - - fasta_path_list = config.get("fasta_list", []) - fasta_path_list += args.fasta - - return fasta_path_list - - def run(*args, **kwargs): # parse command line arguments args, unknown = parser.parse_known_args() @@ -203,18 +201,23 @@ def run(*args, **kwargs): ) if output_directory is None: parser.print_help() - print("No output directory specified.") + + print("No output directory specified. Please do so via CL-argument or config.") return reporting.init_logging(output_directory) - quant_dir = _get_from_args_or_config( - args, user_config, args_key="quant_dir", config_key="quant_dir" - ) - raw_path_list = _get_raw_path_list_from_args_and_config(args, user_config) - library_path = _get_from_args_or_config( - args, user_config, args_key="library", config_key="library" - ) - fasta_path_list = _get_fasta_list_from_args_and_config(args, user_config) + # TODO revisit the multiple sources of raw files (cli, config, regex, ...) + raw_paths = _get_raw_path_list_from_args_and_config(args, user_config) + cli_params_config = { + **({ConfigKeys.RAW_PATHS: raw_paths} if raw_paths else {}), + **({ConfigKeys.LIBRARY_PATH: args.library} if args.library is not None else {}), + **({ConfigKeys.FASTA_PATHS: args.library} if args.fasta else {}), + **( + {ConfigKeys.QUANT_DIRECTORY: args.library} + if args.quant_dir is not None + else {} + ), + } # TODO rename all output_directory, output_folder => output_path, quant_dir->quant_path (except cli parameter) @@ -222,14 +225,7 @@ def run(*args, **kwargs): matplotlib.use("Agg") try: - SearchPlan( - output_directory, - raw_path_list=raw_path_list, - library_path=library_path, - fasta_path_list=fasta_path_list, - config=user_config, - quant_path=quant_dir, - ).run_plan() + SearchPlan(output_directory, user_config, cli_params_config).run_plan() except Exception as e: if isinstance(e, CustomError): diff --git a/alphadia/constants/default.yaml b/alphadia/constants/default.yaml index 7b44c78b..2276518e 100644 --- a/alphadia/constants/default.yaml +++ b/alphadia/constants/default.yaml @@ -1,10 +1,12 @@ # configuration for the extraction plan version: 1 -library: null +# These values are typically filled via CLI parameters output_directory: null -raw_path_list: [] -output: null +library_path: null +raw_paths: [] +fasta_paths: [] +quant_directory: null general: thread_count: 10 @@ -63,31 +65,31 @@ library_prediction: # composition: H(-2)2H(8)13C(2) custom_modifications: # Dimethyl @K channel decoy - Dimethyl:d12@K: + - name: Dimethyl:d12@K composition: H(-2)2H(8)13C(2) # Dimethyl @Any_N-term channel decoy - Dimethyl:d12@Any_N-term: + - name: Dimethyl:d12@Any_N-term composition: H(-2)2H(8)13C(2) # Dimethyl @Protein_N-term channel decoy - Dimethyl:d12@Protein_N-term: + - name: Dimethyl:d12@Protein_N-term composition: H(-2)2H(8)13C(2) # mTRAQ @K channel decoy - mTRAQ:d12@K: + - name: mTRAQ:d12@K composition: H(12)C(1)13C(10)15N(2)O(1) # mTRAQ @Any_N-term channel decoy - mTRAQ:d12@Any_N-term: + - name: mTRAQ:d12@Any_N-term composition: H(12)C(1)13C(14)15N(2)O(1) # mTRAQ @Protein_N-term channel decoy - mTRAQ:d12@Protein_N-term: + - name: mTRAQ:d12@Protein_N-term composition: H(12)C(1)13C(14)15N(2)O(1) # SILAC heavy @K channel decoy - Label:13C(12)@K: + - name: Label:13C(12)@K composition: C(12) search: diff --git a/alphadia/constants/keys.py b/alphadia/constants/keys.py index 1f786966..1b3f04da 100644 --- a/alphadia/constants/keys.py +++ b/alphadia/constants/keys.py @@ -22,3 +22,13 @@ class StatOutputKeys(metaclass=ConstantsClass): MS2_ERROR = "ms2_error" RT_ERROR = "rt_error" MOBILITY_ERROR = "mobility_error" + + +class ConfigKeys(metaclass=ConstantsClass): + """String constants for accessing the config.""" + + OUTPUT_DIRECTORY = "output_directory" + LIBRARY_PATH = "library_path" + RAW_PATHS = "raw_paths" + FASTA_PATHS = "fasta_paths" + QUANT_DIRECTORY = "quant_directory" diff --git a/alphadia/constants/multistep.yaml b/alphadia/constants/multistep.yaml index 1bfbeb38..c1c3875d 100644 --- a/alphadia/constants/multistep.yaml +++ b/alphadia/constants/multistep.yaml @@ -12,6 +12,7 @@ transfer: enabled: True # override settings that could have been set by the user: + quant_directory: null general: save_library: False reuse_quant: False @@ -23,6 +24,7 @@ library: # predict: True # override settings that could have been set by the user: + quant_directory: null general: save_library: False reuse_quant: False @@ -37,6 +39,7 @@ mbr: search: target_num_candidates: 5 # override settings that could have been set by the user: + quant_directory: null general: reuse_quant: False library_prediction: diff --git a/alphadia/exceptions.py b/alphadia/exceptions.py index 94efbc7e..e8f93131 100644 --- a/alphadia/exceptions.py +++ b/alphadia/exceptions.py @@ -20,6 +20,9 @@ def msg(self): def detail_msg(self): return self._detail_msg + def __str__(self): + return f"{self._error_code}: {self._msg} {self._detail_msg}" + class BusinessError(CustomError): """Custom error class for 'business' errors. @@ -59,3 +62,33 @@ class NotDiaDataError(BusinessError): _error_code = "NOT_DIA_DATA" _msg = "Could not find cycle shape. Please check if this is a valid DIA data set." + + +class ConfigError(BusinessError): + """Raise when something is wrong with the provided configuration.""" + + _error_code = "CONFIG_ERROR" + + _msg = "Malformed configuration file(s)." + _key = "" + _config_name = "" + + def __init__(self, key: str, config_name: str): + self._key = key + self._config_name = config_name + + +class KeyAddedConfigError(ConfigError): + """Raise when a key should be added to a config.""" + + def __init__(self, key: str, config_name: str): + super().__init__(key, config_name) + self._detail_msg = f"Defining new keys is not allowed when updating a config: key='{self._key}', config_name='{self._config_name}'" + + +class TypeMismatchConfigError(ConfigError): + """Raise when the type of a value does not match the default type.""" + + def __init__(self, key: str, config_name: str, extra_msg: str): + super().__init__(key, config_name) + self._detail_msg = f"Types of values must match default config: key='{self._key}', config_name='{self._config_name}', types='{extra_msg}'" diff --git a/alphadia/libtransform.py b/alphadia/libtransform.py index 805b4073..08a475f4 100644 --- a/alphadia/libtransform.py +++ b/alphadia/libtransform.py @@ -40,10 +40,10 @@ def __call__(self, *args: typing.Any) -> typing.Any: return self.forward(*args) else: logger.critical( - f"Input {input} failed validation for {self.__class__.__name__}" + f"Input {args} failed validation for {self.__class__.__name__}" ) raise ValueError( - f"Input {input} failed validation for {self.__class__.__name__}" + f"Input {args} failed validation for {self.__class__.__name__}" ) def validate(self, *args: typing.Any) -> bool: diff --git a/alphadia/search_plan.py b/alphadia/search_plan.py index 2d065d8a..ef8dc5cb 100644 --- a/alphadia/search_plan.py +++ b/alphadia/search_plan.py @@ -8,7 +8,7 @@ import pandas as pd import yaml -from alphadia.constants.keys import StatOutputKeys +from alphadia.constants.keys import ConfigKeys, StatOutputKeys from alphadia.outputtransform import ( SearchPlanOutput, ) @@ -34,11 +34,8 @@ class SearchPlan: def __init__( self, output_directory: str, - raw_path_list: list[str], - library_path: str | None, - fasta_path_list: list[str], - config: dict, - quant_dir: str | None, + config: dict | None = None, + cli_params_config: dict | None = None, ): """Initialize search plan. @@ -48,40 +45,27 @@ def __init__( Parameters ---------- - config: - User configuration. output_directory: Output directory. - library_path: - Library path. - fasta_path_list: - List of fasta paths. - quant_dir: - Quantification directory holding previous results. - raw_path_list - List of raw paths. + config: + Configuration provided by user (loaded from file and/or dictionary) + cli_params_config + config-like dictionary of parameters directly provided by CLI """ - - self._user_config: dict = config - self._output_dir: Path = Path(output_directory) reporting.init_logging(output_directory) - self._library_path: Path | None = ( - None if library_path is None else Path(library_path) + self._output_dir: Path = Path(output_directory) + self._user_config: dict = config if config is not None else {} + self._cli_params_config: dict = ( + cli_params_config if cli_params_config is not None else {} ) - self._fasta_path_list: list[str] = fasta_path_list - self._raw_path_list: list[str] = raw_path_list # these are the default paths if the library step is the only one self._library_step_output_dir: Path = self._output_dir - self._library_quant_dir: Path | None = ( - None if quant_dir is None else Path(quant_dir) - ) # multistep search: self._multistep_config: dict | None = None self._transfer_step_output_dir: Path | None = None - self._mbr_step_library_path: Path | None = None multistep_search_config = self._user_config.get("multistep_search", {}) self._transfer_step_enabled = multistep_search_config.get( @@ -107,16 +91,11 @@ def _update_paths(self) -> None: # in case transfer step is enabled, we need to adjust the library step settings if self._transfer_step_enabled: - self._library_quant_dir = None self._transfer_step_output_dir = self._output_dir / TRANSFER_STEP_NAME # in case mbr step is enabled, we need to adjust the library step settings if self._mbr_step_enabled: - self._library_quant_dir = None self._library_step_output_dir = self._output_dir / LIBRARY_STEP_NAME - self._mbr_step_library_path = ( - self._library_step_output_dir / f"{SearchPlanOutput.LIBRARY_OUTPUT}.hdf" - ) def run_plan(self): """Run the search plan. @@ -127,8 +106,6 @@ def run_plan(self): print_logo() print_environment() - # TODO add some logging here on the directories (if they are not logged elsewhere) - extra_config_for_library_step = ( self._multistep_config[LIBRARY_STEP_NAME] if self._transfer_step_enabled or self._mbr_step_enabled @@ -142,7 +119,6 @@ def run_plan(self): # output: DL model self.run_step( self._transfer_step_output_dir, - self._library_path, self._multistep_config[TRANSFER_STEP_NAME], ) @@ -170,9 +146,7 @@ def run_plan(self): logger.info(f"Running step '{LIBRARY_STEP_NAME}'") self.run_step( self._library_step_output_dir, - self._library_path, extra_config_for_library_step, - self._library_quant_dir, ) if self._mbr_step_enabled: @@ -183,31 +157,31 @@ def run_plan(self): self._library_step_output_dir ) + mbr_step_library_path = str( + self._library_step_output_dir / f"{SearchPlanOutput.LIBRARY_OUTPUT}.hdf" + ) + mbr_step_extra_config = ( - self._multistep_config[MBR_STEP_NAME] | optimized_values_config + self._multistep_config[MBR_STEP_NAME] + | optimized_values_config + | {ConfigKeys.LIBRARY_PATH: mbr_step_library_path} ) self.run_step( self._output_dir, - self._mbr_step_library_path, mbr_step_extra_config, ) def run_step( self, output_directory: Path, - library_path: Path | None, extra_config: dict, - quant_dir: Path | None = None, ) -> None: """Run a single step of the search plan.""" step = SearchStep( output_folder=str(output_directory), - raw_path_list=self._raw_path_list, - library_path=None if library_path is None else str(library_path), - fasta_path_list=self._fasta_path_list, config=self._user_config, + cli_config=self._cli_params_config, extra_config=extra_config, - quant_path=None if quant_dir is None else str(quant_dir), ) step.run() diff --git a/alphadia/search_step.py b/alphadia/search_step.py index 959b7c5e..c109ae0d 100644 --- a/alphadia/search_step.py +++ b/alphadia/search_step.py @@ -9,10 +9,16 @@ from alphabase.spectral_library.flat import SpecLibFlat from alphadia import libtransform, outputtransform +from alphadia.constants.keys import ConfigKeys from alphadia.exceptions import CustomError from alphadia.workflow import peptidecentric, reporting from alphadia.workflow.base import WorkflowBase -from alphadia.workflow.config import MULTISTEP_SEARCH, USER_DEFINED, Config +from alphadia.workflow.config import ( + MULTISTEP_SEARCH, + USER_DEFINED, + USER_DEFINED_CLI_PARAM, + Config, +) SPECLIB_FILE_NAME = "speclib.hdf" @@ -23,13 +29,10 @@ class SearchStep: def __init__( self, output_folder: str, - raw_path_list: list[str] | None = None, - library_path: str | None = None, - fasta_path_list: list[str] | None = None, config: dict | Config | None = None, - config_base_path: str | None = None, + cli_config: dict | None = None, extra_config: dict | None = None, - quant_path: str | None = None, + config_base_path: str | None = None, ) -> None: """Highest level class to plan a DIA search step. @@ -42,53 +45,34 @@ def __init__( output_folder : str output folder to save the results - raw_path_list : list - list of input file locations - - library_path : str, optional - path to the spectral library file. If not provided, the library is built from fasta files - - fasta_path_list : list, optional - list of fasta file locations to build the library from - - config_base_path : str, optional - user-provided yaml file containing the default config. - config : dict, optional - user-provided dict to update the default config. Can be used for debugging purposes etc. + values to update the default config. Overrides values in `default.yaml` and `config_base_path`. - extra_config : dict, optional - dict to update the final config. Used for multistep searches. + cli_config : dict, optional + additional config values (parameters from the command line). Overrides values in `config`. - quant_path : str, optional - path to directory to save the quantification results (psm & frag parquet files). If not provided, the results are saved in the usual workflow folder + extra_config : dict, optional + additional config values (parameters to orchestrate multistep searches). Overrides values in `config` and `cli_config`. + config_base_path : str, optional + absolute path to yaml file containing additional config values. Overrides values in `default.yaml`. """ - if config is None: - config = {} - if fasta_path_list is None: - fasta_path_list = [] - if raw_path_list is None: - raw_path_list = [] - self.output_folder = output_folder os.makedirs(output_folder, exist_ok=True) reporting.init_logging(self.output_folder) - self.raw_path_list = raw_path_list - self.library_path = library_path - self.fasta_path_list = fasta_path_list - self.quant_path = quant_path - - self.spectral_library = None - self._config = self._init_config( - config, extra_config, output_folder, config_base_path + config, cli_config, extra_config, output_folder, config_base_path ) - logger.setLevel(logging.getLevelName(self._config["general"]["log_level"])) + self.raw_path_list = self._config[ConfigKeys.RAW_PATHS] + self.library_path = self._config[ConfigKeys.LIBRARY_PATH] + self.fasta_path_list = self._config[ConfigKeys.FASTA_PATHS] + + self.spectral_library = None + self.init_alphabase() self.load_library() @@ -96,48 +80,64 @@ def __init__( self._log_inputs() + @staticmethod def _init_config( - self, - user_config: dict | Config, - extra_config: dict, + user_config: dict | Config | None, + cli_config: dict | None, + extra_config: dict | None, output_folder: str, config_base_path: str | None, ) -> Config: """Initialize the config with default values and update with user defined values.""" - # default config path is not defined in the function definition to account for different path separators on different OS - if config_base_path is None: - # default yaml config location under /misc/config/config.yaml - config_base_path = os.path.join( - os.path.dirname(__file__), "constants", "default.yaml" - ) - - logger.info(f"loading config from {config_base_path}") + default_config_path = os.path.join( + os.path.dirname(__file__), "constants", "default.yaml" + ) + logger.info(f"loading config from {default_config_path}") config = Config() - config.from_yaml(config_base_path) + config.from_yaml(default_config_path) config_updates = [] - # load update config from dict - if isinstance(user_config, dict): - user_config_update = Config(USER_DEFINED) - user_config_update.from_dict(user_config) - config_updates.append(user_config_update) - elif isinstance(user_config, Config): - config_updates.append(user_config) - else: - raise ValueError("'config' parameter must be of type 'dict' or 'Config'") - + if config_base_path is not None: + logger.info(f"loading additional config from {config_base_path}") + user_config_from_file = Config(USER_DEFINED) + user_config_from_file.from_yaml(default_config_path) + config_updates.append(user_config_from_file) + + if user_config is not None: + logger.info("loading additional config provided via CLI") + # load update config from dict + if isinstance(user_config, dict): + user_config_update = Config(USER_DEFINED) + user_config_update.from_dict(user_config) + config_updates.append(user_config_update) + elif isinstance(user_config, Config): + config_updates.append(user_config) + else: + raise ValueError( + "'config' parameter must be of type 'dict' or 'Config'" + ) + + if cli_config is not None: + logger.info("loading additional config provided via CLI parameters") + cli_config_update = Config(USER_DEFINED_CLI_PARAM) + cli_config_update.from_dict(cli_config) + config_updates.append(cli_config_update) + + # this needs to be last if extra_config is not None: extra_config_update = Config(MULTISTEP_SEARCH) extra_config_update.from_dict(extra_config) - # need to overwrite user-defined output folder here - extra_config["output"] = output_folder + # need to overwrite user-defined output folder here to have correct value in config dump + extra_config[ConfigKeys.OUTPUT_DIRECTORY] = output_folder config_updates.append(extra_config_update) config.update(config_updates, do_print=True) - if "output" not in config: - config["output"] = output_folder + if ConfigKeys.OUTPUT_DIRECTORY not in config: + config[ConfigKeys.OUTPUT_DIRECTORY] = output_folder + + config.to_yaml(os.path.join(output_folder, "frozen_config.yaml")) return config @@ -162,12 +162,14 @@ def spectral_library(self, spectral_library: SpecLibFlat) -> None: def init_alphabase(self): """Init alphabase by registering custom modifications.""" - # register custom modifications - if "custom_modifications" in self.config: - n_modifications = len(self.config["custom_modifications"]) - logging.info(f"Registering {n_modifications} custom modifications") + new_modifications = {} + for mod in self.config["custom_modifications"]: + new_modifications[mod["name"]] = {"composition": mod["composition"]} + + if new_modifications: + logging.info(f"Registering {len(new_modifications)} custom modifications") - modification.add_new_modifications(self.config["custom_modifications"]) + modification.add_new_modifications(new_modifications) def load_library(self): """ @@ -340,7 +342,7 @@ def _process_raw_file( workflow = peptidecentric.PeptideCentricWorkflow( raw_name, self.config, - quant_path=self.quant_path, + quant_path=self.config["quant_directory"], ) # check if the raw file is already processed diff --git a/alphadia/workflow/base.py b/alphadia/workflow/base.py index 02aff0e1..b9a9e726 100644 --- a/alphadia/workflow/base.py +++ b/alphadia/workflow/base.py @@ -5,6 +5,8 @@ # alpha family imports from alphabase.spectral_library.base import SpecLibBase +from alphadia.constants.keys import ConfigKeys + # alphadia imports from alphadia.data import alpharaw_wrapper, bruker from alphadia.workflow import manager, reporting @@ -52,7 +54,7 @@ def __init__( """ self._instance_name: str = instance_name self._parent_path: str = quant_path or os.path.join( - config["output"], QUANT_FOLDER_NAME + config[ConfigKeys.OUTPUT_DIRECTORY], QUANT_FOLDER_NAME ) logger.info(f"Saving quantification results to {self._parent_path}") diff --git a/alphadia/workflow/config.py b/alphadia/workflow/config.py index 55d7a1f3..a59b9c92 100644 --- a/alphadia/workflow/config.py +++ b/alphadia/workflow/config.py @@ -15,10 +15,13 @@ import yaml +from alphadia.exceptions import KeyAddedConfigError, TypeMismatchConfigError + logger = logging.getLogger() DEFAULT = "default" USER_DEFINED = "user defined" +USER_DEFINED_CLI_PARAM = "user defined (cli)" MULTISTEP_SEARCH = "multistep search" @@ -100,7 +103,10 @@ def _recursive_defaultdict(): tracking_dict = defaultdict(_recursive_defaultdict) current_config = deepcopy(self.config) + for config in configs: + logger.info(f"Updating config with '{config.name}'") + _update( current_config, config.to_dict(), @@ -169,14 +175,12 @@ def _update( Raises ------ - ValueError in these cases: - - a key is not found in the target_config - - the type of the update value does not match the type of the target value - - an item is not found in the target_config + - KeyAddedConfigError: a key is not found in the target_config + - ValueTypeMismatchConfigError: the type of the update value does not match the type of the target value """ for key, update_value in update_config.items(): if key not in target_config: - raise ValueError(f"Key not found in target_config: '{key}'") + raise KeyAddedConfigError(key, config_name) target_value = target_config[key] tracking_value = tracking_dict[key] @@ -189,8 +193,8 @@ def _update( and isinstance(update_value, int | float) ) ): - raise ValueError( - f"Type mismatch for key '{key}': {type(update_value)} != {type(target_value)}" + raise TypeMismatchConfigError( + key, config_name, f"{type(update_value)} != {type(target_value)}" ) if isinstance(target_value, dict): diff --git a/docs/guides/transfer-dimethyl.md b/docs/guides/transfer-dimethyl.md index e9804e4a..d5a61968 100644 --- a/docs/guides/transfer-dimethyl.md +++ b/docs/guides/transfer-dimethyl.md @@ -153,3 +153,7 @@ multistep_search: transfer_step_enabled: True mbr_step_enabled: True ``` + +In case the multistep search fails at some step, you can restart the failed step by +using the `full_config.yaml` that is stored in the respective subfolder. You can of course edit +this file in order to fix the issue that caused the failure. diff --git a/gui/src/main/modules/workflows.js b/gui/src/main/modules/workflows.js index cb671b75..ffd7b344 100644 --- a/gui/src/main/modules/workflows.js +++ b/gui/src/main/modules/workflows.js @@ -79,15 +79,15 @@ function workflowToConfig(workflow) { let output = {name: workflow.name} if (workflow.library.path != "") { - output["library"] = workflow.library.path + output["library_path"] = workflow.library.path } if (workflow.fasta_list.path != "") { - output["fasta_list"] = workflow.fasta_list.path + output["fasta_paths"] = workflow.fasta_list.path } if (workflow.raw_path_list.path != "") { - output["raw_path_list"] = workflow.raw_path_list.path + output["raw_paths"] = workflow.raw_path_list.path } if (workflow.output_directory.path != "") { diff --git a/misc/distributed_search/parse_parameters.py b/misc/distributed_search/parse_parameters.py index 476013e5..27c8f283 100755 --- a/misc/distributed_search/parse_parameters.py +++ b/misc/distributed_search/parse_parameters.py @@ -58,7 +58,7 @@ def safe_add_key(config, parent_key, key, value): safe_add_key(config, "library_prediction", "predict", False) # remove any fasta if one is present in the config file -config.pop("fasta_list", None) +config.pop("fasta_paths", None) # determine chunk size: division of infile rowcount and number of nodes chunk_size = int(np.ceil(infile.shape[0] / int(args.nnodes))) @@ -79,7 +79,7 @@ def safe_add_key(config, parent_key, key, value): # save current chunk indices into chunk-yaml as raw files safe_add_key( - current_config, None, "raw_path_list", list(all_filepaths[start_idx:end_idx]) + current_config, None, "raw_paths", list(all_filepaths[start_idx:end_idx]) ) # create folder for current chunk in target directory. Don't create the folder if it already exists. @@ -99,7 +99,7 @@ def safe_add_key(config, parent_key, key, value): sys.exit(1) # set library path in config - safe_add_key(current_config, None, "library", lib_source) + safe_add_key(current_config, None, "library_path", lib_source) # set chunk folder as output_directory in the config safe_add_key(current_config, None, "output_directory", "./") diff --git a/misc/distributed_search/speclib_config.py b/misc/distributed_search/speclib_config.py index 0ad79e67..41e7b221 100755 --- a/misc/distributed_search/speclib_config.py +++ b/misc/distributed_search/speclib_config.py @@ -39,17 +39,17 @@ def safe_add_key(config, parent_key, key, value): # if library and fasta are set, predicting will result in repredicted & annotated library # add fasta_list to config _new_fasta_list = [args.fasta_path] if args.fasta_path else [] -safe_add_key(config, None, "fasta_list", _new_fasta_list) +safe_add_key(config, None, "fasta_paths", _new_fasta_list) # add library path to config _new_library = args.library_path if args.library_path else None -safe_add_key(config, None, "library", _new_library) +safe_add_key(config, None, "library_path", _new_library) # set library prediction to True safe_add_key(config, "library_prediction", "predict", True) # remove rawfiles for prediction step in case some are set -config.pop("raw_path_list", None) +config.pop("raw_paths", None) # set output directory for predicted spectral library safe_add_key(config, None, "output_directory", os.path.join(args.target_directory)) diff --git a/tests/e2e_tests/e2e_test_cases.yaml b/tests/e2e_tests/e2e_test_cases.yaml index 01698f0c..b46093b1 100644 --- a/tests/e2e_tests/e2e_test_cases.yaml +++ b/tests/e2e_tests/e2e_test_cases.yaml @@ -24,10 +24,10 @@ test_cases: target_rt_tolerance: 100 calibration: batch_size: 1000 - library: + library_path: # - source_url: https://datashare.biochem.mpg.de/s/Uw2yfNSbApfPpTk # hela_hybrid.hdf - source_url: https://datashare.biochem.mpg.de/s/cNdrN4OJC9AAHhz # hela_hybrid.small.hdf - raw_data: + raw_paths: - source_url: https://datashare.biochem.mpg.de/s/339jg5HtGrwLwDN/download?files=20231017_OA2_TiHe_ADIAMA_HeLa_200ng_Evo011_21min_F-40_05.raw # - source_url: https://datashare.biochem.mpg.de/s/339jg5HtGrwLwDN/download?files=20231017_OA2_TiHe_ADIAMA_HeLa_200ng_Evo011_21min_F-40_06.raw # - source_url: https://datashare.biochem.mpg.de/s/339jg5HtGrwLwDN/download?files=20231017_OA2_TiHe_ADIAMA_HeLa_200ng_Evo011_21min_F-40_07.raw @@ -57,9 +57,9 @@ test_cases: transfer_step_enabled: True mbr_step_enabled: True # dimethyl data (from tutorial) - fasta: + fasta_paths: - source_url: https://datashare.biochem.mpg.de/s/1GiKQSwlPf6YlMm/download?path=%2F&files=2024_01_12_human.fasta - raw_data: + raw_paths: - source_url: https://datashare.biochem.mpg.de/s/1GiKQSwlPf6YlMm/download?path=%2Fraw_data&files=20240408_OA1_Evo12_31min_TiHe_SA_H032_E32_F-40_B1.raw - source_url: https://datashare.biochem.mpg.de/s/1GiKQSwlPf6YlMm/download?path=%2Fraw_data&files=20240408_OA1_Evo12_31min_TiHe_SA_H032_E32_F-40_B2.raw - source_url: https://datashare.biochem.mpg.de/s/1GiKQSwlPf6YlMm/download?path=%2Fraw_data&files=20240408_OA1_Evo12_31min_TiHe_SA_H032_E32_F-40_B3.raw @@ -85,9 +85,9 @@ test_cases: search_output: peptide_level_lfq: true precursor_level_lfq: true - library: + library_path: - source_url: https://datashare.biochem.mpg.de/s/e4jqILnxHPujBBP/download?files=MSFragger_library_60SPD_2.tsv - raw_data: + raw_paths: - source_url: https://datashare.biochem.mpg.de/s/e4jqILnxHPujBBP/download?files=20231218_TIMS03_PaSk_SA_K562_syPASEF_200ng_var_IM0713_S1-E7_1_41539.d - source_url: https://datashare.biochem.mpg.de/s/e4jqILnxHPujBBP/download?files=20231218_TIMS03_PaSk_SA_K562_syPASEF_200ng_var_IM0713_S1-F1_1_41545.d - source_url: https://datashare.biochem.mpg.de/s/e4jqILnxHPujBBP/download?files=20231218_TIMS03_PaSk_SA_K562_syPASEF_200ng_var_IM0713_S1-F7_1_41551.d @@ -120,9 +120,9 @@ test_cases: search_output: peptide_level_lfq: true precursor_level_lfq: true - fasta: + fasta_paths: - source_url: https://datashare.biochem.mpg.de/s/WTu3rFZHNeb3uG2/download?files=2024_01_12_human.fasta - raw_data: + raw_paths: - source_url: https://datashare.biochem.mpg.de/s/WTu3rFZHNeb3uG2/download?files=20231024_OA3_TiHe_ADIAMA_HeLa_200ng_Evo01_21min_F-40_iO_before_01.raw - source_url: https://datashare.biochem.mpg.de/s/WTu3rFZHNeb3uG2/download?files=20231024_OA3_TiHe_ADIAMA_HeLa_200ng_Evo01_21min_F-40_iO_before_02.raw - source_url: https://datashare.biochem.mpg.de/s/WTu3rFZHNeb3uG2/download?files=20231024_OA3_TiHe_ADIAMA_HeLa_200ng_Evo01_21min_F-40_iO_before_03.raw @@ -156,9 +156,9 @@ test_cases: search_output: peptide_level_lfq: true precursor_level_lfq: true - fasta: + fasta_paths: - source_url: https://datashare.biochem.mpg.de/s/WTu3rFZHNeb3uG2/download?files=2024_01_12_human.fasta - raw_data: + raw_paths: - source_url: https://datashare.biochem.mpg.de/s/WTu3rFZHNeb3uG2/download?files=20231024_OA3_TiHe_ADIAMA_HeLa_200ng_Evo01_21min_F-40_iO_before_01.raw - source_url: https://datashare.biochem.mpg.de/s/WTu3rFZHNeb3uG2/download?files=20231024_OA3_TiHe_ADIAMA_HeLa_200ng_Evo01_21min_F-40_iO_before_02.raw - source_url: https://datashare.biochem.mpg.de/s/WTu3rFZHNeb3uG2/download?files=20231024_OA3_TiHe_ADIAMA_HeLa_200ng_Evo01_21min_F-40_iO_before_03.raw @@ -191,9 +191,9 @@ test_cases: # search_output: # peptide_level_lfq: true # precursor_level_lfq: true -# library: +# library_path: # - source_url: https://datashare.biochem.mpg.de/s/Q9D8N2mq8vlzQ1f #speclib.mbr.hdf -# raw_data: +# raw_paths: # - source_url: ... # metrics: # - BasicStats diff --git a/tests/e2e_tests/prepare_test_data.py b/tests/e2e_tests/prepare_test_data.py index 4bc6a335..261e7c33 100644 --- a/tests/e2e_tests/prepare_test_data.py +++ b/tests/e2e_tests/prepare_test_data.py @@ -55,18 +55,18 @@ def _create_config_file( """Create the config file from paths to the input files and optional extra_config.""" config_to_write = { - "raw_path_list": downloaded_files[YamlKeys.RAW_DATA], + "raw_paths": downloaded_files[YamlKeys.RAW_DATA], "output_directory": os.path.join(target_path, OUTPUT_DIR_NAME), } | extra_config if YamlKeys.LIBRARY in downloaded_files: config_to_write = config_to_write | { - "library": downloaded_files[YamlKeys.LIBRARY][0] + "library_path": downloaded_files[YamlKeys.LIBRARY][0] } if YamlKeys.FASTA in downloaded_files: config_to_write = config_to_write | { - "fasta_list": downloaded_files[YamlKeys.FASTA] + "fasta_paths": downloaded_files[YamlKeys.FASTA] } config_target_path = os.path.join(target_path, DEFAULT_CONFIG_FILE_NAME) diff --git a/tests/unit_tests/test_config.py b/tests/unit_tests/test_config.py index b7c71655..bdf4be82 100644 --- a/tests/unit_tests/test_config.py +++ b/tests/unit_tests/test_config.py @@ -4,6 +4,7 @@ import pytest import yaml +from alphadia.exceptions import KeyAddedConfigError, TypeMismatchConfigError from alphadia.workflow.config import Config generic_default_config = """ @@ -205,7 +206,7 @@ def test_config_update_new_key_raises(): config_2.from_dict({"new_key": 0}) # when - with pytest.raises(ValueError, match="Key not found in target_config: 'new_key'"): + with pytest.raises(KeyAddedConfigError): config_1.update([config_2], do_print=True) @@ -219,8 +220,7 @@ def test_config_update_type_mismatch_raises(): # when with pytest.raises( - ValueError, - match="Type mismatch for key 'simple_value_int': != ", + TypeMismatchConfigError, ): config_1.update([config_2], do_print=True) diff --git a/tests/unit_tests/test_search_plan.py b/tests/unit_tests/test_search_plan.py index 9224df8f..2380ee7f 100644 --- a/tests/unit_tests/test_search_plan.py +++ b/tests/unit_tests/test_search_plan.py @@ -17,6 +17,13 @@ "some_user_config_key": "some_user_config_value", } +BASE_CLI_PARAMS_CONFIG = { + "raw_paths": ["/raw1"], + "library_path": "/user_provided_library_path", + "fasta_paths": ["/fasta1"], + "quant_directory": "/user_provided_quant_path", +} + def get_search_plan(config): """Helper function to create a SearchPlan object with a given config.""" @@ -25,11 +32,8 @@ def get_search_plan(config): ): return SearchPlan( output_directory="/user_provided_output_path", - raw_path_list=["/raw1"], - library_path="/user_provided_library_path", - fasta_path_list=["/fasta1"], config=config, - quant_dir="/user_provided_quant_path", + cli_params_config=BASE_CLI_PARAMS_CONFIG, ) @@ -47,12 +51,9 @@ def test_runs_plan_without_transfer_and_mbr_steps(mock_plan, mock_init_logging): # could use `mock_plan.assert_has_calls([call(..)])` pattern here but it is harder to read in case of error assert mock_plan.call_args_list[0].kwargs == { "output_folder": "/user_provided_output_path", - "raw_path_list": ["/raw1"], - "library_path": "/user_provided_library_path", - "fasta_path_list": ["/fasta1"], "config": BASE_USER_CONFIG, "extra_config": {}, - "quant_path": "/user_provided_quant_path", + "cli_config": BASE_CLI_PARAMS_CONFIG, } mock_plan.return_value.run.assert_called_once_with() @@ -66,12 +67,7 @@ def test_runs_plan_without_transfer_and_mbr_steps_none_dirs( """Test that the SearchPlan object runs the plan correctly without transfer and mbr steps when all parameters are none or empty.""" search_plan = SearchPlan( - output_directory="/user_provided_output_path", - raw_path_list=[], - library_path=None, - fasta_path_list=[], - config={}, - quant_dir=None, + output_directory="/user_provided_output_path", config={}, cli_params_config={} ) # when @@ -82,12 +78,9 @@ def test_runs_plan_without_transfer_and_mbr_steps_none_dirs( # could use `mock_plan.assert_has_calls([call(..)])` pattern here but it is harder to read in case of error assert mock_plan.call_args_list[0].kwargs == { "output_folder": "/user_provided_output_path", - "raw_path_list": [], - "library_path": None, - "fasta_path_list": [], "config": {}, "extra_config": {}, - "quant_path": None, + "cli_config": {}, } mock_plan.return_value.run.assert_called_once_with() @@ -120,20 +113,14 @@ def test_runs_plan_with_transfer_step( # transfer_step assert mock_plan.call_args_list[0].kwargs == { "output_folder": "/user_provided_output_path/transfer", - "raw_path_list": ["/raw1"], - "library_path": "/user_provided_library_path", - "fasta_path_list": ["/fasta1"], "config": BASE_USER_CONFIG | additional_user_config, "extra_config": MOCK_MULTISTEP_CONFIG["transfer"], - "quant_path": None, + "cli_config": BASE_CLI_PARAMS_CONFIG, } # library_step assert mock_plan.call_args_list[1].kwargs == { "output_folder": "/user_provided_output_path", - "raw_path_list": ["/raw1"], - "library_path": "/user_provided_library_path", - "fasta_path_list": ["/fasta1"], "config": BASE_USER_CONFIG | additional_user_config, "extra_config": MOCK_MULTISTEP_CONFIG["library"] | { @@ -143,7 +130,7 @@ def test_runs_plan_with_transfer_step( }, } | dynamic_config, - "quant_path": None, + "cli_config": BASE_CLI_PARAMS_CONFIG, } mock_plan.return_value.run.assert_has_calls([call(), call()]) @@ -177,23 +164,19 @@ def test_runs_plan_with_mbr_step(mock_get_dyn_config, mock_plan, mock_init_loggi # library_step assert mock_plan.call_args_list[0].kwargs == { "output_folder": "/user_provided_output_path/library", - "raw_path_list": ["/raw1"], - "library_path": "/user_provided_library_path", - "fasta_path_list": ["/fasta1"], "config": BASE_USER_CONFIG | additional_user_config, "extra_config": MOCK_MULTISTEP_CONFIG["library"], - "quant_path": None, + "cli_config": BASE_CLI_PARAMS_CONFIG, } # mbr_step assert mock_plan.call_args_list[1].kwargs == { "output_folder": "/user_provided_output_path", - "raw_path_list": ["/raw1"], - "library_path": "/user_provided_output_path/library/speclib.mbr.hdf", - "fasta_path_list": ["/fasta1"], "config": BASE_USER_CONFIG | additional_user_config, - "extra_config": MOCK_MULTISTEP_CONFIG["mbr"] | dynamic_config, - "quant_path": None, + "extra_config": MOCK_MULTISTEP_CONFIG["mbr"] + | dynamic_config + | {"library_path": "/user_provided_output_path/library/speclib.mbr.hdf"}, + "cli_config": BASE_CLI_PARAMS_CONFIG, } mock_plan.return_value.run.assert_has_calls([call(), call()]) @@ -229,20 +212,14 @@ def test_runs_plan_with_transfer_and_mbr_steps( # transfer_step assert mock_plan.call_args_list[0].kwargs == { "output_folder": "/user_provided_output_path/transfer", - "raw_path_list": ["/raw1"], - "library_path": "/user_provided_library_path", - "fasta_path_list": ["/fasta1"], "config": BASE_USER_CONFIG | additional_user_config, "extra_config": MOCK_MULTISTEP_CONFIG["transfer"], - "quant_path": None, + "cli_config": BASE_CLI_PARAMS_CONFIG, } # library_step assert mock_plan.call_args_list[1].kwargs == { "output_folder": "/user_provided_output_path/library", - "raw_path_list": ["/raw1"], - "library_path": "/user_provided_library_path", - "fasta_path_list": ["/fasta1"], "config": BASE_USER_CONFIG | additional_user_config, "extra_config": MOCK_MULTISTEP_CONFIG["library"] | { @@ -252,18 +229,19 @@ def test_runs_plan_with_transfer_and_mbr_steps( }, } | dynamic_config, - "quant_path": None, + "cli_config": BASE_CLI_PARAMS_CONFIG, } # mbr_step assert mock_plan.call_args_list[2].kwargs == { "output_folder": "/user_provided_output_path", - "raw_path_list": ["/raw1"], - "library_path": "/user_provided_output_path/library/speclib.mbr.hdf", - "fasta_path_list": ["/fasta1"], "config": BASE_USER_CONFIG | additional_user_config, - "extra_config": MOCK_MULTISTEP_CONFIG["mbr"] | dynamic_config, - "quant_path": None, + "extra_config": MOCK_MULTISTEP_CONFIG["mbr"] + | dynamic_config + | { + "library_path": "/user_provided_output_path/library/speclib.mbr.hdf", + }, + "cli_config": BASE_CLI_PARAMS_CONFIG, } mock_plan.return_value.run.assert_has_calls([call(), call(), call()]) diff --git a/tests/unit_tests/test_search_step.py b/tests/unit_tests/test_search_step.py index 63d75a80..61ee0b47 100644 --- a/tests/unit_tests/test_search_step.py +++ b/tests/unit_tests/test_search_step.py @@ -17,8 +17,8 @@ def test_fasta_digest(): tempdir = tempfile.gettempdir() step = search_step.SearchStep( tempdir, - fasta_path_list=[common_contaminants], config={"library_prediction": {"predict": True}}, + cli_config={"fasta_paths": [common_contaminants]}, ) assert len(step.spectral_library.precursor_df) > 0 @@ -30,8 +30,8 @@ def test_fasta_digest(): # predict existing library step = search_step.SearchStep( tempdir, - library_path=speclib_path, config={"library_prediction": {"predict": True}}, + cli_config={"library_path": speclib_path}, ) assert len(step.spectral_library.precursor_df) > 0 assert len(step.spectral_library.fragment_df) > 0 @@ -39,8 +39,8 @@ def test_fasta_digest(): # load existing library without predict step = search_step.SearchStep( tempdir, - library_path=speclib_path, config={"library_prediction": {"predict": False}}, + cli_config={"library_path": speclib_path}, ) assert len(step.spectral_library.precursor_df) > 0 assert len(step.spectral_library.fragment_df) > 0 @@ -68,10 +68,13 @@ def test_library_loading(): for test_dict in test_cases: print("Testing {}".format(test_dict["name"])) + # TODO this is not a unit test test_data_location = DataShareDownloader( test_dict["url"], temp_directory ).download() - step = search_step.SearchStep(temp_directory, library_path=test_data_location) + step = search_step.SearchStep( + temp_directory, {"library_path": test_data_location} + ) assert len(step.spectral_library.precursor_df) > 0 assert len(step.spectral_library.fragment_df) > 0 @@ -81,12 +84,13 @@ def test_custom_modifications(): temp_directory = tempfile.gettempdir() config = { - "custom_modifications": { - "ThisModDoesNotExists@K": { + "custom_modifications": [ + { + "name": "ThisModDoesNotExists@K", "composition": "H(10)", }, - } + ] } - step = search_step.SearchStep(temp_directory, [], config=config) # noqa F841 + step = search_step.SearchStep(temp_directory, config=config) # noqa F841 assert "ThisModDoesNotExists@K" in MOD_DF["mod_name"].values diff --git a/tests/unit_tests/test_workflow.py b/tests/unit_tests/test_workflow.py index 807c5518..d297b1e8 100644 --- a/tests/unit_tests/test_workflow.py +++ b/tests/unit_tests/test_workflow.py @@ -307,7 +307,7 @@ def test_workflow_base(): with open(config_path) as f: config = yaml.safe_load(f) - config["output"] = tempfile.gettempdir() + config["output_directory"] = tempfile.gettempdir() workflow_name = Path(file).stem @@ -317,10 +317,10 @@ def test_workflow_base(): ) my_workflow.load(file, pd.DataFrame({})) - assert my_workflow.config["output"] == config["output"] + assert my_workflow.config["output_directory"] == config["output_directory"] assert my_workflow.instance_name == workflow_name assert my_workflow.parent_path == os.path.join( - config["output"], base.QUANT_FOLDER_NAME + config["output_directory"], base.QUANT_FOLDER_NAME ) assert my_workflow.path == os.path.join( my_workflow.parent_path, workflow_name @@ -420,7 +420,7 @@ def create_workflow_instance(): config = Config() config.from_yaml(config_base_path) - config["output"] = tempfile.mkdtemp() + config["output_directory"] = tempfile.mkdtemp() workflow = peptidecentric.PeptideCentricWorkflow( "test", config,