diff --git a/README.md b/README.md index 59ab9348..9ee28e86 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,10 @@ For the complete TOSTADAS documentation, please see the [Wiki](https://github.com/CDCgov/tostadas/wiki) +## Warnings +### Plugin Compatibility Warning +❗ Important Note: This pipeline uses the nf-schema plugin to validate pipeline parameters. Users with Nextflow version 24 or later may encounter a warning message indicating that the plugin must be installed. To resolve this warning message, please install the plugin manually by following the instructions found in this [link](https://www.nextflow.io/docs/latest/plugins.html#offline-usage) + ## Overview **T O S T A D A S** **T**oolkit for **O**pen **S**equence **T**riage, **A**nnotation, and **DA**tabase **S**ubmission @@ -41,28 +45,17 @@ bash Mambaforge-$(uname)-$(uname -m).sh -b -p $HOME/mambaforge ``` export PATH="$HOME/mambaforge/bin:$PATH" ``` -### 3. Create and activate a conda environment - - **3a. Create an empty conda environment** -``` -conda create --name tostadas -``` -This conda environment will be used to install Nextflow. - - **3b. Activate the environment** -``` -conda activate tostadas -``` -Verify which environment is active by running the following conda command: `conda env list`. The active environment will be denoted with an asterisk * - -### 4. Install Nextflow using mamba and the bioconda Channel +### 3. Install Nextflow using mamba and the bioconda Channel ``` mamba install -c bioconda nextflow ``` -### 5. Update the default submissions config file with your NCBI username and password, and run the following nextflow command to execute the scripts with default parameters and the local run environment: +### 4. Update the default submissions config file with your NCBI username and password ``` # update this config file (you don't have to use vim) vim conf/submission_config.yaml +``` +### 5. Run the workflow with default parameters and the local run environment: +``` # test command for virus reads nextflow run main.nf -profile test, --virus ``` diff --git a/modules/local/general_util/validate_params/main.nf b/modules/local/general_util/validate_params/main.nf deleted file mode 100644 index 9ac42542..00000000 --- a/modules/local/general_util/validate_params/main.nf +++ /dev/null @@ -1,144 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE PARAMS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// Include the plugin -include { validateParameters; paramsSummaryLog; samplesheetToList } from 'plugin/nf-schema' - -// Validate input parameters -validateParameters() - -// Print summary of supplied parameters -log.info paramsSummaryLog(workflow) - - -process VALIDATE_PARAMS { - - exec: - // check that metadata file is provided - assert params.meta_path - - // check annotation paths - if ( params.annotation ) { - if ( params.repeatmasker_liftoff ) { - assert params.ref_fasta_path - assert params.ref_fasta_path - assert params.ref_gff_path - assert params.repeat_library - } - if ( params.vadr ) { - assert params.vadr_models_dir - } - if ( params.bakta ) { - if ( !params.download_bakta_db ) { - assert params.bakta_db_path - } - } - } - - // check output directories - // assert params.output_dir - // assert params.val_output_dir - // assert params.submission_output_dir - - // if ( params.liftoff == true ) { - // assert params.final_liftoff_output_dir - // } - // if ( params.vadr == true ) { - // assert params.vadr_output_dir - // } - // if ( params.bakta == true ) { - // assert params.bakta_output_dir - // } - - // check liftoff params with int or float values - if ( params.repeatmasker_liftoff == true ) { - // Check whether populated or not - assert params.lift_parallel_processes == 0 || params.lift_parallel_processes - assert params.lift_mismatch - assert params.lift_gap_open - assert params.lift_gap_extend - assert params.lift_print_version_exit == true || params.lift_print_version_exit == false - assert params.lift_print_help_exit == true || params.lift_print_help_exit == false - - // Check data types - expected_liftoff_strings = [ - "lift_minimap_path": params.lift_minimap_path, - "lift_feature_database_name": params.lift_feature_database_name - ] - - expected_liftoff_integers = [ - "lift_parallel_processes" : params.lift_parallel_processes, - "lift_mismatch": params.lift_mismatch, - "lift_gap_open": params.lift_gap_open, - "lift_gap_extend": params.lift_gap_extend - ] - - expected_liftoff_floats = [ - "lift_coverage_threshold": params.lift_coverage_threshold, - "lift_child_feature_align_threshold": params.lift_child_feature_align_threshold, - "lift_copy_threshold": params.lift_copy_threshold, - "lift_distance_scaling_factor": params.lift_distance_scaling_factor, - "lift_flank": params.lift_flank, - "lift_overlap": params.lift_overlap - ] - - expected_liftoff_strings.each { key, value -> - if ( expected_liftoff_strings[key] instanceof String == false ) { - throw new Exception("Value must be of string type: $value used for $key parameter") - } - } - - expected_liftoff_integers.each { key, value -> - if ( expected_liftoff_integers[key] instanceof Integer == false ) { - throw new Exception("Value must be of integer type: $value used for $key parameter") - } - } - - expected_liftoff_floats.each { key, value -> - if ( expected_liftoff_floats[key] instanceof Integer == true || expected_liftoff_floats[key] instanceof String == true ) { - throw new Exception("Value must be of float type and not integer or string: $value used for $key parameter") - } - } - } - - // check bakta specific params - if ( params.bakta == true ) { - assert params.meta_path - assert params.bakta_min_contig_length - assert params.bakta_translation_table - assert params.bakta_genus - assert params.bakta_species - assert params.bakta_strain - assert params.bakta_plasmid - assert params.bakta_locus - assert params.bakta_locus_tag - } - - // check list of params with bool values - assert params.clear_nextflow_log == true || params.clear_nextflow_log == false - assert params.clear_work_dir == true || params.clear_work_dir == false - assert params.submission == true || params.submission == false - assert params.cleanup == true || params.cleanup == false - assert params.overwrite_output == true || params.overwrite_output == false - assert params.val_date_format_flag == 's' || params.val_date_format_flag == 'o' || params.val_date_format_flag == 'v' - assert params.val_keep_pi == true || params.val_keep_pi == false - - // check types for inputs - expected_strings = [ - "ref_fasta_path": params.ref_fasta_path, - "ref_gff_path": params.ref_gff_path, - "meta_path": params.meta_path, - "output_dir": params.output_dir, - ] - expected_strings.each { key, value -> - if (!(value instanceof String || value instanceof org.codehaus.groovy.runtime.GStringImpl)) { - throw new Exception("Value must be of string type: $value used for $key parameter") - } - } - - output: - val true -} diff --git a/modules/local/general_util/validate_params/update-function.nf b/modules/local/general_util/validate_params/update-function.nf deleted file mode 100644 index e69de29b..00000000 diff --git a/nextflow_schema.json b/nextflow_schema.json index 31f1ca2d..e2b28280 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,19 +10,20 @@ "format": "file-path", "description": "Meta-data file path for samples", "default": "${projectDir}/assets/metadata_template.xlsx", - "pattern": "^\\S+\\.xlsx$" + "pattern": "^\\S+\\.(xlsx|xls|xlsm|xlsb|csv)$" }, "ref_fasta_path": { "type": "string", "format": "file-path", "description": "Reference Sequence file path", - "default": "${projectDir}/assets/ref/ref.MPXV.NC063383.v7.fasta" + "default": "${projectDir}/assets/ref/ref.MPXV.NC063383.v7.fasta", + "pattern": "^\\S+\\.(fasta|fa|fna|ffn|faa|frn|fasta\\.gz|fa\\.gz|fna\\.gz|ffn\\.gz|faa\\.gz|frn\\.gz|fasta\\.zip|fa\\.zip|fna\\.zip|ffn\\.zip|faa\\.zip|frn\\.zip|fasta\\.bz2|fa\\.bz2|fna\\.bz2|ffn\\.bz2|faa\\.bz2|frn\\.bz2|fasta\\.tar\\.bz2|fa\\.tar\\.bz2|fna\\.tar\\.bz2|ffn\\.tar\\.bz2|faa\\.tar\\.bz2|frn\\.tar\\.bz2|fasta\\.tar|fa\\.tar|fna\\.tar|ffn\\.tar|faa\\.tar|frn\\.tar|fasta\\.tar\\.gz|fa\\.tar\\.gz|fna\\.tar\\.gz|ffn\\.tar\\.gz|faa\\.tar\\.gz|frn\\.tar\\.gz)$" }, "ref_gff_path": { "type": "string", "format": "file-path", - "description": "Reference gff file path for annotation", - "default": "${projectDir}/assets/ref/ref.MPXV.NC063383.v7.gff" + "default": "${projectDir}/assets/ref/ref.MPXV.NC063383.v7.gff", + "pattern": "^\\S+\\.(gff|gff3)$" }, "output_dir": { "type": "string", @@ -56,8 +57,8 @@ }, "custom_fields_file": { "type": "string", - "description": "Path to the JSON file containing custom metadata fields and their information", - "default": "${projectDir}/assets/custom_meta_fields/example_custom_fields.json" + "default": "${projectDir}/assets/custom_meta_fields/example_custom_fields.json", + "pattern": "^\\S+\\.json$" }, "final_liftoff_output_dir": { "type": "string", @@ -90,7 +91,8 @@ "lift_unmapped_features_file_name": { "type": "string", "description": "Name of unmapped features file name", - "default": "output.unmapped_features.txt" + "default": "output.unmapped_features.txt", + "pattern": "^\\S+\\.txt$" }, "lift_copy_threshold": { "type": "number", @@ -249,7 +251,8 @@ "submission_config": { "type": "string", "description": "Configuration file for submission to public repos", - "default": "${projectDir}/bin/config_files/.yaml" + "default": "${projectDir}/bin/config_files/.yaml", + "pattern": "^\\S+\\.(yaml|yml)$" }, "submission_wait_time": { "type": "integer", @@ -297,7 +300,8 @@ "repeat_library": { "type": "string", "description": "Path to the repeat library file used by RepeatMasker.", - "default": "${projectDir}/assets/lib/MPOX_repeats_lib.fasta" + "default": "${projectDir}/assets/lib/MPOX_repeats_lib.fasta", + "pattern": "^\\S+\\.(fasta|fa|fna|ffn|faa|frn|fasta\\.gz|fa\\.gz|fna\\.gz|ffn\\.gz|faa\\.gz|frn\\.gz|fasta\\.zip|fa\\.zip|fna\\.zip|ffn\\.zip|faa\\.zip|frn\\.zip|fasta\\.bz2|fa\\.bz2|fna\\.bz2|ffn\\.bz2|faa\\.bz2|frn\\.bz2|fasta\\.tar\\.bz2|fa\\.tar\\.bz2|fna\\.tar\\.bz2|ffn\\.tar\\.bz2|faa\\.tar\\.bz2|frn\\.tar\\.bz2|fasta\\.tar|fa\\.tar|fna\\.tar|ffn\\.tar|faa\\.tar|frn\\.tar|fasta\\.tar\\.gz|fa\\.tar\\.gz|fna\\.tar\\.gz|ffn\\.tar\\.gz|faa\\.tar\\.gz|frn\\.tar\\.gz)$" }, "gisaid": { "description": "Flag to enable or disable submission to GISAID." @@ -383,7 +387,8 @@ }, "lift_feature_types": { "type": "string", - "default": "${projectDir}/assets/feature_types.txt" + "default": "${projectDir}/assets/feature_types.txt", + "pattern": "^\\S+\\.txt$" }, "processed_samples": { "type": "string", @@ -429,7 +434,10 @@ "default": "${projectDir}/environment.yml" } }, - - "required": ["meta_path", "ref_fasta_path", "ref_gff_path", "species"] + "required": [ + "meta_path", + "ref_fasta_path", + "ref_gff_path", + "species" + ] } - diff --git a/workflows/tostadas.nf b/workflows/tostadas.nf index 76c9cceb..bedc7c3a 100644 --- a/workflows/tostadas.nf +++ b/workflows/tostadas.nf @@ -9,7 +9,7 @@ nextflow.enable.dsl=2 // get the utility processes / subworkflows // include { CHECK_FILES } from "../modules/local/general_util/check_files/main" // include { RUN_UTILITY } from "../subworkflows/local/utility" -include { VALIDATE_PARAMS } from '../modules/local/general_util/validate_params/main' +include { validateParameters; paramsSummaryLog; samplesheetToList } from 'plugin/nf-schema' include { GET_WAIT_TIME } from "../modules/local/general_util/get_wait_time/main" @@ -45,9 +45,12 @@ workflow TOSTADAS { exit 0 } - // validate params - VALIDATE_PARAMS() - + // validate input parameters + validateParameters() + + // print summary of supplied parameters + log.info paramsSummaryLog(workflow) + // run metadata validation process METADATA_VALIDATION ( params.meta_path