From 7fb762b1febf74e2a9c2cb5a67251072f178023d Mon Sep 17 00:00:00 2001 From: Peter Belmann Date: Mon, 28 Oct 2024 16:11:43 +0100 Subject: [PATCH 1/2] feat(config): allow nextflow run usage (#373) * feat(config): download preset config file from github * feat(tests): adjust tests for using the preset mode * doc(quickstart): remove typo * feat(documentation): update github pull request template * fix(tests): use github pull request number instead of event number --- .github/pull_request_template.md | 10 +++--- .github/workflows/workflow_modules.yml | 9 +++--- docs/quickstart.md | 5 ++- modules/assembly/shortReadAssembler.nf | 4 +-- nextflow.config | 44 +++++++++++++++++++++++--- 5 files changed, 52 insertions(+), 20 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index fdc2e195..091774b9 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -10,18 +10,16 @@ Thank you for submitting this PR. Before merge: -* The PR must be reviewed by one of the team members. +* A PR must be reviewed by one of the team members. -* Please check if anything in the Readme must be adjusted, or added (development-setup, production-setup, user-guide). +* Please check if anything in the documentation must be adjusted, or added (development-setup, production-setup, user-guide). -* PRs with new modules or workflow interfaces must include tests according to the developer [guidelines](https://openstack.cebitec.uni-bielefeld.de:8080/swift/v1/meta-omics-toolkit/master.html#developer_guidelines). +* PRs with new modules or workflow interfaces must include tests according to the developer [guidelines](https://metagenomics.github.io/metagenomics-tk/latest/developer_guidelines/). -* The new code is readable, well commented and should adhere to our developer [guidelines](https://openstack.cebitec.uni-bielefeld.de:8080/swift/v1/meta-omics-toolkit/master.html#developer_guidelines). +* The new code is readable, well commented and should adhere to our developer [guidelines](https://metagenomics.github.io/metagenomics-tk/latest/developer_guidelines/). * Before merging it must be checked if a squash of commits is required. - - diff --git a/.github/workflows/workflow_modules.yml b/.github/workflows/workflow_modules.yml index c3e9ed78..6e46d5f8 100644 --- a/.github/workflows/workflow_modules.yml +++ b/.github/workflows/workflow_modules.yml @@ -6,7 +6,7 @@ concurrency: env: WORK_DIR: "/vol/spool/${{ github.ref_name }}" PROFILE: "slurm" - PR_NUMBER: ${{ github.event.number }} + PR_NUMBER: ${{ github.event.pull_request.number }} EMGB_KEGG_DB: "/vol/spool/emgb/annotatedgenes2json_db_kegg-mirror-2022-12" EMGB_DB_TITLES: "/vol/spool/emgb/uniref90.titles.tsv.gz" jobs: @@ -244,7 +244,8 @@ jobs: runs-on: [ self-hosted, slurm] needs: [codePolicy] steps: - - name: Test Dereplication + - uses: actions/checkout@v4 + - name: Test Quickstart run: | bash ./scripts/test_quickstart.sh || exit 1 @@ -572,7 +573,7 @@ jobs: VERSION=$(sort VERSIONS.txt | tail -n 1) OUTPUT=outputEasy bash ./scripts/test_settings.sh \ - " --preset --scratch /vol/scratch --input.paired.path test_data/fullPipeline/reads_split.tsv --highmemLarge=28,2000 --s3SignIn false --databases=/vol/scratch/databases/ --output=${OUTPUT} " \ + " --preset --template default/fullPipeline_illumina_nanpore.yml --scratch /vol/scratch --input.paired.path test_data/fullPipeline/reads_split.tsv --highmemLarge=28,2000 --s3SignIn false --databases=/vol/scratch/databases/ --output=${OUTPUT} " \ " " "${WORK_DIR}" ${PROFILE} ${VERSION} "preset" || exit 1 bash ./scripts/check_parameter.sh ${OUTPUT} || exit 1 - name: Test whether settings in default mode can be updated @@ -580,7 +581,7 @@ jobs: VERSION=$(sort VERSIONS.txt | tail -n 1) OUTPUT=outputDefault bash ./scripts/test_settings.sh \ - " --scratch /vol/scratch --resources.highmemLarge.memory=2000 --s3SignIn false --databases=/vol/scratch/databases/ --output=${OUTPUT} " \ + " --scratch /vol/scratch --template default/fullPipeline_illumina_nanpore.yml --resources.highmemLarge.memory=2000 --s3SignIn false --databases=/vol/scratch/databases/ --output=${OUTPUT} " \ "" "${WORK_DIR}" ${PROFILE} ${VERSION} "" || exit 1 bash ./scripts/check_parameter.sh ${OUTPUT} || exit 1 diff --git a/docs/quickstart.md b/docs/quickstart.md index eeaf0e02..b6fde145 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -8,9 +8,8 @@ You will need at least 250 GB of disk space. The disk were your docker images an ## Requirements 1. docker: Install Docker by following the official Docker installation [instructions](https://docs.docker.com/engine/install/ubuntu/). -2. make: You can install make on Ubuntu via `sudo apt install make` -3. java: In order to run Nextflow you need to install Java on your machine which can be achieved via `sudo apt install default-jre` -4. Resources: +2. make: You can install make on Ubuntu via `sudo apt install make`. +3. java: In order to run Nextflow you need to install Java on your machine which can be achieved via `sudo apt install default-jre`. ## Preparation diff --git a/modules/assembly/shortReadAssembler.nf b/modules/assembly/shortReadAssembler.nf index deaf0005..8f5ca10e 100644 --- a/modules/assembly/shortReadAssembler.nf +++ b/modules/assembly/shortReadAssembler.nf @@ -373,11 +373,11 @@ workflow _wCalculateMegahitResources { modelType = Channel.empty() if(params.steps.containsKey("assembly") && params.steps.assembly.containsKey("megahit") \ && params?.steps?.assembly?.megahit?.additionalParams.contains("meta-sensitive")){ - model = Channel.value(file("${baseDir}/models/assembler/megahit/sensitive.pkl")) + model = Channel.value(file("${projectDir}/models/assembler/megahit/sensitive.pkl")) modelType = Channel.value("sensitive") } else { modelType = Channel.value("default") - model = Channel.value(file("${baseDir}/models/assembler/megahit/default.pkl")) + model = Channel.value(file("${projectDir}/models/assembler/megahit/default.pkl")) } resourceType.predict | join(nonpareil | splitCsv(header: true, sep: '\t') \ diff --git a/nextflow.config b/nextflow.config index c74c24bc..d5f6d7bc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -46,13 +46,45 @@ if("dry-run" in params.keySet()){ System.out.println("Dry-run is enabled!") } +def configUrlString = "https://raw.githubusercontent.com/metagenomics/metagenomics-tk/refs/tags/" + manifest.version + "/default/fullPipeline_illumina_nanpore.yml" -template = (params.containsKey("template") && params.containsKey("preset")) ? params.template : "default/fullPipeline_illumina_nanpore.yml" +/* +* +* Check if --preset is set and/or a template for a Toolkit configuration file is provided. +* Depending on the setting the corresponding configuration file is loaded. +* +*/ +def loadYAML(configUrlString){ + if((params.containsKey("template") && params.containsKey("preset"))){ + return new Yaml().load(new File(params.template)) + } + if(params.containsKey("preset")){ + try { + // Step 1: Fetch the remote YAML content + def url = new URL(configUrlString) + def connection = url.openConnection() + connection.setRequestMethod('GET') + + if (connection.responseCode == HttpURLConnection.HTTP_OK) { + def reader = connection.inputStream.withReader { it.text } + + // Step 2: Parse the YAML content + return new Yaml().load(reader) + + } else { + println "GET request not worked, response code: ${connection.responseCode}" + System.exit(1) + } + } catch (Exception e) { + e.printStackTrace() + } + } -defaultYmlFile = new File("${baseDir}/" + template) -preLoadYML = new Yaml().load(defaultYmlFile.text) + return null +} +preLoadYML = loadYAML(configUrlString) /* @@ -64,15 +96,17 @@ def getPresetSteps(){ disableModules = ["metabolomics", "annotation", "fragmentRecruitment"] steps = preLoadYML["steps"] + stepsKeys = steps.keySet() def stepsMap = [:] System.out.println(PRETTY_SEPARATOR) System.out.println("The following modules will be executed:\n") + for(step in stepsKeys){ + if(!params.keySet().contains("no-" + step) && ! disableModules.contains(step)){ - System.out.println(step) stepsMap[step] = steps[step] } } @@ -543,7 +577,7 @@ profiles { docker { fixOwnership = true enabled = true - } + } process { cache = 'lenient' From 3d88be6b60e47964f0d72aa6f94c18d5517d14c0 Mon Sep 17 00:00:00 2001 From: Peter Belmann Date: Mon, 28 Oct 2024 21:19:05 +0000 Subject: [PATCH 2/2] feat(docker): use host network instead of docker bridge --- lib/Utils.groovy | 5 +++++ modules/annotation/module.nf | 11 ++++++----- modules/fragmentRecruitment/mashScreen.nf | 2 ++ modules/magAttributes/module.nf | 6 +++--- modules/plasmids/module.nf | 2 +- modules/plasmids/processes.nf | 6 +++--- modules/qualityControl/ontQC.nf | 2 ++ modules/qualityControl/shortReadQC.nf | 2 ++ 8 files changed, 24 insertions(+), 12 deletions(-) diff --git a/lib/Utils.groovy b/lib/Utils.groovy index 37c6bed1..5390259c 100644 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -31,6 +31,11 @@ class Utils { } } + static String getDockerNetwork(){ + return " --net=host "; + } + + static String getCreateDatabaseDirCommand(db){ return "if [ ! -z " + db + " ]; then mkdir " + db + " -p; fi" } diff --git a/modules/annotation/module.nf b/modules/annotation/module.nf index 2120839c..d398aad2 100644 --- a/modules/annotation/module.nf +++ b/modules/annotation/module.nf @@ -59,7 +59,7 @@ process pMMseqs2 { // Another mount flag is used to get a key file (aws format) into the docker-container. // This file is then used by s5cmd. The necessary mount points are generated by “constructParametersObject()”. - containerOptions constructParametersObject("mmseqs2") + " --entrypoint='' " + containerOptions constructParametersObject("mmseqs2") + " --entrypoint='' " + Utils.getDockerNetwork() tag "Sample: $sample, Database: $dbType" @@ -104,7 +104,7 @@ process pMMseqs2_taxonomy { // Therefore this place has to be mounted to the docker container to be accessible during run time. // Another mount flag is used to get a key file (aws format) into the docker-container. // This file is then used by s5cmd. The necessary mount points are generated by “constructParametersObject()”. - containerOptions constructParametersObject("mmseqs2_taxonomy") + " --entrypoint='' " + containerOptions constructParametersObject("mmseqs2_taxonomy") + " --entrypoint='' " + Utils.getDockerNetwork() tag "Sample: $sample, Database_taxonomy: $dbType" @@ -148,7 +148,7 @@ process pResistanceGeneIdentifier { container "${params.rgi_image}" - containerOptions Utils.getDockerMount(params?.steps?.annotation?.rgi?.database, params) + containerOptions Utils.getDockerMount(params?.steps?.annotation?.rgi?.database, params) + Utils.getDockerNetwork() tag "Sample: $sample, BinID: $binID" @@ -296,7 +296,7 @@ process pHmmSearch { // Re-Use the gtdb-tk container for Prodigal to safe space container "${params.gtdbtk_image}" - containerOptions Utils.getDockerMount(params?.steps?.binning?.magscot?.hmmSearch?.database, params) + containerOptions Utils.getDockerMount(params?.steps?.binning?.magscot?.hmmSearch?.database, params) + Utils.getDockerNetwork() tag "Sample: $sample" @@ -370,7 +370,8 @@ process pKEGGFromMMseqs2 { // Another mount flag is used to get a key file (aws format) into the docker-container. // This file is then used by s5cmd. - containerOptions Utils.getDockerMount(params.steps?.annotation?.keggFromMMseqs2?.database, params) + containerOptions Utils.getDockerMount(params.steps?.annotation?.keggFromMMseqs2?.database, params) + Utils.getDockerNetwork() + publishDir params.output, mode: "${params.publishDirMode}", saveAs: { filename -> getOutput("${sample}", params.runid, "keggFromMMseqs2", filename) }, \ pattern: "{**.tsv}" diff --git a/modules/fragmentRecruitment/mashScreen.nf b/modules/fragmentRecruitment/mashScreen.nf index e2f19ceb..990006b7 100644 --- a/modules/fragmentRecruitment/mashScreen.nf +++ b/modules/fragmentRecruitment/mashScreen.nf @@ -38,6 +38,8 @@ process pMashScreen { container "${params.mash_image}" + containerOptions Utils.getDockerNetwork() + when params?.steps?.fragmentRecruitment?.mashScreen != null input: diff --git a/modules/magAttributes/module.nf b/modules/magAttributes/module.nf index 4860eb1f..fd5da698 100644 --- a/modules/magAttributes/module.nf +++ b/modules/magAttributes/module.nf @@ -49,7 +49,7 @@ process pCheckM { when params.steps.containsKey("magAttributes") && params.steps.magAttributes.containsKey("checkm") \ && !params.steps.magAttributes.containsKey("checkm2") - containerOptions Utils.getDockerMount(params.steps?.magAttributes?.checkm?.database, params) + containerOptions Utils.getDockerMount(params.steps?.magAttributes?.checkm?.database, params) + Utils.getDockerNetwork() beforeScript Utils.getCreateDatabaseDirCommand("${params.polished.databases}") @@ -88,7 +88,7 @@ process pCheckM2 { when params.steps.containsKey("magAttributes") && params.steps.magAttributes.containsKey("checkm2") - containerOptions Utils.getDockerMount(params.steps?.magAttributes?.checkm2?.database, params) + containerOptions Utils.getDockerMount(params.steps?.magAttributes?.checkm2?.database, params) + Utils.getDockerNetwork() beforeScript "mkdir -p ${params.polished.databases}" @@ -128,7 +128,7 @@ process pGtdbtk { when params.steps.containsKey("magAttributes") && params.steps.magAttributes.containsKey("gtdb") - containerOptions Utils.getDockerMount(params?.steps?.magAttributes?.gtdb?.database, params) + containerOptions Utils.getDockerMount(params?.steps?.magAttributes?.gtdb?.database, params) + Utils.getDockerNetwork() beforeScript Utils.getCreateDatabaseDirCommand("${params.polished.databases}") diff --git a/modules/plasmids/module.nf b/modules/plasmids/module.nf index 83c3b59c..c89f7cd3 100644 --- a/modules/plasmids/module.nf +++ b/modules/plasmids/module.nf @@ -69,7 +69,7 @@ process pPLSDB { saveAs: { filename -> getOutput("${sample}", params.runid, "PLSDB", filename) }, \ pattern: "{**.tsv}" - containerOptions Utils.getDockerMount(params.steps?.plasmid?.PLSDB?.database, params) + containerOptions Utils.getDockerMount(params.steps?.plasmid?.PLSDB?.database, params) + Utils.getDockerNetwork() when params.steps.containsKey("plasmid") && params.steps.plasmid.containsKey("PLSDB") diff --git a/modules/plasmids/processes.nf b/modules/plasmids/processes.nf index 35ad570f..a65da663 100644 --- a/modules/plasmids/processes.nf +++ b/modules/plasmids/processes.nf @@ -24,7 +24,7 @@ process pViralVerifyPlasmid { shell = ['/bin/bash'] - containerOptions Utils.getDockerMount(params?.steps?.plasmid?.ViralVerifyPlasmid?.database, params) + containerOptions Utils.getDockerMount(params?.steps?.plasmid?.ViralVerifyPlasmid?.database, params) + Utils.getDockerNetwork() container "${params.viralVerify_image}" @@ -121,7 +121,7 @@ process pMobTyper { beforeScript Utils.getCreateDatabaseDirCommand("${params.polished.databases}") - containerOptions Utils.getDockerMount(params.steps?.plasmid?.MobTyper?.database, params) + containerOptions Utils.getDockerMount(params.steps?.plasmid?.MobTyper?.database, params) + Utils.getDockerNetwork() secret { "${S3_MobTyper_ACCESS}"!="" ? ["S3_MobTyper_ACCESS", "S3_MobTyper_SECRET"] : [] } @@ -183,7 +183,7 @@ process pPlaton { tag "Sample: $sample, BinId: $binID" - containerOptions " --user root:root " + Utils.getDockerMount(params.steps?.plasmid?.Platon?.database, params) + containerOptions " --user root:root " + Utils.getDockerMount(params.steps?.plasmid?.Platon?.database, params) + Utils.getDockerNetwork() publishDir params.output, mode: "${params.publishDirMode}", saveAs: { filename -> getOutput("${sample}", params.runid, "Platon", filename) }, \ pattern: "{**.tsv}" diff --git a/modules/qualityControl/ontQC.nf b/modules/qualityControl/ontQC.nf index 444656a4..36558908 100644 --- a/modules/qualityControl/ontQC.nf +++ b/modules/qualityControl/ontQC.nf @@ -59,6 +59,8 @@ process pPorechopDownload { container "${params.porechop_image}" + containerOptions Utils.getDockerNetwork() + input: tuple val(sample), env(readUrl) diff --git a/modules/qualityControl/shortReadQC.nf b/modules/qualityControl/shortReadQC.nf index ab5db308..809f5ea1 100644 --- a/modules/qualityControl/shortReadQC.nf +++ b/modules/qualityControl/shortReadQC.nf @@ -204,6 +204,8 @@ process pFastpSplitDownload { container "${params.fastp_image}" + containerOptions Utils.getDockerNetwork() + input: tuple val(sample), env(read1Url), env(read2Url)