From c2495666de3f8bbbe947f661824938abe6b51f1d Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 21 Jan 2025 14:05:58 +0100 Subject: [PATCH 1/7] Separate mmcif_files from obsolete channels --- conf/dbs.config | 1 + main.nf | 2 + modules/local/download_pdbmmcif/main.nf | 5 --- modules/local/run_alphafold2/main.nf | 7 +-- modules/local/run_alphafold2_msa/main.nf | 1 + modules/local/run_alphafold2_pred/main.nf | 1 + subworkflows/local/prepare_alphafold2_dbs.nf | 45 +++++++++++--------- workflows/alphafold2.nf | 6 ++- 8 files changed, 40 insertions(+), 28 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index d4e521a2..c824bf94 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -29,6 +29,7 @@ params { mgnify_path = "${params.alphafold2_db}/mgnify/*" pdb70_path = "${params.alphafold2_db}/pdb70/**" pdb_mmcif_path = "${params.alphafold2_db}/pdb_mmcif/*" + pdb_obsolete_path = "${params.alphafold2_db}/pdb_mmcif/obsolete.dat" uniref30_alphafold2_path = "${params.alphafold2_db}/uniref30/*" uniref90_path = "${params.alphafold2_db}/uniref90/*" pdb_seqres_path = "${params.alphafold2_db}/pdb_seqres/*" diff --git a/main.nf b/main.nf index d1ec1a6b..8d595b37 100644 --- a/main.nf +++ b/main.nf @@ -88,6 +88,7 @@ workflow NFCORE_PROTEINFOLD { params.mgnify_path, params.pdb70_path, params.pdb_mmcif_path, + params.pdb_obsolete_path, params.uniref30_alphafold2_path, params.uniref90_path, params.pdb_seqres_path, @@ -122,6 +123,7 @@ workflow NFCORE_PROTEINFOLD { PREPARE_ALPHAFOLD2_DBS.out.mgnify, PREPARE_ALPHAFOLD2_DBS.out.pdb70, PREPARE_ALPHAFOLD2_DBS.out.pdb_mmcif, + PREPARE_ALPHAFOLD2_DBS.out.pdb_obsolete, PREPARE_ALPHAFOLD2_DBS.out.uniref30, PREPARE_ALPHAFOLD2_DBS.out.uniref90, PREPARE_ALPHAFOLD2_DBS.out.pdb_seqres, diff --git a/modules/local/download_pdbmmcif/main.nf b/modules/local/download_pdbmmcif/main.nf index a8a12963..385b350f 100644 --- a/modules/local/download_pdbmmcif/main.nf +++ b/modules/local/download_pdbmmcif/main.nf @@ -13,7 +13,6 @@ process DOWNLOAD_PDBMMCIF { input: val source_url_pdb_mmcif - val source_url_pdb_obsolete output: path ('*') , emit: ch_db @@ -54,14 +53,10 @@ process DOWNLOAD_PDBMMCIF { # Delete empty download directory structure. find ./raw -type d -empty -delete - aria2c \\ - $source_url_pdb_obsolete - cat <<-END_VERSIONS > versions.yml "${task.process}": sed: \$(echo \$(sed --version 2>&1) | head -1 | sed 's/^.*GNU sed) //; s/ .*\$//') rsync: \$(rsync --version | head -1 | sed 's/^rsync version //; s/ protocol version [[:digit:]]*//') - aria2c: \$( aria2c -v | head -1 | sed 's/aria2 version //' ) END_VERSIONS """ diff --git a/modules/local/run_alphafold2/main.nf b/modules/local/run_alphafold2/main.nf index 6ebd3c1d..37feb585 100644 --- a/modules/local/run_alphafold2/main.nf +++ b/modules/local/run_alphafold2/main.nf @@ -21,7 +21,8 @@ process RUN_ALPHAFOLD2 { path ('small_bfd/*') path ('mgnify/*') path ('pdb70/*') - path ('pdb_mmcif/*') + path ('mmcif_files/*') + path ('obsolete_pdb/*') path ('uniref30/*') path ('uniref90/*') path ('pdb_seqres/*') @@ -61,8 +62,8 @@ process RUN_ALPHAFOLD2 { --data_dir=\$PWD \ --uniref90_database_path=./uniref90/uniref90.fasta \ --mgnify_database_path=./mgnify/mgy_clusters_2022_05.fa \ - --template_mmcif_dir=./pdb_mmcif/mmcif_files \ - --obsolete_pdbs_path=./pdb_mmcif/obsolete.dat \ + --template_mmcif_dir=./mmcif_files \ + --obsolete_pdbs_path=./obsolete_pdb/obsolete.dat \ --random_seed=53343 \ $args diff --git a/modules/local/run_alphafold2_msa/main.nf b/modules/local/run_alphafold2_msa/main.nf index 7428eb7f..488b9fb6 100644 --- a/modules/local/run_alphafold2_msa/main.nf +++ b/modules/local/run_alphafold2_msa/main.nf @@ -21,6 +21,7 @@ process RUN_ALPHAFOLD2_MSA { path ('small_bfd/*') path ('mgnify/*') path ('pdb70/*') + path ('pdb_mmcif/mmcif_files/*') path ('pdb_mmcif/*') path ('uniref30/*') path ('uniref90/*') diff --git a/modules/local/run_alphafold2_pred/main.nf b/modules/local/run_alphafold2_pred/main.nf index 13fb15a7..503b2edc 100644 --- a/modules/local/run_alphafold2_pred/main.nf +++ b/modules/local/run_alphafold2_pred/main.nf @@ -21,6 +21,7 @@ process RUN_ALPHAFOLD2_PRED { path ('small_bfd/*') path ('mgnify/*') path ('pdb70/*') + path ('pdb_mmcif/mmcif_files/*') path ('pdb_mmcif/*') path ('uniref30/*') path ('uniref90/*') diff --git a/subworkflows/local/prepare_alphafold2_dbs.nf b/subworkflows/local/prepare_alphafold2_dbs.nf index 4621af6b..b9b19558 100644 --- a/subworkflows/local/prepare_alphafold2_dbs.nf +++ b/subworkflows/local/prepare_alphafold2_dbs.nf @@ -8,6 +8,7 @@ include { ARIA2_UNCOMPRESS as ARIA2_SMALL_BFD ARIA2_UNCOMPRESS as ARIA2_MGNIFY ARIA2_UNCOMPRESS as ARIA2_PDB70 + ARIA2_UNCOMPRESS as ARIA2_OBSOLETE ARIA2_UNCOMPRESS as ARIA2_UNIREF30 ARIA2_UNCOMPRESS as ARIA2_UNIREF90 ARIA2_UNCOMPRESS as ARIA2_UNIPROT_SPROT @@ -28,7 +29,8 @@ workflow PREPARE_ALPHAFOLD2_DBS { alphafold2_params_path // directory: /path/to/alphafold2/params/ mgnify_path // directory: /path/to/mgnify/ pdb70_path // directory: /path/to/pdb70/ - pdb_mmcif_path // directory: /path/to/pdb_mmcif/ + pdb_mmcif_path // directory: /path/to/pdb_mmcif/mmcif_files/ + pdb_obsolete_path // directory: /path/to/pdb_mmcif/obsolete.dat uniref30_alphafold2_path // directory: /path/to/uniref30/alphafold2/ uniref90_path // directory: /path/to/uniref90/ pdb_seqres_path // directory: /path/to/pdb_seqres/ @@ -65,9 +67,8 @@ workflow PREPARE_ALPHAFOLD2_DBS { ch_params = Channel.value(file(alphafold2_params_path)) ch_mgnify = Channel.value(file(mgnify_path)) ch_pdb70 = Channel.value(file(pdb70_path, type: 'dir' )) - ch_mmcif_files = file(pdb_mmcif_path, type: 'dir') - ch_mmcif_obsolete = file(pdb_mmcif_path, type: 'file') - ch_mmcif = Channel.value(ch_mmcif_files + ch_mmcif_obsolete) + ch_mmcif_files = Channel.value(file(pdb_mmcif_path)) + ch_obsolete = Channel.value(file(pdb_obsolete_path, type: 'file')) ch_uniref30 = Channel.value(file(uniref30_alphafold2_path, type: 'any')) ch_uniref90 = Channel.value(file(uniref90_path)) ch_pdb_seqres = Channel.value(file(pdb_seqres_path)) @@ -108,10 +109,15 @@ workflow PREPARE_ALPHAFOLD2_DBS { DOWNLOAD_PDBMMCIF( pdb_mmcif_link, + ) + ch_mmcif_files = DOWNLOAD_PDBMMCIF.out.ch_db + ch_versions = ch_versions.mix(DOWNLOAD_PDBMMCIF.out.versions) + + ARIA2_OBSOLETE( pdb_obsolete_link ) - ch_mmcif = DOWNLOAD_PDBMMCIF.out.ch_db - ch_versions = ch_versions.mix(DOWNLOAD_PDBMMCIF.out.versions) + ch_obsolete = ARIA2_OBSOLETE.out.db + ch_versions = ch_versions.mix(ARIA2_OBSOLETE.out.versions) ARIA2_UNIREF30( uniref30_alphafold2_link @@ -146,20 +152,21 @@ workflow PREPARE_ALPHAFOLD2_DBS { ARIA2_UNIPROT_SPROT.out.db, ARIA2_UNIPROT_TREMBL.out.db ) - ch_uniprot = COMBINE_UNIPROT.out.ch_db - ch_version = ch_versions.mix(COMBINE_UNIPROT.out.versions) + ch_uniprot = COMBINE_UNIPROT.out.ch_db + ch_versions = ch_versions.mix(COMBINE_UNIPROT.out.versions) } emit: - bfd = ch_bfd - small_bfd = ch_small_bfd - params = ch_params - mgnify = ch_mgnify - pdb70 = ch_pdb70 - pdb_mmcif = ch_mmcif - uniref30 = ch_uniref30 - uniref90 = ch_uniref90 - pdb_seqres = ch_pdb_seqres - uniprot = ch_uniprot - versions = ch_versions + bfd = ch_bfd + small_bfd = ch_small_bfd + params = ch_params + mgnify = ch_mgnify + pdb70 = ch_pdb70 + pdb_mmcif = ch_mmcif_files + pdb_obsolete = ch_obsolete + uniref30 = ch_uniref30 + uniref90 = ch_uniref90 + pdb_seqres = ch_pdb_seqres + uniprot = ch_uniprot + versions = ch_versions } diff --git a/workflows/alphafold2.nf b/workflows/alphafold2.nf index 1f5344f5..2911fcca 100644 --- a/workflows/alphafold2.nf +++ b/workflows/alphafold2.nf @@ -37,6 +37,7 @@ workflow ALPHAFOLD2 { ch_mgnify // channel: path(mgnify) ch_pdb70 // channel: path(pdb70) ch_pdb_mmcif // channel: path(pdb_mmcif) + ch_pdb_obsolete // channel: path(pdb_obsolete) ch_uniref30 // channel: path(uniref30) ch_uniref90 // channel: path(uniref90) ch_pdb_seqres // channel: path(pdb_seqres) @@ -73,12 +74,13 @@ workflow ALPHAFOLD2 { ch_mgnify, ch_pdb70, ch_pdb_mmcif, + ch_pdb_obsolete, ch_uniref30, ch_uniref90, ch_pdb_seqres, ch_uniprot ) - + RUN_ALPHAFOLD2 .out .multiqc @@ -106,6 +108,7 @@ workflow ALPHAFOLD2 { ch_mgnify, ch_pdb70, ch_pdb_mmcif, + ch_pdb_obsolete, ch_uniref30, ch_uniref90, ch_pdb_seqres, @@ -123,6 +126,7 @@ workflow ALPHAFOLD2 { ch_mgnify, ch_pdb70, ch_pdb_mmcif, + ch_pdb_obsolete, ch_uniref30, ch_uniref90, ch_pdb_seqres, From 38ade64bf79f13fcb1d6fd04006b1b4b40a4700b Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 21 Jan 2025 14:16:43 +0100 Subject: [PATCH 2/7] Update nextflow schema --- nextflow_schema.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index 073e5d73..87333dcd 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -412,6 +412,11 @@ "description": "Path to the PDB mmCIF database", "fa_icon": "fas fa-folder-open" }, + "pdb_obsolete_path": { + "type": "string", + "description": "Path to the PDB obsolete file", + "fa_icon": "fas fa-folder-open" + }, "uniref30_alphafold2_path": { "type": "string", "description": "Path to the Uniref30 database", From 53a1bcd6491fbd3edb2dad43f4a7fdfca08b96f4 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 21 Jan 2025 14:21:23 +0100 Subject: [PATCH 3/7] Update changelog and nextflow config --- CHANGELOG.md | 11 +++++++++++ nextflow.config | 1 + 2 files changed, 12 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc14ad8d..ef98ba11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[PR #210](https://github.com/nf-core/proteinfold/pull/210)] - Moving post-processing logic to a subworkflow, change wave images pointing to oras to point to https and refactor module to match nf-core folder structure. - [[#214](https://github.com/nf-core/proteinfold/issues/214)] - Fix colabfold image to run in cpus after [#188](https://github.com/nf-core/proteinfold/issues/188) fix. - [[#235](https://github.com/nf-core/proteinfold/issues/235)] - Update samplesheet to new version (switch from `sequence` column to `id`). +- [[#240](https://github.com/nf-core/proteinfold/issues/240)] - Separate download and input of pdb `mmcif` files and `obsolete` database. + +### Parameters + +| Old parameter | New parameter | +| --------------------- | ---------------------------------------- | +| | `--pdb_obsolete_path` | + +> **NB:** Parameter has been **updated** if both old and new parameter information is present. +> **NB:** Parameter has been **added** if just the new parameter information is present. +> **NB:** Parameter has been **removed** if parameter information is ## [[1.1.1](https://github.com/nf-core/proteinfold/releases/tag/1.1.1)] - 2025-07-30 diff --git a/nextflow.config b/nextflow.config index d3322f34..83c59a75 100644 --- a/nextflow.config +++ b/nextflow.config @@ -43,6 +43,7 @@ params { mgnify_path = null pdb70_path = null pdb_mmcif_path = null + pdb_obsolete_path = null uniref30_alphafold2_path = null uniref90_path = null pdb_seqres_path = null From 500a193652cb6906f5a51bc248bd8e95c153a8a5 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 21 Jan 2025 14:39:12 +0100 Subject: [PATCH 4/7] Make lint happy --- workflows/alphafold2.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/alphafold2.nf b/workflows/alphafold2.nf index 2911fcca..90ae44fd 100644 --- a/workflows/alphafold2.nf +++ b/workflows/alphafold2.nf @@ -80,7 +80,7 @@ workflow ALPHAFOLD2 { ch_pdb_seqres, ch_uniprot ) - + RUN_ALPHAFOLD2 .out .multiqc From 1abc60f2fad42369b85b130c54470e841c804d57 Mon Sep 17 00:00:00 2001 From: Jose Espinosa-Carrasco Date: Tue, 21 Jan 2025 15:02:42 +0000 Subject: [PATCH 5/7] Fix table --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ef98ba11..e2a93045 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,9 +20,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Parameters -| Old parameter | New parameter | -| --------------------- | ---------------------------------------- | -| | `--pdb_obsolete_path` | +| Old parameter | New parameter | +| ------------- | --------------------- | +| | `--pdb_obsolete_path` | > **NB:** Parameter has been **updated** if both old and new parameter information is present. > **NB:** Parameter has been **added** if just the new parameter information is present. From 7a531d560ffee0581d0276d7554b1bb4a7b5d789 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 21 Jan 2025 16:11:51 +0100 Subject: [PATCH 6/7] Apply patch see here (https://github.com/nf-core/tools/pull/3416) --- nextflow.config | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/nextflow.config b/nextflow.config index 83c59a75..7589904f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -287,13 +287,13 @@ env { } // Set bash options -process.shell = """\ -bash - -set -e # Exit if a tool returns a non-zero status/exit code -set -u # Treat unset variables and parameters as an error -set -o pipefail # Returns the status of the last command to exit with a non-zero status or zero if all successfully execute -set -C # No clobber - prevent output redirection from overwriting files. +process.shell = [ + "bash", + "-C", // No clobber - prevent output redirection from overwriting files. + "-e", // Exit if a tool returns a non-zero status/exit code + "-u", // Treat unset variables and parameters as an error + "-o pipefail" // Returns the status of the last command to exit with a non-zero status or zero if all successfully execute +] """ // Disable process selector warnings by default. Use debug profile to enable warnings. From 66c65b9c3bb6b8838a73be277dc9ee767fb89227 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Tue, 21 Jan 2025 16:18:11 +0100 Subject: [PATCH 7/7] Revert last change until patch is stable see (https://github.com/nf-core/tools/pull/3416#issuecomment-2604923831) --- nextflow.config | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/nextflow.config b/nextflow.config index 7589904f..83c59a75 100644 --- a/nextflow.config +++ b/nextflow.config @@ -287,13 +287,13 @@ env { } // Set bash options -process.shell = [ - "bash", - "-C", // No clobber - prevent output redirection from overwriting files. - "-e", // Exit if a tool returns a non-zero status/exit code - "-u", // Treat unset variables and parameters as an error - "-o pipefail" // Returns the status of the last command to exit with a non-zero status or zero if all successfully execute -] +process.shell = """\ +bash + +set -e # Exit if a tool returns a non-zero status/exit code +set -u # Treat unset variables and parameters as an error +set -o pipefail # Returns the status of the last command to exit with a non-zero status or zero if all successfully execute +set -C # No clobber - prevent output redirection from overwriting files. """ // Disable process selector warnings by default. Use debug profile to enable warnings.