diff --git a/CHANGELOG.md b/CHANGELOG.md index dc14ad8d..e2a93045 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[PR #210](https://github.com/nf-core/proteinfold/pull/210)] - Moving post-processing logic to a subworkflow, change wave images pointing to oras to point to https and refactor module to match nf-core folder structure. - [[#214](https://github.com/nf-core/proteinfold/issues/214)] - Fix colabfold image to run in cpus after [#188](https://github.com/nf-core/proteinfold/issues/188) fix. - [[#235](https://github.com/nf-core/proteinfold/issues/235)] - Update samplesheet to new version (switch from `sequence` column to `id`). +- [[#240](https://github.com/nf-core/proteinfold/issues/240)] - Separate download and input of pdb `mmcif` files and `obsolete` database. + +### Parameters + +| Old parameter | New parameter | +| ------------- | --------------------- | +| | `--pdb_obsolete_path` | + +> **NB:** Parameter has been **updated** if both old and new parameter information is present. +> **NB:** Parameter has been **added** if just the new parameter information is present. +> **NB:** Parameter has been **removed** if parameter information is ## [[1.1.1](https://github.com/nf-core/proteinfold/releases/tag/1.1.1)] - 2025-07-30 diff --git a/conf/dbs.config b/conf/dbs.config index d4e521a2..c824bf94 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -29,6 +29,7 @@ params { mgnify_path = "${params.alphafold2_db}/mgnify/*" pdb70_path = "${params.alphafold2_db}/pdb70/**" pdb_mmcif_path = "${params.alphafold2_db}/pdb_mmcif/*" + pdb_obsolete_path = "${params.alphafold2_db}/pdb_mmcif/obsolete.dat" uniref30_alphafold2_path = "${params.alphafold2_db}/uniref30/*" uniref90_path = "${params.alphafold2_db}/uniref90/*" pdb_seqres_path = "${params.alphafold2_db}/pdb_seqres/*" diff --git a/main.nf b/main.nf index d1ec1a6b..8d595b37 100644 --- a/main.nf +++ b/main.nf @@ -88,6 +88,7 @@ workflow NFCORE_PROTEINFOLD { params.mgnify_path, params.pdb70_path, params.pdb_mmcif_path, + params.pdb_obsolete_path, params.uniref30_alphafold2_path, params.uniref90_path, params.pdb_seqres_path, @@ -122,6 +123,7 @@ workflow NFCORE_PROTEINFOLD { PREPARE_ALPHAFOLD2_DBS.out.mgnify, PREPARE_ALPHAFOLD2_DBS.out.pdb70, PREPARE_ALPHAFOLD2_DBS.out.pdb_mmcif, + PREPARE_ALPHAFOLD2_DBS.out.pdb_obsolete, PREPARE_ALPHAFOLD2_DBS.out.uniref30, PREPARE_ALPHAFOLD2_DBS.out.uniref90, PREPARE_ALPHAFOLD2_DBS.out.pdb_seqres, diff --git a/modules/local/download_pdbmmcif/main.nf b/modules/local/download_pdbmmcif/main.nf index a8a12963..385b350f 100644 --- a/modules/local/download_pdbmmcif/main.nf +++ b/modules/local/download_pdbmmcif/main.nf @@ -13,7 +13,6 @@ process DOWNLOAD_PDBMMCIF { input: val source_url_pdb_mmcif - val source_url_pdb_obsolete output: path ('*') , emit: ch_db @@ -54,14 +53,10 @@ process DOWNLOAD_PDBMMCIF { # Delete empty download directory structure. find ./raw -type d -empty -delete - aria2c \\ - $source_url_pdb_obsolete - cat <<-END_VERSIONS > versions.yml "${task.process}": sed: \$(echo \$(sed --version 2>&1) | head -1 | sed 's/^.*GNU sed) //; s/ .*\$//') rsync: \$(rsync --version | head -1 | sed 's/^rsync version //; s/ protocol version [[:digit:]]*//') - aria2c: \$( aria2c -v | head -1 | sed 's/aria2 version //' ) END_VERSIONS """ diff --git a/modules/local/run_alphafold2/main.nf b/modules/local/run_alphafold2/main.nf index 6ebd3c1d..37feb585 100644 --- a/modules/local/run_alphafold2/main.nf +++ b/modules/local/run_alphafold2/main.nf @@ -21,7 +21,8 @@ process RUN_ALPHAFOLD2 { path ('small_bfd/*') path ('mgnify/*') path ('pdb70/*') - path ('pdb_mmcif/*') + path ('mmcif_files/*') + path ('obsolete_pdb/*') path ('uniref30/*') path ('uniref90/*') path ('pdb_seqres/*') @@ -61,8 +62,8 @@ process RUN_ALPHAFOLD2 { --data_dir=\$PWD \ --uniref90_database_path=./uniref90/uniref90.fasta \ --mgnify_database_path=./mgnify/mgy_clusters_2022_05.fa \ - --template_mmcif_dir=./pdb_mmcif/mmcif_files \ - --obsolete_pdbs_path=./pdb_mmcif/obsolete.dat \ + --template_mmcif_dir=./mmcif_files \ + --obsolete_pdbs_path=./obsolete_pdb/obsolete.dat \ --random_seed=53343 \ $args diff --git a/modules/local/run_alphafold2_msa/main.nf b/modules/local/run_alphafold2_msa/main.nf index 7428eb7f..488b9fb6 100644 --- a/modules/local/run_alphafold2_msa/main.nf +++ b/modules/local/run_alphafold2_msa/main.nf @@ -21,6 +21,7 @@ process RUN_ALPHAFOLD2_MSA { path ('small_bfd/*') path ('mgnify/*') path ('pdb70/*') + path ('pdb_mmcif/mmcif_files/*') path ('pdb_mmcif/*') path ('uniref30/*') path ('uniref90/*') diff --git a/modules/local/run_alphafold2_pred/main.nf b/modules/local/run_alphafold2_pred/main.nf index 13fb15a7..503b2edc 100644 --- a/modules/local/run_alphafold2_pred/main.nf +++ b/modules/local/run_alphafold2_pred/main.nf @@ -21,6 +21,7 @@ process RUN_ALPHAFOLD2_PRED { path ('small_bfd/*') path ('mgnify/*') path ('pdb70/*') + path ('pdb_mmcif/mmcif_files/*') path ('pdb_mmcif/*') path ('uniref30/*') path ('uniref90/*') diff --git a/nextflow.config b/nextflow.config index d3322f34..83c59a75 100644 --- a/nextflow.config +++ b/nextflow.config @@ -43,6 +43,7 @@ params { mgnify_path = null pdb70_path = null pdb_mmcif_path = null + pdb_obsolete_path = null uniref30_alphafold2_path = null uniref90_path = null pdb_seqres_path = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 073e5d73..87333dcd 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -412,6 +412,11 @@ "description": "Path to the PDB mmCIF database", "fa_icon": "fas fa-folder-open" }, + "pdb_obsolete_path": { + "type": "string", + "description": "Path to the PDB obsolete file", + "fa_icon": "fas fa-folder-open" + }, "uniref30_alphafold2_path": { "type": "string", "description": "Path to the Uniref30 database", diff --git a/subworkflows/local/prepare_alphafold2_dbs.nf b/subworkflows/local/prepare_alphafold2_dbs.nf index 4621af6b..b9b19558 100644 --- a/subworkflows/local/prepare_alphafold2_dbs.nf +++ b/subworkflows/local/prepare_alphafold2_dbs.nf @@ -8,6 +8,7 @@ include { ARIA2_UNCOMPRESS as ARIA2_SMALL_BFD ARIA2_UNCOMPRESS as ARIA2_MGNIFY ARIA2_UNCOMPRESS as ARIA2_PDB70 + ARIA2_UNCOMPRESS as ARIA2_OBSOLETE ARIA2_UNCOMPRESS as ARIA2_UNIREF30 ARIA2_UNCOMPRESS as ARIA2_UNIREF90 ARIA2_UNCOMPRESS as ARIA2_UNIPROT_SPROT @@ -28,7 +29,8 @@ workflow PREPARE_ALPHAFOLD2_DBS { alphafold2_params_path // directory: /path/to/alphafold2/params/ mgnify_path // directory: /path/to/mgnify/ pdb70_path // directory: /path/to/pdb70/ - pdb_mmcif_path // directory: /path/to/pdb_mmcif/ + pdb_mmcif_path // directory: /path/to/pdb_mmcif/mmcif_files/ + pdb_obsolete_path // directory: /path/to/pdb_mmcif/obsolete.dat uniref30_alphafold2_path // directory: /path/to/uniref30/alphafold2/ uniref90_path // directory: /path/to/uniref90/ pdb_seqres_path // directory: /path/to/pdb_seqres/ @@ -65,9 +67,8 @@ workflow PREPARE_ALPHAFOLD2_DBS { ch_params = Channel.value(file(alphafold2_params_path)) ch_mgnify = Channel.value(file(mgnify_path)) ch_pdb70 = Channel.value(file(pdb70_path, type: 'dir' )) - ch_mmcif_files = file(pdb_mmcif_path, type: 'dir') - ch_mmcif_obsolete = file(pdb_mmcif_path, type: 'file') - ch_mmcif = Channel.value(ch_mmcif_files + ch_mmcif_obsolete) + ch_mmcif_files = Channel.value(file(pdb_mmcif_path)) + ch_obsolete = Channel.value(file(pdb_obsolete_path, type: 'file')) ch_uniref30 = Channel.value(file(uniref30_alphafold2_path, type: 'any')) ch_uniref90 = Channel.value(file(uniref90_path)) ch_pdb_seqres = Channel.value(file(pdb_seqres_path)) @@ -108,10 +109,15 @@ workflow PREPARE_ALPHAFOLD2_DBS { DOWNLOAD_PDBMMCIF( pdb_mmcif_link, + ) + ch_mmcif_files = DOWNLOAD_PDBMMCIF.out.ch_db + ch_versions = ch_versions.mix(DOWNLOAD_PDBMMCIF.out.versions) + + ARIA2_OBSOLETE( pdb_obsolete_link ) - ch_mmcif = DOWNLOAD_PDBMMCIF.out.ch_db - ch_versions = ch_versions.mix(DOWNLOAD_PDBMMCIF.out.versions) + ch_obsolete = ARIA2_OBSOLETE.out.db + ch_versions = ch_versions.mix(ARIA2_OBSOLETE.out.versions) ARIA2_UNIREF30( uniref30_alphafold2_link @@ -146,20 +152,21 @@ workflow PREPARE_ALPHAFOLD2_DBS { ARIA2_UNIPROT_SPROT.out.db, ARIA2_UNIPROT_TREMBL.out.db ) - ch_uniprot = COMBINE_UNIPROT.out.ch_db - ch_version = ch_versions.mix(COMBINE_UNIPROT.out.versions) + ch_uniprot = COMBINE_UNIPROT.out.ch_db + ch_versions = ch_versions.mix(COMBINE_UNIPROT.out.versions) } emit: - bfd = ch_bfd - small_bfd = ch_small_bfd - params = ch_params - mgnify = ch_mgnify - pdb70 = ch_pdb70 - pdb_mmcif = ch_mmcif - uniref30 = ch_uniref30 - uniref90 = ch_uniref90 - pdb_seqres = ch_pdb_seqres - uniprot = ch_uniprot - versions = ch_versions + bfd = ch_bfd + small_bfd = ch_small_bfd + params = ch_params + mgnify = ch_mgnify + pdb70 = ch_pdb70 + pdb_mmcif = ch_mmcif_files + pdb_obsolete = ch_obsolete + uniref30 = ch_uniref30 + uniref90 = ch_uniref90 + pdb_seqres = ch_pdb_seqres + uniprot = ch_uniprot + versions = ch_versions } diff --git a/workflows/alphafold2.nf b/workflows/alphafold2.nf index 1f5344f5..90ae44fd 100644 --- a/workflows/alphafold2.nf +++ b/workflows/alphafold2.nf @@ -37,6 +37,7 @@ workflow ALPHAFOLD2 { ch_mgnify // channel: path(mgnify) ch_pdb70 // channel: path(pdb70) ch_pdb_mmcif // channel: path(pdb_mmcif) + ch_pdb_obsolete // channel: path(pdb_obsolete) ch_uniref30 // channel: path(uniref30) ch_uniref90 // channel: path(uniref90) ch_pdb_seqres // channel: path(pdb_seqres) @@ -73,6 +74,7 @@ workflow ALPHAFOLD2 { ch_mgnify, ch_pdb70, ch_pdb_mmcif, + ch_pdb_obsolete, ch_uniref30, ch_uniref90, ch_pdb_seqres, @@ -106,6 +108,7 @@ workflow ALPHAFOLD2 { ch_mgnify, ch_pdb70, ch_pdb_mmcif, + ch_pdb_obsolete, ch_uniref30, ch_uniref90, ch_pdb_seqres, @@ -123,6 +126,7 @@ workflow ALPHAFOLD2 { ch_mgnify, ch_pdb70, ch_pdb_mmcif, + ch_pdb_obsolete, ch_uniref30, ch_uniref90, ch_pdb_seqres,