Skip to content

Commit

Permalink
Merge pull request #242 from JoseEspinosa/i240
Browse files Browse the repository at this point in the history
Separate mmcif_files from obsolete channels
  • Loading branch information
JoseEspinosa authored Jan 21, 2025
2 parents 41dab96 + 66c65b9 commit 1af71b4
Show file tree
Hide file tree
Showing 11 changed files with 56 additions and 27 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [[PR #210](https://github.com/nf-core/proteinfold/pull/210)] - Moving post-processing logic to a subworkflow, change wave images pointing to oras to point to https and refactor module to match nf-core folder structure.
- [[#214](https://github.com/nf-core/proteinfold/issues/214)] - Fix colabfold image to run in cpus after [#188](https://github.com/nf-core/proteinfold/issues/188) fix.
- [[#235](https://github.com/nf-core/proteinfold/issues/235)] - Update samplesheet to new version (switch from `sequence` column to `id`).
- [[#240](https://github.com/nf-core/proteinfold/issues/240)] - Separate download and input of pdb `mmcif` files and `obsolete` database.

### Parameters

| Old parameter | New parameter |
| ------------- | --------------------- |
| | `--pdb_obsolete_path` |

> **NB:** Parameter has been **updated** if both old and new parameter information is present.
> **NB:** Parameter has been **added** if just the new parameter information is present.
> **NB:** Parameter has been **removed** if parameter information is
## [[1.1.1](https://github.com/nf-core/proteinfold/releases/tag/1.1.1)] - 2025-07-30

Expand Down
1 change: 1 addition & 0 deletions conf/dbs.config
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ params {
mgnify_path = "${params.alphafold2_db}/mgnify/*"
pdb70_path = "${params.alphafold2_db}/pdb70/**"
pdb_mmcif_path = "${params.alphafold2_db}/pdb_mmcif/*"
pdb_obsolete_path = "${params.alphafold2_db}/pdb_mmcif/obsolete.dat"
uniref30_alphafold2_path = "${params.alphafold2_db}/uniref30/*"
uniref90_path = "${params.alphafold2_db}/uniref90/*"
pdb_seqres_path = "${params.alphafold2_db}/pdb_seqres/*"
Expand Down
2 changes: 2 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ workflow NFCORE_PROTEINFOLD {
params.mgnify_path,
params.pdb70_path,
params.pdb_mmcif_path,
params.pdb_obsolete_path,
params.uniref30_alphafold2_path,
params.uniref90_path,
params.pdb_seqres_path,
Expand Down Expand Up @@ -122,6 +123,7 @@ workflow NFCORE_PROTEINFOLD {
PREPARE_ALPHAFOLD2_DBS.out.mgnify,
PREPARE_ALPHAFOLD2_DBS.out.pdb70,
PREPARE_ALPHAFOLD2_DBS.out.pdb_mmcif,
PREPARE_ALPHAFOLD2_DBS.out.pdb_obsolete,
PREPARE_ALPHAFOLD2_DBS.out.uniref30,
PREPARE_ALPHAFOLD2_DBS.out.uniref90,
PREPARE_ALPHAFOLD2_DBS.out.pdb_seqres,
Expand Down
5 changes: 0 additions & 5 deletions modules/local/download_pdbmmcif/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ process DOWNLOAD_PDBMMCIF {

input:
val source_url_pdb_mmcif
val source_url_pdb_obsolete

output:
path ('*') , emit: ch_db
Expand Down Expand Up @@ -54,14 +53,10 @@ process DOWNLOAD_PDBMMCIF {
# Delete empty download directory structure.
find ./raw -type d -empty -delete
aria2c \\
$source_url_pdb_obsolete
cat <<-END_VERSIONS > versions.yml
"${task.process}":
sed: \$(echo \$(sed --version 2>&1) | head -1 | sed 's/^.*GNU sed) //; s/ .*\$//')
rsync: \$(rsync --version | head -1 | sed 's/^rsync version //; s/ protocol version [[:digit:]]*//')
aria2c: \$( aria2c -v | head -1 | sed 's/aria2 version //' )
END_VERSIONS
"""

Expand Down
7 changes: 4 additions & 3 deletions modules/local/run_alphafold2/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ process RUN_ALPHAFOLD2 {
path ('small_bfd/*')
path ('mgnify/*')
path ('pdb70/*')
path ('pdb_mmcif/*')
path ('mmcif_files/*')
path ('obsolete_pdb/*')
path ('uniref30/*')
path ('uniref90/*')
path ('pdb_seqres/*')
Expand Down Expand Up @@ -61,8 +62,8 @@ process RUN_ALPHAFOLD2 {
--data_dir=\$PWD \
--uniref90_database_path=./uniref90/uniref90.fasta \
--mgnify_database_path=./mgnify/mgy_clusters_2022_05.fa \
--template_mmcif_dir=./pdb_mmcif/mmcif_files \
--obsolete_pdbs_path=./pdb_mmcif/obsolete.dat \
--template_mmcif_dir=./mmcif_files \
--obsolete_pdbs_path=./obsolete_pdb/obsolete.dat \
--random_seed=53343 \
$args
Expand Down
1 change: 1 addition & 0 deletions modules/local/run_alphafold2_msa/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ process RUN_ALPHAFOLD2_MSA {
path ('small_bfd/*')
path ('mgnify/*')
path ('pdb70/*')
path ('pdb_mmcif/mmcif_files/*')
path ('pdb_mmcif/*')
path ('uniref30/*')
path ('uniref90/*')
Expand Down
1 change: 1 addition & 0 deletions modules/local/run_alphafold2_pred/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ process RUN_ALPHAFOLD2_PRED {
path ('small_bfd/*')
path ('mgnify/*')
path ('pdb70/*')
path ('pdb_mmcif/mmcif_files/*')
path ('pdb_mmcif/*')
path ('uniref30/*')
path ('uniref90/*')
Expand Down
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ params {
mgnify_path = null
pdb70_path = null
pdb_mmcif_path = null
pdb_obsolete_path = null
uniref30_alphafold2_path = null
uniref90_path = null
pdb_seqres_path = null
Expand Down
5 changes: 5 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,11 @@
"description": "Path to the PDB mmCIF database",
"fa_icon": "fas fa-folder-open"
},
"pdb_obsolete_path": {
"type": "string",
"description": "Path to the PDB obsolete file",
"fa_icon": "fas fa-folder-open"
},
"uniref30_alphafold2_path": {
"type": "string",
"description": "Path to the Uniref30 database",
Expand Down
45 changes: 26 additions & 19 deletions subworkflows/local/prepare_alphafold2_dbs.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ include {
ARIA2_UNCOMPRESS as ARIA2_SMALL_BFD
ARIA2_UNCOMPRESS as ARIA2_MGNIFY
ARIA2_UNCOMPRESS as ARIA2_PDB70
ARIA2_UNCOMPRESS as ARIA2_OBSOLETE
ARIA2_UNCOMPRESS as ARIA2_UNIREF30
ARIA2_UNCOMPRESS as ARIA2_UNIREF90
ARIA2_UNCOMPRESS as ARIA2_UNIPROT_SPROT
Expand All @@ -28,7 +29,8 @@ workflow PREPARE_ALPHAFOLD2_DBS {
alphafold2_params_path // directory: /path/to/alphafold2/params/
mgnify_path // directory: /path/to/mgnify/
pdb70_path // directory: /path/to/pdb70/
pdb_mmcif_path // directory: /path/to/pdb_mmcif/
pdb_mmcif_path // directory: /path/to/pdb_mmcif/mmcif_files/
pdb_obsolete_path // directory: /path/to/pdb_mmcif/obsolete.dat
uniref30_alphafold2_path // directory: /path/to/uniref30/alphafold2/
uniref90_path // directory: /path/to/uniref90/
pdb_seqres_path // directory: /path/to/pdb_seqres/
Expand Down Expand Up @@ -65,9 +67,8 @@ workflow PREPARE_ALPHAFOLD2_DBS {
ch_params = Channel.value(file(alphafold2_params_path))
ch_mgnify = Channel.value(file(mgnify_path))
ch_pdb70 = Channel.value(file(pdb70_path, type: 'dir' ))
ch_mmcif_files = file(pdb_mmcif_path, type: 'dir')
ch_mmcif_obsolete = file(pdb_mmcif_path, type: 'file')
ch_mmcif = Channel.value(ch_mmcif_files + ch_mmcif_obsolete)
ch_mmcif_files = Channel.value(file(pdb_mmcif_path))
ch_obsolete = Channel.value(file(pdb_obsolete_path, type: 'file'))
ch_uniref30 = Channel.value(file(uniref30_alphafold2_path, type: 'any'))
ch_uniref90 = Channel.value(file(uniref90_path))
ch_pdb_seqres = Channel.value(file(pdb_seqres_path))
Expand Down Expand Up @@ -108,10 +109,15 @@ workflow PREPARE_ALPHAFOLD2_DBS {

DOWNLOAD_PDBMMCIF(
pdb_mmcif_link,
)
ch_mmcif_files = DOWNLOAD_PDBMMCIF.out.ch_db
ch_versions = ch_versions.mix(DOWNLOAD_PDBMMCIF.out.versions)

ARIA2_OBSOLETE(
pdb_obsolete_link
)
ch_mmcif = DOWNLOAD_PDBMMCIF.out.ch_db
ch_versions = ch_versions.mix(DOWNLOAD_PDBMMCIF.out.versions)
ch_obsolete = ARIA2_OBSOLETE.out.db
ch_versions = ch_versions.mix(ARIA2_OBSOLETE.out.versions)

ARIA2_UNIREF30(
uniref30_alphafold2_link
Expand Down Expand Up @@ -146,20 +152,21 @@ workflow PREPARE_ALPHAFOLD2_DBS {
ARIA2_UNIPROT_SPROT.out.db,
ARIA2_UNIPROT_TREMBL.out.db
)
ch_uniprot = COMBINE_UNIPROT.out.ch_db
ch_version = ch_versions.mix(COMBINE_UNIPROT.out.versions)
ch_uniprot = COMBINE_UNIPROT.out.ch_db
ch_versions = ch_versions.mix(COMBINE_UNIPROT.out.versions)
}

emit:
bfd = ch_bfd
small_bfd = ch_small_bfd
params = ch_params
mgnify = ch_mgnify
pdb70 = ch_pdb70
pdb_mmcif = ch_mmcif
uniref30 = ch_uniref30
uniref90 = ch_uniref90
pdb_seqres = ch_pdb_seqres
uniprot = ch_uniprot
versions = ch_versions
bfd = ch_bfd
small_bfd = ch_small_bfd
params = ch_params
mgnify = ch_mgnify
pdb70 = ch_pdb70
pdb_mmcif = ch_mmcif_files
pdb_obsolete = ch_obsolete
uniref30 = ch_uniref30
uniref90 = ch_uniref90
pdb_seqres = ch_pdb_seqres
uniprot = ch_uniprot
versions = ch_versions
}
4 changes: 4 additions & 0 deletions workflows/alphafold2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ workflow ALPHAFOLD2 {
ch_mgnify // channel: path(mgnify)
ch_pdb70 // channel: path(pdb70)
ch_pdb_mmcif // channel: path(pdb_mmcif)
ch_pdb_obsolete // channel: path(pdb_obsolete)
ch_uniref30 // channel: path(uniref30)
ch_uniref90 // channel: path(uniref90)
ch_pdb_seqres // channel: path(pdb_seqres)
Expand Down Expand Up @@ -73,6 +74,7 @@ workflow ALPHAFOLD2 {
ch_mgnify,
ch_pdb70,
ch_pdb_mmcif,
ch_pdb_obsolete,
ch_uniref30,
ch_uniref90,
ch_pdb_seqres,
Expand Down Expand Up @@ -106,6 +108,7 @@ workflow ALPHAFOLD2 {
ch_mgnify,
ch_pdb70,
ch_pdb_mmcif,
ch_pdb_obsolete,
ch_uniref30,
ch_uniref90,
ch_pdb_seqres,
Expand All @@ -123,6 +126,7 @@ workflow ALPHAFOLD2 {
ch_mgnify,
ch_pdb70,
ch_pdb_mmcif,
ch_pdb_obsolete,
ch_uniref30,
ch_uniref90,
ch_pdb_seqres,
Expand Down

0 comments on commit 1af71b4

Please sign in to comment.