-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4 from gregdenay/main
Relaxed naming for fastq files
- Loading branch information
Showing
13 changed files
with
143 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
isolate_id sample_id organism isolate_name_alt isolation_org sequencing_org extraction_method library_method sequencing_instrument bioinformatics_org third_party_flag third_party_owner sample_type collection_date collection_municipality collection_country collection_cause collected_by manufacturer designation manufacturer_type sample_description lot_number | ||
2016-0000962-01 2016-0000962 Listeria monocytogenes unknown RRW RRW TRUE BfR unknown 01.01.2016 unknown DE lebensmittel unknown unknown unknown unkown | ||
2016-0000962-02 2016-0000962 Listeria monocytogenes 16-LI00962-0 unknown RRW RRW TRUE BfR unknown 01.01.2016 unknown DE lebensmittel unknown unknown unknown unkown |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
sample fq1 fq2 | ||
2016-0000962-01 /home/debian/NGS/NRW-geuebt/local-assembler/.tests/integration/data/2016-0000962-01_S01_L001_R1_001.fastq.gz /home/debian/NGS/NRW-geuebt/local-assembler/.tests/integration/data/2016-0000962-01_S01_L001_R2_001.fastq.gz | ||
16-LI00962-0 /home/debian/NGS/NRW-geuebt/local-assembler/.tests/integration/data/16-LI00962-0_S01_L001_R1_001.fastq.gz /home/debian/NGS/NRW-geuebt/local-assembler/.tests/integration/data/16-LI00962-0_S01_L001_R2_001.fastq.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
sample fastq_name fq1 fq2 | ||
2016-0000962-01 2016-0000962-01 /home/debian/NGS/NRW-geuebt/local-assembler/.tests/integration/data/2016-0000962-01_S01_L001_R1_001.fastq.gz /home/debian/NGS/NRW-geuebt/local-assembler/.tests/integration/data/2016-0000962-01_S01_L001_R2_001.fastq.gz | ||
2016-0000962-02 16-LI00962-0 /home/debian/NGS/NRW-geuebt/local-assembler/.tests/integration/data/16-LI00962-0_S01_L001_R1_001.fastq.gz /home/debian/NGS/NRW-geuebt/local-assembler/.tests/integration/data/16-LI00962-0_S01_L001_R2_001.fastq.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import os | ||
import sys | ||
|
||
from tempfile import TemporaryDirectory | ||
import shutil | ||
import filecmp | ||
from pathlib import Path, PurePosixPath | ||
|
||
|
||
sys.path.insert(0, os.path.dirname(__file__)) | ||
|
||
|
||
def test_consolidate_ids(): | ||
with TemporaryDirectory() as tmpdir: | ||
# Modify paths to link to your test data and script | ||
workdir = os.path.join(Path(tmpdir), "workdir") | ||
data_path = PurePosixPath(".tests/unit/consolidate_ids/data") | ||
expected_path = PurePosixPath(".tests/unit/consolidate_ids/expected") | ||
script_path = PurePosixPath(".tests/../workflow/scripts/consolidate_ids.py") | ||
|
||
# Copy data to the temporary workdir. | ||
shutil.copytree(data_path, workdir) | ||
shutil.copy(script_path, workdir) | ||
|
||
# run function | ||
sys.path.insert(0, workdir) | ||
from consolidate_ids import main # import main from your script | ||
main( | ||
ssheet=os.path.join(workdir, "sample_sheet.tsv"), | ||
metadata=os.path.join(workdir, "metadata.tsv"), | ||
sheetout=os.path.join(workdir, 'result.tsv'), | ||
) | ||
|
||
# check that tables are same | ||
assert filecmp.cmp(os.path.join(workdir, 'result.tsv'), os.path.join(expected_path, 'sample_sheet.tsv')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
0.1.1 | ||
0.2.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
|
||
|
||
import sys | ||
|
||
|
||
# if not calling for snakemake rule | ||
try: | ||
sys.stderr = open(snakemake.log[0], "w") | ||
except NameError: | ||
pass | ||
|
||
|
||
import pandas as pd | ||
|
||
|
||
def main(ssheet, metadata, sheetout): | ||
# load metadata and samplesheet | ||
metatbl = pd.read_csv(metadata, sep="\t", index_col=False) | ||
sample_sheet = pd.read_csv(ssheet, sep="\t", index_col="sample") | ||
new_index = [] | ||
# for each fastq pair, check that there is a corresponding entry in metadata | ||
for sname in sample_sheet.index.to_list(): | ||
selection = metatbl.loc[metatbl["isolate_name_alt"] == sname] | ||
if len(selection) == 0: | ||
selection = metatbl.loc[metatbl["isolate_id"] == sname] | ||
if len(selection) == 0: | ||
# Crash if name not found | ||
raise KeyError( | ||
f"There is not information on sample '{sname}' in the metadata table, " | ||
f"althought valid FASTQs were provided. " | ||
f"Ensure the completness of the submitted metadata. " | ||
f"The workflow will expect fastq to be named after either the value " | ||
f"the `isolate_id` field if `isolate_name_alt` is empty." | ||
) | ||
elif len(selection) > 1: | ||
# crash if name not unique | ||
raise not ValueError( | ||
f"Several entries for sample name '{sname}' were found in the metadata table. " | ||
f" Make sure that both the fields `isolate_id` and `isolate_name_alt` " | ||
f"contain unique values" | ||
) | ||
else: | ||
# add isolate id to reindexing | ||
new_index.append(selection.iloc[0]["isolate_id"]) | ||
|
||
# resindex | ||
sample_sheet.reset_index(inplace=True, names="fastq_name") | ||
reindexed_sheet = sample_sheet.set_index(pd.Index(new_index)) | ||
reindexed_sheet.index.name = "sample" | ||
# output | ||
reindexed_sheet.to_csv(sheetout, sep="\t", header=True, index=True) | ||
|
||
|
||
if __name__ == "__main__": | ||
main( | ||
snakemake.input["sample_sheet"], | ||
snakemake.params["metadata"], | ||
snakemake.output["sample_sheet"], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters