-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathSnakefile
98 lines (65 loc) · 2.37 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import os
import pandas as pd
#configfile: "config.yaml"
sample_table_file=config.get('sampletable','samples.tsv')
if not os.path.exists(sample_table_file):
logger.critical("Couldn't find sampletable!"
f"I looked for {sample_table_file} relativ to working directory.\n"
"Create one with the script prepare_sample_table.py \n"
"You can also specify the path to the sample table in the config file."
)
exit(1)
SampleTable = pd.read_table(sample_table_file,index_col=0)
SAMPLES = list(SampleTable.index)
JAVA_MEM_FRACTION=0.85
CONDAENV ='envs'
PAIRED_END= ('R2' in SampleTable.columns)
FRACTIONS= ['R1']
if PAIRED_END: FRACTIONS+= ['R2']
def get_taxonomy_names():
if 'idtaxa_dbs' in config and config['idtaxa_dbs'] is not None:
return config['idtaxa_dbs'].keys()
else:
return []
rule all:
input:
"stats/Nreads_filtered.txt",
"model/ErrorRates_R1.rds",
"output/seqtab.tsv",
"figures/Lengths/Sequence_Length_distribution_abundance.pdf",
"taxonomy/rep_seq.fasta",
'stats/Nreads.tsv',
expand("taxonomy/{ref}.tsv", ref=get_taxonomy_names())
rule all_taxonomy:
input:
expand("taxonomy/{ref}_gg.tsv", ref=get_taxonomy_names()),
expand("taxonomy/{ref}.tsv", ref=get_taxonomy_names()),
rule all_tree:
input:
"taxonomy/otu_tree.nwk",
rule all_profile:
input: expand("figures/Quality_profiles/{direction}/{sample}_{direction}.pdf",sample=SAMPLES,direction=['R1','R2'])
rule all_filtered:
input: "stats/Nreads_filtered.txt",
rule combine_read_counts:
input:
'stats/Nreads_filtered.txt',
'stats/Nreads_dereplicated.txt',
'stats/Nreads_chimera_removed.txt'
output:
'stats/Nreads.tsv',
plot= 'stats/Nreads.pdf'
run:
import pandas as pd
import matplotlib
import matplotlib.pylab as plt
D= pd.read_table(input[0],index_col=0)
D= D.join(pd.read_table(input[1],index_col=0))
D= D.join(pd.read_table(input[2],squeeze=True,index_col=0))
D.to_csv(output[0],sep='\t')
matplotlib.rcParams['pdf.fonttype']=42
D.plot.bar(width=0.7,figsize=(D.shape[0]*0.3,5))
plt.ylabel('N reads')
plt.savefig(output.plot)
include: "rules/dada2.smk"
include: "rules/taxonomy.smk"