forked from FelixKrueger/Bismark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbam2nuc
executable file
·659 lines (528 loc) · 21.7 KB
/
bam2nuc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
#!/usr/bin/env perl
use warnings;
use strict;
$|++;
use Getopt::Long;
use Cwd;
use Carp;
## This program is Copyright (C) 2010-18, Felix Krueger ([email protected])
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.
### This script bam2nuc reads BAM files and calculates the nucleotide coverage of the reads
### (using the genomic sequence rather than the observed sequence in the reads themselves)
### and compares it to the average genomic sequence composition. Reads harbouring InDels
### are not taken into consideration. Mono- or Dinucleotides containing Ns are ignored as well
my %chromosomes; # storing sequence information of all chromosomes/scaffolds
my %freqs; # keeping a record of which chromosomes have been processed
my %genomic_freqs;
my %processed;
my $bam2nuc_version = 'v0.20.0';
my ($output_dir,$genome_folder,$parent_dir,$samtools_path,$genome_freq_only) = process_commandline();
warn "Summary of parameters for nucleotide coverage report:\n";
warn '='x53,"\n";
# warn "Input BAM file:\t\t\t$coverage_infile\n";
warn "Output directory:\t\t\t>$output_dir<\n";
warn "Parent directory:\t\t\t>$parent_dir<\n";
warn "Genome directory:\t\t\t>$genome_folder<\n";
warn "Samtools installation:\t\t\t>$samtools_path<\n\n";
my $total_number_of_words_counted = 0;
read_genome_into_memory($parent_dir);
warn "Stored sequence information of ",scalar keys %chromosomes," chromosomes/scaffolds in total\n\n";
### Genomic nucleotide frequencies only
if ($genome_freq_only){
get_genomic_frequencies();
warn "Finished processing genomic nucleotide frequencies\n\n";
exit;
}
# Processing data files
foreach my $infile(@ARGV){
generate_nucleotide_report($infile);
}
sub generate_nucleotide_report {
my $infile = shift;
%freqs = ();
%genomic_freqs = ();
warn "="x66,"\n";
warn "Mono- and di-nucleotide coverage will now be written into a report\n";
warn "="x66,"\n\n";
my $number_processed = 0;
get_genomic_frequencies();
warn "\n\n";
warn "Calculating read frequencies from file '$infile'\n";
warn "="x90,"\n";
if ($infile =~ /\.bam$/){
open (IN,"$samtools_path view -h $infile |") or die "Unable to read from BAM file $infile: $!\n";
}
elsif ($infile =~ /\.cram$/){
open (IN,"$samtools_path view -h $infile |") or die "Unable to read from CRAM file $infile: $!. Please note that CRAM files require Samtools version 1.2 or above...\n";
}
else{
open (IN,$infile) or die "Unable to read from $infile: $!\n";
}
my ($single,$paired) = test_file($infile);
# warn "returned $single and $paired\n";
if ($single){
warn "Determined the file to be single-end\n";
}
elsif ($paired){
warn "Determined the file to be paired-end\n";
}
else{
die "Failed to figure out SE or PE...\n";
}
sleep(1);
my $count = 0;
my $skipped = 0;
while (<IN>){
chomp;
if (/^\@/) {
# warn "$_\n";
next;
}
++$count;
if ($count%500000 == 0){
warn "Processed $count lines\n";
}
my ($flag,$chr,$start,$cigar,$sequence) = (split(/\t/))[1,2,3,5,9];
#warn "$flag\t$chr\t$start\t$cigar\t$sequence\n"; sleep(1);
# $chr =~ s/^.*\|//; # just a temporary measure for the Bonasio et al dataset
# warn "chr after replacing: $chr\n";
### checking CIGAR string for insertions or deletions, and chucking the sequence if they are no linear matches
if ($cigar =~ /[ID]/){
# warn "ignoring sequence:\n $_\n"; sleep(1);
++$skipped;
next;
}
### extracting the genomic sequence instead
my $extracted_sequence = substr($chromosomes{$chr},$start - 1,length$sequence);
# warn "Old sequence: $sequence\nExt sequence: $extracted_sequence\n"; sleep(1);
if ($single){
calc_single_end($extracted_sequence,$flag);
}
else{
calc_paired_end($extracted_sequence,$flag);
}
}
warn "\n\n";
### Time to calculate averages and print to a file
my $outfile = $infile;
$outfile =~ s/.*\///; # deleting path information
die "File needs to be in BAM or CRAM format (ending in .bam or .cram). Terminating process...\n" unless ($outfile =~s /(bam|cram)$/nucleotide_stats.txt/);
warn "Printing nucleotide stats to >> ${output_dir}$outfile <<\n";
open (OUT,'>',"${output_dir}$outfile") or die "Failed to write to file $outfile: $!\n\n";
calculate_averages();
close OUT or warn "Failed to close filehandle CLOSE: $!\n\n";
}
sub get_genomic_frequencies{
### GENOMIC NUCLEOTIDE FREQUENCIES
if (-e "${genome_folder}genomic_nucleotide_frequencies.txt"){
open ( GF,"${genome_folder}genomic_nucleotide_frequencies.txt") or die "Couldn't read from file '${genome_folder}genomic_nucleotide_frequencies.txt': $!\n";
warn "Detected file 'genomic_nucleotide_frequencies.txt' in the genome folder $genome_folder already. Using nucleotide frequencies contained therein ...\n";
warn "="x188,"\n";
while (<GF>){
chomp;
my ($element,$freq) = (split /\t/);
$genomic_freqs{$element} = $freq;
# warn "$element\t$freq\n";
}
close GF;
}
else{
warn "Could not find genomic nucleotide frequency table in the genome folder, calculating genomic frequencies (this may take several minutes depending on genome size) ...\n";
warn "="x164,"\n";
foreach my $chr (keys %chromosomes){
warn "Processing chromosome >> $chr <<\n";
process_sequence($chromosomes{$chr});
}
%genomic_freqs = %freqs;
%freqs = (); # resetting to store the read composition now
### Attempting to write a genomic nucleotide frequency table out to the genome folder so we can re-use it next time without the need to re-calculate
### if this fails
if ( open (FREQS,'>',"${genome_folder}genomic_nucleotide_frequencies.txt") ){
warn "Writing genomic nucleotide frequencies to the file >${genome_folder}genomic_nucleotide_frequencies.txt< for future re-use\n";
foreach my $f(sort keys %genomic_freqs){
warn "Writing count of (di-)nucleotide: $f\t$genomic_freqs{$f}\n";
print FREQS "$f\t$genomic_freqs{$f}\n";
}
close FREQS or warn "Failed to close filehandle FREQS: $!\n\n";
}
else{
warn "Failed to write out file ${genome_folder}genomic_nucleotide_frequencies.txt because of: $!\n";
sleep(1);
warn "Attempting to write to the output directory ('$output_dir') instead\n\n";
### Attempting to write a genomic nucleotide frequency table out to the output folder
if ( open (FREQS,'>',"${output_dir}genomic_nucleotide_frequencies.txt") ){
warn "Writing genomic nucleotide frequencies to the file >${output_dir}genomic_nucleotide_frequencies.txt< for future re-use\n";
foreach my $f(sort keys %genomic_freqs){
warn "Writing count of (di-)nucleotide: $f\t$genomic_freqs{$f}\n";
print FREQS "$f\t$genomic_freqs{$f}\n";
}
close FREQS or warn "Failed to close filehandle FREQS: $!\n\n";
}
else{
warn "Failed to write out file ${output_dir}genomic_nucleotide_frequencies.txt because of: $! skipping writing out genomic frequency table\n";
}
}
}
}
sub calc_paired_end{
my ($sequence,$flag) = @_;
# warn "FLAG: $flag\n$sequence\n"; sleep(1);
if ($flag == 99 or $flag == 147){ # OT or CTOT
# warn "flag 99 or 147. Don't need to do anything\n"; # fine, don't need to do anything
}
elsif ($flag == 83 or 163){ # OB or CTOB
# reverse complementing
$sequence = reverse $sequence;
$sequence =~ tr/GATC/CTAG/;
}
else{
die "failed to detect valid Bismark FLAG tag: $flag\n";
}
process_sequence($sequence);
}
sub calc_single_end{
my ($sequence,$flag) = @_;
if ($flag == 0){
# warn "flag 0\n"; # fine, don't need to do anything
}
elsif ($flag == 16){
# reverse complementing
$sequence = reverse $sequence;
$sequence =~ tr/GATC/CTAG/;
}
else{
die "failed to detect valid Bismark FLAG tag: $flag\n";
}
process_sequence($sequence);
}
sub test_file{
my $in = shift;
open (TEST,"samtools view -H $in |") or die $!;
while (<TEST>){
chomp;
if (/^\@PG/) {
if (/\s-1\s+/ and /\s+-2\s/){ # paired-end
close TEST or warn $!;
return (0,1);
}
else{ # single-end
close TEST or warn $!;
return (1,0);
}
}
}
}
sub calculate_averages {
warn "Final Stage: Calculating averages\n";
warn "="x33,"\n\n";
my $total_number_of_words_counted;
my $total_number_of_words_counted_genomic;
warn "(di-)nucleotide\tcount sample\tpercent sample\tcount genomic\tpercent genomic\tcoverage\n";
print OUT "(di-)nucleotide\tcount sample\tpercent sample\tcount genomic\tpercent genomic\tcoverage\n";
foreach my $word ('A','C','G','T') {
$total_number_of_words_counted += $freqs{$word};
$total_number_of_words_counted_genomic += $genomic_freqs{$word};
}
foreach my $word ('A','C','G','T') {
my $percentage = sprintf ("%.2f",100*$freqs{$word}/$total_number_of_words_counted);
my $percentage_genomic = sprintf ("%.2f",100*$genomic_freqs{$word}/$total_number_of_words_counted_genomic);
my $overall_coverage = sprintf ("%.3f",$freqs{$word}/$genomic_freqs{$word});
warn "$word\t$freqs{$word}\t$percentage\t$genomic_freqs{$word}\t$percentage_genomic\t$overall_coverage\n";
print OUT "$word\t$freqs{$word}\t$percentage\t$genomic_freqs{$word}\t$percentage_genomic\t$overall_coverage\n";
}
$total_number_of_words_counted = 0;
$total_number_of_words_counted_genomic = 0;
foreach my $word ('AA','AC','AG','AT','CA','CC','CG','CT','GA','GC','GG','GT','TA','TC','TG','TT') {
$total_number_of_words_counted += $freqs{$word};
$total_number_of_words_counted_genomic += $genomic_freqs{$word};
}
foreach my $word ('AA','AC','AG','AT','CA','CC','CG','CT','GA','GC','GG','GT','TA','TC','TG','TT') {
my $percentage = sprintf ("%.2f",100*$freqs{$word}/$total_number_of_words_counted);
my $percentage_genomic = sprintf ("%.2f",100*$genomic_freqs{$word}/$total_number_of_words_counted_genomic);
my $overall_coverage = sprintf ("%.3f",$freqs{$word}/$genomic_freqs{$word});
warn "$word\t$freqs{$word}\t$percentage\t$genomic_freqs{$word}\t$percentage_genomic\t$overall_coverage\n";
print OUT "$word\t$freqs{$word}\t$percentage\t$genomic_freqs{$word}\t$percentage_genomic\t$overall_coverage\n";
}
}
sub process_sequence{
my $seq = shift;
my $mono;
my $di;
foreach my $index (0..(length$seq)-1){
my $counted = 0;
if ($index%10000000==0){
# warn "Current index number is $index\n";
}
$mono = substr($seq,$index,1);
unless ( $mono eq 'N'){
$freqs{$mono}++;
}
unless ( ($index + 2) > length$seq ){
$di = substr($seq,$index,2);
if (index($di,'N') < 0) {
$freqs{$di}++;
}
}
}
}
sub process_commandline{
my $help;
my $output_dir;
my $genome_folder;
my $parent_dir;
my $samtools_path;
my $genomic_composition_only;
my $version;
my $command_line = GetOptions ('help' => \$help,
'dir=s' => \$output_dir,
'g|genome_folder=s' => \$genome_folder,
'parent_dir=s' => \$parent_dir,
'samtools_path=s' => \$samtools_path,
'genomic_composition_only' => \$genomic_composition_only,
'version' => \$version,
);
### EXIT ON ERROR if there were errors with any of the supplied options
unless ($command_line){
die "Please respecify command line options\n";
}
### HELPFILE
if ($help){
print_helpfile();
exit;
}
if ($version){
print << "VERSION";
Bismark Nucleotide Coverage Module -
bam2nuc
Bismark Version: $bam2nuc_version
Copyright 2010-16 Felix Krueger, Babraham Bioinformatics
www.bioinformatics.babraham.ac.uk/projects/bismark/
VERSION
exit;
}
### no files provided
unless (@ARGV){
unless ($genomic_composition_only){ # don't require data input files for genomic composition only
warn "You need to provide one or more BAM files to continue. Please respecify!\n";
sleep(1);
print_helpfile();
exit;
}
}
unless ($parent_dir){
$parent_dir = getcwd();
}
unless ($parent_dir =~ /\/$/){
$parent_dir =~ s/$/\//;
}
### OUTPUT DIR PATH
if (defined $output_dir){
unless ($output_dir eq ''){ # if the output dir has been passed on by Bismark and is an empty string we don't want to change it
unless ($output_dir =~ /\/$/){
$output_dir =~ s/$/\//;
}
}
}
else{
$output_dir = '';
}
### GENOME folder
if ($genome_folder){
unless ($genome_folder =~/\/$/){
$genome_folder =~ s/$/\//;
}
}
else{
die "Please specify a genome folder to proceed (full path only)\n";
}
## PATH TO SAMTOOLS
if (defined $samtools_path){
# if Samtools was specified as full command
if ($samtools_path =~ /samtools$/){
if (-e $samtools_path){
# Samtools executable found
}
else{
die "Could not find an installation of Samtools at the location $samtools_path. Please respecify\n";
}
}
else{
unless ($samtools_path =~ /\/$/){
$samtools_path =~ s/$/\//;
}
$samtools_path .= 'samtools';
if (-e $samtools_path){
# Samtools executable found
}
else{
die "Could not find an installation of Samtools at the location $samtools_path. Please respecify\n";
}
}
}
# Check whether Samtools is in the PATH if no path was supplied by the user
else{
if (!system "which samtools >/dev/null 2>&1"){ # STDOUT is binned, STDERR is redirected to STDOUT. Returns 0 if Samtools is in the PATH
$samtools_path = `which samtools`;
chomp $samtools_path;
}
}
return ($output_dir,$genome_folder,$parent_dir,$samtools_path,$genomic_composition_only);
}
####
####
sub read_genome_into_memory{
## reading in and storing the specified genome in the %chromosomes hash
chdir ($genome_folder) or die "Can't move to $genome_folder: $!";
warn "Now reading in and storing sequence information of the genome specified in: $genome_folder\n\n";
my @chromosome_filenames = <*.fa>;
### if there aren't any genomic files with the extension .fa we will look for files with the extension .fa.gz
unless (@chromosome_filenames){
@chromosome_filenames = <*.fa.gz>;
}
### if there aren't any genomic files with the extension .fa or .fq.gz we will look for files with the extension .fasta
unless (@chromosome_filenames){
@chromosome_filenames = <*.fasta>;
}
### if there aren't any genomic files with the extension .fa or .fa.gz or .fasta we will look for files with the extension .fasta.gz
unless (@chromosome_filenames){
@chromosome_filenames = <*.fasta.gz>;
}
unless (@chromosome_filenames){
die "The specified genome folder $genome_folder does not contain any sequence files in FastA format (with .fa or .fasta file extensions)\n";
}
foreach my $chromosome_filename (@chromosome_filenames){
# skipping the tophat entire mouse genome fasta file
next if ($chromosome_filename eq 'Mus_musculus.NCBIM37.fa');
if( $chromosome_filename =~ /\.gz$/){
open (CHR_IN,"gunzip -c $chromosome_filename |") or die "Failed to read from sequence file $chromosome_filename $!\n";
}
else{
open (CHR_IN,$chromosome_filename) or die "Failed to read from sequence file $chromosome_filename $!\n";
}
### first line needs to be a fastA header
my $first_line = <CHR_IN>;
chomp $first_line;
$first_line =~ s/\r//; # removing /r carriage returns
### Extracting chromosome name from the FastA header
my $chromosome_name = extract_chromosome_name($first_line);
my $sequence;
while (<CHR_IN>){
chomp;
$_ =~ s/\r//; # removing /r carriage returns
if ($_ =~ /^>/){
### storing the previous chromosome in the %chromosomes hash, only relevant for Multi-Fasta-Files (MFA)
if (exists $chromosomes{$chromosome_name}){
warn "chr $chromosome_name (",length $sequence ," bp)\n";
die "Exiting because chromosome name already exists. Please make sure all chromosomes have a unique name!\n";
}
else {
if (length($sequence) == 0){
warn "Chromosome $chromosome_name in the multi-fasta file $chromosome_filename did not contain any sequence information!\n";
}
warn "chr $chromosome_name (",length $sequence ," bp)\n";
$chromosomes{$chromosome_name} = $sequence;
$processed{$chromosome_name} = 0; # processed chromosomes will be set to 1 later to allow a record of which chromosome has been processed
}
### resetting the sequence variable
$sequence = '';
### setting new chromosome name
$chromosome_name = extract_chromosome_name($_);
}
else{
$sequence .= uc$_;
}
}
if (exists $chromosomes{$chromosome_name}){
warn "chr $chromosome_name (",length $sequence ," bp)\t";
die "Exiting because chromosome name already exists. Please make sure all chromosomes have a unique name.\n";
}
else{
if (length($sequence) == 0){
warn "Chromosome $chromosome_name in the file $chromosome_filename did not contain any sequence information!\n";
}
warn "chr $chromosome_name (",length $sequence ," bp)\n";
$chromosomes{$chromosome_name} = $sequence;
$processed{$chromosome_name} = 0; # processed chromosomes will be set to 1 later to allow a record of which chromosome has been processed
}
}
warn "\n";
chdir $parent_dir or die "Failed to move to directory $parent_dir\n";
}
sub extract_chromosome_name {
## Bowtie extracts the first string after the inition > in the FASTA file, so we are doing this as well
my $fasta_header = shift;
if ($fasta_header =~ s/^>//){
my ($chromosome_name) = split (/\s+/,$fasta_header);
return $chromosome_name;
}
else{
die "The specified chromosome ($fasta_header) file doesn't seem to be in FASTA format as required!\n";
}
}
sub print_helpfile{
warn <<EOF
SYNOPSIS:
This script bam2nuc reads BAM files and calculates the mono- and di-nucleotide coverage of the
reads (using the genomic sequence rather than the observed sequence in the reads themselves)
and compares it to the average genomic sequence composition. Reads harbouring InDels are not
taken into consideration. Mono- or Dinucleotides containing Ns are ignored as well.
bam2nuc handles both Bismark single-end and paired-end files (determined automatically). Both
BAM and CRAM files should work as input, but please note that Samtools version 1.2 or higher is
required for CRAM files.
USAGE: bam2nuc [options] --genome_folder <path> [input.(bam|cram)]
--dir Output directory. Output is written to the current directory if not specified explicitly.
--genome_folder <path> Enter the genome folder you wish to use to extract sequences from (full path only). Accepted
formats are FastA files ending with '.fa' or '.fasta', or their gzipped versions (ending in .gz).
Specifying a genome folder path is mandatory.
--samtools_path The path to your Samtools installation, e.g. /home/user/samtools/. Does not need to be specified
explicitly if Samtools is in the PATH already
--genomic_composition_only Only calculate and extract the genomic sequence composition and exit thereafter. This option will
attempt to write the genomic composition table 'genomic_nucleotide_frequencies.txt' to the genome
folder or to the output directory instead if that doesn't succeed.
--help Displays this help message and exits
GENOMIC composition
===================
Since the calculation of the average genomic (di-)nucleotide composition may take a while bam2nuc attempts to
write out a file called 'genomic_nucleotide_frequencies.txt' to the genome folder if it wasn't there already. The
next time bam2nuc is run it will then use this file instead of calculating the average genome composition again. If
writing to the genome folder fails (e.g. because of permission issues) it will be written out to the output directory
instead.
OUTPUT FORMAT
=============
bam2nuc writes out a file ending in .nucleotide_stats.txt in the following format (tab-delimited):
(di-)nucleotide count sample percent sample count genomic percent genomic coverage
A 14541 30.91 3768086 30.98 0.004
C 8893 18.90 2321832 19.09 0.004
G 9019 19.17 2318192 19.06 0.004
T 14597 31.02 3754886 30.87 0.004
AA 5008 10.86 1321485 10.86 0.004
AC 2355 5.11 639783 5.26 0.004
AG 2692 5.84 709163 5.83 0.004
AT 4191 9.09 1097652 9.02 0.004
CA 2912 6.32 786744 6.47 0.004
CC 1812 3.93 473900 3.90 0.004
CG 1341 2.91 355535 2.92 0.004
CT 2659 5.77 705653 5.80 0.004
GA 2903 6.30 756411 6.22 0.004
GC 1724 3.74 453607 3.73 0.004
GG 1817 3.94 470732 3.87 0.004
GT 2402 5.21 637436 5.24 0.004
TA 3419 7.42 903441 7.43 0.004
TC 2823 6.12 754531 6.20 0.004
TG 2996 6.50 782761 6.44 0.004
TT 5055 10.96 1314144 10.80 0.004
This file is picked up and plotted by bismark2report automatically if found in the folder.
Script last modified: 26 April 2018
EOF
;
exit 1;
}