Skip to content

Commit

Permalink
2.7.0
Browse files Browse the repository at this point in the history
  • Loading branch information
telatin committed Mar 29, 2022
1 parent 9a5cf64 commit cc44e64
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 27 deletions.
2 changes: 1 addition & 1 deletion bamtocov.nimble
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Package

version = "2.6.1"
version = "2.7.0"
author = "Andrea Telatin, Giovanni Birolo"
description = "BAM to Coverage"
license = "MIT"
Expand Down
6 changes: 6 additions & 0 deletions docs/5_history.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ permalink: /history
This project extends [covtobed](https://github.com/telatin/covtobed),
reimplementing the core algorithm in Nim.

* 2.7.0
* **bamtocounts** now supports `--paired` to count fragments
* **bamtocounts** fixed a bug that ignored the user flag to exclude
* [**EXPERIMENTAL**] testing `--extendRead INT` in bamtocov #6
* 2.6.0
* **bamtocounts** algorithm rewritten to accept unsorted and non-indexed files. the requirement for indexed files will be permanently dropped, for consistency across the suite, while we recommend using sorted files, and, in the future, this requirement might be added again to allow for performance gains.
* 2.5.0
* **BamToCounts** rewritten with the target engine of BamToCov
* 2.4.0
Expand Down
57 changes: 33 additions & 24 deletions src/bamtocounts.nim
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ setControlCHook(handler)

var
do_strict = false
do_paired = false
do_norm = false
debug = false
verbose = false
Expand Down Expand Up @@ -68,7 +69,10 @@ proc countsToString(c: stranded_counts, stranded: bool): string =
else:
$(c.fwd + c.rev)

proc alignments_count(table: var OrderedTable[string, stranded_counts], bam:Bam, mapq:uint8, eflag:uint16, regions: target_t, strict = false): float =
type
counts_t* = TableRef[interval_t[string], int]

proc makeCountsTable(table: var OrderedTable[string, stranded_counts], bam:Bam, mapq:uint8, eflag:uint16, regions: target_t, strict = false): float =
var total: float = 0
for read in bam:
total += 1
Expand All @@ -85,6 +89,27 @@ proc alignments_count(table: var OrderedTable[string, stranded_counts], bam:Bam,
table[region.label].inc(read.flag.reverse)
return total / 1000000


proc alignments_count(table: var OrderedTable[string, stranded_counts], bam:Bam, mapq:uint8, eflag:uint16, regions: target_t, strict = false, paired = false): float =
var total: float = 0
for read in bam:
total += 1
if read.tid notin regions:
continue
if read.mapping_quality == 0 or ( (read.flag and eflag) != 0):
continue
if paired and (read.flag.proper_pair == false or read.flag.read2 == true):
continue


for region in regions[read.tid]:

if (read.start < region.start and read.stop > region.stop) or (read.stop > region.start and read.stop < region.stop) or (read.start > region.start and read.start < region.stop):
if strict and ( read.start < region.start or read.stop > region.stop ):
continue
table[region.label].inc(read.flag.reverse)
return total / 1000000

proc add(s: var feature_coords, z: feature_coords) =
# feature_coords = tuple[chrom, starts, stops, name: string]
s.chrom &= ";" & z.chrom
Expand Down Expand Up @@ -113,9 +138,10 @@ Arguments:
Options:
-T, --threads <threads> BAM decompression threads [default: 0]
-r, --fasta <fasta> FASTA file for use with CRAM files [default: $env_fasta].
-r, --fasta <fasta> FASTA file for use with CRAM files [default: $env_fasta]
-F, --flag <FLAG> Exclude reads with any of the bits in FLAG set [default: 1796]
-Q, --mapq <mapq> Mapping quality threshold [default: 0]
--paired Count read pairs rather than single reads
--strict Read must be contained, not just overlap, with feature
--stranded Print strand-specific counts
--coords Also print coordinates of each feature
Expand Down Expand Up @@ -147,6 +173,7 @@ Options:
do_strand = bool(args["--stranded"])
do_coords = bool(args["--coords"])
do_strict = bool(args["--strict"])
do_paired = bool(args["--paired"])
debug = bool(args["--debug"])
verbose = bool(args["--verbose"])
gffIdentifier = $args["--id"]
Expand Down Expand Up @@ -265,7 +292,9 @@ Options:

if debug:
stderr.writeLine("\\/ Target regions: ", len(targetCounts))
let perMillion = targetCounts.alignments_count(bam, uint8(mapq), eflag, cookedTarget, do_strict)

## GATHER THE COUNTS
let perMillion = targetCounts.alignments_count(bam, uint8(mapq), eflag, cookedTarget, do_strict, do_paired)
if debug:
stderr.writeLine("/\\ Counts done: ", perMillion)

Expand All @@ -281,27 +310,7 @@ Options:
else: ""
echo feature, "\t", coords, counts, rpkm, norm

#[
# Print table
var tableKeys = newSeq[target_feature]()
for j in keys(countsTable):
tableKeys.add(j)
#tableKeys.sort(targetSort)
for feat in tableKeys:
var fields = ""
let featCoords = if do_coords: #[feat.cid] & "\t" & $feat.start & "\t" & $feat.stop & "\t"
else: ""
if do_rpkm#:
let
kb : float = (feat.stop - feat.start ) / 1000
RPKM : float = float(counts(countsTable[feat])) / alignmentsPerMillion / kb
fields &= "\t" & $RPKM.formatBiggestFloat(ffDecimal, digitsPrecision)
if do_norm:
let normal = counts(countsTable[feat]) / (feat.stop - feat.start)
fields &= "\t" & $normal.formatBiggestFloat(ffDecimal, digitsPrecision)
echo featCoords, "\t", feat.feature, "\t", counts(countsTable[feat]), fields ]#
return 0
]#


when isMainModule:
var args = commandLineParams()
Expand Down
11 changes: 9 additions & 2 deletions src/bamtocov.nim
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ proc `$`[T](i: genomic_interval_t[T]): string =


type
input_option_t = tuple[min_mapping_quality: uint8, eflag: uint16, physical: bool, target: raw_target_t]
input_option_t = tuple[min_mapping_quality: uint8, eflag: uint16, physical: bool, extendFrag: int, target: raw_target_t]

proc alignment_stream(bam: Bam, opts: input_option_t, target: target_t): iterator (): genomic_interval_t[bool] =
result = iterator(): genomic_interval_t[bool] {.closure.} =
Expand Down Expand Up @@ -244,7 +244,12 @@ proc coverage_iter(bam: Bam, opts: input_option_t, target: target_t): iterator()

# increment coverage with aln starting here
while more_alignments_for_ref and (next_change == aln.start):
coverage_ends.push(covEnd(stop: aln.stop, reverse: aln.label))
# EXP: --extendRead INT
let readEnd = if opts.extendFrag > 0:
if aln.start + opts.extendFrag > reflen: reflen
else: aln.start + opts.extendFrag
else: aln.stop
coverage_ends.push(covEnd(stop: readEnd, reverse: aln.label))
cov.inc(aln.label)
if debug: stderr.writeLine("Added aln: " & $aln)

Expand Down Expand Up @@ -623,6 +628,7 @@ BAM reading options:
-Q, --mapq <mapq> Mapping quality threshold [default: 0]
Other options:
--extendReads INT [Experimental] artificially extend reads by INT bases [default: 0]
--debug Enable diagnostics
-h, --help Show help
""" % ["version", version])
Expand Down Expand Up @@ -685,6 +691,7 @@ Other options:
min_mapping_quality: uint8(parse_int($args["--mapq"])),
eflag: uint16(parse_int($args["--flag"])),
physical: bool(args["--physical"]),
extendFrag: parse_int($args["--extendReads"]),
target: if format_gff: gff_to_table(target_file, gffField, gffSeparator, gffIdentifier)
elif format_gtf: gtf_to_table(target_file, gffField, gffSeparator, gffIdentifier)
else: bed_to_table(target_file)
Expand Down

0 comments on commit cc44e64

Please sign in to comment.