Skip to content

Commit

Permalink
Merge branch 'main' into apiMinimial
Browse files Browse the repository at this point in the history
  • Loading branch information
Koeng101 authored Mar 2, 2024
2 parents ea97474 + d0607bb commit e56178a
Show file tree
Hide file tree
Showing 15 changed files with 301 additions and 182 deletions.
27 changes: 0 additions & 27 deletions .github/workflows/stale.yml

This file was deleted.

1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
- Updated dual barcodes primer sets to be created without csv files [#67](https://github.com/Koeng101/dnadesign/pull/67)
- Added workers to bio as a way to process data [#62](https://github.com/Koeng101/dnadesign/pull/62)
- Improved megamash efficiency and added []Match JSON conversion [#61](https://github.com/Koeng101/dnadesign/pull/61)
- Added barcoding functionality for sequencing reads [#59](https://github.com/Koeng101/dnadesign/pull/59)
Expand Down
3 changes: 1 addition & 2 deletions external/minimap2/minimap2.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,7 @@ func Minimap2(templateFastaInput io.Reader, fastqInput io.Reader, w io.Writer) e
}

// Minimap2Channeled uses channels rather than io.Reader and io.Writers.
func Minimap2Channeled(fastaTemplates io.Reader, fastqChan <-chan fastq.Read, samChan chan<- sam.Alignment) error {
ctx := context.Background()
func Minimap2Channeled(ctx context.Context, fastaTemplates io.Reader, fastqChan <-chan fastq.Read, samChan chan<- sam.Alignment) error {
g, ctx := errgroup.WithContext(ctx)

// Create a pipe for writing fastq reads and reading them as an io.Reader
Expand Down
45 changes: 40 additions & 5 deletions external/samtools/samtools.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"os/exec"
"syscall"

"github.com/koeng101/dnadesign/lib/bio/sam"
"golang.org/x/sync/errgroup"
)

Expand All @@ -22,7 +23,7 @@ import (
// The first samtools view removes unmapped sequences, the sort sorts the
// sequences for piping into pileup, and the final command builds the pileup
// file.
func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) error {
func Pileup(ctx context.Context, templateFastas io.Reader, samAlignments io.Reader, w io.Writer) error {
/*
Due to how os.exec works in Golang, we can't directly have pipes as if
the whole thing was a script. However, we can attach pipes to each
Expand All @@ -49,7 +50,7 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro
}
tmpFile.Close() // Close the file as it's no longer needed

g, ctx := errgroup.WithContext(context.Background())
g, ctx := errgroup.WithContext(ctx)

// Setup pipe connections between commands
viewSortReader, viewSortWriter := io.Pipe()
Expand All @@ -73,7 +74,7 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro

select {
case <-ctx.Done():
viewCmd.Process.Signal(syscall.SIGTERM)
_ = viewCmd.Process.Signal(syscall.SIGTERM)
return ctx.Err()
default:
return viewCmd.Wait()
Expand All @@ -93,7 +94,7 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro

select {
case <-ctx.Done():
sortCmd.Process.Signal(syscall.SIGTERM)
_ = sortCmd.Process.Signal(syscall.SIGTERM)
return ctx.Err()
default:
return sortCmd.Wait()
Expand All @@ -111,7 +112,7 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro

select {
case <-ctx.Done():
mpileupCmd.Process.Signal(syscall.SIGTERM)
_ = mpileupCmd.Process.Signal(syscall.SIGTERM)
return ctx.Err()
default:
return mpileupCmd.Wait()
Expand All @@ -125,3 +126,37 @@ func Pileup(templateFastas io.Reader, samAlignments io.Reader, w io.Writer) erro

return nil
}

// PileupChanneled processes SAM alignments from a channel and sends pileup lines to another channel.
func PileupChanneled(ctx context.Context, templateFastas io.Reader, samChan <-chan sam.Alignment, w io.Writer) error {
g, ctx := errgroup.WithContext(ctx)

// Create a pipe for writing SAM alignments and reading them as an io.Reader
samPr, samPw := io.Pipe()

// Goroutine to consume SAM alignments and write them to the PipeWriter
g.Go(func() error {
defer samPw.Close()
for alignment := range samChan {
// Assuming the sam.Alignment type has a WriteTo method or similar to serialize it to the writer
_, err := alignment.WriteTo(samPw)
if err != nil {
return err // return error to be handled by errgroup
}
}
return nil
})

// Run Pileup function in a goroutine
g.Go(func() error {
return Pileup(ctx, templateFastas, samPr, w) // Runs Pileup, writing output to pileupPw
})

// Wait for all goroutines in the group to finish
if err := g.Wait(); err != nil {
return err // This will return the first non-nil error from the group of goroutines
}

// At this point, all goroutines have finished successfully
return nil
}
4 changes: 3 additions & 1 deletion external/samtools/samtools_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package samtools_test

import (
"bytes"
"context"
"os"
"testing"

Expand All @@ -28,7 +29,8 @@ func TestPileup(t *testing.T) {
var buf bytes.Buffer

// Execute the pileup function
err = samtools.Pileup(templateFile, samFile, &buf)
ctx := context.Background()
err = samtools.Pileup(ctx, templateFile, samFile, &buf)
if err != nil {
t.Errorf("Pileup returned error: %s", err)
}
Expand Down
2 changes: 1 addition & 1 deletion go.work
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ go 1.22.0

use (
./api
./external
./external
./lib
)
4 changes: 3 additions & 1 deletion lib/align/megamash/megamash.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package megamash
import (
"encoding/json"
"fmt"
"strings"

"github.com/koeng101/dnadesign/lib/bio/fasta"
"github.com/koeng101/dnadesign/lib/transform"
Expand All @@ -19,6 +20,7 @@ import (
// StandardizedDNA returns the alphabetically lesser strand of a double
// stranded DNA molecule.
func StandardizedDNA(sequence string) string {
sequence = strings.ToUpper(sequence)
var deterministicSequence string
reverseComplement := transform.ReverseComplement(sequence)
if sequence > reverseComplement {
Expand All @@ -32,7 +34,7 @@ func StandardizedDNA(sequence string) string {
var (
DefaultKmerSize uint = 16
DefaultMinimalKmerCount uint = 10
DefaultScoreThreshold float64 = 0.2
DefaultScoreThreshold float64 = 0.5
)

type MegamashMap struct {
Expand Down
17 changes: 9 additions & 8 deletions lib/align/megamash/megamash_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
package megamash
package megamash_test

import (
"testing"

"github.com/koeng101/dnadesign/lib/align/megamash"
"github.com/koeng101/dnadesign/lib/bio/fasta"
)

Expand All @@ -12,7 +13,7 @@ func TestMegamash(t *testing.T) {
oligo3 := "CCGTGCGACAAGATTTCAAGGGTCTCTCTTCTATCGCAGCCAAGGAAGAAGGTGTATCTCTAGAGAAGCGTCGAGTGAGACCCGGATCGAACTTAGGTAGCCCCCTTCGAAGTGGCTCTGTCTGATCCTCCGCGGATGGCGACACCATCGGACTGAGGATATTGGCCACA"

samples := []string{"TTTTGTCTACTTCGTTCCGTTGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGACGGCGCCTCCGTGCGACGAGATTTCAAGGGTCTCTGTGCTATATTGCCGCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCAGATCGACTTTTAGATTCCTCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAATACGTGG", "TGTCCTTTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTACTTATCGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTATCTGAGACCGAAGTGGTTTGCCTAAACGCAGGTGCTGTTGGCAAAGGCAGAAAGTAGTCTTAACCTTGACAATGAGTGGTA", "GTTATTGTCGTCTCCTTTGACTCAGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTGCTGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTCCGCTTCTATCTGAGACCGAAGTGGTTAT", "TGTTCTGTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTCTGCCTTAGAGACCACGCCTCCGTGCGACAAGATTCAAGGGTCTCTGTGCTCTGCCGCTAGTTCCGCTCTAGCTGCTCCGGTATGCATCTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCGGATCGAACTTAGGTAGCCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAACTGTTGGTT"}
m, err := NewMegamashMap([]fasta.Record{{Sequence: oligo1, Identifier: "oligo1"}, {Sequence: oligo2, Identifier: "oligo2"}, {Sequence: oligo3, Identifier: "oligo3"}}, DefaultKmerSize, DefaultMinimalKmerCount, DefaultScoreThreshold)
m, err := megamash.NewMegamashMap([]fasta.Record{{Sequence: oligo1, Identifier: "oligo1"}, {Sequence: oligo2, Identifier: "oligo2"}, {Sequence: oligo3, Identifier: "oligo3"}}, megamash.DefaultKmerSize, megamash.DefaultMinimalKmerCount, megamash.DefaultScoreThreshold)
if err != nil {
t.Errorf("Failed to make NewMegamashMap: %s", err)
}
Expand All @@ -31,8 +32,8 @@ func BenchmarkMegamash(b *testing.B) {
oligo3 := "CCGTGCGACAAGATTTCAAGGGTCTCTCTTCTATCGCAGCCAAGGAAGAAGGTGTATCTCTAGAGAAGCGTCGAGTGAGACCCGGATCGAACTTAGGTAGCCCCCTTCGAAGTGGCTCTGTCTGATCCTCCGCGGATGGCGACACCATCGGACTGAGGATATTGGCCACA"

samples := []string{"TTTTGTCTACTTCGTTCCGTTGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGACGGCGCCTCCGTGCGACGAGATTTCAAGGGTCTCTGTGCTATATTGCCGCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCAGATCGACTTTTAGATTCCTCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAATACGTGG", "TGTCCTTTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTACTTATCGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTATCTGAGACCGAAGTGGTTTGCCTAAACGCAGGTGCTGTTGGCAAAGGCAGAAAGTAGTCTTAACCTTGACAATGAGTGGTA", "GTTATTGTCGTCTCCTTTGACTCAGCGTATTGCTAAGGTTAAGACTACTTTCTGCCTTTGCGAGAACAGCACCTCTGCTAGGGGCTGCTGGGTCTCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTCCGCTTCTATCTGAGACCGAAGTGGTTAT", "TGTTCTGTACTTCGTTCAGTTACGTATTGCTAAGGTTAAGACTACTTCTGCCTTAGAGACCACGCCTCCGTGCGACAAGATTCAAGGGTCTCTGTGCTCTGCCGCTAGTTCCGCTCTAGCTGCTCCGGTATGCATCTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCGGATCGAACTTAGGTAGCCAGGTGCTGTTCTCGCAAAGGCAGAAAGTAGTCTTAACCTTAGCAACTGTTGGTT"}
m, _ := NewMegamashMap([]fasta.Record{{Sequence: oligo1, Identifier: "oligo1"}, {Sequence: oligo2, Identifier: "oligo2"}, {Sequence: oligo3, Identifier: "oligo3"}},
DefaultKmerSize, DefaultMinimalKmerCount, DefaultScoreThreshold)
m, _ := megamash.NewMegamashMap([]fasta.Record{{Sequence: oligo1, Identifier: "oligo1"}, {Sequence: oligo2, Identifier: "oligo2"}, {Sequence: oligo3, Identifier: "oligo3"}},
megamash.DefaultKmerSize, megamash.DefaultMinimalKmerCount, megamash.DefaultScoreThreshold)
for _, sample := range samples {
_ = m.Match(sample)
}
Expand All @@ -41,24 +42,24 @@ func BenchmarkMegamash(b *testing.B) {

func TestMatchesConversion(t *testing.T) {
// Initial slice of Match structs
matches := []Match{
matches := []megamash.Match{
{"match1", 90.1},
{"match2", 85.5},
}
// Convert matches to JSON string
jsonStr, err := MatchesToJSON(matches)
jsonStr, err := megamash.MatchesToJSON(matches)
if err != nil {
t.Fatalf("MatchesToJSON failed with error: %v", err)
}

// Convert JSON string back to slice of Match structs
convertedMatches, err := JSONToMatches(jsonStr)
convertedMatches, err := megamash.JSONToMatches(jsonStr)
if err != nil {
t.Fatalf("JSONToMatches failed with error: %v", err)
}

// Convert the convertedMatches back to JSON to compare strings
convertedJSONStr, err := MatchesToJSON(convertedMatches)
convertedJSONStr, err := megamash.MatchesToJSON(convertedMatches)
if err != nil {
t.Fatalf("MatchesToJSON failed with error: %v", err)
}
Expand Down
35 changes: 26 additions & 9 deletions lib/bio/fastq/fastq.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"errors"
"fmt"
"io"
"sort"
"strings"
)

Expand All @@ -40,6 +41,17 @@ type Read struct {
Quality string `json:"quality"`
}

// DeepCopy deep copies a read. Used for when you want to modify optionals then
// pipe elsewhere.
func (read *Read) DeepCopy() Read {
newRead := Read{Identifier: read.Identifier, Sequence: read.Sequence, Quality: read.Quality}
newRead.Optionals = make(map[string]string)
for key, value := range read.Optionals {
newRead.Optionals[key] = value
}
return newRead
}

// Header is a blank struct, needed for compatibility with bio parsers. It contains nothing.
type Header struct{}

Expand Down Expand Up @@ -161,7 +173,11 @@ func (parser *Parser) Next() (Read, error) {
if len(line) <= 1 { // newline delimiter - actually checking for empty line
return Read{}, fmt.Errorf("empty quality sequence for %q, got to line %d: %w", seqIdentifier, parser.line, err)
}
quality = string(line[:len(line)-1])
if parser.atEOF {
quality = string(line)
} else {
quality = string(line[:len(line)-1])
}

// Parsing ended. Check for inconsistencies.
if lookingForIdentifier {
Expand All @@ -179,12 +195,6 @@ func (parser *Parser) Next() (Read, error) {
return fastq, nil
}

// Reset discards all data in buffer and resets state.
func (parser *Parser) Reset(r io.Reader) {
parser.reader.Reset(r)
parser.line = 0
}

/******************************************************************************
Start of Write functions
Expand All @@ -200,8 +210,15 @@ func (read *Read) WriteTo(w io.Writer) (int64, error) {
if err != nil {
return writtenBytes, err
}
for key, val := range read.Optionals {
newWrittenBytes, err = fmt.Fprintf(w, " %s=%s", key, val)
keys := make([]string, len(read.Optionals))
i := 0
for key := range read.Optionals {
keys[i] = key
i++
}
sort.Strings(keys)
for _, key := range keys {
newWrittenBytes, err = fmt.Fprintf(w, " %s=%s", key, read.Optionals[key])
writtenBytes += int64(newWrittenBytes)
if err != nil {
return writtenBytes, err
Expand Down
1 change: 1 addition & 0 deletions lib/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ go 1.22.0

require (
github.com/google/go-cmp v0.6.0
github.com/koeng101/dnadesign/external v0.0.0-20240213205901-f4998ef84117
golang.org/x/sync v0.5.0
)
2 changes: 2 additions & 0 deletions lib/go.sum
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/koeng101/dnadesign/external v0.0.0-20240213205901-f4998ef84117 h1:MLWgADbigSsAmDP3yG93ESlN0Ek9QLtH5uHigmWVXwg=
github.com/koeng101/dnadesign/external v0.0.0-20240213205901-f4998ef84117/go.mod h1:nb80z/jm5HMCxfNZ50cBJa5TffkXxpY9okvqnBj8RrM=
golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE=
golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
38 changes: 38 additions & 0 deletions lib/sequencing/barcoding/barcoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,48 @@ type DualBarcodePrimerSet struct {

// DualBarcode contains a forward and reverse barcode.
type DualBarcode struct {
Name string
Forward string
Reverse string
}

// DualBarcodesToPrimerSet parsers a list of dual barcodes into a dual barcode
// primer set.
func DualBarcodesToPrimerSet(dualBarcodes []DualBarcode) DualBarcodePrimerSet {
var result DualBarcodePrimerSet
result.BarcodeMap = make(map[string]DualBarcode)
result.ReverseBarcodeMap = make(map[DualBarcode]string)
forwardBarcodesMap := make(map[string]bool)
reverseBarcodesMap := make(map[string]bool)

for _, barcode := range dualBarcodes {
forwardBarcodesMap[barcode.Forward] = true
reverseBarcodesMap[barcode.Reverse] = true
newDualBarcode := DualBarcode{Forward: barcode.Forward, Reverse: barcode.Reverse}
result.BarcodeMap[barcode.Name] = newDualBarcode
result.ReverseBarcodeMap[newDualBarcode] = barcode.Name
}
// Convert maps to slices
forwardBarcodes := make([]string, 0, len(forwardBarcodesMap))
for barcode := range forwardBarcodesMap {
forwardBarcodes = append(forwardBarcodes, barcode)
}
reverseBarcodes := make([]string, 0, len(reverseBarcodesMap))
for barcode := range reverseBarcodesMap {
reverseBarcodes = append(reverseBarcodes, barcode)
}

// Sort the slices
sort.Strings(forwardBarcodes)
sort.Strings(reverseBarcodes)

// Append sorted barcodes to result
result.ForwardBarcodes = forwardBarcodes
result.ReverseBarcodes = reverseBarcodes

return result
}

// ParseDualPrimerSet parses a csv file into a DualBarcodePrimerSet.
func ParseDualPrimerSet(csvFile io.Reader) (DualBarcodePrimerSet, error) {
var result DualBarcodePrimerSet
Expand Down
Loading

0 comments on commit e56178a

Please sign in to comment.