Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add --post option to split / split2 subcommands #152

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions seqkit/cmd/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,16 +415,28 @@ negative index 0-9-8-7-6-5-4-3-2-1
-12:-1 A C G T N a c g t n
`

func writeSeqs(records []*fastx.Record, file string, lineWidth int, quiet bool, dryRun bool) error {

func openWriter(file string, post string) *xopen.Writer {
if post == "" {
outfh, err := xopen.Wopen(file)
checkError(err)
return(outfh)
} else {
outfh, err := xopen.WopenPipe(file, post)
checkError(err)
return(outfh)
}
}

func writeSeqs(records []*fastx.Record, file string, post string, lineWidth int, quiet bool, dryRun bool) error {
if !quiet {
log.Infof("write %d sequences to file: %s\n", len(records), file)
}
if dryRun {
return nil
}

outfh, err := xopen.Wopen(file)
checkError(err)
outfh := openWriter(file, post)
defer outfh.Close()

for _, record := range records {
Expand Down
32 changes: 14 additions & 18 deletions seqkit/cmd/split.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ Examples:
dryRun := getFlagBool(cmd, "dry-run")

outdir := getFlagString(cmd, "out-dir")
post := getFlagString(cmd, "post")
force := getFlagBool(cmd, "force")

file := files[0]
Expand Down Expand Up @@ -171,14 +172,14 @@ Examples:
records = append(records, record.Clone())
if len(records) == size {
outfile = filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), i, fileExt))
writeSeqs(records, outfile, config.LineWidth, quiet, dryRun)
writeSeqs(records, outfile, post, config.LineWidth, quiet, dryRun)
i++
records = []*fastx.Record{}
}
}
if len(records) > 0 {
outfile = filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), i, fileExt))
writeSeqs(records, outfile, config.LineWidth, quiet, dryRun)
writeSeqs(records, outfile, post, config.LineWidth, quiet, dryRun)
}

return
Expand Down Expand Up @@ -245,8 +246,7 @@ Examples:
if len(IDs) > 0 {
outfile = filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), n, fileExt))
if !dryRun {
outfh, err = xopen.Wopen(outfile)
checkError(err)
outfh = openWriter(file, post)
}
}
j := 0
Expand All @@ -273,8 +273,7 @@ Examples:
outfile = filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), n, fileExt))
if !dryRun {
outfh.Close()
outfh, err = xopen.Wopen(outfile)
checkError(err)
outfh = openWriter(file, post)
}
j = 0
}
Expand Down Expand Up @@ -338,14 +337,14 @@ Examples:
records = append(records, record)
if len(records) == size {
outfile = filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), i, fileExt))
writeSeqs(records, outfile, config.LineWidth, quiet, dryRun)
writeSeqs(records, outfile, post, config.LineWidth, quiet, dryRun)
i++
records = []*fastx.Record{}
}
}
if len(records) > 0 {
outfile = filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), i, fileExt))
writeSeqs(records, outfile, config.LineWidth, quiet, dryRun)
writeSeqs(records, outfile, post, config.LineWidth, quiet, dryRun)
}
return
}
Expand Down Expand Up @@ -423,8 +422,7 @@ Examples:
if len(IDs) > 0 {
outfile = filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), n, fileExt))
if !dryRun {
outfh, err = xopen.Wopen(outfile)
checkError(err)
outfh = openWriter(file, post)
}
}
j := 0
Expand All @@ -451,8 +449,7 @@ Examples:
outfile = filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), n, fileExt))
if !dryRun {
outfh.Close()
outfh, err = xopen.Wopen(outfile)
checkError(err)
outfh = openWriter(file, post)
}
j = 0
}
Expand Down Expand Up @@ -514,7 +511,7 @@ Examples:
outfile = filepath.Join(outdir, fmt.Sprintf("%s.id_%s%s",
filepath.Base(fileName),
pathutil.RemoveInvalidPathChars(id, "__"), fileExt))
writeSeqs(records, outfile, config.LineWidth, quiet, dryRun)
writeSeqs(records, outfile, post, config.LineWidth, quiet, dryRun)
}
return
}
Expand Down Expand Up @@ -610,8 +607,7 @@ Examples:
pathutil.RemoveInvalidPathChars(id, "__"), fileExt))

if !dryRun {
outfh, err = xopen.Wopen(outfile)
checkError(err)
outfh = openWriter(file, post)
for _, chr := range _IDs {
r, ok := faidx.Index[chr]
if !ok {
Expand Down Expand Up @@ -705,7 +701,7 @@ Examples:
var outfile string
for subseq, records := range recordsBySeqs {
outfile = filepath.Join(outdir, fmt.Sprintf("%s.region_%d:%d_%s%s", filepath.Base(fileName), start, end, subseq, fileExt))
writeSeqs(records, outfile, config.LineWidth, quiet, dryRun)
writeSeqs(records, outfile, post, config.LineWidth, quiet, dryRun)
}
return
}
Expand Down Expand Up @@ -810,8 +806,7 @@ Examples:
outfile := filepath.Join(outdir, fmt.Sprintf("%s.region_%d:%d_%s%s", filepath.Base(fileName), start, end, subseq, fileExt))

if !dryRun {
outfh, err = xopen.Wopen(outfile)
checkError(err)
outfh = openWriter(file, post)

for _, chr := range chrs {
r, ok := faidx.Index[chr]
Expand Down Expand Up @@ -855,6 +850,7 @@ func init() {
splitCmd.Flags().BoolP("dry-run", "d", false, "dry run, just print message and no files will be created.")
splitCmd.Flags().BoolP("keep-temp", "k", false, "keep tempory FASTA and .fai file when using 2-pass mode")
splitCmd.Flags().StringP("out-dir", "O", "", "output directory (default value is $infile.split)")
splitCmd.Flags().StringP("post", "P", "", "postprocess shell command ($FILE for formatted out-dir)")
splitCmd.Flags().BoolP("force", "f", false, "overwrite output directory")
}

Expand Down
11 changes: 5 additions & 6 deletions seqkit/cmd/split2.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ according to the input files.
}

outdir := getFlagString(cmd, "out-dir")
post := getFlagString(cmd, "post")
force := getFlagBool(cmd, "force")

if size == 0 && parts == 0 && length == 0 {
Expand Down Expand Up @@ -244,8 +245,7 @@ according to the input files.
i++
var outfh2 *xopen.Writer
outfile := filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), i+1, fileExt))
outfh2, err = xopen.Wopen(outfile)
checkError(err)
outfh2 = openWriter(outfile, post)

outfhs = append(outfhs, outfh2)
counts = append(counts, 0)
Expand All @@ -264,8 +264,7 @@ according to the input files.
i++
var outfh2 *xopen.Writer
outfile := filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), i+1, fileExt))
outfh2, err = xopen.Wopen(outfile)
checkError(err)
outfh2 = openWriter(outfile, post)

outfhs = append(outfhs, outfh2)
counts = append(counts, 0)
Expand All @@ -278,8 +277,7 @@ according to the input files.
if i+1 > len(outfhs) || outfhs[i] == nil {
var outfh2 *xopen.Writer
outfile := filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), i+1, fileExt))
outfh2, err = xopen.Wopen(outfile)
checkError(err)
outfh2 = openWriter(outfile, post)

outfhs = append(outfhs, outfh2)
counts = append(counts, 0)
Expand Down Expand Up @@ -331,5 +329,6 @@ func init() {
split2Cmd.Flags().IntP("by-part", "p", 0, "split sequences into N parts")
split2Cmd.Flags().StringP("by-length", "l", "", "split sequences into chunks of N bases, supports K/M/G suffix")
split2Cmd.Flags().StringP("out-dir", "O", "", "output directory (default value is $infile.split)")
split2Cmd.Flags().StringP("post", "P", "", "postprocess shell command ($FILE for formatted out-dir)")
split2Cmd.Flags().BoolP("force", "f", false, "overwrite output directory")
}