Skip to content

Commit

Permalink
Merge pull request #194 from hkariti/dedup_quoting
Browse files Browse the repository at this point in the history
Fix corrupt dedup output for inputs with quotes
  • Loading branch information
Phlya authored Jan 3, 2024
2 parents 3d2361d + 8b23b37 commit 3a155ce
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions pairtools/lib/dedup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import scipy.spatial
from scipy.sparse import coo_matrix
from scipy.sparse.csgraph import connected_components
from csv import QUOTE_NONE

from . import dedup_cython, pairsam_format
from .stats import PairCounter
Expand Down Expand Up @@ -79,7 +80,7 @@ def streaming_dedup(
# Stream the dups:
if outstream_dups:
df_chunk.loc[mask_mapped & mask_duplicates, :].to_csv(
outstream_dups, index=False, header=False, sep="\t"
outstream_dups, index=False, header=False, sep="\t", quoting=QUOTE_NONE
)

# Drop readID if it was created (not needed for nodup and unmapped pairs):
Expand All @@ -89,12 +90,12 @@ def streaming_dedup(
# Stream unmapped:
if outstream_unmapped:
df_chunk.loc[~mask_mapped, :].to_csv(
outstream_unmapped, index=False, header=False, sep="\t"
outstream_unmapped, index=False, header=False, sep="\t", quoting=QUOTE_NONE
)

# Stream unique pairs:
df_chunk.loc[mask_mapped & (~mask_duplicates), :].to_csv(
outstream, index=False, header=False, sep="\t"
outstream, index=False, header=False, sep="\t", quoting=QUOTE_NONE
)

t1 = time.time()
Expand Down

0 comments on commit 3a155ce

Please sign in to comment.