forked from PapenfussLab/sv_benchmark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcall_cortex.sh
87 lines (83 loc) · 3.16 KB
/
call_cortex.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/bin/bash
#
#
. common.sh
CALLER=cortex/1.0.5.14
CORTEX_DIR=$BASE_DIR/tools/cortex/CORTEX_release_v1.0.5.21
# Compilation failed for 1.0.5.21 due to our servers using glibc 2.11
#PATH=/usr/local/bioinfsoftware/cortex/CORTEX_release_v1.0.5.14/bin:$PATH
export PATH=$CORTEX_DIR/bin:$PATH
export PATH=$BASE_DIR/tools/cortex/stampy-1.0.28:$PATH
export PATH=$BASE_DIR/tools/cortex/vcftools_0.1.9/bin:$PATH
export PATH=$CORTEX_DIR/scripts/analyse_variants/needleman_wunsch-0.3.0:$PATH
export PERL5LIB=$CORTEX_DIR/scripts/analyse_variants/bioinf-perl/lib:$PERL5LIB
export PERL5LIB=$CORTEX_DIR/scripts/calling:$PERL5LIB
# Reference precprocessing
# cd $(dirname $CX_REFERENCE)
# stampy.py -G hg19 hg19.fa && stampy.py -g hg19 -H hg19
# cd $CX_REFERENCE.split
# ls -1 $CX_REFERENCE.split/*.fa > $CX_REFERENCE).split/file_listing_fasta
# cortex_var_31_c1 --kmer_size 31 --mem_height 27 --mem_width 100 --se_list file_listing_fasta --max_read_len 10000 --dump_binary $(basename $CX_REFERENCE).k31.ctx --sample_id REF
# cortex_var_63_c1 --kmer_size 61 --mem_height 27 --mem_width 100 --se_list file_listing_fasta --max_read_len 10000 --dump_binary $(basename $CX_REFERENCE).k61.ctx --sample_id REF
for FQ1 in $DATA_DIR/*.1.fq ; do
cx_load $FQ1
CX_CALLER=$CALLER
# cortex requires more than 480GB of memory at k=61 on 40x chm data set
#if [[ "$CX_READ_LENGTH" -gt 61 ]] ; then
# KMER_ARGS="--first_kmer 31 --kmer_step 30 --last_kmer 61"
# CX_CALLER_ARGS="31,30,61"
#else
KMER_ARGS="--first_kmer 31"
CX_CALLER_ARGS="31,0,31"
#fi
CX_FQ1=$FQ1
CX_FQ2=${FQ1/.1./.2.}
if [[ $DATA_DIR == *HG002* ]] ; then
CX_REFERENCE=/home/users/allstaff/cameron.d/reference_genomes/human/hg19.fa
fi
cx_save
XC_OUTPUT=$CX.vcf
# http://cortexassembler.sourceforge.net/cortex_var_user_manual.pdf
# build ref binaries
# build stampy hash of genome
REFSIZE=$(stat -c %s $CX_REFERENCE)
if [[ -f "$CX_REFERENCE_FA" ]] ; then
# do we need to adjust for ploidy
REFSIZE=$(stat -c %s $CX_REFERENCE_FA)
fi
XC_SCRIPT="
#rm -rf $CX;
rm -rf $CX/cortex_var/calls $CX/cortex_var/vcfs # don't clobber graph if we have it
mkdir -p $CX 2>/dev/null; cd $CX;
echo \"sample sample.unpaired.list sample.paired1.list sample.paired2.list\" > INDEX
echo -n > sample.unpaired.list
echo $CX_FQ1 > sample.paired1.list
echo $CX_FQ2 > sample.paired2.list
perl $CORTEX_DIR/scripts/calling/run_calls.pl \
$KMER_ARGS \
--fastaq_index INDEX \
--auto_cleaning yes \
--bc yes \
--pd yes \
--outdir cortex_var \
--outvcf cortex.vcf \
--ploidy 2 \
--stampy_hash ${CX_REFERENCE/.fa/} \
--stampy_bin $BASE_DIR/tools/cortex/stampy-1.0.28/stampy.py \
--list_ref_fasta $CX_REFERENCE.split/file_listing_fasta \
--refbindir $(dirname $CX_REFERENCE) \
--genome_size $REFSIZE \
--qthresh 5 \
--mem_height 28 --mem_width 100 \
--vcftools_dir $BASE_DIR/tools/cortex/vcftools_0.1.9/ \
--do_union yes \
--ref CoordinatesAndInCalling \
--workflow independent \
--logfile log.$(date +%Y%m%d%H%M%S).txt \
--max_var_len 65537 \
&& \
cp $CX/cortex_var/vcfs/cortex.vcf_wk_flow_I_RefCC_FINALcombined_BC_calls_at_all_k.decomp.vcf $CX.vcf && \
grep -v '#' $CX/cortex_var/vcfs/cortex.vcf_wk_flow_I_RefCC_FINALcombined_PD_calls_at_all_k.decomp.vcf >> $CX.vcf
"
xc_exec
done