Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chrombpnet with bigwig #1

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
build:

runs-on: ubuntu-latest
if: GitHub.ref == 'refs/heads/master'
if: GitHub.ref == 'refs/heads/chrombpnet_with_bigwig'
steps:
- uses: actions/checkout@v2
- name: Build the Docker image
Expand All @@ -21,8 +21,8 @@ jobs:

- name: Build the Docker image

run: docker build . --file dockerfile --tag kundajelab/chrombpnet:latest
run: docker build . --file dockerfile --tag kundajelab/chrombpnet:chrombpnet_with_bigwig

- name: Docker Push
run: docker push kundajelab/chrombpnet:latest
run: docker push kundajelab/chrombpnet:chrombpnet_with_bigwig

2 changes: 1 addition & 1 deletion chrombpnet/CHROMBPNET.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def main():

modisco_command = "modisco motifs -i {} -n 50000 -o {} -w 500".format(args.h5py,args.output_prefix+"_modisco.h5")
os.system(modisco_command)
modisco_command = "modisco report -i {} -o {} -m {}".format(args.output_prefix+"_modisco.h5",args.output_prefix+"_reports",meme_dir)
modisco_command = "modisco report -i {} -o {} -m {}".format(args.output_prefix+"_modisco.h5",args.output_prefix+"_reports",meme_file)
os.system(modisco_command)

import chrombpnet.evaluation.modisco.convert_html_to_pdf as convert_html_to_pdf
Expand Down
55 changes: 35 additions & 20 deletions chrombpnet/helpers/generate_reports/make_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,16 @@ def qc_report(fpx,prefix,data_type):
#table_profile = open(os.path.join(prefix,"evaluation/modisco_profile/motifs.html")).read().replace("./","./modisco_profile/").replace("width=\"240\"","class=\"cover\"").replace("border=\"1\" class=\"dataframe\"","").replace(">pos_patterns.pattern",">pos_").replace(">neg_patterns.pattern",">neg_").replace("modisco_cwm_fwd","cwm_fwd").replace("modisco_cwm_rev","cwm_rev").replace("num_seqlets","NumSeqs").replace("dataframe","new")
#table_counts = open(os.path.join(prefix,"auxiliary/interpret_subsample/modisco_counts/motifs.html")).read().replace("./","./modisco_counts/").replace("width=\"240\"","class=\"cover\"").replace("border=\"1\" class=\"dataframe\"","").replace(">pos_patterns.pattern",">pos_").replace(">neg_patterns.pattern",">neg_").replace("modisco_cwm_fwd","cwm_fwd").replace("modisco_cwm_rev","cwm_rev").replace("num_seqlets","NumSeqs")

html_perf = f'''

<body style="font-size:20px;">
<h3>{chrombpnet_model_perf_hed}</h3>
<p>{chrombpnet_model_perf_text}</p>
{pdf.to_html(classes='mystyle')}
{pdf1.to_html(classes='mystyle')}
</body>
'''

if data_type == "ATAC":
tn5_1 = os.path.join("./","{}chrombpnet_nobias.tn5_1.footprint.png".format(fpx))
tn5_2 = os.path.join("./","{}chrombpnet_nobias.tn5_2.footprint.png".format(fpx))
Expand Down Expand Up @@ -202,6 +212,18 @@ def qc_report(fpx,prefix,data_type):
</table>
</body>
'''
html_motifs = f'''
<body style="font-size:20px;">
<h3>{tf_hed}</h3>
<p>{tf_text_profile}</p>
</body>
<body>
{table_profile}
</body>
'''
html = html_perf+html_table+html_motifs
return html.format(tn5_1=tn5_1,tn5_2=tn5_2,tn5_3=tn5_3,tn5_4=tn5_4,tn5_5=tn5_5)

elif data_type == "DNASE":
dnase_1 = os.path.join("./","{}chrombpnet_nobias.dnase_1.footprint.png".format(fpx))
dnase_2 = os.path.join("./","{}chrombpnet_nobias.dnase_2.footprint.png".format(fpx))
Expand All @@ -227,18 +249,22 @@ def qc_report(fpx,prefix,data_type):
</table>
</body>
'''
html_motifs = f'''
<body style="font-size:20px;">
<h3>{tf_hed}</h3>
<p>{tf_text_profile}</p>
</body>
<body>
{table_profile}
</body>
'''
html = html_perf+html_table+html_motifs
return html.format(dnase_1=dnase_1,dnase_2=dnase_2)

else:
print("Unknown data type: "+data_type)

html_perf = f'''

<body style="font-size:20px;">
<h3>{chrombpnet_model_perf_hed}</h3>
<p>{chrombpnet_model_perf_text}</p>
{pdf.to_html(classes='mystyle')}
{pdf1.to_html(classes='mystyle')}
</body>
'''


# html_motifs = f'''
# <body style="font-size:20px;">
Expand All @@ -255,17 +281,6 @@ def qc_report(fpx,prefix,data_type):
# {table_counts}
# </body>
# '''
html_motifs = f'''
<body style="font-size:20px;">
<h3>{tf_hed}</h3>
<p>{tf_text_profile}</p>
</body>
<body>
{table_profile}
</body>
'''
html = html_perf+html_table+html_motifs
return html.format(tn5_1=tn5_1,tn5_2=tn5_2,tn5_3=tn5_3,tn5_4=tn5_4,tn5_5=tn5_5)

def main(args):

Expand Down
4 changes: 4 additions & 0 deletions chrombpnet/helpers/hyperparameters/find_bias_hyperparams.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ def main(args):
print("counts_loss_weight:", counts_loss_weight)
assert(counts_loss_weight != 0)

if counts_loss_weight < 1.0:
counts_loss_weight = 1.0
print("WARNING: you are training on low-read depth data")

# store the parameters being used - in a TSV file
file = open("{}bias_data_params.tsv".format(args.output_prefix),"w")
file.write("\t".join(["counts_sum_min_thresh", str(round(lower_thresh,2))]))
Expand Down
3 changes: 3 additions & 0 deletions chrombpnet/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,13 @@ def general_training_args(required_train, optional_train):

required_train.add_argument('-g','--genome', required=True, type=str, help="reference genome fasta file")
required_train.add_argument('-c', '--chrom-sizes', type=str, required=True, help="Chrom sizes file")

group = required_train.add_mutually_exclusive_group(required=True)

group.add_argument('-ibam', '--input-bam-file', type=str, help="Input BAM file")
group.add_argument('-ifrag', '--input-fragment-file', type=str, help="Input fragment file")
group.add_argument('-itag', '--input-tagalign-file', type=str, help="Input tagAlign file")
group.add_argument("-ibw", "--bigwig", type=str, help="Input bigwig file of observed data")
required_train.add_argument('-o', '--output-dir', type=str, required=True, help="Output dir (path/to/output/dir)")
required_train.add_argument('-d', '--data-type', required=True, type=str, choices=['ATAC', 'DNASE'], help="assay type")
required_train.add_argument("-p", "--peaks", type=str, required=True, help="10 column bed file of peaks. Sequences and labels will be extracted centered at start (2nd col) + summit (10th col).")
Expand Down
39 changes: 23 additions & 16 deletions chrombpnet/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,21 @@ def chrombpnet_train_pipeline(args):
fpx = args.file_prefix+"_"
else:
fpx = ""

# Shift bam and convert to bigwig
import chrombpnet.helpers.preprocessing.reads_to_bigwig as reads_to_bigwig
args.output_prefix = os.path.join(args.output_dir,"auxiliary/{}data".format(fpx))
args.plus_shift = None
args.minus_shift = None
reads_to_bigwig.main(args)

if args.bigwig is not None:
pass
else:
# Shift bam and convert to bigwig
import chrombpnet.helpers.preprocessing.reads_to_bigwig as reads_to_bigwig
args.output_prefix = os.path.join(args.output_dir,"auxiliary/{}data".format(fpx))
args.plus_shift = None
args.minus_shift = None
reads_to_bigwig.main(args)
args.bigwig = os.path.join(args.output_dir,"auxiliary/{}data_unstranded.bw".format(fpx)) # bigwig input

# QC bigwig
import chrombpnet.helpers.preprocessing.analysis.build_pwm_from_bigwig as build_pwm_from_bigwig
args.bigwig = os.path.join(args.output_dir,"auxiliary/{}data_unstranded.bw".format(fpx))

args.output_prefix = os.path.join(args.output_dir,"evaluation/{}bw_shift_qc".format(fpx))
folds = json.load(open(args.chr_fold_path))
assert(len(folds["valid"]) > 0) # validation list of chromosomes is empty
Expand Down Expand Up @@ -265,16 +269,19 @@ def train_bias_pipeline(args):
else:
fpx = ""

# Shift bam and convert to bigwig
import chrombpnet.helpers.preprocessing.reads_to_bigwig as reads_to_bigwig
args.output_prefix = os.path.join(args.output_dir,"auxiliary/{}data".format(fpx))
args.plus_shift = None
args.minus_shift = None
reads_to_bigwig.main(args)

if args.bigwig is not None:
pass
else:
# Shift bam and convert to bigwig
import chrombpnet.helpers.preprocessing.reads_to_bigwig as reads_to_bigwig
args.output_prefix = os.path.join(args.output_dir,"auxiliary/{}data".format(fpx))
args.plus_shift = None
args.minus_shift = None
reads_to_bigwig.main(args)
args.bigwig = os.path.join(args.output_dir,"auxiliary/{}data_unstranded.bw".format(fpx))

# QC bigwig
import chrombpnet.helpers.preprocessing.analysis.build_pwm_from_bigwig as build_pwm_from_bigwig
args.bigwig = os.path.join(args.output_dir,"auxiliary/{}data_unstranded.bw".format(fpx))
args.output_prefix = os.path.join(args.output_dir,"evaluation/{}bw_shift_qc".format(fpx))
folds = json.load(open(args.chr_fold_path))
assert(len(folds["valid"]) > 0) # validation list of chromosomes is empty
Expand Down
5 changes: 5 additions & 0 deletions dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ RUN conda install -y -c conda-forge -c bioconda samtools bedtools ucsc-bedgrapht

# Install jq
RUN apt-get install -y jq
RUN apt-get install -y libcairo2
RUN apt-get install -y libpango-1.0-0
RUN apt-get install -y libpangocairo-1.0-0
RUN apt-get install -y libgdk-pixbuf2.0-0
RUN apt-get install -y libffi-dev

# Clean up after apt and conda
RUN apt-get clean && rm -rf /var/lib/apt/lists/*
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ protobuf==3.20
tqdm==4.48.2
deepdish==0.3.7
deeplift==0.6.13.0
modisco==0.5.16.0
modisco==0.5.14.1
modisco-lite==2.0.5
weasyprint==52.5
kundajelab-shap==1
Expand Down