Skip to content

Commit

Permalink
odp now makes plots for both manual and automatically-determined breaks
Browse files Browse the repository at this point in the history
  • Loading branch information
conchoecia committed Apr 10, 2021
1 parent eaf6e8b commit a2786c2
Showing 1 changed file with 93 additions and 17 deletions.
110 changes: 93 additions & 17 deletions scripts/odp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ This script performs mutual-best protein diamond BLAST searches,
from Bio import SeqIO
from itertools import groupby
import matplotlib
import math
from operator import itemgetter
import pandas as pd
import numpy as np
Expand All @@ -13,6 +14,25 @@ import sys

configfile: "config.yaml"

# check for legal config entries. Useful for finding misspelled entries
legal = ["proteins", "prot_to_loc", "genome", "minscafsize", "manual_breaks", "chrom_to_color"]
illegal = set()
for this_axis in ["xaxisspecies", "yaxisspecies"]:
if this_axis in config:
for this_sample in config[this_axis]:
for key in config[this_axis][this_sample]:
if key not in legal:
illegal.add(key)
if len(illegal) > 0:
print("We found some fields in your config file that are not used by this program.")
print("The only fields allowed for individual samples are:")
for key in legal:
print(" - {}".format(key))
print("The keys that we found that are not allowed/in the list above are:")
for key in illegal:
print(" - {}".format(key))
sys.exit()

#make fake breaks
for this_axis in ["xaxisspecies", "yaxisspecies"]:
if this_axis in config:
Expand Down Expand Up @@ -99,20 +119,24 @@ rule all:
expand_avoid_matching_x_and_y(
"synteny_analysis/plots/sample_similarity/{}_and_{}_peridentity_length.pdf",
config["xaxisspecies"], config["yaxisspecies"]),
#make the correlation plots - whole chromosomes
expand_avoid_matching_x_and_y(
"synteny_analysis/plots/significance/wholechr/{}_and_{}_fisher_wholechr.pdf",
config["xaxisspecies"], config["yaxisspecies"]),
expand_avoid_matching_x_and_y(
"synteny_analysis/plots/significance/wholechr_colors/{}_and_{}_fisher_chromcolor.pdf",
"synteny_analysis/dvalue_table_breaks/{}_and_{}_xbreaks_manual.tsv",
config["xaxisspecies"], config["yaxisspecies"]),
# this currently doesn't work - need to trace back where the errors are from
expand_avoid_matching_x_and_y(
"synteny_analysis/plots/significance/breaks/{}_and_{}_fisher_manualbreaks.pdf",
config["xaxisspecies"], config["yaxisspecies"]),
expand_avoid_matching_x_and_y(
"synteny_analysis/plots/significance/breaks/{}_and_{}_fisher_autobreaks.pdf",
config["xaxisspecies"], config["yaxisspecies"])

##make the correlation plots - whole chromosomes
#expand_avoid_matching_x_and_y(
# "synteny_analysis/plots/significance/wholechr/{}_and_{}_fisher_wholechr.pdf",
# config["xaxisspecies"], config["yaxisspecies"]),
#expand_avoid_matching_x_and_y(
# "synteny_analysis/plots/significance/wholechr_colors/{}_and_{}_fisher_chromcolor.pdf",
# config["xaxisspecies"], config["yaxisspecies"]),
## this currently doesn't work - need to trace back where the errors are from
#expand_avoid_matching_x_and_y(
#"synteny_analysis/plots/significance/breaks/{}_and_{}_fisher_manualbreaks.pdf",
# config["xaxisspecies"], config["yaxisspecies"]),
#expand_avoid_matching_x_and_y(
#"synteny_analysis/plots/significance/breaks/{}_and_{}_fisher_autobreaks.pdf",
# config["xaxisspecies"], config["yaxisspecies"])


def filter_fasta_chrom(chrom_file, input_fasta, output_fasta):
Expand Down Expand Up @@ -1550,6 +1574,37 @@ rule gen_fishertable_autobreaks:
input.xbreaks_auto, input.ybreaks_auto,
output.fishersautobreaks, params.style)

def scale_pvalue_to_0_1(vector, reverse = False):
"""
scales a value to between 0 and 1 given the min and max values seen in that vector

we might pass the log, so log(0) = inf. Look for that as special case to define minp

returns a vector
"""
#first find max
maxp = -99999999999999
minp = 99999999999
for entry in vector:
if math.isinf(entry) or math.isnan(entry):
pass
elif entry < minp:
minp = entry
elif entry > maxp:
maxp = entry
# now scale everything to between 0 and 1
outvec = []
for entry in vector:
corrected = entry
if math.isinf(entry) or math.isnan(entry):
corrected = sys.float_info.min
thisval = (corrected-minp)/(maxp-minp)
if reverse:
outvec.append(1-thisval)
else:
outvec.append(thisval)
return outvec

def correlation_plot(plotting_df, fishertsv,
xcoords_file, ycoords_file,
xsample, ysample,
Expand All @@ -1574,6 +1629,8 @@ def correlation_plot(plotting_df, fishertsv,
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib.ticker import StrMethodFormatter, NullFormatter
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import numpy as np
# set seaborn stuff
#sns.set(rc={'text.usetex' : True})
Expand Down Expand Up @@ -1608,6 +1665,12 @@ def correlation_plot(plotting_df, fishertsv,
# first make a lookup table
df = pd.read_csv(plotting_df, delimiter="\t", index_col=0)
fisherdf = pd.read_csv(fishertsv, delimiter="\t", index_col=0)
fisherdf["pval01"] = scale_pvalue_to_0_1(fisherdf["pvalue"], reverse = False)
print(fisherdf["pval01"])
fisherdf["pval01_rev"] = scale_pvalue_to_0_1(fisherdf["pvalue"], reverse = True)
fisherdf["pval01_log"] = scale_pvalue_to_0_1(np.log(fisherdf["pvalue"]), reverse = False)
fisherdf["pval01_log_rev"] = scale_pvalue_to_0_1(np.log(fisherdf["pvalue"]), reverse = True)

print("Performed {} tests".format(len(fisherdf)))
print("0.05 equivalent for {} tests is {}".format(len(fisherdf), 0.05/len(fisherdf)))
p_value_dict = {x: x/len(fisherdf) for x in [0.05, 0.00005, 0.00000005]}
Expand Down Expand Up @@ -1682,15 +1745,28 @@ def correlation_plot(plotting_df, fishertsv,
x, y, ec = None, lw = None, color=row["color"])
panel1.add_patch(circ)
elif style in ["whole_colors", "breaks"]:
magma = cm.get_cmap("magma", 8)
#magma = cm.get_cmap('magma', 8)
#newcolors = magma(np.linspace(0, 1, 12))
#white = np.array([256/256, 256/256, 256/256, 1])
#newcolors[:5, :] = white
#newcmp = ListedColormap(newcolors)
for index, row in fisherdf.iterrows():
left = row["plotleft"]
bottom = row["plotbottom"]
width = row["plotwidth"]
height = row["plotheight"]
rectangle = matplotlib.patches.Rectangle((left, bottom), width, height,
linewidth=0,
facecolor=row["color"],
edgecolor="black", zorder = 2)
if autoscale_color:
rgba = magma(row["pval01_log"])
rectangle = matplotlib.patches.Rectangle((left, bottom), width, height,
linewidth=0,
facecolor=rgba,
edgecolor="black", zorder = 2)
else:
rectangle = matplotlib.patches.Rectangle((left, bottom), width, height,
linewidth=0,
facecolor=row["color"],
edgecolor="black", zorder = 2)
panel1.add_patch(rectangle)

# set the panel linewidth thinner
Expand Down Expand Up @@ -1885,7 +1961,7 @@ rule plot_fishers_autobreaks:
"""
input:
dtable = "synteny_analysis/dvalue_table/{xsample}_and_{ysample}_info.tsv",
fishertsv = "synteny_analysis/plots/significance/tables/{xsample}_and_{ysample}_fisher_manualbreaks.tsv",
fishertsv = "synteny_analysis/plots/significance/tables/{xsample}_and_{ysample}_fisher_autobreaks.tsv",
ycoords = "synteny_analysis/genome_coords/y_genome_coords/{ysample}_genomecoords.txt",
xcoords = "synteny_analysis/genome_coords/x_genome_coords/{xsample}_genomecoords.txt",
xbreaks_auto = "synteny_analysis/dvalue_table_breaks/{xsample}_and_{ysample}_xbreaks_auto.tsv",
Expand Down

0 comments on commit a2786c2

Please sign in to comment.