Skip to content

Commit

Permalink
bdt sculpting plots
Browse files Browse the repository at this point in the history
  • Loading branch information
rkansal47 committed Nov 8, 2023
1 parent 0396da5 commit 455bda2
Show file tree
Hide file tree
Showing 5 changed files with 486 additions and 54 deletions.
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,14 @@ Search for two boosted (high transverse momentum) Higgs bosons (H) decaying to t
- [BDT Trainings](#bdt-trainings)
- [Post-Processing](#post-processing-1)
- [Control plots with resonant and nonresonant samples](#control-plots-with-resonant-and-nonresonant-samples)
- [BDT sculpting plots](#bdt-sculpting-plots)
- [Making separate background and signal templates for scan and bias tests (resonant)](#making-separate-background-and-signal-templates-for-scan-and-bias-tests-resonant)
- [Create Datacard](#create-datacard)
- [PlotFits](#plotfits)
- [Combine](#combine)
- [CMSSW + Combine Quickstart](#cmssw--combine-quickstart)
- [Run fits and diagnostics locally](#run-fits-and-diagnostics-locally)
- [F-tests locally for non-resonant](#f-tests-locally-for-non-resonant)
- [Run fits on condor](#run-fits-on-condor)
- [Making datacards](#making-datacards)
- [F-tests](#f-tests)
Expand Down Expand Up @@ -276,6 +278,9 @@ for year in 2016 2016APV 2017 2018; do python -u postprocessing.py --templates -

Run `postprocessing/bash_scripts/ControlPlots.sh` from inside `postprocessing folder`.

#### BDT sculpting plots

Run `postprocessing/bash_scripts/BDTPlots.sh` from inside `postprocessing folder`.

#### Making separate background and signal templates for scan and bias tests (resonant)

Expand Down Expand Up @@ -367,6 +372,20 @@ All via the below script, with a bunch of options (see script):
run_blinded.sh --workspace --bfit --limits
```

#### F-tests locally for non-resonant

This will take 5-10 minutes.

```bash
# automatically make workspaces and do the background-only fit for orders 0 - 3
run_ftest_nonres.sh --cardstag 23May14 --templatestag $templatestag # -dl for saving shapes and limits
# run f-test for desired order
run_ftest_nonres.sh --cardstag 23May14 --goftoys --ffits --numtoys 100 --seed 444 --order 0
```

Condor is needed for resonant, see [below](#f-tests).


### Run fits on condor

#### Making datacards
Expand Down
322 changes: 298 additions & 24 deletions src/HHbbVV/postprocessing/PostProcess.ipynb

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions src/HHbbVV/postprocessing/bash_scripts/BDTPlots.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
MAIN_DIR="../../.."
TAG=23Nov7BDTSculpting

for year in 2016APV 2016 2017 2018
do
python postprocessing.py --year $year --data-dir "$MAIN_DIR/../data/skimmer/Feb24/" --signal-data-dir "$MAIN_DIR/../data/skimmer/Jun10/" --bdt-preds-dir "$MAIN_DIR/../data/skimmer/Feb24/23_05_12_multiclass_rem_feats_3/inferences" --no-lp-sf-all-years --sig-samples GluGluToHHTobbVV_node_cHHH1 --bg-keys QCD --bdt-plots --plot-dir "$MAIN_DIR/plots/PostProcessing/$TAG"
done
56 changes: 55 additions & 1 deletion src/HHbbVV/postprocessing/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from collections import OrderedDict
import numpy as np
from pandas import DataFrame

import matplotlib as mpl
import matplotlib.pyplot as plt
Expand All @@ -16,7 +17,11 @@
hep.style.use("CMS")
formatter = mticker.ScalarFormatter(useMathText=True)
formatter.set_powerlimits((-3, 3))
plt.rcParams.update({"font.size": 20})

# this is needed for some reason to update the font size for the first plot
fig, ax = plt.subplots(1, 1, figsize=(12, 12))
plt.rcParams.update({"font.size": 24})
plt.close()

import hist
from hist import Hist
Expand All @@ -27,6 +32,7 @@

from hh_vars import LUMI, data_key, hbb_bg_keys
import utils
from utils import CUT_MAX_VAL

from copy import deepcopy

Expand Down Expand Up @@ -855,3 +861,51 @@ def ratioTestTrain(
plt.show()
else:
plt.close()


def cutsLinePlot(
events_dict: Dict[str, DataFrame],
bb_masks: Dict[str, DataFrame],
shape_var: utils.ShapeVar,
plot_key: str,
cut_var: str,
cuts: List[float],
year: str,
weight_key: str,
plotdir: str = "",
name: str = "",
show: bool = False,
):
"""Plot line plots of ``shape_var`` for different cuts on ``cut_var``."""
fig, ax = plt.subplots(1, 1, figsize=(12, 12))
plt.rcParams.update({"font.size": 24})

for i, cut in enumerate(cuts):
sel, _ = utils.make_selection({cut_var: [cut, CUT_MAX_VAL]}, events_dict, bb_masks)
h = utils.singleVarHist(
events_dict, shape_var, bb_masks, weight_key=weight_key, selection=sel
)

hep.histplot(
h[plot_key, ...] / np.sum(h[plot_key, ...].values()),
yerr=True,
label=f"BDTScore >= {cut}",
# density=True,
ax=ax,
linewidth=2,
alpha=0.8,
)

ax.set_xlabel(shape_var.label)
ax.set_ylabel("Fraction of Events")
ax.legend()

hep.cms.label(ax=ax, data=False, year=year, lumi=round(LUMI[year] / 1e3))

if len(name):
plt.savefig(f"{plotdir}/{name}.pdf", bbox_inches="tight")

if show:
plt.show()
else:
plt.close()
136 changes: 107 additions & 29 deletions src/HHbbVV/postprocessing/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from collections import OrderedDict

import os
from pathlib import Path
import sys
import pickle, json
import itertools
Expand Down Expand Up @@ -376,8 +377,9 @@ def main(args):
# THWW score vs Top (if not already from processor)
derive_variables(events_dict)

if args.plot_dir != "":
cutflow.to_csv(f"{args.plot_dir}/preselection_cutflow.csv")
# check if args has attribute
if hasattr(args, "control_plots_dir"):
cutflow.to_csv(f"{args.control_plots_dir}/preselection_cutflow.csv")

print("\nCutflow", cutflow)

Expand All @@ -401,7 +403,7 @@ def main(args):
bb_masks,
sig_keys,
control_plot_vars,
f"{args.plot_dir}/ControlPlots/",
args.control_plots_dir,
args.year,
bg_keys=args.bg_keys,
sig_scale_dict={"HHbbVV": 1e5, "VBFHHbbVV": 2e6} | {key: 2e4 for key in res_sig_keys},
Expand All @@ -410,6 +412,13 @@ def main(args):
show=False,
)

if args.bdt_plots and not args.resonant and not args.vbf:
print("\nMaking BDT sculpting plots\n")

plot_bdt_sculpting(
events_dict, bb_masks, args.bdt_sculpting_plots_dir, args.year, show=False
)

if args.templates:
for wps in scan_wps: # if not scanning, this will just be a single WP
cutstr = "_".join([f"{cut}_{wp}" for cut, wp in zip(scan_cuts, wps)]) if scan else ""
Expand Down Expand Up @@ -453,7 +462,7 @@ def main(args):
for jshift in jshifts:
print(jshift)
plot_dir = (
f"{args.plot_dir}/templates/{cutstr}/" f"{'jshifts/' if jshift != '' else ''}"
f"{args.templates_plots_dir}/{cutstr}/" f"{'jshifts/' if jshift != '' else ''}"
if args.plot_dir != ""
else ""
)
Expand Down Expand Up @@ -489,8 +498,8 @@ def main(args):


def _init(args):
if not (args.control_plots or args.templates or args.scan):
print("You need to pass at least one of --control-plots, --templates, or --scan")
if not (args.control_plots or args.bdt_plots or args.templates):
print("You need to pass at least one of --control-plots, --bdt-plots, or --templates")
return

if args.resonant:
Expand Down Expand Up @@ -600,36 +609,54 @@ def _process_samples(args, BDT_sample_order: List[str] = None):

def _make_dirs(args, scan, scan_cuts, scan_wps):
if args.plot_dir != "":
args.plot_dir = f"{args.plot_dir}/{args.year}/"
os.system(f"mkdir -p {args.plot_dir}")
args.plot_dir = Path(args.plot_dir)
args.plot_dir.mkdir(parents=True, exist_ok=True)

if args.control_plots:
os.system(f"mkdir -p {args.plot_dir}/ControlPlots/")

os.system(f"mkdir -p {args.plot_dir}/templates/")

if scan:
for wps in scan_wps:
cutstr = "_".join([f"{cut}_{wp}" for cut, wp in zip(scan_cuts, wps)])
os.system(f"mkdir -p {args.plot_dir}/templates/{cutstr}/")
os.system(f"mkdir -p {args.plot_dir}/templates/{cutstr}/wshifts")
os.system(f"mkdir -p {args.plot_dir}/templates/{cutstr}/jshifts")
else:
os.system(f"mkdir -p {args.plot_dir}/templates/wshifts")
os.system(f"mkdir -p {args.plot_dir}/templates/jshifts")

if args.resonant:
os.system(f"mkdir -p {args.plot_dir}/templates/hists2d")
args.control_plots_dir = args.plot_dir / "ControlPlots" / args.year
args.control_plots_dir.mkdir(parents=True, exist_ok=True)

if args.bdt_plots and not args.resonant and not args.vbf:
args.bdt_sculpting_plots_dir = args.plot_dir / "BDTSculpting"
args.bdt_sculpting_plots_dir.mkdir(parents=True, exist_ok=True)

if args.templates:
args.templates_plots_dir = args.plot_dir / "Templates" / args.year
args.templates_plots_dir.mkdir(parents=True, exist_ok=True)

if scan:
for wps in scan_wps:
cutstr = "_".join([f"{cut}_{wp}" for cut, wp in zip(scan_cuts, wps)])
(args.templates_plots_dir / f"{cutstr}/wshifts").mkdir(
parents=True, exist_ok=True
)
(args.templates_plots_dir / f"{cutstr}/jshifts").mkdir(
parents=True, exist_ok=True
)
else:
(args.templates_plots_dir / "wshifts").mkdir(parents=True, exist_ok=True)
(args.templates_plots_dir / "jshifts").mkdir(parents=True, exist_ok=True)
if args.resonant:
(args.templates_plots_dir / "hists2d").mkdir(parents=True, exist_ok=True)

elif args.control_plots or args.bdt_plots:
print(
"You need to pass --plot-dir if you want to make control plots or BDT plots. Exiting."
)
sys.exit()

if args.template_dir != "":
args.template_dir = Path(args.template_dir)
if scan:
for wps in scan_wps:
cutstr = "_".join([f"{cut}_{wp}" for cut, wp in zip(scan_cuts, wps)])
os.system(
f"mkdir -p {args.template_dir}/{cutstr}/{args.templates_name}/cutflows/{args.year}/"
(args.template_dir / f"{cutstr}/{args.templates_name}/cutflows/{args.year}").mkdir(
parents=True, exist_ok=True
)
else:
os.system(f"mkdir -p {args.template_dir}/{args.templates_name}/cutflows/{args.year}/")
(args.template_dir / f"{args.templates_name}/cutflows/{args.year}").mkdir(
parents=True, exist_ok=True
)


def _check_load_systematics(systs_file: str, year: str):
Expand All @@ -652,12 +679,19 @@ def _load_samples(args, samples, sig_samples, cutflow, filters=None):

events_dict = {}
for d in args.signal_data_dirs:
events_dict = {**events_dict, **utils.load_samples(d, sig_samples, args.year, filters)}
events_dict = {
**events_dict,
**utils.load_samples(
d, sig_samples, args.year, filters, hem_cleaning=args.hem_cleaning
),
}

if args.data_dir:
events_dict = {
**events_dict,
**utils.load_samples(args.data_dir, samples, args.year, filters),
**utils.load_samples(
args.data_dir, samples, args.year, filters, hem_cleaning=args.hem_cleaning
),
}

print("Samples: ", list(events_dict.keys()))
Expand Down Expand Up @@ -1167,6 +1201,44 @@ def control_plots(
return hists


def plot_bdt_sculpting(
events_dict: Dict[str, pd.DataFrame],
bb_masks: Dict[str, pd.DataFrame],
plot_dir: str,
year: str,
weight_key: str = "finalWeight",
show: bool = False,
):
"""Plot bb jet mass for different BDT score cuts."""
cuts = [0, 0.1, 0.5, 0.9, 0.95]
# bdtvars = ["", "TT", "VJets"]
bdtvars = [""]
plot_keys = ["QCD", "HHbbVV"]

shape_var = ShapeVar(
var="bbFatJetParticleNetMass", label=r"$m^{bb}_{reg}$ (GeV)", bins=[20, 50, 250]
)

for var in bdtvars:
for key in plot_keys:
ed_key = {key: events_dict[key]}
bbm_key = {key: bb_masks[key]}

plotting.cutsLinePlot(
ed_key,
bbm_key,
shape_var,
key,
f"BDTScore{var}",
cuts,
year,
weight_key,
plot_dir,
f"{year}_BDT{var}Cuts_{shape_var.var}_{key}",
show=show,
)


def check_weights(events_dict):
# Check for 0 weights - would be an issue for weight shifts
print(
Expand Down Expand Up @@ -1525,6 +1597,7 @@ def save_templates(
utils.add_bool_arg(parser, "resonant", "for resonant or nonresonant", default=False)
utils.add_bool_arg(parser, "vbf", "non-resonant VBF or inclusive", default=False)
utils.add_bool_arg(parser, "control-plots", "make control plots", default=False)
utils.add_bool_arg(parser, "bdt-plots", "make bdt sculpting plots", default=False)
utils.add_bool_arg(parser, "templates", "save m_bb templates using bdt cut", default=False)
utils.add_bool_arg(
parser, "overwrite-template", "if template file already exists, overwrite it", default=False
Expand Down Expand Up @@ -1554,6 +1627,7 @@ def save_templates(
)

utils.add_bool_arg(parser, "data", "include data", default=True)
utils.add_bool_arg(parser, "hem-cleaning", "perform hem cleaning for 2018", default=None)
utils.add_bool_arg(
parser, "HEM2d", "fatjet phi v eta plots to check HEM cleaning", default=False
)
Expand Down Expand Up @@ -1641,4 +1715,8 @@ def save_templates(
elif args.resonant:
args.bdt_preds_dir = None

if args.hem_cleaning is None:
# can't do HEM cleaning for non-resonant until BDT is re-inferenced
args.hem_cleaning = True if (args.resonant or args.vbf) else False

main(args)

0 comments on commit 455bda2

Please sign in to comment.