Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/rkansal47/HHbbVV into main
Browse files Browse the repository at this point in the history
  • Loading branch information
rkansal47 committed Jan 9, 2024
2 parents e54c509 + 73d5ccc commit 1dd4c38
Show file tree
Hide file tree
Showing 4 changed files with 176 additions and 12 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/auto-merge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ permissions:
jobs:
dependabot:
runs-on: ubuntu-latest
if: ${{ github.actor == 'pre-commit-ci[bot]' && github.event_name == 'pull_request'}}
if:
${{ github.event_name == 'pull_request' &&
github.event.pull_request.user.login == 'pre-commit-ci[bot]'}}
steps:
- name: Auto-merge PR
run: gh pr merge --auto --merge "$PR_URL"
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/psf/black-pre-commit-mirror
rev: "23.10.1"
rev: "23.12.1"
hooks:
- id: black-jupyter
args: [--line-length=100]
156 changes: 150 additions & 6 deletions src/HHbbVV/postprocessing/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,12 @@ def get_nonres_vbf_selection_regions(
"VVFatJetPt": pt_cuts,
"bbFatJetParticleNetMD_Txbb": [txbb_cut, CUT_MAX_VAL],
"VVFatJetParTMD_THWWvsT": [thww_wp, CUT_MAX_VAL],
# add more
"vbf_Mass_jj": [500, 10000],
"vbf_dEta_jj": [4, 10000],
"ak8FatJetEta0": [-2.4, 2.4],
"ak8FatJetEta1": [-2.4, 2.4],
"DijetdEta": [0, 2.0],
"DijetdPhi": [2.6, 10000],
},
label="Pass",
),
Expand All @@ -194,7 +199,12 @@ def get_nonres_vbf_selection_regions(
"VVFatJetPt": pt_cuts,
"bbFatJetParticleNetMD_Txbb": [0.8, txbb_cut],
"VVFatJetParTMD_THWWvsT": [thww_wp, CUT_MAX_VAL],
# add more
"vbf_Mass_jj": [500, 10000],
"vbf_dEta_jj": [4, 10000],
"ak8FatJetEta0": [-2.4, 2.4],
"ak8FatJetEta1": [-2.4, 2.4],
"DijetdEta": [0, 2.0],
"DijetdPhi": [2.6, 10000],
},
label="Fail",
),
Expand Down Expand Up @@ -339,8 +349,8 @@ def get_res_selection_regions(
# TODO: check which of these applies to resonant as well
weight_shifts = {
"pileup": Syst(samples=nonres_sig_keys + res_sig_keys + bg_keys, label="Pileup"),
"PDFalphaS": Syst(samples=nonres_sig_keys, label="PDF"),
"QCDscale": Syst(samples=nonres_sig_keys, label="QCDscale"),
# "PDFalphaS": Syst(samples=nonres_sig_keys, label="PDF"),
# "QCDscale": Syst(samples=nonres_sig_keys, label="QCDscale"),
"ISRPartonShower": Syst(samples=nonres_sig_keys_ggf + ["V+Jets"], label="ISR Parton Shower"),
"FSRPartonShower": Syst(samples=nonres_sig_keys_ggf + ["V+Jets"], label="FSR Parton Shower"),
"L1EcalPrefiring": Syst(
Expand Down Expand Up @@ -394,6 +404,27 @@ def main(args):
)
print("Loaded BDT preds\n")

# Load VBF Variables (edits events_dict so that all of the events have the appropriate variables and stuff) TODO:
if args.vbf:
pt_labels = ["JES", "JER"]
m_labels = ["JMS", "JMR"]
for key, df in events_dict.items():
bb_mask = (
df[("ak8FatJetParticleNetMD_Txbb", 0)] > df[("ak8FatJetParticleNetMD_Txbb", 1)]
)
_add_vbf_columns(df, bb_mask, ptlabel="", mlabel="")

if key == "Data":
continue

for var in pt_labels:
for direction in ["down", "up"]:
_add_vbf_columns(df, bb_mask, ptlabel=f"_{var}_{direction}", mlabel="")

for var in m_labels:
for direction in ["down", "up"]:
_add_vbf_columns(df, bb_mask, ptlabel="", mlabel=f"_{var}_{direction}")

# Control plots
if args.control_plots:
print("\nMaking control plots\n")
Expand Down Expand Up @@ -532,10 +563,123 @@ def _init(args):
return shape_vars, scan, scan_cuts, scan_wps


# adds all necessary columns to dataframes from events_dict
def _add_vbf_columns(df, bb_mask, ptlabel, mlabel):
import vector

bbJet = vector.array(
{
"pt": np.where(bb_mask, df[f"ak8FatJetPt{ptlabel}"][0], df[f"ak8FatJetPt{ptlabel}"][1]),
"phi": np.where(bb_mask, df["ak8FatJetPhi"][0], df["ak8FatJetPhi"][1]),
"eta": np.where(bb_mask, df["ak8FatJetEta"][0], df["ak8FatJetEta"][1]),
"M": np.where(
bb_mask,
df[f"ak8FatJetParticleNetMass{mlabel}"][0],
df[f"ak8FatJetParticleNetMass{mlabel}"][1],
),
}
)

VVJet = vector.array(
{
"pt": np.where(
~bb_mask, df[f"ak8FatJetPt{ptlabel}"][0], df[f"ak8FatJetPt{ptlabel}"][1]
),
"phi": np.where(~bb_mask, df["ak8FatJetPhi"][0], df["ak8FatJetPhi"][1]),
"eta": np.where(~bb_mask, df["ak8FatJetEta"][0], df["ak8FatJetEta"][1]),
"M": np.where(
~bb_mask,
df[f"ak8FatJetParticleNetMass{mlabel}"][0],
df[f"ak8FatJetParticleNetMass{mlabel}"][1],
),
}
)

vbf1 = vector.array(
{
"pt": df[(f"VBFJetPt{ptlabel}", 0)],
"phi": df[("VBFJetPhi", 0)],
"eta": df[("VBFJetEta", 0)],
"M": df[("VBFJetMass", 0)],
}
)

vbf2 = vector.array(
{
"pt": df[(f"VBFJetPt{ptlabel}", 1)],
"phi": df[("VBFJetPhi", 1)],
"eta": df[("VBFJetEta", 1)],
"M": df[("VBFJetMass", 1)],
}
)

jj = vbf1 + vbf2

mass_jj_cut_sorted_pt = jj.mass > 500
eta_jj_cut_sorted_pt = np.abs(vbf1.eta - vbf2.eta) > 4.0

# Adapted from HIG-20-005 ggF_Killer 6.2.2
# https://coffeateam.github.io/coffea/api/coffea.nanoevents.methods.vector.PtEtaPhiMLorentzVector.html
# https://coffeateam.github.io/coffea/api/coffea.nanoevents.methods.vector.LorentzVector.html
# Adding variables defined in HIG-20-005 that show strong differentiation for VBF signal events and background

# seperation between both ak8 higgs jets
if "vbf_dR_HH" not in df.columns:
df[f"vbf_dR_HH"] = VVJet.deltaR(bbJet)
if "vbf_dR_j0_HVV" not in df.columns:
df[f"vbf_dR_j0_HVV"] = vbf1.deltaR(VVJet)
if "vbf_dR_j1_HVV" not in df.columns:
df[f"vbf_dR_j1_HVV"] = vbf2.deltaR(VVJet)
if "vbf_dR_j0_Hbb" not in df.columns:
df[f"vbf_dR_j0_Hbb"] = vbf1.deltaR(bbJet)
if "vbf_dR_j1_Hbb" not in df.columns:
df[f"vbf_dR_j1_Hbb"] = vbf2.deltaR(bbJet)
if "vbf_dR_jj" not in df.columns:
df[f"vbf_dR_jj"] = vbf1.deltaR(vbf2)
if "vbf_Mass_jj{ptlabel}" not in df.columns:
df[f"vbf_Mass_jj{ptlabel}"] = jj.M
if "vbf_dEta_jj" not in df.columns:
df[f"vbf_dEta_jj"] = np.abs(vbf1.eta - vbf2.eta)

if "DijetdEta" not in df.columns:
df[f"DijetdEta"] = np.abs(bbJet.eta - VVJet.eta)
if "DijetdPhi" not in df.columns:
df[f"DijetdPhi"] = np.abs(bbJet.phi - VVJet.phi)

# Subleading VBF-jet cos(θ) in the HH+2j center of mass frame:
# https://github.com/scikit-hep/vector/blob/main/src/vector/_methods.py#L916
system_4vec = vbf1 + vbf2 + VVJet + bbJet
j1_CMF = vbf1.boostCM_of_p4(system_4vec)

# Leading VBF-jet cos(θ) in the HH+2j center of mass frame:
thetab1 = 2 * np.arctan(np.exp(-j1_CMF.eta))
thetab1 = np.cos(thetab1) # 12

if f"vbf_cos_j1{ptlabel}{mlabel}" not in df.columns:
df[f"vbf_cos_j1{ptlabel}{mlabel}"] = np.abs(thetab1)

# Subleading VBF-jet cos(θ) in the HH+2j center of mass frame:
j2_CMF = vbf2.boostCM_of_p4(system_4vec)
thetab2 = 2 * np.arctan(np.exp(-j2_CMF.eta))
thetab2 = np.cos(thetab2)
if f"vbf_cos_j2{ptlabel}{mlabel}" not in df.columns:
df[f"vbf_cos_j2{ptlabel}{mlabel}"] = np.abs(thetab2)

if "vbf_prod_centrality" not in df.columns:
# H1-centrality * H2-centrality:
delta_eta = vbf1.eta - vbf2.eta
avg_eta = (vbf1.eta + vbf2.eta) / 2
prod_centrality = np.exp(
-np.power((VVJet.eta - avg_eta) / delta_eta, 2)
- np.power((bbJet.eta - avg_eta) / delta_eta, 2)
)
df[f"vbf_prod_centrality"] = prod_centrality


def _process_samples(args, BDT_sample_order: List[str] = None):
sig_samples = res_samples if args.resonant else nonres_samples

if not args.resonant and BDT_sample_order is None:
if not args.resonant and BDT_sample_order is None and not args.vbf:
with open(f"{args.bdt_preds_dir}/{args.year}/sample_order.txt", "r") as f:
BDT_sample_order = list(eval(f.read()).keys())

Expand Down Expand Up @@ -580,7 +724,7 @@ def _process_samples(args, BDT_sample_order: List[str] = None):
if bg_key not in args.bg_keys and bg_key != data_key:
del bg_samples[bg_key]

if not args.resonant:
if not args.resonant and not args.vbf:
for key in sig_samples.copy():
if key not in BDT_sample_order:
del sig_samples[key]
Expand Down
26 changes: 22 additions & 4 deletions src/HHbbVV/processors/bbVVSkimmer.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ class bbVVSkimmer(processor.ProcessorABC):
"VVFatJetParTMD_THWWvsT",
"MET_pt",
"MET_phi",
"nGoodElectrons",
"nGoodElectronsHH",
"nGoodElectronsHbb",
"nGoodMuons",
]

Expand Down Expand Up @@ -545,13 +546,26 @@ def process(self, events: ak.Array):
# selection from https://github.com/jennetd/hbb-coffea/blob/85bc3692be9e0e0a0c82ae3c78e22cdf5b3e4d68/boostedhiggs/vhbbprocessor.py#L283-L307
# https://indico.cern.ch/event/1154430/#b-471403-higgs-meeting-special

goodelectron = (
goodelectronHbb = (
(events.Electron.pt > 20)
& (abs(events.Electron.eta) < 2.5)
& (events.Electron.miniPFRelIso_all < 0.4)
& (events.Electron.cutBased >= events.Electron.LOOSE)
)
nelectrons = ak.sum(goodelectron, axis=1)
nelectronsHbb = ak.sum(goodelectronHbb, axis=1)
# if using HH4b lepton vetoes:
# https://cms.cern.ch/iCMS/user/noteinfo?cmsnoteid=CMS%20AN-2020/231 Section 7.1.2
# In order to be considered in the lepton veto step, a muon (electron) is required to to pass the selections described in Section 5.2, and to have pT > 15 GeV (pT > 20 GeV), and |η| < 2.4 (2.5).
# A muon is also required to pass loose identification criteria as detailed in [35] and mini-isolation
# (miniPFRelIso all < 0.4). An electron is required to pass mvaFall17V2noIso WP90 identification as well as mini-isolation (miniPFRelIso all < 0.4).

goodelectronHH = (
(events.Electron.pt > 20)
& (abs(events.Electron.eta) < 2.5)
& (events.Electron.miniPFRelIso_all < 0.4)
& (events.Electron.mvaFall17V2noIso_WP90)
)
nelectronsHH = ak.sum(goodelectronHH, axis=1)

goodmuon = (
(events.Muon.pt > 15)
Expand All @@ -562,7 +576,8 @@ def process(self, events: ak.Array):
nmuons = ak.sum(goodmuon, axis=1)

skimmed_events["nGoodMuons"] = nmuons.to_numpy()
skimmed_events["nGoodElectrons"] = nelectrons.to_numpy()
skimmed_events["nGoodElectronsHH"] = nelectronsHH.to_numpy()
skimmed_events["nGoodElectronsHbb"] = nelectronsHbb.to_numpy()

######################
# Remove branches
Expand Down Expand Up @@ -629,6 +644,9 @@ def process(self, events: ak.Array):
if self._systematics:
systematics += list(weights.variations)

single_weight_pileup = weights.partial_weight(["single_weight_pileup"])
add_selection("single_weight_pileup", (single_weight_pileup <= 4), *selection_args)

# TODO: need to be careful about the sum of gen weights used for the LHE/QCDScale uncertainties
logger.debug("weights ", weights._weights.keys())
for systematic in systematics:
Expand Down

0 comments on commit 1dd4c38

Please sign in to comment.