diff --git a/src/HHbbVV/processors/bbVVSkimmer.py b/src/HHbbVV/processors/bbVVSkimmer.py index ef3815ae..8b2afa4b 100644 --- a/src/HHbbVV/processors/bbVVSkimmer.py +++ b/src/HHbbVV/processors/bbVVSkimmer.py @@ -336,7 +336,6 @@ def process(self, events: ak.Array): } # VBF ak4 jet vars - ak4_jet_vars = {} jets, _ = get_jec_jets(events, year, isData, self.jecs, fatjets=False) @@ -389,14 +388,6 @@ def process(self, events: ak.Array): skimmed_events["nGoodVBFJets"] = np.array(ak.sum(vbf_jet_mask, axis=1)) - # VBF ak4 Jet vars (pt, eta, phi, M, nGoodJets) - # if self._vbf_search: - # isGen = "VBF_HHTobbVV" in dataset - # ak4JetVars = { - # **self.getVBFVars(events, jets, ak8FatJetVars, bb_mask, isGen=isGen) - # } - # skimmed_events = {**skimmed_events, **ak4JetVars} - otherVars = { key: events[var.split("_")[0]]["_".join(var.split("_")[1:])].to_numpy() for (var, key) in self.skim_vars["other"].items() @@ -809,273 +800,6 @@ def process(self, events: ak.Array): def postprocess(self, accumulator): return accumulator - def getVBFVars( - self, - events: ak.Array, - jets: JetArray, - ak8FatJetVars: Dict, - bb_mask: np.ndarray, - isGen: bool = False, - skim_vars: dict = None, - pt_shift: str = None, - mass_shift: str = None, - ) -> Dict: - """ - Computes selections on VBF jet candidates based on B2G-22-003. - Sorts jets by pt after applying base cuts and fatjet exclusion. - Stores nGoodVBFJets, an int list encoding cuts, and kinematic variables of VBF jet candidates. - - TODO: - - Implement the use of pt_shift, mass_shift, skim_vars. - - Study selection parameters that provide the best sensitivity. - - Decide best way to compute vbf variables (vector or by hand) - - Args: - events (ak.Array): Event array. - jets (JetArray): Array of ak4 jets **with JECs already applied**. - ak8FatJetVars (Dict): AK8 Fat Jet variables. - bb_mask (np.ndarray): BB mask array. - isGen (bool, optional): Flag for generation-level. Defaults to False. - skim_vars (dict, optional): Skim variables. Defaults to None. - pt_shift (str, optional): PT shift. Defaults to None. - mass_shift (str, optional): Mass shift. Defaults to None. - - Returns: - Dict: VBF variables dictionary. - - """ - vbfVars = {} - - # AK4 jets definition: 5.4 B2G-22-003 - ak4_jet_mask = ( - jets.isTight - & (jets.pt > self.ak4_jet_selection["pt"]) - & (np.abs(jets.eta) < 4.7) - # medium puId https://twiki.cern.ch/twiki/bin/viewauth/CMS/PileupJetIDUL - & ((jets.pt > 50) | ((jets.puId & 2) == 2)) - ) - - # VBF selections: 7.1.4 B2G-22-003 - - # Mask for electron/muon overlap - # electrons, muons = events.Electron[events.Electron.pt < 5], events.Muon[events.Muon.pt < 7] - # e_pairs = ak.cartesian([jets, electrons], nested=True) - # e_pairs_mask = np.abs(e_pairs.slot0.delta_r(e_pairs.slot1)) < 0.4 - # m_pairs = ak.cartesian([jets, muons], nested=True) - # m_pairs_mask = np.abs(m_pairs.slot0.delta_r(m_pairs.slot1)) < 0.4 - - # electron_muon_overlap_mask = ~( - # ak.any(e_pairs_mask, axis=-1) | ak.any(m_pairs_mask, axis=-1) - # ) - - # Fatjet Definition for ∆R calculations: same definition as in getDijetVars() (not included so we can study its effects in the output.) - ptlabel = pt_shift if pt_shift is not None else "" - mlabel = mass_shift if mass_shift is not None else "" - bbJet = vector.array( - { - "pt": ak8FatJetVars[f"ak8FatJetPt{ptlabel}"][bb_mask], - "phi": ak8FatJetVars["ak8FatJetPhi"][bb_mask], - "eta": ak8FatJetVars["ak8FatJetEta"][bb_mask], - "M": ak8FatJetVars[f"ak8FatJetParticleNetMass{mlabel}"][bb_mask], - } - ) - - VVJet = vector.array( - { - "pt": ak8FatJetVars[f"ak8FatJetPt{ptlabel}"][~bb_mask], - "phi": ak8FatJetVars["ak8FatJetPhi"][~bb_mask], - "eta": ak8FatJetVars["ak8FatJetEta"][~bb_mask], - "M": ak8FatJetVars[f"ak8FatJetMsd{mlabel}"][~bb_mask], - # TODO: change this to ParticleNetMass for next run - # "M": ak8FatJetVars[f"ak8FatJetParticleNetMass{mlabel}"][~bb_mask], - } - ) - - # this might not work due to types - fatjet_overlap_mask = (np.abs(jets.delta_r(bbJet)) > 1.2) & ( - np.abs(jets.delta_r(VVJet)) > 0.8 - ) - - # Apply base masks, sort, and calculate vbf dijet (jj) cuts - vbfJets_mask = ak4_jet_mask # & electron_muon_overlap_mask & fatjet_overlap_mask - vbfJets = jets[vbfJets_mask] - - vbfJets_sorted_pt = vbfJets[ak.argsort(vbfJets.pt, ascending=False)] - # this is the only which does not guarantee two guys. in the other sorts, the entries are specifically None. - vbfJets_sorted_pt = ak.pad_none(vbfJets_sorted_pt, 2, clip=True) - - # pt sorted eta and dijet mass mask - vbf1 = vector.array( - { - "pt": ak.flatten(pad_val(vbfJets_sorted_pt[:, 0:1].pt, 1, axis=1)), - "phi": ak.flatten(pad_val(vbfJets_sorted_pt[:, 0:1].phi, 1, axis=1)), - "eta": ak.flatten(pad_val(vbfJets_sorted_pt[:, 0:1].eta, 1, axis=1)), - "M": ak.flatten(pad_val(vbfJets_sorted_pt[:, 0:1].mass, 1, axis=1)), - } - ) - - vbf2 = vector.array( - { - "pt": ak.flatten(pad_val(vbfJets_sorted_pt[:, 1:2].pt, 1, axis=1)), - "phi": ak.flatten(pad_val(vbfJets_sorted_pt[:, 1:2].phi, 1, axis=1)), - "eta": ak.flatten(pad_val(vbfJets_sorted_pt[:, 1:2].eta, 1, axis=1)), - "M": ak.flatten(pad_val(vbfJets_sorted_pt[:, 1:2].mass, 1, axis=1)), - } - ) - - jj = vbf1 + vbf2 - - mass_jj_cut_sorted_pt = jj.mass > 500 - eta_jj_cut_sorted_pt = np.abs(vbf1.eta - vbf2.eta) > 4.0 - - # uncomment these last two to include dijet cuts - vbfJets_mask_sorted_pt = vbfJets_mask # * mass_jj_cut_sorted_pt * eta_jj_cut_sorted_pt - n_good_vbf_jets_sorted_pt = ak.fill_none(ak.sum(vbfJets_mask_sorted_pt, axis=1), 0) - - # add vbf gen quark info - if isGen: # add | True when debugging with local files - vbfGenJets = events.GenPart[events.GenPart.hasFlags(["isHardProcess"])][:, 4:6] - - vbfVars[f"vbfptGen"] = pad_val(vbfGenJets.pt, 2, axis=1) - vbfVars[f"vbfetaGen"] = pad_val(vbfGenJets.eta, 2, axis=1) - vbfVars[f"vbfphiGen"] = pad_val(vbfGenJets.phi, 2, axis=1) - vbfVars[f"vbfMGen"] = pad_val(vbfGenJets.mass, 2, axis=1) - - jet_pairs = ak.cartesian({"reco": vbfJets_sorted_pt[:, 0:2], "gen": vbfGenJets[:, 0:2]}) - - # Calculate delta eta and delta phi for each pair - delta_eta = jet_pairs["reco"].eta - jet_pairs["gen"].eta - delta_phi = np.pi - np.abs(np.abs(jet_pairs["reco"].phi - jet_pairs["gen"].phi) - np.pi) - - # Calculate delta R for each pair - delta_R = np.sqrt(delta_eta**2 + delta_phi**2) - - # Apply a mask for a low delta R value - mask_low_delta_R = delta_R < 0.4 - num_per_event = ak.sum(mask_low_delta_R, axis=-1) # miscounts 0's since some are empty - - # Combine masks with logical 'and' operation - total_mask = n_good_vbf_jets_sorted_pt > 1 - - # set event that fail to have 0 for num of events. - num_per_event = ak.where(total_mask, num_per_event, 0) - vbfVars[f"vbfNumMatchedGen"] = num_per_event.to_numpy() - - # adds data about R1 R2 selection efficiencies. - graphingR1R2 = False - if ( - graphingR1R2 == True - ): # compute fatjet mask many times, execute dijet selection and associated cuts. add to datafram label with R1 R2 info in it and ngoodjets for this configuration - for R1 in np.arange(0.3, 2, 0.1): - for R2 in np.arange(0.3, 2, 0.1): - fatjet_overlap_mask = (np.abs(jets.delta_r(bbJet)) > R1) & ( - np.abs(jets.delta_r(VVJet)) > R2 - ) - - # compute n_good_vbf_jets + incorporate eta_jj > 4.0 - vbfJets_mask = ( - ak4_jet_mask # & electron_muon_overlap_mask & fatjet_overlap_mask - ) - - # vbfJets_mask = fatjet_overlap_mask # this is for unflitered events - vbfJets = jets[vbfJets_mask] - vbfJets_sorted_pt = vbfJets[ak.argsort(vbfJets.pt, ascending=False)] - vbfJets_sorted_pt = ak.pad_none(vbfJets_sorted_pt, 2, clip=True) - jj_sorted_pt = vbfJets_sorted_pt[:, 0:1] + vbfJets_sorted_pt[:, 1:2] - mass_jj_cut_sorted_pt = jj_sorted_pt.mass > 500 - eta_jj_cut_sorted_pt = ( - np.abs(vbfJets_sorted_pt[:, 0:1].eta - vbfJets_sorted_pt[:, 1:2].eta) - > 4.0 - ) - vbfJets_mask_sorted_pt = ( - vbfJets_mask * mass_jj_cut_sorted_pt * eta_jj_cut_sorted_pt - ) - num_sorted_pt = ak.fill_none(ak.sum(vbfJets_mask_sorted_pt, axis=1), 0) - - # here we can print some information about the jets so that we can study the selections a bit. - # jet_pairs = ak.cartesian({"reco": vbfJets, "gen": vbfGenJets[:,0:2]}) # this is only for unfiltered events - jet_pairs = ak.cartesian( - {"reco": vbfJets_sorted_pt[:, 0:2], "gen": vbfGenJets[:, 0:2]} - ) - - # Calculate delta eta and delta phi for each pair - delta_eta = jet_pairs["reco"].eta - jet_pairs["gen"].eta - delta_phi = np.pi - np.abs( - np.abs(jet_pairs["reco"].phi - jet_pairs["gen"].phi) - np.pi - ) - - # Calculate delta R for each pair - delta_R = np.sqrt(delta_eta**2 + delta_phi**2) - - # Apply a mask for a low delta R value - mask_low_delta_R = delta_R < 0.4 - num_per_event = ak.sum( - mask_low_delta_R, axis=-1 - ) # miscounts 0's since some are empty - - # Combine masks with logical 'and' operation - total_mask = num_sorted_pt > 1 - - # set event that fail to have 0 for num of events. - num_per_event = ak.where(total_mask, num_per_event, 0) - vbfVars[f"vbfR1{R1:.1f}R2{R2:.1f}"] = num_per_event.to_numpy() - - # note later when we have the pd dataframe, we need to apply mask, then turn data into graph. - # and df = df[selection_mask & (df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[('nGoodVBFJets', 0)] >= 1)& (df[('nGoodJets', 0)] == 0)] - - vbfVars[f"vbfpt"] = pad_val(vbfJets_sorted_pt.pt, 2, axis=1) - vbfVars[f"vbfeta"] = pad_val(vbfJets_sorted_pt.eta, 2, axis=1) - vbfVars[f"vbfphi"] = pad_val(vbfJets_sorted_pt.phi, 2, axis=1) - vbfVars[f"vbfM"] = pad_val(vbfJets_sorted_pt.mass, 2, axis=1) - - # int list representing the number of passing vbf jets per event - vbfVars[f"nGoodVBFJets"] = n_good_vbf_jets_sorted_pt.to_numpy() - - adding_bdt_vars = True - if adding_bdt_vars == True: - # Adapted from HIG-20-005 ggF_Killer 6.2.2 - # https://coffeateam.github.io/coffea/api/coffea.nanoevents.methods.vector.PtEtaPhiMLorentzVector.html - # https://coffeateam.github.io/coffea/api/coffea.nanoevents.methods.vector.LorentzVector.html - # Adding variables defined in HIG-20-005 that show strong differentiation for VBF signal events and background - - # seperation between both ak8 higgs jets - vbfVars[f"vbf_dR_HH"] = VVJet.deltaR(bbJet) - - vbfVars[f"vbf_dR_j0_HVV"] = vbf1.deltaR(VVJet) - vbfVars[f"vbf_dR_j1_HVV"] = vbf2.deltaR(VVJet) - vbfVars[f"vbf_dR_j0_Hbb"] = vbf1.deltaR(bbJet) - vbfVars[f"vbf_dR_j1_Hbb"] = vbf2.deltaR(bbJet) - vbfVars[f"vbf_dR_jj"] = vbf1.deltaR(vbf2) - vbfVars[f"vbf_Mass_jj"] = jj.M - vbfVars[f"vbf_dEta_jj"] = np.abs(vbf1.eta - vbf2.eta) - - # Subleading VBF-jet cos(θ) in the HH+2j center of mass frame: - # https://github.com/scikit-hep/vector/blob/main/src/vector/_methods.py#L916 - system_4vec = vbf1 + vbf2 + VVJet + bbJet - j1_CMF = vbf1.boostCM_of_p4(system_4vec) - - # Leading VBF-jet cos(θ) in the HH+2j center of mass frame: - thetab1 = 2 * np.arctan(np.exp(-j1_CMF.eta)) - thetab1 = np.cos(thetab1) # 12 - vbfVars[f"vbf_cos_j1"] = np.abs(thetab1) - - # Subleading VBF-jet cos(θ) in the HH+2j center of mass frame: - j2_CMF = vbf2.boostCM_of_p4(system_4vec) - thetab2 = 2 * np.arctan(np.exp(-j2_CMF.eta)) - thetab2 = np.cos(thetab2) - vbfVars[f"vbf_cos_j2"] = np.abs(thetab2) - - # H1-centrality * H2-centrality: - delta_eta = vbf1.eta - vbf2.eta - avg_eta = (vbf1.eta + vbf2.eta) / 2 - prod_centrality = np.exp( - -np.power((VVJet.eta - avg_eta) / delta_eta, 2) - - np.power((bbJet.eta - avg_eta) / delta_eta, 2) - ) - vbfVars[f"vbf_prod_centrality"] = prod_centrality - - return vbfVars - def getDijetVars( self, ak8FatJetVars: Dict, bb_mask: np.ndarray, pt_shift: str = None, mass_shift: str = None ):