Skip to content

Commit

Permalink
update pt unc + quark boundary uncertainty
Browse files Browse the repository at this point in the history
  • Loading branch information
rkansal47 committed Jun 12, 2023
1 parent 9521062 commit f707986
Show file tree
Hide file tree
Showing 6 changed files with 210 additions and 107 deletions.
Binary file removed src/HHbbVV/corrections/ratio_feb20.root
Binary file not shown.
Binary file added src/HHbbVV/corrections/ratio_june9.root
Binary file not shown.
4 changes: 4 additions & 0 deletions src/HHbbVV/postprocessing/corrections.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,9 +316,13 @@ def get_lpsf(
# fraction of subjets > 350 * 0.21 measured by CASE
uncs["sj_pt_unc"] = (np.sum(events[f"{jet}_lp_sf_num_sjpt_gt350"][0]) / tot_matched) * 0.21

# TODO: check double counting

if VV:
num_prongs = events["ak8FatJetHVVNumProngs"][0]

# TODO: re-write as an OR over double matched, unmatched, and near jet boundary

sj_matching_unc = np.sum(events[f"{jet}_lp_sf_double_matched_event"][0])
for nump in range(2, 5):
sj_matching_unc += (
Expand Down
34 changes: 24 additions & 10 deletions src/HHbbVV/processors/corrections.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,7 @@ def add_trig_effs(weights: Weights, fatjets: FatJetArray, year: str, num_jets: i
# ------------------- Lund plane reweighting ------------------- #


MAX_PT_FPARAMS = 4 # max order (+1) of pt extrapolation functions
MAX_PT_FPARAMS = 3 # max order (+1) of pt extrapolation functions
MAX_PT_BIN = 350 # have to use subjet pt extrapolation for subjet pT > this

(
Expand All @@ -647,7 +647,7 @@ def _get_lund_lookups(seed: int = 42, lnN: bool = True, trunc_gauss: bool = Fals
import uproot

# initialize lund plane scale factors lookups
f = uproot.open(package_path + "/corrections/lp_ratio_jan20.root")
f = uproot.open(package_path + "/corrections/ratio_june9.root")

# 3D histogram: [subjet_pt, ln(0.8/Delta), ln(kT/GeV)]
ratio_nom = f["ratio_nom"].to_numpy()
Expand Down Expand Up @@ -743,7 +743,12 @@ def _np_pad(arr: np.ndarray, target: int = MAX_PT_FPARAMS):
)


def _get_lund_arrays(events: NanoEventsArray, fatjet_idx: Union[int, ak.Array], num_prongs: int):
def _get_lund_arrays(
events: NanoEventsArray,
jec_fatjets: FatJetArray,
fatjet_idx: Union[int, ak.Array],
num_prongs: int,
):
"""
Gets the ``num_prongs`` subjet pTs and Delta and kT per primary LP splitting of fatjets at
``fatjet_idx`` in each event.
Expand All @@ -753,6 +758,7 @@ def _get_lund_arrays(events: NanoEventsArray, fatjet_idx: Union[int, ak.Array],
Args:
events (NanoEventsArray): nano events
jec_fatjets (FatJetArray): post-JEC fatjets, used to update subjet pTs.
fatjet_idx (int | ak.Array): fatjet index
num_prongs (int): number of prongs / subjets per jet to reweight
Expand Down Expand Up @@ -850,7 +856,7 @@ def _calc_lund_SFs(
high_pt_sel = flat_subjet_pt > max_pt_bin
hpt_logD = flat_logD[high_pt_sel]
hpt_logkt = flat_logkt[high_pt_sel]
hpt_sjpt = flat_subjet_pt[high_pt_sel] # change this to 1/sjpt for next iteration
hpt_sjpt = 1 / flat_subjet_pt[high_pt_sel]
# store polynomial orders for pT extrapolation
sj_pt_orders = np.array([np.power(hpt_sjpt, i) for i in range(max_fparams)]).T

Expand Down Expand Up @@ -885,13 +891,13 @@ def _calc_lund_SFs(
)
)

return np.array(
sf_vals
).T # output shape: ``[n_jets, len(ratio_lookups) x len(pt_extrap_lookups)]``
# output shape: ``[n_jets, len(ratio_lookups) x len(pt_extrap_lookups)]``
return np.array(sf_vals).T


def get_lund_SFs(
events: NanoEventsArray,
jec_fatjets: FatJetArray,
fatjet_idx: Union[int, ak.Array],
num_prongs: int,
gen_quarks: GenParticleArray,
Expand All @@ -906,6 +912,7 @@ def get_lund_SFs(
Args:
events (NanoEventsArray): nano events
jec_fatjets (FatJetArray): post-JEC fatjets, used to update subjet pTs.
fatjet_idx (int | ak.Array): fatjet index
num_prongs (int): number of prongs / subjets per jet to r
seed (int, optional): seed for random smearings. Defaults to 42.
Expand All @@ -930,8 +937,10 @@ def get_lund_SFs(
pt_extrap_lookups_dict,
) = _get_lund_lookups(seed, lnN, trunc_gauss)

jec_fatjet = jec_fatjets[np.arange(len(jec_fatjet)), fatjet_idx]

flat_logD, flat_logkt, flat_subjet_pt, ld_offsets, kt_subjets_vec = _get_lund_arrays(
events, fatjet_idx, num_prongs
events, jec_fatjet, fatjet_idx, num_prongs
)

sfs = {}
Expand Down Expand Up @@ -1011,15 +1020,20 @@ def get_lund_SFs(
sj_matched_idx_mask = np.copy(sj_matched_idx)
sj_matched_idx_mask[~sj_matched] = -1

j_q_dr = gen_quarks.delta_r(jec_fatjet)
q_boundary = (j_q_dr > 0.7) * (j_q_dr < 0.9)
# events with quarks at the boundary of the jet
sfs["lp_sf_boundary_quarks"] = np.any(q_boundary, axis=1, keepdims=True)

# events which have more than one quark matched to the same subjet
sfs["lp_sf_double_matched_event"] = np.any(
[np.sum(sj_matched_idx_mask == i, axis=1) > 1 for i in range(3)], axis=0
[np.sum(sj_matched_idx_mask == i, axis=1) > 1 for i in range(num_prongs)], axis=0
).astype(int)[:, np.newaxis]

# number of quarks per event which aren't matched
sfs["lp_sf_unmatched_quarks"] = np.sum(~sj_matched, axis=1, keepdims=True)

# old pT extrapolation uncertainty
# OLD pT extrapolation uncertainty
sfs["lp_sf_num_sjpt_gt350"] = np.sum(kt_subjets_vec.pt > 350, axis=1, keepdims=True).to_numpy()

return sfs
2 changes: 1 addition & 1 deletion src/HHbbVV/scale_factors/corrections.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ def add_top_pt_weight(weights: Weights, events: NanoEventsArray):

# find corrections path using this file's path
try:
with gzip.open(package_path + "/data/jec_compiled.pkl.gz", "rb") as filehandler:
with open(package_path + "/corrections/jec_compiled.pkl", "rb") as filehandler:
jmestuff = pickle.load(filehandler)

fatjet_factory = jmestuff["fatjet_factory"]
Expand Down
Loading

0 comments on commit f707986

Please sign in to comment.