Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit hooks
Browse files Browse the repository at this point in the history
  • Loading branch information
pre-commit-ci[bot] committed Dec 4, 2024
1 parent bb1acf5 commit 83ba1f3
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 71 deletions.
2 changes: 1 addition & 1 deletion models/bdt_trained_on_hhh_qcd.json

Large diffs are not rendered by default.

29 changes: 14 additions & 15 deletions src/analysis/boosted.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,25 +99,25 @@ def parse_boosted_w_target(testfile, predfile, num_higgs=3):

for i in range(1, num_higgs + 1):
# Collect target pt, mask, and assignment for each Higgs
bh_pt = np.array(testfile['TARGETS'][f'bh{i}']['pt'])
bh_mask = np.array(testfile['TARGETS'][f'bh{i}']['mask'])
bb_bh_t.append(np.array(testfile['TARGETS'][f'bh{i}']['bb']))
bh_pt = np.array(testfile["TARGETS"][f"bh{i}"]["pt"])
bh_mask = np.array(testfile["TARGETS"][f"bh{i}"]["mask"])
bb_bh_t.append(np.array(testfile["TARGETS"][f"bh{i}"]["bb"]))
bh_masks_list.append(bh_mask.reshape(-1, 1))
bh_pts_list.append(bh_pt.reshape(-1, 1))

try:
# Collect predicted assignment, detection probability, and fatjet assignment probability
bb_bh_p.append(np.array(predfile['TARGETS'][f'bh{i}']['bb']))
dp_bh.append(np.array(predfile['TARGETS'][f'bh{i}']['detection_probability']))
ap_bh.append(np.array(predfile['TARGETS'][f'bh{i}']['assignment_probability']))
bb_bh_p.append(np.array(predfile["TARGETS"][f"bh{i}"]["bb"]))
dp_bh.append(np.array(predfile["TARGETS"][f"bh{i}"]["detection_probability"]))
ap_bh.append(np.array(predfile["TARGETS"][f"bh{i}"]["assignment_probability"]))
except:
# In case of missing prediction, apply fallback logic
bb_bh_p.append(np.array(predfile['TARGETS'][f'bh{i}']['bb']) + 10)
dp_bh.append(np.array(predfile['TARGETS'][f'bh{i}']['mask']).astype('float'))
ap_bh.append(np.array(predfile['TARGETS'][f'bh{i}']['mask']).astype('float'))
bb_bh_p.append(np.array(predfile["TARGETS"][f"bh{i}"]["bb"]) + 10)
dp_bh.append(np.array(predfile["TARGETS"][f"bh{i}"]["mask"]).astype("float"))
ap_bh.append(np.array(predfile["TARGETS"][f"bh{i}"]["mask"]).astype("float"))

# Collect fatjet pt
fj_pt = np.array(testfile['INPUTS']['BoostedJets']['fj_pt'])
fj_pt = np.array(testfile["INPUTS"]["BoostedJets"]["fj_pt"])

# Concatenate detection and assignment probabilities into arrays
dps = np.concatenate([dp.reshape(-1, 1) for dp in dp_bh], axis=1)
Expand Down Expand Up @@ -145,9 +145,9 @@ def parse_boosted_w_target(testfile, predfile, num_higgs=3):
LUT_target = gen_target_bH_LUT(bb_ps_selected, bb_ts_selected, targetH_selected_pts)

# Reconstruct bH to remove overlapped ak4 jets
fj_eta = np.array(testfile['INPUTS']['BoostedJets']['fj_eta'])
fj_phi = np.array(testfile['INPUTS']['BoostedJets']['fj_phi'])
fj_mass = np.array(testfile['INPUTS']['BoostedJets']['fj_mass'])
fj_eta = np.array(testfile["INPUTS"]["BoostedJets"]["fj_eta"])
fj_phi = np.array(testfile["INPUTS"]["BoostedJets"]["fj_phi"])
fj_mass = np.array(testfile["INPUTS"]["BoostedJets"]["fj_mass"])

fjs = ak.zip(
{
Expand All @@ -156,10 +156,9 @@ def parse_boosted_w_target(testfile, predfile, num_higgs=3):
"phi": fj_phi,
"mass": fj_mass,
},
with_name="Momentum4D"
with_name="Momentum4D",
)
fj_reco = fjs[bb_ps_selected - 10]

# Return the predicted and target LUTs and the reconstructed jets
return LUT_pred, LUT_target, fj_reco

42 changes: 25 additions & 17 deletions src/analysis/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,12 @@ def calc_pur_eff(target_path, pred_path, bins, num_higgs=3):

# generate look up tables
LUT_boosted_pred, LUT_boosted_target, fjs_reco = parse_boosted_w_target(target_h5, pred_h5, num_higgs)
LUT_resolved_pred, LUT_resolved_target, _ = parse_resolved_w_target(target_h5, pred_h5, fjs_reco=None, num_higgs=num_higgs)
LUT_resolved_wOR_pred, LUT_resolved_wOR_target, _ = parse_resolved_w_target(target_h5, pred_h5, fjs_reco=fjs_reco, num_higgs=num_higgs)
LUT_resolved_pred, LUT_resolved_target, _ = parse_resolved_w_target(
target_h5, pred_h5, fjs_reco=None, num_higgs=num_higgs
)
LUT_resolved_wOR_pred, LUT_resolved_wOR_target, _ = parse_resolved_w_target(
target_h5, pred_h5, fjs_reco=fjs_reco, num_higgs=num_higgs
)

LUT_resolved_pred_no_OR = []
for event in LUT_resolved_wOR_pred:
Expand All @@ -43,8 +47,12 @@ def calc_pur_eff(target_path, pred_path, bins, num_higgs=3):

# calculate efficiencies and purities for b+r, b, and r
results = {}
results["pur_m"], results["purerr_m"], avg_pur_m, n_correct_pred_m = calc_pur(LUT_boosted_pred, LUT_resolved_wOR_pred, bins)
results["eff_m"], results["efferr_m"], avg_eff_m, n_reco_target_m = calc_eff(LUT_boosted_target, LUT_resolved_wOR_target, bins)
results["pur_m"], results["purerr_m"], avg_pur_m, n_correct_pred_m = calc_pur(
LUT_boosted_pred, LUT_resolved_wOR_pred, bins
)
results["eff_m"], results["efferr_m"], avg_eff_m, n_reco_target_m = calc_eff(
LUT_boosted_target, LUT_resolved_wOR_target, bins
)

results["pur_b"], results["purerr_b"], avg_pur_b, n_correct_pred_b = calc_pur(LUT_boosted_pred, None, bins)
results["eff_b"], results["efferr_b"], avg_eff_b, n_reco_target_b = calc_eff(LUT_boosted_target, None, bins)
Expand Down Expand Up @@ -139,7 +147,7 @@ def plot_pur_eff_w_dict(plot_dict, target_path, save_path=None, proj_name=None,
# plot purities and efficiencies
for tag, pred_path in plot_dict.items():
print("Processing", tag)

results = calc_pur_eff(target_path, pred_path, bins, num_higgs)

ax_m[0].errorbar(
Expand Down Expand Up @@ -169,29 +177,29 @@ def plot_pur_eff_w_dict(plot_dict, target_path, save_path=None, proj_name=None,

# adjust limits and legends
event_type = "H" * num_higgs
ax_m[0].legend(title=f'{event_type} Boosted+Resolved')
ax_m[1].legend(title=f'{event_type} Boosted+Resolved')
ax_m[0].legend(title=f"{event_type} Boosted+Resolved")
ax_m[1].legend(title=f"{event_type} Boosted+Resolved")
ax_m[0].set_ylim([-0.1, 1.1])
ax_m[1].set_ylim([-0.1, 1.1])
ax_b[0].legend(title=f'{event_type} Boosted')
ax_b[1].legend(title=f'{event_type} Boosted')
ax_b[0].legend(title=f"{event_type} Boosted")
ax_b[1].legend(title=f"{event_type} Boosted")
ax_b[0].set_ylim([-0.1, 1.1])
ax_b[1].set_ylim([-0.1, 1.1])
ax_r[0].legend(title=f'{event_type} Resolved')
ax_r[1].legend(title=f'{event_type} Resolved')
ax_r[0].legend(title=f"{event_type} Resolved")
ax_r[1].legend(title=f"{event_type} Resolved")
ax_r[0].set_ylim([-0.1, 1.1])
ax_r[1].set_ylim([-0.1, 1.1])
ax_r_or[0].legend(title=f'{event_type} Resolved+OR')
ax_r_or[1].legend(title=f'{event_type} Resolved+OR')
ax_r_or[0].legend(title=f"{event_type} Resolved+OR")
ax_r_or[1].legend(title=f"{event_type} Resolved+OR")
ax_r_or[0].set_ylim([-0.1, 1.1])
ax_r_or[1].set_ylim([-0.1, 1.1])

plt.show()

if save_path is not None:
fig_m.savefig(f"{save_path}/{proj_name}_merged.pdf", format='pdf')
fig_b.savefig(f"{save_path}/{proj_name}_boosted.pdf", format='pdf')
fig_r.savefig(f"{save_path}/{proj_name}_resolved.pdf", format='pdf')
fig_r_or.savefig(f"{save_path}/{proj_name}_resolved_wOR.pdf", format='pdf')
fig_m.savefig(f"{save_path}/{proj_name}_merged.pdf", format="pdf")
fig_b.savefig(f"{save_path}/{proj_name}_boosted.pdf", format="pdf")
fig_r.savefig(f"{save_path}/{proj_name}_resolved.pdf", format="pdf")
fig_r_or.savefig(f"{save_path}/{proj_name}_resolved_wOR.pdf", format="pdf")

return
46 changes: 26 additions & 20 deletions src/analysis/resolved.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,21 +133,22 @@ def gen_target_h_LUT(b1_ps_passed, b2_ps_passed, b1_ts_selected, b2_ts_selected,
builder.end_list()
return builder


def parse_resolved_w_target(testfile, predfile, num_higgs=3, fjs_reco=None):
# Lists to store h_pt, h_masks, and bh_masks for each Higgs
h_pts_list = []
h_masks_list = []
bh_masks_list = []

for i in range(1, num_higgs + 1):
# Collect pt and mask for resolved Higgs
h_pt = np.array(testfile['TARGETS'][f'h{i}']['pt'])
h_mask = np.array(testfile['TARGETS'][f'h{i}']['mask'])
h_pt = np.array(testfile["TARGETS"][f"h{i}"]["pt"])
h_mask = np.array(testfile["TARGETS"][f"h{i}"]["mask"])
h_pts_list.append(h_pt.reshape(-1, 1))
h_masks_list.append(h_mask.reshape(-1, 1))

# Collect boosted mask for each Higgs
bh_mask = np.array(testfile['TARGETS'][f'bh{i}']['mask'])
bh_mask = np.array(testfile["TARGETS"][f"bh{i}"]["mask"])
bh_masks_list.append(bh_mask.reshape(-1, 1))

# Combine masks and pt arrays for resolved and boosted Higgs
Expand All @@ -165,41 +166,41 @@ def parse_resolved_w_target(testfile, predfile, num_higgs=3, fjs_reco=None):

for i in range(1, num_higgs + 1):
# Collect target assignments for b1 and b2
b1_h_t = np.array(testfile['TARGETS'][f'h{i}']['b1']).astype('int')
b2_h_t = np.array(testfile['TARGETS'][f'h{i}']['b2']).astype('int')
b1_h_t = np.array(testfile["TARGETS"][f"h{i}"]["b1"]).astype("int")
b2_h_t = np.array(testfile["TARGETS"][f"h{i}"]["b2"]).astype("int")
b1_ts_list.append(b1_h_t.reshape(-1, 1))
b2_ts_list.append(b2_h_t.reshape(-1, 1))

# Collect predicted assignments for b1 and b2
b1_h_p = np.array(predfile['TARGETS'][f'h{i}']['b1']).astype('int')
b2_h_p = np.array(predfile['TARGETS'][f'h{i}']['b2']).astype('int')
b1_h_p = np.array(predfile["TARGETS"][f"h{i}"]["b1"]).astype("int")
b2_h_p = np.array(predfile["TARGETS"][f"h{i}"]["b2"]).astype("int")
b1_ps_list.append(b1_h_p.reshape(-1, 1))
b2_ps_list.append(b2_h_p.reshape(-1, 1))

# Lists for detection and assignment probabilities
dp_list, ap_list = [], []
for i in range(1, num_higgs + 1):
dp_h = np.array(predfile['TARGETS'][f'h{i}']['detection_probability'])
ap_h = np.array(predfile['TARGETS'][f'h{i}']['assignment_probability'])
dp_h = np.array(predfile["TARGETS"][f"h{i}"]["detection_probability"])
ap_h = np.array(predfile["TARGETS"][f"h{i}"]["assignment_probability"])
dp_list.append(dp_h.reshape(-1, 1))
ap_list.append(ap_h.reshape(-1, 1))

# Reconstruct jet 4-momentum objects
j_pt = np.array(testfile['INPUTS']['Jets']['pt'])
j_eta = np.array(testfile['INPUTS']['Jets']['eta'])
j_phi = np.array(testfile['INPUTS']['Jets']['phi'])
j_mass = np.array(testfile['INPUTS']['Jets']['mass'])
j_pt = np.array(testfile["INPUTS"]["Jets"]["pt"])
j_eta = np.array(testfile["INPUTS"]["Jets"]["eta"])
j_phi = np.array(testfile["INPUTS"]["Jets"]["phi"])
j_mass = np.array(testfile["INPUTS"]["Jets"]["mass"])
js = ak.zip(
{
"pt": j_pt,
"eta": j_eta,
"phi": j_phi,
"mass": j_mass,
},
with_name="Momentum4D"
with_name="Momentum4D",
)
if np.max(js.layout.minmax_depth) == 1:
js = [js]
js = [js]

# Concatenate detection and assignment probabilities
dps = np.concatenate(dp_list, axis=1)
Expand Down Expand Up @@ -234,10 +235,15 @@ def parse_resolved_w_target(testfile, predfile, num_higgs=3, fjs_reco=None):
LUT_pred = gen_pred_h_LUT(
b1_ps_selected, b2_ps_selected, b1_ts_selected, b2_ts_selected, js, goodJetIdx, bi_cat_H_selected, ak.ArrayBuilder()
).snapshot()

LUT_target = gen_target_h_LUT(
b1_ps_selected, b2_ps_selected, b1_ts_selected, b2_ts_selected, targetH_selected_pts, bi_cat_H_selected, ak.ArrayBuilder()
b1_ps_selected,
b2_ps_selected,
b1_ts_selected,
b2_ts_selected,
targetH_selected_pts,
bi_cat_H_selected,
ak.ArrayBuilder(),
).snapshot()

return LUT_pred, LUT_target, goodJetIdx

33 changes: 17 additions & 16 deletions src/analysis/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,9 @@ def calc_pur(LUT_boosted_pred, LUT_resolved_pred, bins):
- mean_per_bin
)

num_correct_pred = np.sum(predHs[:,0])
mean_pur = num_correct_pred/predHs.shape[0]
num_correct_pred = np.sum(predHs[:, 0])

mean_pur = num_correct_pred / predHs.shape[0]

return mean_per_bin, err_per_bin, mean_pur, num_correct_pred

Expand Down Expand Up @@ -134,12 +134,13 @@ def calc_eff(LUT_boosted_target, LUT_resolved_target, bins):
- mean_per_bin
)

num_reco_target = np.sum(targetHs[:,0])
mean_eff = num_reco_target/targetHs.shape[0]
num_reco_target = np.sum(targetHs[:, 0])

mean_eff = num_reco_target / targetHs.shape[0]

return mean_per_bin, err_per_bin, mean_eff, num_reco_target


# calculate event purity
def calc_event_purity(LUT_boosted_pred, LUT_resolved_pred, bins):
N_OR = 0
Expand Down Expand Up @@ -178,22 +179,23 @@ def calc_event_purity(LUT_boosted_pred, LUT_resolved_pred, bins):
N_correct_event = ak.sum(correct_event)

metrics = {}
metrics['avg_event_purity'] = N_correct_event / N_event
metrics["avg_event_purity"] = N_correct_event / N_event

# for each number of predicted candidates
# calculate purity
N_pred = ak.num(pred_events, axis=1)
N_max_pred = ak.max(N_pred)
for i in range(0, N_max_pred+1):
for i in range(0, N_max_pred + 1):
event_sel = N_pred == i
N_sel_event = ak.sum(event_sel)
N_correct_sel_event = ak.sum(correct_event[event_sel])

metrics[f'{i}_candidate_event_purity'] = N_correct_sel_event / N_sel_event
metrics[f'{i}_candidate_event_ratio'] = N_sel_event / N_event
metrics[f"{i}_candidate_event_purity"] = N_correct_sel_event / N_sel_event
metrics[f"{i}_candidate_event_ratio"] = N_sel_event / N_event

return metrics


# calculate event efficiency
# calculate purity
def calc_event_efficiency(LUT_boosted_target, LUT_resolved_target, bins):
Expand Down Expand Up @@ -223,7 +225,6 @@ def calc_event_efficiency(LUT_boosted_target, LUT_resolved_target, bins):
# resolved case
target_events = [[targetH[0] for targetH in event] for event in LUT_resolved_target]


target_events = ak.Array(target_events)

# calculate average purity
Expand All @@ -233,18 +234,18 @@ def calc_event_efficiency(LUT_boosted_target, LUT_resolved_target, bins):
N_retrieved_event = ak.sum(retrieved_event)

metrics = {}
metrics['avg_event_efficiency'] = N_retrieved_event / N_event
metrics["avg_event_efficiency"] = N_retrieved_event / N_event

# for each number of targets
# calculate purity
N_target = ak.num(target_events, axis=1)
N_max_target = ak.max(N_target)
for i in range(0, N_max_target+1):
for i in range(0, N_max_target + 1):
event_sel = N_target == i
N_sel_event = ak.sum(event_sel)
N_retrieved_sel_event = ak.sum(retrieved_event[event_sel])

metrics[f'{i}_target_event_purity'] = N_retrieved_sel_event / N_sel_event
metrics[f'{i}_target_event_ratio'] = N_sel_event / N_event
metrics[f"{i}_target_event_purity"] = N_retrieved_sel_event / N_sel_event
metrics[f"{i}_target_event_ratio"] = N_sel_event / N_event

return metrics
4 changes: 2 additions & 2 deletions src/models/test_baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def main(test_file, event_file, n_higgs, method):
"eta": eta,
"phi": phi,
# "mass": ak.zeros_like(pt),
'mass': mass,
"mass": mass,
"btag": btag,
},
with_name="Momentum4D",
Expand Down Expand Up @@ -171,7 +171,7 @@ def main(test_file, event_file, n_higgs, method):

num_vectors = np.sum(mask, axis=-1).to_numpy()
lines = 2
print('hello', masks.shape)
print("hello", masks.shape)
results, jet_limits, clusters = evaluate_predictions(predictions, num_vectors, targets, masks, event_file, lines)
display_table(results, jet_limits, clusters)

Expand Down

0 comments on commit 83ba1f3

Please sign in to comment.