From bb3ddb93cc84c9628602286d5455383aeaf0bb1b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 17 Jun 2024 19:18:45 +0000 Subject: [PATCH 1/2] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pycqa/flake8: 7.0.0 → 7.1.0](https://github.com/pycqa/flake8/compare/7.0.0...7.1.0) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9166b9c..8efccaf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,7 +34,7 @@ repos: args: [--line-length=125] - repo: https://github.com/pycqa/flake8 - rev: 7.0.0 + rev: 7.1.0 hooks: - id: flake8 # black-compatible flake-8 config From 0ec61131076f7c3d24da64ba6a8195ab550f27bb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 17 Jun 2024 19:18:55 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit hooks --- src/analysis/plot.py | 120 +++++++++++++++++++++++++++---------- src/analysis/utils.py | 8 +-- src/models/mix_baseline.py | 2 +- 3 files changed, 92 insertions(+), 38 deletions(-) diff --git a/src/analysis/plot.py b/src/analysis/plot.py index 2b81473..d63b11a 100644 --- a/src/analysis/plot.py +++ b/src/analysis/plot.py @@ -26,7 +26,7 @@ def calc_pur_eff(target_path, pred_path, bins): for event in LUT_resolved_wOR_pred: event_no_OR = [] for predH in event: - if predH[2]==0: + if predH[2] == 0: event_no_OR.append(predH) LUT_resolved_pred_no_OR.append(event_no_OR) @@ -34,39 +34,46 @@ def calc_pur_eff(target_path, pred_path, bins): for event in LUT_resolved_wOR_target: event_no_OR = [] for targetH in event: - if targetH[2]==0: + if targetH[2] == 0: event_no_OR.append(targetH) LUT_resolved_target_no_OR.append(event_no_OR) # calculate efficiencies and purities for b+r, b, and r results = {} - results['pur_m'], results['purerr_m'] = calc_eff(LUT_boosted_pred, LUT_resolved_wOR_pred, bins) - results['eff_m'], results['efferr_m'] = calc_pur(LUT_boosted_target, LUT_resolved_wOR_target, bins) + results["pur_m"], results["purerr_m"] = calc_eff(LUT_boosted_pred, LUT_resolved_wOR_pred, bins) + results["eff_m"], results["efferr_m"] = calc_pur(LUT_boosted_target, LUT_resolved_wOR_target, bins) - results['pur_b'], results['purerr_b'] = calc_eff(LUT_boosted_pred, None, bins) - results['eff_b'], results['efferr_b'] = calc_pur(LUT_boosted_target, None, bins) + results["pur_b"], results["purerr_b"] = calc_eff(LUT_boosted_pred, None, bins) + results["eff_b"], results["efferr_b"] = calc_pur(LUT_boosted_target, None, bins) - results['pur_r'], results['purerr_r'] = calc_eff(None, LUT_resolved_pred, bins) - results['eff_r'], results['efferr_r'] = calc_pur(None, LUT_resolved_target, bins) + results["pur_r"], results["purerr_r"] = calc_eff(None, LUT_resolved_pred, bins) + results["eff_r"], results["efferr_r"] = calc_pur(None, LUT_resolved_target, bins) - results['pur_r_or'], results['purerr_r_or'] = calc_eff(None, LUT_resolved_pred_no_OR, bins) - results['eff_r_or'], results['efferr_r_or'] = calc_pur(None, LUT_resolved_target_no_OR, bins) + results["pur_r_or"], results["purerr_r_or"] = calc_eff(None, LUT_resolved_pred_no_OR, bins) + results["eff_r_or"], results["efferr_r_or"] = calc_pur(None, LUT_resolved_target_no_OR, bins) - print("Number of Boosted Prediction:", np.array([pred for event in LUT_boosted_pred for pred in event]).shape[0] ) - print("Number of Resolved Prediction before OR:", np.array([pred for event in LUT_resolved_pred for pred in event]).shape[0] ) - print("Number of Resolved Prediction after OR:", np.array([pred for event in LUT_resolved_pred_no_OR for pred in event]).shape[0] ) + print("Number of Boosted Prediction:", np.array([pred for event in LUT_boosted_pred for pred in event]).shape[0]) + print( + "Number of Resolved Prediction before OR:", + np.array([pred for event in LUT_resolved_pred for pred in event]).shape[0], + ) + print( + "Number of Resolved Prediction after OR:", + np.array([pred for event in LUT_resolved_pred_no_OR for pred in event]).shape[0], + ) return results + # I started to use "efficiency" for describing how many gen Higgs were reconstructed # and "purity" for desrcribing how many reco Higgs are actually gen Higgs def plot_pur_eff_w_dict(plot_dict, target_path, save_path=None, proj_name=None, bins=None): if bins == None: bins = np.arange(0, 1050, 50) - plot_bins = np.append(bins, 2*bins[-1]-bins[-2]) - bin_centers = [(plot_bins[i]+plot_bins[i+1])/2 for i in range(plot_bins.size-1)] - xerr=(plot_bins[1]-plot_bins[0])/2*np.ones(plot_bins.shape[0]-1) + plot_bins = np.append(bins, 2 * bins[-1] - bins[-2]) + bin_centers = [(plot_bins[i] + plot_bins[i + 1]) / 2 for i in range(plot_bins.size - 1)] + xerr = (plot_bins[1] - plot_bins[0]) / 2 * np.ones(plot_bins.shape[0] - 1) # m: merged (b+r w OR) # b: boosted @@ -77,28 +84,75 @@ def plot_pur_eff_w_dict(plot_dict, target_path, save_path=None, proj_name=None, fig_r_or, ax_r_or = plt.subplots(1, 2, figsize=(12, 5)) # preset figure labels, titles, limits, etc. - ax_m[0].set(xlabel=r"Merged Reco H pT (GeV)", ylabel=r"Reconstruction Purity", title=f"Reconstruction Purity vs. Merged Reco H pT") - ax_m[1].set(xlabel=r"Merged Gen H pT (GeV)", ylabel=r"Reconstruction Efficiency", title=f"Reconstruction Efficiency vs. Merged Gen H pT") - ax_b[0].set(xlabel=r"Reco Boosted H pT (GeV)", ylabel=r"Reconstruction Purity", title=f"Reconstruction Purity vs. Reco Boosted H pT") - ax_b[1].set(xlabel=r"Gen Boosted H pT (GeV)", ylabel=r"Reconstruction Efficiency", title=f"Reconstruction Efficiency vs. Gen Boosted H pT") - ax_r[0].set(xlabel=r"Reco Resolved H pT (GeV)", ylabel=r"Reconstruction Purity", title=f"Reconstruction Purity vs. Reco Resolved H pT") - ax_r[1].set(xlabel=r"Gen Resolved H pT (GeV)", ylabel=r"Reconstruction Efficiency", title=f"Reconstruction Efficiency vs. Gen Resolved H pT") - ax_r_or[0].set(xlabel=r"Reco Resolved H pT (GeV)", ylabel=r"Reconstruction Purity", title=f"Resolved Purity After OR vs. Reco Resolved H pT") - ax_r_or[1].set(xlabel=r"Gen Resolved H pT (GeV)", ylabel=r"Reconstruction Efficiency", title=f"Resolved Efficiency After OR vs. Gen Resolved H pT") - + ax_m[0].set( + xlabel=r"Merged Reco H pT (GeV)", + ylabel=r"Reconstruction Purity", + title=f"Reconstruction Purity vs. Merged Reco H pT", + ) + ax_m[1].set( + xlabel=r"Merged Gen H pT (GeV)", + ylabel=r"Reconstruction Efficiency", + title=f"Reconstruction Efficiency vs. Merged Gen H pT", + ) + ax_b[0].set( + xlabel=r"Reco Boosted H pT (GeV)", + ylabel=r"Reconstruction Purity", + title=f"Reconstruction Purity vs. Reco Boosted H pT", + ) + ax_b[1].set( + xlabel=r"Gen Boosted H pT (GeV)", + ylabel=r"Reconstruction Efficiency", + title=f"Reconstruction Efficiency vs. Gen Boosted H pT", + ) + ax_r[0].set( + xlabel=r"Reco Resolved H pT (GeV)", + ylabel=r"Reconstruction Purity", + title=f"Reconstruction Purity vs. Reco Resolved H pT", + ) + ax_r[1].set( + xlabel=r"Gen Resolved H pT (GeV)", + ylabel=r"Reconstruction Efficiency", + title=f"Reconstruction Efficiency vs. Gen Resolved H pT", + ) + ax_r_or[0].set( + xlabel=r"Reco Resolved H pT (GeV)", + ylabel=r"Reconstruction Purity", + title=f"Resolved Purity After OR vs. Reco Resolved H pT", + ) + ax_r_or[1].set( + xlabel=r"Gen Resolved H pT (GeV)", + ylabel=r"Reconstruction Efficiency", + title=f"Resolved Efficiency After OR vs. Gen Resolved H pT", + ) # plot purities and efficiencies for tag, pred_path in plot_dict.items(): print("Processing", tag) results = calc_pur_eff(target_path, pred_path, bins) - ax_m[0].errorbar(x=bin_centers, y=results['pur_m'], xerr=xerr, yerr=results['purerr_m'], fmt='o', capsize=5, label=tag) - ax_m[1].errorbar(x=bin_centers, y=results['eff_m'], xerr=xerr, yerr=results['efferr_m'], fmt='o', capsize=5, label=tag) - ax_b[0].errorbar(x=bin_centers, y=results['pur_b'], xerr=xerr, yerr=results['purerr_b'], fmt='o', capsize=5, label=tag) - ax_b[1].errorbar(x=bin_centers, y=results['eff_b'], xerr=xerr, yerr=results['efferr_b'], fmt='o', capsize=5, label=tag) - ax_r[0].errorbar(x=bin_centers, y=results['pur_r'], xerr=xerr, yerr=results['purerr_r'], fmt='o', capsize=5, label=tag) - ax_r[1].errorbar(x=bin_centers, y=results['eff_r'], xerr=xerr, yerr=results['efferr_r'], fmt='o', capsize=5, label=tag) - ax_r_or[0].errorbar(x=bin_centers, y=results['pur_r_or'], xerr=xerr, yerr=results['purerr_r_or'], fmt='o', capsize=5, label=tag) - ax_r_or[1].errorbar(x=bin_centers, y=results['eff_r_or'], xerr=xerr, yerr=results['efferr_r_or'], fmt='o', capsize=5, label=tag) + ax_m[0].errorbar( + x=bin_centers, y=results["pur_m"], xerr=xerr, yerr=results["purerr_m"], fmt="o", capsize=5, label=tag + ) + ax_m[1].errorbar( + x=bin_centers, y=results["eff_m"], xerr=xerr, yerr=results["efferr_m"], fmt="o", capsize=5, label=tag + ) + ax_b[0].errorbar( + x=bin_centers, y=results["pur_b"], xerr=xerr, yerr=results["purerr_b"], fmt="o", capsize=5, label=tag + ) + ax_b[1].errorbar( + x=bin_centers, y=results["eff_b"], xerr=xerr, yerr=results["efferr_b"], fmt="o", capsize=5, label=tag + ) + ax_r[0].errorbar( + x=bin_centers, y=results["pur_r"], xerr=xerr, yerr=results["purerr_r"], fmt="o", capsize=5, label=tag + ) + ax_r[1].errorbar( + x=bin_centers, y=results["eff_r"], xerr=xerr, yerr=results["efferr_r"], fmt="o", capsize=5, label=tag + ) + ax_r_or[0].errorbar( + x=bin_centers, y=results["pur_r_or"], xerr=xerr, yerr=results["purerr_r_or"], fmt="o", capsize=5, label=tag + ) + ax_r_or[1].errorbar( + x=bin_centers, y=results["eff_r_or"], xerr=xerr, yerr=results["efferr_r_or"], fmt="o", capsize=5, label=tag + ) # adjust limits and legends ax_m[0].legend() diff --git a/src/analysis/utils.py b/src/analysis/utils.py index f4d5dad..f9276bb 100644 --- a/src/analysis/utils.py +++ b/src/analysis/utils.py @@ -80,8 +80,8 @@ def calc_eff(LUT_boosted_pred, LUT_resolved_pred, bins): predHs_inds = np.digitize(predHs[:, 1], bins) correctTruth_per_bin = [] - for bin_i in range(1, len(bins)+1): - correctTruth_per_bin.append(predHs[:,0][predHs_inds==bin_i]) + for bin_i in range(1, len(bins) + 1): + correctTruth_per_bin.append(predHs[:, 0][predHs_inds == bin_i]) correctTruth_per_bin = ak.Array(correctTruth_per_bin) means = ak.mean(correctTruth_per_bin, axis=-1) @@ -120,8 +120,8 @@ def calc_pur(LUT_boosted_target, LUT_resolved_target, bins): targetHs_inds = np.digitize(targetHs[:, 1], bins) correctTruth_per_bin = [] - for bin_i in range(1, len(bins)+1): - correctTruth_per_bin.append(targetHs[:,0][targetHs_inds==bin_i]) + for bin_i in range(1, len(bins) + 1): + correctTruth_per_bin.append(targetHs[:, 0][targetHs_inds == bin_i]) correctTruth_per_bin = ak.Array(correctTruth_per_bin) means = ak.mean(correctTruth_per_bin, axis=-1) diff --git a/src/models/mix_baseline.py b/src/models/mix_baseline.py index 9be43e9..abc4a44 100644 --- a/src/models/mix_baseline.py +++ b/src/models/mix_baseline.py @@ -128,7 +128,7 @@ def main(test_file, pred_file, n_higgs): # all combinations of input jets # for different numbers of resolved higgs and jets JET_ASSIGNMENTS = {} - for nH in range(0, n_higgs+1): + for nH in range(0, n_higgs + 1): JET_ASSIGNMENTS[nH] = {} for nj in range(0, nH * 2): JET_ASSIGNMENTS[nH][nj] = []