diff --git a/machine_learning_hep/scripts-dhadrons/README.md b/machine_learning_hep/scripts-dhadrons/README.md new file mode 100644 index 0000000000..53b40fb84f --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/README.md @@ -0,0 +1,11 @@ +# Helper scripts for inclusive hadron analysis + +- adjusting-run2-run3: fix Run 3 plots for comparison with Run 2 results +- debugging: verify different stages of MLHEP processing +- merging: merge results from different MLHEP and cut variation runs +- multitrial: a workflow to perform the multitrial (raw yield) systematics with MLHEP +- preliminary-plots: scripts to plot invariant mass fits and cut variation results for ALICE preliminaries +- run-mlhep: automate MLHEP running +- systematics: obtain various comparison plots, esp. for systematics and final analysis results + +See README files in each subfolder. diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/README.md b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/README.md new file mode 100644 index 0000000000..daf5e02ddb --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/README.md @@ -0,0 +1,34 @@ +# Scripts to fix histograms for comparison with Run 2 results + +## Add pT bins to extend the x-axis range on the plots + +File: `add_pt_bins.py`
+Usage: `python add_pt_bins.py in_file.root histname out_file.root` + +ROOT does not allow nicely to plot a histogram on a plot with x-axis wider than histogram minimum and maximum bins. + +This script takes the `histname` histogram from `in_file.root`, and creates a new histogram with added bin [0.0, `histname`'s minimum) and [`histname`'s maximum, 24.0). `0` and `24.0` can be changed in the Python code. The new histogram has marker and line styles copied forom the old one, and is saved in `out_file.root`. + +You can uncomment lines 53-64 to get a formula for merging 2 bins. You need to adjust the indices of bins to merge. This is useful if you want to compare `histname` against less granular results from elsewhere. + +## Restrict the maximum of x-axis + +File: `remove_high_pt.py`
+Usage: `python remove_high_pt.py in_file.root histname out_file.root maxval` + +This is a contrary script to the previous one. +Here, `out_file.root` will contain histograms, where the last x-axis bin contains `maxval`. Higher bins are removed.
+`histname` is a pattern (substring) of histogram names. + +## Rescale and merge cross section results + +Files: `modify_crosssec_run2.py`, `modify_crosssec_run3.py`
+Usage: `python modify_crosssec_run2.py in_file.root histname out_histname out_file.root` + +The Run 2 script scales `histname` from `in_file.root` by 1./BR and merges bins, whose indices are provided in the script. The output is saved under name `out_histname` in `out_file.root`. + +The Run 3 script only rescales the input histogram and saves the result in `out_histname` in `out_file.root`. + +The lines commented out provide more examples of rescaling. + +For Lc prompt cross section obtained during March 2025 approvals, only the uncommented lines in both files were used. diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/add_pt_bins.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/add_pt_bins.py new file mode 100644 index 0000000000..b65b41ce57 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/add_pt_bins.py @@ -0,0 +1,75 @@ +# pylint: disable=missing-function-docstring +""" +file: add_pt_bins.py +brief: Add 0-1 and 24-25 dummy pT bins to extend x-range of input histogram. +usage: python3 add_pt_bins.py file.root my_histo file_out.root +author: Maja Karwowska , Warsaw University of Technology +""" + +import argparse +from array import array + +from ROOT import TH1F, TFile, gROOT # pylint: disable=import-error,no-name-in-module + + +def main(): + """ + Main function. + """ + gROOT.SetBatch(True) + + parser = argparse.ArgumentParser(description="Arguments to pass") + parser.add_argument("filename", help="input file with histogram") + parser.add_argument("histname", help="histogram name") + parser.add_argument("outname", help="output file for the new histogram") + args = parser.parse_args() + + with TFile(args.filename) as fin, TFile(args.outname, "recreate") as fout: + hist = fin.Get(args.histname) + hist.SetDirectory(0) + first_bin = 1 + # last_bin = hist.GetXaxis().FindBin(12.0) + last_bin = hist.GetNbinsX() + bins = [0.0] + # bins = [] + empty_bins = len(bins) + for binn in range(first_bin, last_bin + 1): + bins.append(hist.GetBinLowEdge(binn)) + # last_bins = [24.0, 25.0] + last_bins = [24.0] + bins += last_bins + print(f"Hist bins {bins}") + hist2 = TH1F(args.histname, "", len(bins) - 1, array('d', bins)) + for binn in range(empty_bins, last_bin + 1): + hist2.SetBinContent(binn + 1, hist.GetBinContent(binn + 1 - empty_bins)) + hist2.SetBinError(binn + 1, hist.GetBinError(binn + 1 - empty_bins)) + print(f"Setting bin {binn + 1} low edge {hist2.GetBinLowEdge(binn + 1)} " \ + f"up edge {hist2.GetXaxis().GetBinUpEdge(binn + 1)} content to content " \ + f"from bin {binn + 1 - empty_bins}: {hist2.GetBinContent(binn + 1)}") + # Formula for merging 2 bins. For example, to compare with less granular Run 2 results. + # last_bin = hist2.GetNbinsX() + # width_combined = hist.GetBinWidth(hist.GetNbinsX() -1) + hist.GetBinWidth(hist.GetNbinsX()) + # hist2.SetBinContent(last_bin, + # ((hist.GetBinContent(hist.GetNbinsX() - 1) * hist.GetBinWidth(hist.GetNbinsX() - 1) +\ + # hist.GetBinContent(hist.GetNbinsX()) * hist.GetBinWidth(hist.GetNbinsX())) /\ + # width_combined)) + # hist2.SetBinError(last_bin, + # math.sqrt((hist.GetBinError(hist.GetNbinsX() - 1) * hist.GetBinWidth(hist.GetNbinsX() - 1) \ + # / width_combined) **2 +\ + # (hist.GetBinError(hist.GetNbinsX()) * hist.GetBinWidth(hist.GetNbinsX()) /\ + # width_combined) ** 2)) + # print(f"Setting bin {last_bin} low edge {hist2.GetBinLowEdge(last_bin)} " \ + # f"up edge {hist2.GetXaxis().GetBinUpEdge(last_bin)} content to content " \ + # f"from bins {hist.GetNbinsX()-1}, {hist.GetNbinsX()}: {hist2.GetBinContent(last_bin)}") + hist2.SetMarkerSize(hist.GetMarkerSize()) + hist2.SetMarkerColor(hist.GetMarkerColor()) + hist2.SetMarkerStyle(hist.GetMarkerStyle()) + hist2.SetLineWidth(hist.GetLineWidth()) + hist2.SetLineColor(hist.GetLineColor()) + hist2.SetLineStyle(hist.GetLineStyle()) + fout.cd() + hist2.Write() + + +if __name__ == "__main__": + main() diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py new file mode 100644 index 0000000000..cac35b6ba4 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py @@ -0,0 +1,77 @@ +# pylint: disable=missing-function-docstring +""" +file: modify_input.py +brief: Perform adjustments on the input Run 2 cross section histogram. +usage: python3 modify_crosssec_run2.py file.root my_histo out_histo file_out.root +author: Maja Karwowska , Warsaw University of Technology +""" + +import argparse +import math +from array import array + +from ROOT import TH1F, TFile, gROOT # pylint: disable=import-error,no-name-in-module + +OUTPUT_BINS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 24] +BR = 0.0623 + +def main(): + """ + Main function. + """ + gROOT.SetBatch(True) + + parser = argparse.ArgumentParser(description="Arguments to pass") + parser.add_argument("filename", help="input file with histogram") + parser.add_argument("histname", help="histogram name") + parser.add_argument("outhistname", help="outhistogram name") + parser.add_argument("outname", help="output file for the new histogram") + args = parser.parse_args() + + with TFile(args.filename) as fin, TFile(args.outname, "recreate") as fout: + hist = fin.Get(args.histname) + hist.SetDirectory(0) + #hist.Scale(0.000000001) + hist.Scale(1./BR) + hist2 = TH1F(args.outhistname, "", len(OUTPUT_BINS) - 1, array('d', OUTPUT_BINS)) + merge_bins = [20] # dummy large number so as not to merge + # merge bins = [7, 9] # indices of bins to merge + ind = 0 + for binn in range(1, hist.GetNbinsX() + 1): + print(f"Old hist bin {binn} low edge {hist.GetBinLowEdge(binn)} "\ + f"up edge {hist.GetXaxis().GetBinUpEdge(binn)} "\ + f"content: {hist.GetBinContent(binn)} +/- {hist.GetBinError(binn)}") + for binn in range(1, hist2.GetNbinsX() + 1): + if binn < merge_bins[0]: + hist2.SetBinContent(binn, hist.GetBinContent(binn)) + hist2.SetBinError(binn, hist.GetBinError(binn)) + elif ind >= len(merge_bins) or binn > merge_bins[0] + len(merge_bins) / 2: + hist2.SetBinContent(binn, hist.GetBinContent(binn + ind)) + hist2.SetBinError(binn, hist.GetBinError(binn + ind)) + else: + bin1 = merge_bins[ind] + bin2 = merge_bins[ind] + 1 + weight_sum = hist.GetBinWidth(bin1) + hist.GetBinWidth(bin2) + hist2.SetBinContent(binn, + (hist.GetBinContent(bin1) * hist.GetBinWidth(bin1) +\ + hist.GetBinContent(bin2) * hist.GetBinWidth(bin2)) /\ + weight_sum) + hist2.SetBinError(binn, + math.sqrt(((hist.GetBinWidth(bin1) * hist.GetBinError(bin1)) / weight_sum) ** 2. +\ + ((hist.GetBinWidth(bin2) * hist.GetBinError(bin2)) / weight_sum) ** 2.)) + ind += 1 + print(f"New bin {binn} low edge {hist2.GetBinLowEdge(binn)} "\ + f"up edge {hist2.GetXaxis().GetBinUpEdge(binn)} "\ + f"content: {hist2.GetBinContent(binn)} +/- {hist2.GetBinError(binn)} ind {ind}") + hist2.SetMarkerSize(hist.GetMarkerSize()) + hist2.SetMarkerColor(hist.GetMarkerColor()) + hist2.SetMarkerStyle(hist.GetMarkerStyle()) + hist2.SetLineWidth(hist.GetLineWidth()) + hist2.SetLineColor(hist.GetLineColor()) + hist2.SetLineStyle(hist.GetLineStyle()) + fout.cd() + hist2.Write() + + +if __name__ == "__main__": + main() diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run3.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run3.py new file mode 100644 index 0000000000..442707dca6 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run3.py @@ -0,0 +1,70 @@ +# pylint: disable=missing-function-docstring +""" +file: modify_input.py +brief: Perform adjustments on the input Run 3 cross section histogram. +usage: python3 modify_crosssec_run3.py file.root my_histo out_histo file_out.root +author: Maja Karwowska , Warsaw University of Technology +""" + +import argparse + +from ROOT import ( # pylint: disable=import-error,no-name-in-module + TFile, + gROOT, +) + +# 2024 values for LHC22o +MLHEP_EV_SEL = 20430386. +NORM = 47092223769.611162532 +BR = 0.0623 + +# 2025 values for LHC23_pass4_thin +MLHEP_EV_SEL = 258442910841. # 2 x 10^1 +NORM = 3.0077675e+11 + +# 2025 values for multiplicity analysis +EV_SEL_MULT = 290860860000. +NORM_MB = 249371059919 +NORM_2 = 37884927886 +EV_FACTOR_2 = 0.85 +NORM_3 = 50023302929 +EV_FACTOR_3 = 0.91 +NORM_4 = 49545723906 +EV_FACTOR_4 = 0.96 +NORM_5 = 49300695562 +EV_FACTOR_5 = 0.98 +NORM_6 = 22192632583 +EV_FACTOR_6 = 0.99 +NORM_7 = 2476292886 +EV_FACTOR_7 = 1.0 + +def main(): + """ + Main function. + """ + gROOT.SetBatch(True) + + parser = argparse.ArgumentParser(description="Arguments to pass") + parser.add_argument("filename", help="input file with histogram") + parser.add_argument("histname", help="histogram name") + parser.add_argument("outhistname", help="outhistogram name") + parser.add_argument("outname", help="output file for the new histogram") + args = parser.parse_args() + + with TFile(args.filename) as fin, TFile(args.outname, "recreate") as fout: + hist = fin.Get(args.histname) + hist2 = hist.Clone(args.outhistname) + hist2.SetDirectory(0) + #hist2.Scale(0.000001 * MLHEP_EV_SEL / NORM) + #hist2.Scale(0.000001) + #hist.Scale(1./59400000000) # luminosity scaling, lumi in pb + #hist.Scale(BR) # BR scaling back + + hist2.Scale(EV_SEL_MULT / NORM_7) + hist2.Scale(EV_FACTOR_7) + fout.cd() + hist2.Write() + + +if __name__ == "__main__": + main() diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/remove_high_pt.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/remove_high_pt.py new file mode 100644 index 0000000000..7539057c85 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/remove_high_pt.py @@ -0,0 +1,55 @@ +# pylint: disable=missing-function-docstring +""" +file: remove_high_pt.py +brief: Remove bins with pT > max_pt in all histograms matching my_histos_pattern in the input file.root. +usage: python3 remove_high_pt.py file.root my_histos_pattern file_out.root max_pt +author: Maja Karwowska , Warsaw University of Technology +""" + +import argparse +from array import array + +from ROOT import TH1F, TFile, gROOT # pylint: disable=import-error,no-name-in-module + + +def main(): + """ + Main function. + """ + gROOT.SetBatch(True) + + parser = argparse.ArgumentParser(description="Arguments to pass") + parser.add_argument("filename", help="input file with histogram") + parser.add_argument("histname", help="histogram name pattern") + parser.add_argument("outname", help="output file for the new histogram") + parser.add_argument("maxval", type=float, help="maxval in histogram") + args = parser.parse_args() + + with TFile(args.filename) as fin, TFile(args.outname, "recreate") as fout: + histnames = [key.GetName() for key in fin.GetListOfKeys() if args.histname in key.GetName()] + for histname in histnames: + hist = fin.Get(histname) + hist.SetDirectory(0) + last_bin = hist.GetXaxis().FindBin(args.maxval) + bins = [] + for binn in range(1, last_bin + 1): + bins.append(hist.GetBinLowEdge(binn)) + hist2 = TH1F(histname, "", len(bins) - 1, array('d', bins)) + for binn in range(1, last_bin + 1): + hist2.SetBinContent(binn, hist.GetBinContent(binn)) + hist2.SetBinError(binn, hist.GetBinError(binn)) + print(f"Setting bin {binn} low edge {hist2.GetBinLowEdge(binn)} " \ + f"up edge {hist2.GetXaxis().GetBinUpEdge(binn)} content to content " \ + f"from bin {binn}: {hist2.GetBinContent(binn)}") + hist2.SetMarkerSize(hist.GetMarkerSize()) + hist2.SetMarkerColor(hist.GetMarkerColor()) + hist2.SetMarkerStyle(hist.GetMarkerStyle()) + hist2.SetLineWidth(hist.GetLineWidth()) + hist2.SetLineColor(hist.GetLineColor()) + hist2.SetLineStyle(hist.GetLineStyle()) + fout.cd() + hist2.Write() + + +if __name__ == "__main__": + main() diff --git a/machine_learning_hep/scripts-dhadrons/debugging/README.md b/machine_learning_hep/scripts-dhadrons/debugging/README.md new file mode 100644 index 0000000000..d8a887e311 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/debugging/README.md @@ -0,0 +1,26 @@ +# Verify different stages of MLHEP processing + +## Check MLHEP output data files + +File: `check_parquet.py`
+Usage: `python check_parquet.py in_file.parquet` + +The Python script contains some examples of how to read from a parquet file, print some useful information, and plot histograms. + +It can be used to check MLHEP skimming, training, and application outputs by testing individual parquet files. + +## Compare prompt fractions calculated with different inputs or methods + +Files: `plot_prompt_fraction_vs_crosssec_configs.py`, `config_fraction_vs_crosssec_configs.json`
+Usage: `python plot_prompt_fraction_vs_crosssec_configs.py config_fraction_vs_crosssec_configs.json` + +Adjust the JSON config. You can provide as many histogram files in the `hists` dictionary as you want. +By adjusting `histoname`, you can plot also the non-prompt fraction. + +## Plot prompt fraction vs different BDT cuts + +Files: `plot_prompt_fraction_vs_bdt_cuts.py`, `config_fraction_vs_bdt_cuts.json`
+Usage: `python plot_prompt_fraction_vs_bdt_cuts.py config_fraction_vs_bdt_cuts.json` + +Adjust the JSON config. Here, you provide a glob pattern to all files of interest. +By adjusting `histoname`, you can plot also the non-prompt fraction. diff --git a/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py b/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py new file mode 100644 index 0000000000..b0c3b4b1db --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/debugging/check_parquet.py @@ -0,0 +1,66 @@ +""" +file: check_parquet.py +brief: Examples of different checks on any parquet file produced by the MLHEP preprocessing steps. +usage: python check_parquet.py AnalysisResultsReco_fPt1_2.parquet +author: Maja Karwowska , Warsaw University of Technology +""" + +import argparse + +import matplotlib.pyplot as plt # pylint: disable=import-error +import numpy as np +import pandas as pd + + +def plot_parquet(df): + """ + An example of plotting a histogram from parquet. + """ + print(df["fY"]) + print(df["fY"][~np.isinf(df["fY"])]) + + ds_fin = df["fY"][~np.isinf(df["fY"])] + + fig = plt.figure(figsize=(20, 15)) + ax = plt.subplot(1, 1, 1) + plt.hist(ds_fin.values, bins=50) + ax.set_xlabel("fY", fontsize=30) + ax.set_ylabel("Entries", fontsize=30) + fig.savefig("fY.png", bbox_inches='tight') + plt.close(fig) + +def main(): + """ + The main function. + """ + parser = argparse.ArgumentParser() + parser.add_argument("infile", help="file to process") + args = parser.parse_args() + + df = pd.read_parquet(args.infile) + print(f"df columns: {df.columns}") + print(df.size) + + print(f"df mean\n{df.mean()}") + + print(f"df[0]\n{df.iloc[0]}") + + plot_parquet(df) + + df_sel = df[df["y_test_probxgboostbkg"] > 1.0] + print(f"sel df bkg:\n{df_sel}") + df_sel = df[df["y_test_probxgboostnon_prompt"] < 0.00] + print(f"sel df non-prompt:\n{df_sel}") + df_sel = df[df["y_test_probxgboostprompt"] < 0.00] + print(f"sel df prompt:\n{df_sel}") + + # Valid only for data with saved results of ML application on Hyperloop + #print(f'ML columns:\n{df["fMlBkgScore"]}\n{df["fMlPromptScore"]}\n{df["fMlNonPromptScore"]}') + #df_sel = df[df["fMlBkgScore"] > 1.0] + #print(f'df sel ML bkg:\n{df_sel["fMlBkgScore"]}') + #df_sel = df[df["fMlNonPromptScore"] < 0.0] + #print(f'df sel ML non-prompt:\n{df_sel["fMlNonPromptScore"]}') + + +if __name__ == '__main__': + main() diff --git a/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_bdt_cuts.json b/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_bdt_cuts.json new file mode 100644 index 0000000000..5d9a224404 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_bdt_cuts.json @@ -0,0 +1,16 @@ +{ + "file_pattern": "/data8/majak/MLHEP/results-24022025-newtrain_fd_0.[0-9][0-9]0/LHC23pp_pass4/Results/resultsdatatot/finalcrossLcpKpiRun3analysis.root", + "_file_pattern": "glob pattern to all files with different BDT cuts", + "dir_pattern": "results-24022025-newtrain_fd_0.[0-9][0-9]0", + "_dir_pattern": "the base directory prefix from the file pattern above", + "histoname": "gfraction", + "_histoname": "the prompt fraction histogram name", + "pt_bins_min": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 16], + "pt_bins_max": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 16, 24], + "x_axis": "non-prompt cut", + "y_axis": "#Lambda_{c} prompt fraction", + "outdir": ".", + "_outdir": "output directory", + "outfile": "fraction", + "_outfile": "output file pattern; pdf/png/root suffixes are appended" +} diff --git a/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_crosssec_configs.json b/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_crosssec_configs.json new file mode 100644 index 0000000000..ee7e1e5071 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/debugging/config_fraction_vs_crosssec_configs.json @@ -0,0 +1,36 @@ +{ + "inputdir": "/data8/majak/crosssec/202502/", + "_inputdir": "directory with input files", + "histoname": "gfraction", + "_histoname": "the prompt fraction histogram name", + "default": "#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp, #sqrt{#it{s}} = 13.6 TeV", + "_default": "label of the baseline case", + "hists": { + "#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp, #sqrt{#it{s}} = 13.6 TeV": { + "file": [ + "finalcrossLcpKpiRun3analysis_dd.root" + ] + }, + "#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp, #sqrt{#it{s}} = 13.6 TeV, wide span": { + "file": [ + "finalcrossLcpKpiRun3analysis_dd_wide.root" + ] + }, + "#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp, #sqrt{#it{s}} = 13.6 TeV, Nb method": { + "file": [ + "finalcrossLcpKpiRun3analysis_roofit.root" + ] + } + }, + "_hists": "dictionary of input files and the corresponding labels of histograms to plot", + "y_axis": "#Lambda_{c} prompt fraction", + "legend": [0.25, 0.18, 0.70, 0.38], + "_legend": "position of the legend on the main plot", + "legend_ratio": [0.40, 0.10, 0.90, 0.35], + "_legend": "position of the legend on the ratio plot", + "output": { + "outdir": "/data8/majak/crosssec/202502/", + "file": "graph_frac_Lc_run3_Nb" + }, + "_output": "output directory and file name pattern; pdf/png/root suffixes are appended" +} diff --git a/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py new file mode 100644 index 0000000000..45bc981009 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_bdt_cuts.py @@ -0,0 +1,75 @@ +""" +file: plot_prompt_fraction_vs_fd_cuts.py +brief: Plot prompt fraction from cross section calculations for different non-prompt cuts +usage: python3 plot_prompt_fraction_vs_fd_cuts.py config_fraction_vs_fd_cuts.json +author: Maja Karwowska , Warsaw University of Technology +""" + +import argparse +import glob +import json +import re + +import matplotlib.pyplot as plt # pylint: disable=import-error +from ROOT import ( # pylint: disable=import-error,no-name-in-module + TFile, + gROOT, +) + + +def get_fractions(cfg): + """ + Read the prompt fractions from files for different non-prompt cuts. + """ + filenames = sorted(glob.glob(cfg["file_pattern"])) + fractions = {} + fractions_err = {} + fd_cuts = [] + for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"], strict=False): + fractions[f"{pt_bin_min}_{pt_bin_max}"] = [] + fractions_err[f"{pt_bin_min}_{pt_bin_max}"] = [] + for filename in filenames: + with TFile.Open(filename) as fin: + hist = fin.Get(cfg["histoname"]) + dirname = re.search(cfg["dir_pattern"], filename).group(0) + fd_cut = re.split("_", dirname)[-1] + fd_cuts.append(fd_cut) + for ind, (pt_bin_min, pt_bin_max) in enumerate(zip(cfg["pt_bins_min"], cfg["pt_bins_max"], strict=False)): + fractions[f"{pt_bin_min}_{pt_bin_max}"].append(hist.GetPointY(ind + 1)) + fractions_err[f"{pt_bin_min}_{pt_bin_max}"].append(hist.GetErrorY(ind + 1)) + print(f"final fractions:\n{fractions}\nfd_cuts:\n{fd_cuts}\nfractions error:\n{fractions_err}") + return fractions, fractions_err, fd_cuts + + +def main(): + """ + The main function. + """ + gROOT.SetBatch(True) + + parser = argparse.ArgumentParser(description="Arguments to pass") + parser.add_argument("config", help="JSON config file") + args = parser.parse_args() + + with open(args.config, encoding="utf8") as fil: + cfg = json.load(fil) + + fractions, fractions_err, fd_cuts = get_fractions(cfg) + + for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"], strict=False): + plt.figure(figsize=(20, 15)) + ax = plt.subplot(1, 1, 1) + ax.set_xlabel(cfg["x_axis"]) + ax.set_ylabel(cfg["y_axis"]) + ax.set_ylim([0.0, 1.0]) + ax.tick_params(labelsize=20) + plt.grid(linestyle="-", linewidth=2) + plt.errorbar(fd_cuts, fractions[f"{pt_bin_min}_{pt_bin_max}"], + yerr=fractions_err[f"{pt_bin_min}_{pt_bin_max}"], + c="b", elinewidth=2.5, linewidth=4.0) + ax.set_xticks(ax.get_xticks()[::10]) + plt.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_{pt_bin_min}_{pt_bin_max}.png') + + +if __name__ == "__main__": + main() diff --git a/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_crosssec_configs.py b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_crosssec_configs.py new file mode 100644 index 0000000000..bf219c9d41 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/debugging/plot_prompt_fraction_vs_crosssec_configs.py @@ -0,0 +1,106 @@ +""" +file: plot_prompt_fraction_vs_crosssec_configs.py +brief: Plot prompt fraction for diffent cross section calculations (e.g., different methods or inputs). +usage: python3 plot_prompt_fraction_vs_crosssec_configs.py config_fraction_vs_crosssec_configs.json +author: Maja Karwowska , Warsaw University of Technology +""" + +import argparse +import json +import os + +from compare_fractions import get_legend, prepare_canvas, save_canvas, set_hist_style +from ROOT import ( # pylint: disable=import-error,no-name-in-module + TFile, + gROOT, + gStyle, + kAzure, + kBlack, + kBlue, + kCyan, + kGreen, + kMagenta, + kOrange, + kRed, + kTeal, + kYellow, +) + +COLORS=[kBlack, kRed-3, kAzure-7, kMagenta+1, kGreen+2, kOrange-3, kBlue, kTeal+3, kGreen, kAzure+8, + kYellow+3, kOrange-5, kMagenta+2, kBlue-6, kCyan+1, kGreen-6] + + +def get_hist_limits(hist, miny = 0.0, maxy = 0.0): + """ + Find the minimum and maximum y-value of the histogram. + """ + for binn in range(hist.GetN()): + print(f"bin {binn} [{hist.GetPointX(binn)}, "\ + f"val {hist.GetPointY(binn)} "\ + f"err {hist.GetErrorYlow(binn)}, {hist.GetErrorYhigh(binn)}") + maxval = hist.GetPointY(binn) + hist.GetErrorYhigh(binn) + minval = hist.GetPointY(binn) - hist.GetErrorYlow(binn) + maxy = max(maxval, maxy) + miny = min(minval, miny) + return miny, maxy + + +def main(): + """ + The main function. + """ + gROOT.SetBatch(True) + + gStyle.SetOptStat(0) + gStyle.SetFrameLineWidth(2) + + parser = argparse.ArgumentParser(description="Arguments to pass") + parser.add_argument("config", help="JSON config file") + args = parser.parse_args() + + with open(args.config, encoding="utf8") as fil: + cfg = json.load(fil) + + with TFile(os.path.join(cfg["output"]["outdir"], + f'{cfg["output"]["file"]}.root'), "recreate") as output: + + canv = prepare_canvas(f'c_{cfg["histoname"]}') + leg = get_legend(*cfg["legend"], len(cfg["hists"])) + + maxy = 0. + miny = 1. + hists = [] + for ind, (label, color) in enumerate(zip(cfg["hists"], COLORS, strict=False)): + with TFile.Open(os.path.join(cfg["inputdir"], cfg["hists"][label]["file"][0])) as fin: + hist = fin.Get(cfg["histoname"]) + print(f'hist {cfg["histoname"]}: {hist}') + set_hist_style(hist, color, cfg["y_axis"]) + print(label) + miny, maxy = get_hist_limits(hist, miny, maxy) + + canv.cd() + draw_opt = "same" if ind != 0 else "" + hist.Draw(draw_opt) + leg.AddEntry(hist, label, "p") + + hists.append(hist) + + # margin = 0.1 + print(f"Hist maxy: {maxy} miny: {miny}") + for hist in hists: + #hist.GetYaxis().SetRangeUser(miny - margin * miny, maxy + margin * maxy) + hist.GetYaxis().SetRangeUser(0.0, 0.7) + #hist.GetYaxis().SetRangeUser(0.5, 1.0) + hist.GetXaxis().SetRangeUser(0.0, 25.0) + + leg.Draw() + + output.cd() + canv.Write() + save_canvas(canv, cfg, cfg["output"]["file"]) + for hist in hists: + hist.Write() + + +if __name__ == "__main__": + main() diff --git a/machine_learning_hep/scripts-dhadrons/merging/README.md b/machine_learning_hep/scripts-dhadrons/merging/README.md new file mode 100644 index 0000000000..e7b4f55f96 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/merging/README.md @@ -0,0 +1,48 @@ +# Merge results from different MLHEP and cut variation runs + +## Merge multiple histograms from multiple input files + +Files: `merge_histos.py`, `merge-cutvar.sh`, `merge-yields.sh`
+Usage: `python merge_histos.py -o out_file.root -n histName1 -n histName2 -i in_file1.root -i in_file2.root` + +You can provide as many histogram names as you want. All histograms should be 1-dimensional and have the same x-axis. If no histogram name is provided, the script will merge all 1-dimensional histograms from the input files. + +Provide one input file per x-axis bin. File names can be repeated. + +Merge histograms `histName1` and `histName2` from the input files and save them in the output file. +For each histogram name provided, e.g., `histName1`, "merging" means creation of a single output histogram with bin 1 content set to the content of bin 1 in `histName1` in `in_file1.root`, bin 2 content set to the content of bin 2 in `histName1` in `in_file2.root`, and so on. +Particularly, the x-axis can represent pT, and the script can be used to merge results obtained for different pT bins. + +The bash files `merge-cutvar.sh` and `merge-yields.sh` provide examples of using this Python script for merging cut variation results and O2Physics D2H fitter results, respectively. + +## Merge the outputs of the MLHEP histomass step + +Files: `merge_histomass.py`, `merge-mlhep.sh`
+Usage: `python merge_histomass.py -o out_file.root -n histName1 -n histName2 -i in_file1.root -i -in_file2.root` + +This script is different from the previous one as it is adjusted to the layout of MLHEP `masshisto.root` files, which contain 1 invariant mass histogram per pT bin. + +Histogram names `histName1`, `histName2` are treated as patterns (substrings) of histograms to merge. For `masshisto.root` files, the pattern can be `hmassfPt`, which matches all histograms like `hmassfPt0_1_0.010.000.000`, `hmassfPt1_2_0.020.400.000`, and so on. + +You can provide as many histogram name pattern as you want. +Provide one input file per pT bin. Each file should contain one matching histogram per pT bin. File names can be repeated. + +The merging creates a single output file with histogram for the 1st pT bin from `in_file1.root`, histogram for the 2nd pT bin from `in_file2.root`, and so on. + +`merge-mlhep.sh` is an example that uses `merge_histomass.py` to obtain a single invariant mass file for the O2Physics D2H mass fitter. The script makes also use of `merge_histos.py` to get a single efficiencies file to be used in the cut variation macro. + +## Gather MLHEP efficiencies and mass fits for cut variation + +File: `gather-inputs-cutvar.sh`
+Usage: `./gather-inputs-cutvar.sh` + +To get MLHEP results for different non-prompt cuts, different output directories must be set. Otherwise, the results get overwritten. However, the cut variation script requires the input efficiency and mass fit files to be in a single directory. + +This script takes all `efficienciesLcpKpiRun3analysis.root` and `yields_LcpKpi_Run3analysis.root` MLHEP output files from the directories that match `RESDIR_PATTERN`, and puts them in the `OUTPUT_DIR`. To differentiate the files, the suffix made of the corresponding directory name with `RESDIR_PATTERN` removed is appended to a file name.
+`PERM_PATTERN` is also used to match directories, but it is not removed from the suffix. + +For example, given `RESDIR_PATTERN`: `/data/MLHEP/results-today_`, `PERM_PATTERN`: `non-prompt_`, and directories like: `/data/MLHEP/results-today_non-prompt_0.1`, `/data/MLHEP/results-today_-prompt_0.2`, the resulting efficiency file names are: `efficienciesLcpKpiRun3analysis_non-prompt_0.1.root`, `efficienciesLcpKpiRun3analysis_non-prompt_0.2.root`. + +Adjust `MLHEP_DIR`, `OUTPUT_DIR`, `RESDIR_PATTERN` and `PERM_PATTERN` in the script. + +You might also need to adjust the regular expression in line 12 and file paths in the for loop. diff --git a/machine_learning_hep/scripts-dhadrons/merging/gather-inputs-cutvar.sh b/machine_learning_hep/scripts-dhadrons/merging/gather-inputs-cutvar.sh new file mode 100755 index 0000000000..249d5b3286 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/merging/gather-inputs-cutvar.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Gather MLHEP efficiencies and mass fits for all chosen non-prompt cuts into a single directory. +# The cut variation script requires files from a single directory. + +MLHEP_DIR="/data8/majak/MLHEP" +OUTPUT_DIR="${MLHEP_DIR}/input-fd-012025" + +RESDIR_PATTERN="${MLHEP_DIR}/results-24012025-hyp-ml-luigi-cuts_" +PERM_PATTERN="fd_" + +for dir in "${RESDIR_PATTERN}${PERM_PATTERN}"0.[0-9][0-9][0-9]* ; do + suffix=${dir##"${RESDIR_PATTERN}"} + echo "$suffix" + + cp "${dir}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + "${OUTPUT_DIR}/efficienciesLcpKpiRun3analysis_${suffix}.root" + cp "${dir}/LHC23pp_pass4/Results/resultsdatatot/yields_LcpKpi_Run3analysis.root" \ + "${OUTPUT_DIR}/yieldsLcpKpiRun3analysis-${suffix}" +done diff --git a/machine_learning_hep/scripts-dhadrons/merging/merge-cutvar.sh b/machine_learning_hep/scripts-dhadrons/merging/merge-cutvar.sh new file mode 100755 index 0000000000..2f6bc61c29 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/merging/merge-cutvar.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +python merge_histos.py -o /data8/majak/systematics/032025/bdt/CutVarLc_pp13TeV_LHC23_pass4_wide_both.root \ + -n hCorrYieldsPrompt -n hCorrYieldsNonPrompt -n hCorrFracPrompt -n hCorrFracNonPrompt \ + -n hCovPromptPrompt -n hCovPromptNonPrompt -n hCovNonPromptNonPrompt \ + -i /data8/majak/fdd-results/012025/fdd-results-1-2-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root \ + -i /data8/majak/fdd-results/012025/fdd-results-2-3-cheb-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root \ + -i /data8/majak/fdd-results/012025/fdd-results-3-4-cheb-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root \ + -i /data8/majak/fdd-results/012025/fdd-results-4-5-cheb-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root \ + -i /data8/majak/fdd-results/012025/fdd-results-5-6-cheb-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root \ + -i /data8/majak/fdd-results/012025/fdd-results-4-5-cheb-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root \ + -i /data8/majak/fdd-results/012025/fdd-results-4-5-cheb-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root \ + -i /data8/majak/fdd-results/012025/fdd-results-4-5-cheb-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root \ + -i /data8/majak/fdd-results/012025/fdd-results-10-12-cheb-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root \ + -i /data8/majak/fdd-results/012025/fdd-results-10-12-cheb-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root \ + -i /data8/majak/fdd-results/012025/fdd-results-16-24-cheb-wide-both/CutVarLc_pp13TeV_LHC23_pass4.root diff --git a/machine_learning_hep/scripts-dhadrons/merging/merge-mlhep.sh b/machine_learning_hep/scripts-dhadrons/merging/merge-mlhep.sh new file mode 100755 index 0000000000..ef0df01089 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/merging/merge-mlhep.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +MLHEP_DIR="/data8/majak/MLHEP" +OUTPUT_DIR="/data8/majak/MLHEP/input-d2h-fitter-012025" +OUTPUT_DIR_EFF="/data8/majak/MLHEP/input-fd-012025" + +RESDIR_PATTERN="${MLHEP_DIR}/results-24012025-hyp-ml-luigi-cuts_fd_" +PERM_PATTERN="fd_precise_" + +FD_12=(0.000 0.200 0.250 0.300 0.350 0.380 0.415 0.430 0.470 0.500 0.520 0.550 0.570 0.590 0.610 0.630 0.650 0.670 0.690) +FD_23=(0.000 0.290 0.320 0.350 0.380 0.410 0.430 0.450 0.470 0.490 0.510 0.530 0.550 0.570 0.590 0.610 0.630 0.650 0.670) +FD_34=(0.000 0.290 0.320 0.350 0.370 0.390 0.410 0.425 0.450 0.470 0.490 0.510 0.530 0.550 0.570 0.590 0.610 0.630 0.650) +FD_45=(0.000 0.130 0.150 0.170 0.190 0.210 0.230 0.250 0.270 0.290 0.320 0.350 0.370 0.390 0.410 0.430 0.450 0.470 0.490) +FD_56=(0.000 0.110 0.130 0.150 0.170 0.190 0.210 0.230 0.250 0.270 0.290 0.310 0.330 0.350 0.370 0.390 0.410 0.430 0.450) +FD_67=(0.000 0.130 0.150 0.170 0.190 0.210 0.230 0.250 0.270 0.290 0.320 0.350 0.370 0.390 0.410 0.430 0.450 0.470 0.490) +FD_78=(0.000 0.130 0.150 0.170 0.190 0.210 0.230 0.250 0.270 0.290 0.320 0.350 0.370 0.390 0.410 0.430 0.450 0.470 0.490) +FD_810=(0.000 0.130 0.150 0.170 0.190 0.210 0.230 0.250 0.270 0.290 0.320 0.350 0.370 0.390 0.410 0.430 0.450 0.470 0.490) +FD_1012=(0.000 0.210 0.230 0.250 0.270 0.290 0.310 0.330 0.350 0.370 0.390 0.410 0.430 0.450 0.470 0.490 0.510 0.530 0.550) +FD_1216=(0.000 0.210 0.230 0.250 0.270 0.290 0.310 0.330 0.350 0.370 0.390 0.410 0.430 0.450 0.470 0.490 0.510 0.530 0.550) +FD_1624=(0.000 0.090 0.110 0.130 0.150 0.170 0.190 0.210 0.230 0.250 0.270 0.290 0.310 0.330 0.350 0.370 0.390 0.410 0.430) + +for i in "${!FD_12[@]}" ; do + fd12=${FD_12[i]} + fd23=${FD_23[i]} + fd34=${FD_34[i]} + fd45=${FD_45[i]} + fd56=${FD_56[i]} + fd67=${FD_67[i]} + fd78=${FD_78[i]} + fd810=${FD_810[i]} + fd1012=${FD_1012[i]} + fd1216=${FD_1216[i]} + fd1624=${FD_1624[i]} + echo "${i} fd ${fd12} ${fd23} ${fd34} ${fd45} ${fd56} ${fd67} ${fd78} ${fd810} ${fd1012} ${fd1216} ${fd1624}" + + RESPATH="${OUTPUT_DIR}/projections_${PERM_PATTERN}${fd12}_${fd23}_${fd34}_${fd45}_${fd56}_${fd67}_${fd78}_${fd810}_${fd1012}_${fd1216}_${fd1624}.root" + + python merge_histomass.py \ + -n hmassfPt \ + -o "${RESPATH}" \ + -i "${RESDIR_PATTERN}${fd12}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ + -i "${RESDIR_PATTERN}${fd23}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ + -i "${RESDIR_PATTERN}${fd34}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ + -i "${RESDIR_PATTERN}${fd45}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ + -i "${RESDIR_PATTERN}${fd56}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ + -i "${RESDIR_PATTERN}${fd67}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ + -i "${RESDIR_PATTERN}${fd78}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ + -i "${RESDIR_PATTERN}${fd810}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ + -i "${RESDIR_PATTERN}${fd1012}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ + -i "${RESDIR_PATTERN}${fd1216}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" \ + -i "${RESDIR_PATTERN}${fd1624}/LHC23pp_pass4/Results/resultsdatatot/masshisto.root" + + RESPATH="${OUTPUT_DIR_EFF}/eff_${PERM_PATTERN}${fd12}_${fd23}_${fd34}_${fd45}_${fd56}_${fd67}_${fd78}_${fd810}_${fd1012}_${fd1216}_${fd1624}.root" + + python merge_histos.py \ + -n eff \ + -n eff_fd \ + -o "${RESPATH}" \ + -i "${RESDIR_PATTERN}${fd12}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + -i "${RESDIR_PATTERN}${fd23}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + -i "${RESDIR_PATTERN}${fd34}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + -i "${RESDIR_PATTERN}${fd45}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + -i "${RESDIR_PATTERN}${fd56}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + -i "${RESDIR_PATTERN}${fd67}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + -i "${RESDIR_PATTERN}${fd78}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + -i "${RESDIR_PATTERN}${fd810}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + -i "${RESDIR_PATTERN}${fd1012}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + -i "${RESDIR_PATTERN}${fd1216}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" \ + -i "${RESDIR_PATTERN}${fd1624}/LHC24pp_mc/Results/resultsmctot/efficienciesLcpKpiRun3analysis.root" +done diff --git a/machine_learning_hep/scripts-dhadrons/merging/merge-yields.sh b/machine_learning_hep/scripts-dhadrons/merging/merge-yields.sh new file mode 100755 index 0000000000..640e24c30b --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/merging/merge-yields.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# Merge yields at 0.0 for prompt cross section +python merge_histos.py -o "/data8/majak/crosssec/202502/yieldsLcpKpiRun3analysis_fd_0.000.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-poly-fixed-sigma.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-cheb-fixed-sigma-120-190.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-cheb-fixed-sigma-120-190.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-cheb-fixed-sigma-120-190.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-cheb-fixed-sigma-120-190.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-cheb-fixed-sigma-120-190.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-cheb-fixed-sigma-120-190.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-cheb-fixed-sigma-120-190.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-cheb-fixed-sigma-120-190.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-cheb-fixed-sigma-120-190.root" \ + -i "/data8/majak/MLHEP/input-fd-012025/yields-fd_0.000-cheb-fixed-sigma-120-190.root" diff --git a/machine_learning_hep/scripts-dhadrons/merging/merge_histomass.py b/machine_learning_hep/scripts-dhadrons/merging/merge_histomass.py new file mode 100644 index 0000000000..eb172d1039 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/merging/merge_histomass.py @@ -0,0 +1,42 @@ +""" +Merge MLHEP histomass root files for the PWGHF mass fitter. +One file per pt bin. Each file contains one histogram per pt bin. +""" + +import argparse + +from ROOT import TFile, gROOT # pylint: disable=import-error + + +def main(): + """ + Main + """ + + gROOT.SetBatch(True) + + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("-n", "--histname", action="append", type=str, + help="Name pattern of histograms to merge") + parser.add_argument("-o", "--outfile", action="append", type=str, help="Output file") + parser.add_argument("-i", "--infile", action="append", type=str, help="Input file") + args = parser.parse_args() + + if len(args.outfile) != 1: + raise ValueError("Provide exactly 1 output file") + + print(f"infile {args.infile}") + + with TFile(args.outfile[0], "RECREATE") as fout: + for name in args.histname: + for ind, filename in enumerate(args.infile): + fin = TFile(filename) + list_hists = [key.GetName() for key in fin.GetListOfKeys() \ + if name in key.GetName()] + print(f"File {filename} hist list {list_hists} selected {list_hists[ind]}") + hist = fin.Get(list_hists[ind]) + fout.cd() + hist.Write() + +if __name__ == "__main__": + main() diff --git a/machine_learning_hep/scripts-dhadrons/merging/merge_histos.py b/machine_learning_hep/scripts-dhadrons/merging/merge_histos.py new file mode 100644 index 0000000000..d32b39e66a --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/merging/merge_histos.py @@ -0,0 +1,52 @@ +""" +Merge histograms from different ROOT files. One file per x-axis bin. +A single histogram contains all x-axis bins. +""" + +import argparse + +from ROOT import TFile, gROOT # pylint: disable=import-error + + +def main(): + """ + Main + """ + gROOT.SetBatch(True) + + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("-n", "--histname", action="append", type=str, + help="Name of histograms to merge") + parser.add_argument("-o", "--outfile", action="append", type=str, help="Output file") + parser.add_argument("-i", "--infile", action="append", type=str, help="Input file") + args = parser.parse_args() + + if len(args.outfile) != 1: + raise ValueError("Provide exactly 1 output file") + + with TFile(args.outfile[0], "RECREATE") as fout: + fins = [TFile(filename) for filename in args.infile] + + histname = args.histname + if args.histname is None: + histname = [key.GetName() for key in fins[0].GetListOfKeys()] + + def get_hist(fin, histname): + fin.cd() + return fin.Get(histname) + + for name in histname: + hist_list = [get_hist(fin, name) for fin in fins] + if any(cls in hist_list[0].ClassName() for cls in ("TH1", "TGraph")): + hist = hist_list[-1].Clone() + for ind, hist_tmp in enumerate(hist_list): + print(f"hist {name} bin {ind+1} pt [{hist.GetBinLowEdge(ind + 1)}, " \ + f"{hist.GetBinLowEdge(ind + 2)}) " \ + f"content {hist_tmp.GetBinContent(ind + 1)}") + hist.SetBinContent(ind+1, hist_tmp.GetBinContent(ind+1)) + hist.SetBinError(ind+1, hist_tmp.GetBinError(ind+1)) + fout.cd() + hist.Write() + +if __name__ == "__main__": + main() diff --git a/machine_learning_hep/multitrial/README.md b/machine_learning_hep/scripts-dhadrons/multitrial/README.md similarity index 97% rename from machine_learning_hep/multitrial/README.md rename to machine_learning_hep/scripts-dhadrons/multitrial/README.md index 8437924336..54d84c7299 100644 --- a/machine_learning_hep/multitrial/README.md +++ b/machine_learning_hep/scripts-dhadrons/multitrial/README.md @@ -1,4 +1,4 @@ -# Multitrial systematics with MLHEP +# Multitrial (raw yield) systematics with MLHEP ## Generate configurations (MLHEP yml databases) for each trial @@ -9,7 +9,7 @@ Arguments: - `database_file`: filename of the template database without the .yml extension, e.g., `database_ml_parameters_LcToPKPi` - `in_db_dir`: path to the directory containing the database, e.g., `data/data_run3` - `out_db_dir`: path to the directory for output multitrial databases, e.g., `multitrial_db` -- `mlhep_results_dir_pattern`: prefix of output directory name for fit results; for each trial, the trial name is appended to the directory name, and the resulting directory name is written under `Run3analysis/{data,mc}/prefix_dir_res` in the database file +- `mlhep_results_dir_pattern`: prefix of output directory name for fit results; for each trial, the trial name is appended to the directory name, and the resulting directory name is written under `Run3analysis/{data,mc}/prefix_dir_res` in the database file Adjust `DIR_PATH` in the script. It is the path to the base directory where you store directories with MLHEP results. diff --git a/machine_learning_hep/multitrial/config_multitrial.json b/machine_learning_hep/scripts-dhadrons/multitrial/config_multitrial.json similarity index 100% rename from machine_learning_hep/multitrial/config_multitrial.json rename to machine_learning_hep/scripts-dhadrons/multitrial/config_multitrial.json diff --git a/machine_learning_hep/multitrial/multitrial.py b/machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py similarity index 91% rename from machine_learning_hep/multitrial/multitrial.py rename to machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py index 593a99dd31..5f8fc0b684 100644 --- a/machine_learning_hep/multitrial/multitrial.py +++ b/machine_learning_hep/scripts-dhadrons/multitrial/multitrial.py @@ -9,10 +9,10 @@ import glob import json import re -import numpy as np -import matplotlib.pyplot as plt -from matplotlib.ticker import MultipleLocator, AutoMinorLocator +import matplotlib.pyplot as plt # pylint: disable=import-error +import numpy as np +from matplotlib.ticker import AutoMinorLocator, MultipleLocator # pylint: disable=import-error from ROOT import ( # pylint: disable=import-error,no-name-in-module TFile, gROOT, @@ -33,7 +33,7 @@ def get_yields(cfg): yields_err = {} trials = {} chis = {} - for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"]): + for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"], strict=False): yields[f"{pt_bin_min}_{pt_bin_max}"] = [] yields_err[f"{pt_bin_min}_{pt_bin_max}"] = [] trials[f"{pt_bin_min}_{pt_bin_max}"] = [] @@ -50,7 +50,7 @@ def get_yields(cfg): dirname = re.split("/", filename)[4] # [-2] for D2H fitter trial_name = dirname.replace(cfg["dir_pattern"], "") for ind, (pt_bin_min, pt_bin_max) in enumerate(zip(cfg["pt_bins_min"], - cfg["pt_bins_max"])): + cfg["pt_bins_max"], strict=False)): if eval(cfg["selection"])(hist_sel.GetBinContent(ind + 1)) \ and hist.GetBinContent(ind + 1) > 1.0 : # pylint: disable=eval-used yields[f"{pt_bin_min}_{pt_bin_max}"].append(hist.GetBinContent(ind + 1)) @@ -109,8 +109,8 @@ def plot_yields_trials(yields, yields_err, trials, cfg, pt_string, plot_pt_strin facecolor="orange", edgecolor="none", alpha=0.3) plot_trial_line(ax, central_trial_ind) plot_text_box(ax, plot_pt_string) - fig.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_yields_trials_{pt_string}.png', - bbox_inches='tight') + fig.savefig(f"{cfg["outdir"]}/{cfg["outfile"]}_yields_trials_{pt_string}.png", + bbox_inches="tight") plt.close() @@ -120,8 +120,8 @@ def plot_chis(chis, cfg, pt_string, plot_pt_string): ax.scatter(x_axis, chis, c="b", marker="o") set_ax_limits(ax, pt_string, chis) plot_text_box(ax, plot_pt_string) - fig.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_chis_{pt_string}.png', - bbox_inches='tight') + fig.savefig(f"{cfg["outdir"]}/{cfg["outfile"]}_chis_{pt_string}.png", + bbox_inches="tight") plt.close() @@ -145,7 +145,7 @@ def plot_yields_distr(yields, cfg, pt_string, plot_pt_string, central_trial_ind, f"std dev: {std_dev:.2f}\n"\ f"RMSE: {rmse:.2f}\n"\ f"#trials: {len(yields)}") - plt.savefig(f'{cfg["outdir"]}/{cfg["outfile"]}_distr_{pt_string}.png', bbox_inches='tight') + plt.savefig(f"{cfg["outdir"]}/{cfg["outfile"]}_distr_{pt_string}.png", bbox_inches="tight") plt.close() @@ -161,7 +161,7 @@ def main(): yields, yields_err, trials, chis = get_yields(cfg) - for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"]): + for pt_bin_min, pt_bin_max in zip(cfg["pt_bins_min"], cfg["pt_bins_max"], strict=False): plot_pt_string = f"${pt_bin_min} < p_\\mathrm{{T}}/(\\mathrm{{GeV}}/c) < {pt_bin_max}$" pt_string = f"{pt_bin_min}_{pt_bin_max}" @@ -177,7 +177,7 @@ def main(): except: # pylint: disable=bare-except pass - with open(f'{cfg["outdir"]}/{cfg["outfile"]}_trials_{pt_string}.txt', + with open(f"{cfg["outdir"]}/{cfg["outfile"]}_trials_{pt_string}.txt", "w", encoding="utf-8") as ftext: for trial in trials[pt_string]: ftext.write(f"{trial}\n") diff --git a/machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.py b/machine_learning_hep/scripts-dhadrons/multitrial/run-mlhep-fitter-multitrial.py similarity index 97% rename from machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.py rename to machine_learning_hep/scripts-dhadrons/multitrial/run-mlhep-fitter-multitrial.py index d8307def4e..b547900b95 100644 --- a/machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.py +++ b/machine_learning_hep/scripts-dhadrons/multitrial/run-mlhep-fitter-multitrial.py @@ -9,6 +9,7 @@ import argparse import re import shutil + import yaml SIGMA02="0.007, 0.007, 0.013" @@ -85,7 +86,7 @@ def process_trial(trial, ana_cfg, data_cfg, mc_cfg): ana_cfg["n_rebin"] = [rebin + 1 for rebin in ana_cfg["n_rebin"]] elif "free-sigma" in trial: print("Processing free-sigma") - for pt_cfg, free_sigma in zip(mc_cfg, FREE_SIGMAS): + for pt_cfg, free_sigma in zip(mc_cfg, FREE_SIGMAS, strict=False): sig_fn = pt_cfg["components"]["sig"]["fn"] pt_cfg["components"]["sig"]["fn"] = re.sub(r"sigma_g1\[(.*?)\]", f"sigma_g1[{free_sigma}]", sig_fn) @@ -133,7 +134,7 @@ def main(db, db_dir, out_db_dir, resdir_pattern): fit_cfg = ana_cfg["mass_roofit"] mc_cfg = [fit_params for fit_params in fit_cfg \ if "level" in fit_params and fit_params["level"] == "mc"] - data_cfg = [fit_params for fit_params in fit_cfg if not "level" in fit_params] + data_cfg = [fit_params for fit_params in fit_cfg if "level" not in fit_params] resdir = f"{resdir_pattern}{comb}" respath = f"{DIR_PATH}/{resdir}/" diff --git a/machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.sh b/machine_learning_hep/scripts-dhadrons/multitrial/run-mlhep-fitter-multitrial.sh similarity index 100% rename from machine_learning_hep/multitrial/run-mlhep-fitter-multitrial.sh rename to machine_learning_hep/scripts-dhadrons/multitrial/run-mlhep-fitter-multitrial.sh diff --git a/machine_learning_hep/scripts-dhadrons/preliminary-plots/DrawCutVarFit.C b/machine_learning_hep/scripts-dhadrons/preliminary-plots/DrawCutVarFit.C new file mode 100644 index 0000000000..509f8caf1c --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/preliminary-plots/DrawCutVarFit.C @@ -0,0 +1,304 @@ +#include "TCanvas.h" +#include "TFile.h" +#include "TGaxis.h" +#include "TGraphAsymmErrors.h" +#include "TH1.h" +#include "TLatex.h" +#include "TLegend.h" +#include "TPad.h" +#include "TStyle.h" +#include +#include + +using namespace std; + +void SetStyle(); +void SetStyleHisto(TH1D* h); +void SetStyleHisto(TH1F* h); +void NormaliseHist1d(TH1* h); + +// const Int_t colors[] = {kGreen + 2, kBlue - 4, kRed, kOrange + 7}; +// const Int_t markers[] = {20, 21, 33, 34}; +// const Int_t npoints[] = {5, 3, 4, 4, 4, 4, 4}; +// const Int_t nPtBins = 11; +// const Double_t ptlimsmiddle[11] = {1.5, 2.5, 3.5, 4.5, 5.5, 6.5, +// 7.5, 9, 11, 14, 20}; +// const Int_t nPtBinsCoarse = 11; +// Double_t ptlimsCoarse[nPtBinsCoarse + 1] = {1., 2., 3., 4., 5., 6., +// 7., 8., 10., 12., 16., 24.}; +// Double_t ptbinwidthCoarse[nPtBinsCoarse] = {1., 1., 1., 1., 1., 1., +// 1., 2., 2., 4., 8.}; +// const Double_t ptlimsmiddlePrompt[21] = { +// 0.5, 1.25, 1.75, 2.25, 2.75, 3.25, 3.75, 4.25, 4.75, 5.25, 5.75, +// 6.25, 6.75, 7.25, 7.75, 8.5, 9.5, 11., 14., 20., 30.}; +// Double_t yvaluncPrompt[21] = {0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., +// 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.}; + +std::vector bdtScoreCuts_1_2 = {0.21, 0.24, 0.27, 0.30, 0.33, 0.35, 0.37, 0.39, 0.41, 0.44, 0.46, 0.48, 0.50, 0.52, 0.54, 0.55, 0.58}; +std::vector bdtScoreCuts_2_3 = {0.20, 0.22, 0.24, 0.26, 0.28, 0.30, 0.32, 0.34, 0.36, 0.38, 0.40, 0.42, 0.44, 0.46, 0.48, 0.50, 0.52}; +std::vector bdtScoreCuts_3_4 = {0.26, 0.28, 0.30, 0.32, 0.34, 0.36, 0.38, 0.40, 0.42, 0.44, 0.46, 0.48, 0.50, 0.52, 0.55, 0.58, 0.60}; +std::vector bdtScoreCuts_4_5 = {0.17, 0.19, 0.21, 0.23, 0.25, 0.27, 0.29, 0.31, 0.33, 0.35, 0.38, 0.40, 0.43, 0.47, 0.50, 0.54, 0.58}; +std::vector bdtScoreCuts_5_6 = {0.10, 0.12, 0.14, 0.16, 0.18, 0.21, 0.24, 0.26, 0.28, 0.30, 0.33, 0.36, 0.39, 0.42, 0.45, 0.50, 0.52}; +std::vector bdtScoreCuts_6_8 = {0.15, 0.17, 0.19, 0.21, 0.23, 0.25, 0.27, 0.29, 0.31, 0.33, 0.36, 0.39, 0.41, 0.43, 0.46, 0.49, 0.52}; +std::vector bdtScoreCuts_8_12 = {0.08, 0.11, 0.14, 0.16, 0.18, 0.20, 0.22, 0.25, 0.28, 0.30, 0.33, 0.35, 0.38, 0.41, 0.43, 0.46, 0.49}; +// std::vector bdtScoreCuts = {0.29, 0.33, 0.37, 0.41, 0.45, 0.49, +// 0.53, 0.57, 0.61, 0.65, 0.69, 0.73, +// 0.77, 0.81, 0.85, 0.89, 0.93}; +std::vector bdtScoreCuts_toPlot = {0.29, 0.45, 0.61, 0.77, 0.93}; +std::vector bdtScoreCuts_toPlot_ind = {0, 4, 8, 12, 16}; + +const Int_t binMin = 4; +const Int_t binMax = 5; +std::vector bdtScoreCuts = bdtScoreCuts_4_5; + +bool DrawAllPoints = false; + +void DrawCutVarFit(bool isPreliminary = kTRUE) +{ + + // TGaxis::SetMaxDigits(1); + gStyle->SetOptTitle(0); + gStyle->SetOptStat(0); + + TFile* CutVarFile = nullptr; + + // D + TH1F* hRawYieldsVsCutPt = nullptr; + TH1F* hRawYieldPromptVsCut = nullptr; + TH1F* hRawYieldFDVsCut = nullptr; + TH1F* hRawYieldsVsCutReSum = nullptr; + + CutVarFile = + new TFile("/data8/majak/systematics/230824/CutVarLc_pp13TeV_LHC24d3_default.root", + "read"); + hRawYieldsVsCutPt = + std::reinterpret_cast CutVarFile->Get(Form("hRawYieldVsCut_pt%d_%d", binMin, binMax)); + hRawYieldPromptVsCut = + std::reinterpret_cast CutVarFile->Get(Form("hRawYieldPromptVsCut_pt%d_%d", binMin, binMax)); + hRawYieldFDVsCut = + std::reinterpret_cast CutVarFile->Get(Form("hRawYieldNonPromptVsCut_pt%d_%d", binMin, binMax)); + hRawYieldsVsCutReSum = + std::reinterpret_cast CutVarFile->Get(Form("hRawYieldSumVsCut_pt%d_%d", binMin, binMax)); + + SetStyleHisto(hRawYieldsVsCutPt); + SetStyleHisto(hRawYieldPromptVsCut); + SetStyleHisto(hRawYieldFDVsCut); + SetStyleHisto(hRawYieldsVsCutReSum); + + hRawYieldsVsCutPt->SetMarkerStyle(20); + hRawYieldsVsCutPt->SetMarkerSize(1); + hRawYieldsVsCutPt->SetMarkerColor(kBlack); + hRawYieldsVsCutPt->SetLineColor(kBlack); + + hRawYieldPromptVsCut->SetMarkerStyle(33); + hRawYieldPromptVsCut->SetMarkerSize(1); + hRawYieldPromptVsCut->SetMarkerColor(kRed + 1); + hRawYieldPromptVsCut->SetLineColor(kRed + 1); + + hRawYieldFDVsCut->SetMarkerStyle(33); + hRawYieldFDVsCut->SetMarkerSize(1); + hRawYieldFDVsCut->SetMarkerColor(kAzure + 4); + hRawYieldFDVsCut->SetLineColor(kAzure + 4); + + hRawYieldsVsCutReSum->SetMarkerStyle(33); + hRawYieldsVsCutReSum->SetMarkerSize(1); + hRawYieldsVsCutReSum->SetMarkerColor(kGreen + 2); + hRawYieldsVsCutReSum->SetLineColor(kGreen + 2); + + hRawYieldsVsCutPt->GetYaxis()->SetTitle("Raw yield"); + hRawYieldsVsCutPt->GetYaxis()->SetTitleSize(0.05); + hRawYieldsVsCutPt->GetYaxis()->SetMaxDigits(3); + hRawYieldsVsCutPt->GetXaxis()->SetTitle("Minimum BDT score for non-prompt#Lambda_{c}^{#plus}"); + hRawYieldsVsCutPt->GetXaxis()->SetTitleSize(0.05); + hRawYieldsVsCutPt->SetMinimum(0.1); + hRawYieldsVsCutPt->SetMaximum(35000); + hRawYieldsVsCutPt->SetLineWidth(2); + hRawYieldsVsCutPt->GetYaxis()->SetTitleOffset(1.1); + // Set custom labels + for (size_t i = 0; i < bdtScoreCuts.size(); ++i) { + hRawYieldsVsCutPt->GetXaxis()->SetBinLabel(i + 1, Form("")); + for (size_t j = 0; j < bdtScoreCuts_toPlot_ind.size(); ++j) + // if (bdtScoreCuts[i] == bdtScoreCuts_toPlot[j]) { + if (i == bdtScoreCuts_toPlot_ind[j]) { + std::cout << "bdtScoreCuts[i] " << bdtScoreCuts[i] << " bdtScoreCuts_toPlot " << bdtScoreCuts_toPlot_ind[j] << std::endl; + hRawYieldsVsCutPt->GetXaxis()->SetBinLabel(i + 1, Form("%.2f", bdtScoreCuts[i])); + } + } + + TCanvas* c1 = new TCanvas("c1", "c1", 0, 0, 750, 750); + gStyle->SetOptStat(0); + c1->SetTickx(); + c1->SetTicky(); + c1->SetBottomMargin(0.13); + c1->SetLeftMargin(0.17); + c1->SetTopMargin(0.06); + c1->SetRightMargin(0.06); + c1->cd(); + + hRawYieldsVsCutPt->Draw(); + hRawYieldPromptVsCut->Draw("HISTsame"); + hRawYieldPromptVsCut->SetFillStyle(3154); + hRawYieldPromptVsCut->SetFillColor(kRed + 1); + hRawYieldFDVsCut->Draw("HISTsame"); + hRawYieldFDVsCut->SetFillStyle(3145); + hRawYieldFDVsCut->SetFillColor(kAzure + 4); + hRawYieldsVsCutReSum->Draw("HISTsame"); + + TLatex info; + info.SetNDC(); + info.SetTextFont(43); + info.SetTextSize(40); + info.DrawLatex(0.21, 0.86, "ALICE Preliminary"); + + TLatex infos; + infos.SetNDC(); + infos.SetTextFont(43); + infos.SetTextSize(30); + infos.DrawLatex(0.21, 0.80, + "#Lambda_{c}^{#plus} and charge conj., pp, #sqrt{#it{s}} = 13.6 TeV"); + // infos.DrawLatex(0.21, 0.74, "|#it{y}| < 0.5"); + + TLatex infoPt; + infoPt.SetNDC(); + infoPt.SetTextFont(43); + infoPt.SetTextSize(30); + + infoPt.DrawLatex(0.62, 0.70, Form("%d < #it{p}_{T} < %d GeV/#it{c}", binMin, binMax)); + // TLatex info5; + // info5.SetNDC(); + // info5.SetTextFont(43); + // info5.SetTextSize(15); + // info5.DrawLatex(0.48, 0.66, "#it{f} (b #rightarrow B^{0}, b #rightarrow + // B^{+})_{LHCb}, BR (H_{b} #rightarrow D^{0}+X)_{PYTHIA 8}");//, + // info1.DrawLatex(0.5, 0.74-0.02, "average of"); + // info.DrawLatex(0.20, 0.70, "#Lambda_{c}^{+} #rightarrow pK^{0}_{S}"); + // if (isPreliminary){ + // info.DrawLatex(0.28, 0.85, "ALICE"); + // info.DrawLatex(0.28, 0.85, "ALICE"); + // info.DrawLatex(0.22, 0.2-0.06, "Preliminary"); + // } + + // TLatex info2; + // info2.SetNDC(); + // info2.SetTextFont(43); + // info2.SetTextSize(15); + // info2.DrawLatex(0.21, 0.17, "#pm 3.7% lumi. unc. not shown"); + // info2.DrawLatex(0.21, 0.22, "#pm 0.76% BR unc. not shown"); + + TLegend* leg = new TLegend(0.62, 0.48, 0.70, 0.68); + leg->SetFillColor(0); + leg->SetFillStyle(0); + leg->SetBorderSize(0); + leg->SetMargin(0.46); + leg->SetTextSize(28); + leg->SetTextFont(43); + leg->AddEntry(hRawYieldsVsCutPt, "Data", "p"); + leg->AddEntry(hRawYieldPromptVsCut, "Prompt", "F"); + leg->AddEntry(hRawYieldFDVsCut, "Non-prompt", "F"); + leg->AddEntry(hRawYieldsVsCutReSum, "Total", "l"); + leg->Draw(); + + c1->SaveAs(Form("./CutVarFitLcFD_%d-%d.pdf", binMin, binMax)); + c1->SaveAs(Form("./CutVarFitLcFD_%d-%d.png", binMin, binMax)); + c1->SaveAs(Form("./CutVarFitLcFD_%d-%d.eps", binMin, binMax)); +} + +void SetStyle() +{ + cout << "Setting style!" << endl; + + gStyle->Reset("Plain"); + gStyle->SetOptTitle(0); + gStyle->SetOptStat(0); + gStyle->SetPalette(1); + gStyle->SetCanvasColor(10); + gStyle->SetCanvasBorderMode(0); + gStyle->SetFrameLineWidth(1); + gStyle->SetFrameFillColor(kWhite); + gStyle->SetPadColor(10); + gStyle->SetPadTickX(1); + gStyle->SetPadTickY(1); + gStyle->SetPadBottomMargin(0.13); + gStyle->SetPadLeftMargin(0.13); + gStyle->SetPadTopMargin(0.07); + gStyle->SetPadRightMargin(0.07); + gStyle->SetHistLineWidth(1); + gStyle->SetHistLineColor(kRed); + gStyle->SetFuncWidth(2); + gStyle->SetFuncColor(kGreen); + gStyle->SetLineWidth(2); + gStyle->SetLabelSize(0.055, "xyz"); + gStyle->SetLabelOffset(0.01, "y"); + gStyle->SetLabelOffset(0.01, "x"); + gStyle->SetLabelColor(kBlack, "xyz"); + // gStyle->SetTitleSize(0.055,"xyz"); + // gStyle->SetTitleOffset(1.5,"y"); + // gStyle->SetTitleOffset(1.15,"x"); + gStyle->SetTitleFillColor(kWhite); + gStyle->SetTextSizePixels(30); + gStyle->SetTextFont(42); + gStyle->SetLegendBorderSize(0); + gStyle->SetLegendFillColor(kWhite); + gStyle->SetLegendFont(42); + gStyle->SetMarkerStyle(20); + gStyle->SetMarkerSize(0.7); + gStyle->SetMarkerColor(kBlack); +} + +void SetStyleHisto(TH1D* h) +{ + + h->SetLineColor(kBlack); + h->SetLineWidth(2); + h->GetYaxis()->SetLabelFont(42); + h->GetYaxis()->SetTitleFont(42); + h->GetYaxis()->SetTitleSize(0.06); + h->GetYaxis()->SetTitleOffset(1.7); + h->GetYaxis()->SetLabelSize(0.05); + h->GetYaxis()->SetDecimals(kTRUE); + // h->GetYaxis()->SetNdivisions(507); + h->GetXaxis()->SetTitleFont(42); + h->GetXaxis()->SetLabelFont(42); + h->GetXaxis()->SetTitleSize(0.06); + h->GetXaxis()->SetTitleOffset(1.2); + h->GetXaxis()->SetLabelSize(0.07); + h->GetXaxis()->SetNdivisions(510); +} + +void SetStyleHisto(TH1F* h) +{ + + h->SetLineColor(kBlack); + h->SetLineWidth(2); + h->GetYaxis()->SetLabelFont(42); + h->GetYaxis()->SetTitleFont(42); + h->GetYaxis()->SetTitleSize(0.06); + h->GetYaxis()->SetTitleOffset(1.7); + h->GetYaxis()->SetLabelSize(0.05); + h->GetYaxis()->SetDecimals(kTRUE); + // h->GetYaxis()->SetNdivisions(507); + h->GetXaxis()->SetTitleFont(42); + h->GetXaxis()->SetLabelFont(42); + h->GetXaxis()->SetTitleSize(0.06); + h->GetXaxis()->SetTitleOffset(1.3); + h->GetXaxis()->SetLabelSize(0.07); + h->GetXaxis()->SetLabelOffset(0.01); + // h->GetXaxis()->SetNdivisions(505); + // h->GetXaxis()->SetNdivisions(510); +} + +void NormaliseHist1d(TH1* h) +{ + if (h) { + // dN/dpt + for (Int_t i = 1; i <= h->GetNbinsX(); i++) { + h->SetBinContent(i, + h->GetBinContent(i) / (h->GetXaxis()->GetBinWidth(i))); + // hnew->SetBinError(i,hnew->GetBinContent(i)/(hnew->GetBinWidth(i) + // * TMath::Sqrt(hnew->GetBinContent(i)))); // may need to look at again + h->SetBinError(i, h->GetBinError(i) / (h->GetXaxis()->GetBinWidth(i))); + } + } else { + cout << "can't normalise hist - not found" << endl; + } +} diff --git a/machine_learning_hep/scripts-dhadrons/preliminary-plots/README.md b/machine_learning_hep/scripts-dhadrons/preliminary-plots/README.md new file mode 100644 index 0000000000..3696a7d539 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/preliminary-plots/README.md @@ -0,0 +1,35 @@ +# Scripts for ALICE preliminary plots + +## Invariant mass fits + +File: `plot_invmass_fit_dzero_dplus_lambdac.py`
+Usage: `python plot_invmass_fit_dzero_dplus_lambdac.py config_invmass_preliminary.yml` + +Example config in `config_invmass_preliminary.yml`. It was used to draw the plots: +- +- +- + +The script is passed in different versions around the D2H people. Here, it contains my few improvements, e.g., configurable multiplicity label.
+I also commented out lines related to non-prompt particles as we had results only for the prompt case. + +You still need to adjust the script in several places: +- comment/uncomment the lines related to prompt/non-prompt particles +- adjust the output directory in line 177 +- input filename in `get_name_infile()` +- histogram names in `main()` + +## Cut variation results + +File: `DrawCutVarFit.C`
+Usage: `root -x DrawCutVarFit.C` in the ROOT / O2 shell + +Used to draw the plot . + +Adjust the script: +- set the `bdtScoreCuts_...` variables to your final BDT cuts +- set `binMin` and `binMax` to the pT bin you want to plot +- set `bdtScoreCuts` to the proper `bdtScoreCuts_...` variable +- adjust `bdtScoreCuts_toPlot` and the corresponding indices in `bdtScoreCuts_toPlot_ind`; they are the cuts to label on the x-axis +- adjust the input file name and histogram names in `DrawCutVarFit()` +- adjust x-axis title, if needed diff --git a/machine_learning_hep/scripts-dhadrons/preliminary-plots/config_invmass_preliminary.yml b/machine_learning_hep/scripts-dhadrons/preliminary-plots/config_invmass_preliminary.yml new file mode 100644 index 0000000000..f4045e4d65 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/preliminary-plots/config_invmass_preliminary.yml @@ -0,0 +1,15 @@ +--- +_pp13.6TeVFD: + Particle: 'LAMBDAC_TO_PKPI' + _Particle: 'D0, DPLUS, LAMBDAC_TO_PKPI, LAMBDAC_TO_PK0S' + PtMin: [1., 4., 8.] + PtMax: [2., 5., 10.] + MassMin: [2.21, 2.19, 2.1] + _MassMin: 'min masses to display' + MassMax: [2.356, 2.38, 2.456] + _MassMax: 'max masses to display' + Rebin: [2, 2, 4] + Mult: [null, "01", "7085"] + _Mult: 'multiplicity label contained in the output file name' + MultLatex: ["Minimum Bias", " = 20.07", " = 4.34"] + _MultLatex: 'TLatex text describing multiplicity on the plot' diff --git a/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py b/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py new file mode 100644 index 0000000000..936880d6d4 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/preliminary-plots/plot_invmass_fit_dzero_dplus_lambdac.py @@ -0,0 +1,412 @@ +#!/usr/bin/env python3 + +# Copyright 2019-2020 CERN and copyright holders of ALICE O2. +# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +# All rights not expressly granted are reserved. + +# This software is distributed under the terms of the GNU General Public +# License v3 (GPL Version 3), copied verbatim in the file "COPYING". + +# In applying this license CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization +# or submit itself to any jurisdiction. + +""" +file: plot_invmass_fit_dzero_dplus_lambdac.py +brief: script to produce invariant mass fit plot for article, the CFG file should be the one used for inv mass fit +usage: python3 plot_invmass_fit_dzero_dplus_lambdac.py CFG +author: Alexandre Bigot , Strasbourg University +""" + +import argparse + +import yaml +from ROOT import ( + TCanvas, + TFile, + TLatex, + TLegend, + TPad, + kAzure, + kBlack, + kBlue, + kGreen, + kRed, +) + +# import or copy the file O2Physics/PWGHF/D2H/Macros/style_formatter.py +from style_formatter import set_global_style, set_object_style + +# enumerator +D0, DPLUS, LAMBDAC_TO_PKPI, LAMBDAC_TO_PK0S = 0, 1, 2, 3 + +# colours +RED = kRed + 1 +BLUE = kBlue + 1 +AZURE = kAzure + 4 +Green = kGreen + 1 + +# conversion +GEV2MEV = 1000 + +# canvas dimensions +WIDTH = 520 +HEIGHT = 500 + +# text size +SIZE_TEXT_LAT_ALICE = 28 +SIZE_TEXT_LAT_LABEL_FOR_COLL_SYSTEM = 24 +SIZE_TEXT_LAT_LABEL = 20 +SIZE_TEXT_LEGEND = 19 + + +def get_name_infile(particle, suffix): + """ + Helper method to get the name of the input file according to the particle + + Parameters + ---------- + - particle (int): particle ID + + Returns + ---------- + - name_infile (string): name of the input file + """ + + name_infile_promptEnhanced = "" + name_infile_FDEnhanced = "" + if particle == D0: + name_infile_promptEnhanced = "../RawYieldResult/CentralValue/RawYieldsData_D0_pPb5TeV_FD_pos00.root" + name_infile_FDEnhanced = "../RawYieldResult/CentralValue/RawYieldsData_D0_pPb5TeV_FD_pos13.root" + elif particle == DPLUS: + pass + elif particle == LAMBDAC_TO_PKPI: + name_infile_promptEnhanced = f"/data8/majak/invmass-plots/massesmasshisto{suffix}.root" + name_infile_FDEnhanced = "fits_non_prompt.root" + elif particle == LAMBDAC_TO_PK0S: + pass + + return name_infile_promptEnhanced, name_infile_FDEnhanced + + +def get_title_xaxis(particle): + """ + Helper method to get the title of x axis according to the particle + + Parameters + ---------- + - particle (int): particle ID + + Returns + ---------- + - title_xaxis (string): title of x axis + """ + + title_xaxis = "" + if particle == D0: + title_xaxis = "#it{M}(K#pi) (GeV/#it{c}^{2})" + elif particle == DPLUS: + title_xaxis = "#it{M}(#piK#pi) (GeV/#it{c}^{2})" + elif particle == LAMBDAC_TO_PKPI: + title_xaxis = "#it{M}(pK#pi) (GeV/#it{c}^{2})" + elif particle == LAMBDAC_TO_PK0S: + title_xaxis = "#it{M}(pK^{0}_{S}) (GeV/#it{c}^{2})" + + return title_xaxis + + +def get_h_value_err(h, i_bin, convert_to_mev=False): + """ + Helper method to get bin content and error of an histogram + + Parameters + ---------- + - h (TH1): histogram + - i_bin (int): bin number + - convert_to_mev (int): apply conversion from GeV to MeV + + Returns + ---------- + - value (float): bin content of h + - error (float): bin error of h + """ + + value = h.GetBinContent(i_bin) + error = h.GetBinError(i_bin) + + print(f"i_bin: {i_bin} value {value} error {error} first bin value: {h.GetBinContent(1)}") + + if convert_to_mev: + value *= GEV2MEV + error *= GEV2MEV + + return value, error + + +def draw_info(lat_label, particle): + """ + Helper method to draw particle-dependent information on canvas + + Parameters + ---------- + - lat_label (TLatex): TLatex instance + - particle (int): particle ID + """ + + info = "" + fnonprompt = "" + if particle == D0: + info = "D^{0} #rightarrow K^{#font[122]{-}}#pi^{+} and charge conj." + fnonprompt = "#it{f}_{ non-prompt}^{ raw} = 0.750 #pm 0.016 (stat.) #pm 0.008 (syst.)" + # fnonprompt = "#it{f}_{ non-prompt}^{ raw} = 0.531" + elif particle == DPLUS: + info = "D^{+} #rightarrow #pi^{+}K^{#font[122]{-}}#pi^{+} and charge conj." + fnonprompt = "#it{f}_{ non-prompt}^{ raw} = 0.787 #pm 0.022 (stat.) #pm 0.016 (syst.)" + elif particle == LAMBDAC_TO_PKPI: + info = "#Lambda_{c}^{+} #rightarrow pK^{#font[122]{-}}#pi^{+} and charge conj." + fnonprompt = "#it{f}_{ non-prompt}^{ raw} = 0.630 #pm 0.056 (stat.) #pm 0.050 (syst.)" + elif particle == LAMBDAC_TO_PK0S: + info = "#Lambda_{c}^{+} #rightarrow pK^{0}_{S} and charge conj." + fnonprompt = "#it{f}_{ non-prompt}^{ raw} = 0.549 #pm 0.138 (stat.) #pm 0.055 (syst.)" + + lat_label.DrawLatex(0.19, 0.85, info) + lat_label.DrawLatex(0.19, 0.16, fnonprompt) + + +def save_canvas(canvas, particle, pt_mins, pt_maxs, ind_pt, mult): + """ + Helper method to save canvas according to particle + + Parameters + ---------- + - canvas (TCanvas): a canvas + - particle (int): particle ID + """ + + out_dir = "/data8/majak/invmass-plots/" + name = "" + if particle == D0: + name = "Dzero" + elif particle == DPLUS: + name = "Dplus" + elif particle == LAMBDAC_TO_PKPI: + name = "LambdacToPKPi" + elif particle == LAMBDAC_TO_PK0S: + name = "LambdacToPKzeroShort" + + mult = f"{mult[ind_pt]}_" if mult[ind_pt] else "" + for ext in ["pdf", "png", "eps"]: + canvas.SaveAs(f"{out_dir}InvMassFit{name}_{mult}Pt_{pt_mins[ind_pt]:.0f}_{pt_maxs[ind_pt]:.0f}.{ext}") + + +# pylint: disable=too-many-locals,too-many-statements +def main(particle, i_pt, cfg, batch): + """ + Main method for a single bin (for article plots) + + Parameters + ---------- + - particle (int): particle ID + - i_pt (int): pT bin number + """ + + set_global_style(padtopmargin=0.07, padleftmargin=0.14, padbottommargin=0.125, + titleoffsety=1.3, titleoffsetx=1., maxdigits=3) + + # import configurables + pt_mins = cfg["pp13.6TeVFD"]["PtMin"] + pt_maxs = cfg["pp13.6TeVFD"]["PtMax"] + mass_mins = cfg["pp13.6TeVFD"]["MassMin"] + mass_maxs = cfg["pp13.6TeVFD"]["MassMax"] + rebin = cfg["pp13.6TeVFD"]["Rebin"] + mult = cfg["pp13.6TeVFD"]["Mult"] + mult_latex = cfg["pp13.6TeVFD"]["MultLatex"] + + print(f"Plotting for {pt_mins[i_pt]}-{pt_maxs[i_pt]}") + + name_infile_promptEnhanced, name_infile_FDEnhanced = \ + get_name_infile(particle, f"{pt_mins[i_pt]:.0f}{pt_maxs[i_pt]:.0f}") + + file_promptEnhanced = TFile.Open(name_infile_promptEnhanced) + # file_FDEnhanced = TFile.Open(name_infile_FDEnhanced) + + # hmean_promptEnhanced = file_promptEnhanced.Get("hist_means_lc") + # hsigma_promptEnhanced = file_promptEnhanced.Get("hist_sigmas_lc") + + # hmean_FDEnhanced = file_FDEnhanced.Get("hRawYieldsMean") + # hsigma_FDEnhanced = file_FDEnhanced.Get("hRawYieldsSigma") + + # hsignal_promptEnhanced = file_promptEnhanced.Get("hist_rawyields_lc") + # hsignal_FDEnhanced = file_FDEnhanced.Get("hRawYields") + + mult_suffix = f"_{mult[i_pt]}" if mult[i_pt] else "" + name_hmass = f"hmass{pt_mins[i_pt]:.0f}{pt_maxs[i_pt]:.0f}{mult_suffix}" + print(f"file {name_infile_promptEnhanced} hist {name_hmass}") + hmass_promptEnhanced = file_promptEnhanced.Get(name_hmass) + # hmass_FDEnhanced = file_FDEnhanced.Get(name_hmass) + hmass_promptEnhanced.Rebin(rebin[i_pt]) + # hmass_FDEnhanced.Rebin(rebin[i_pt]) + + title_xaxis = get_title_xaxis(particle) + width_bin = hmass_promptEnhanced.GetBinWidth(i_pt+1) + bin_max = hmass_promptEnhanced.GetMaximumBin() + bin_min = hmass_promptEnhanced.GetMinimumBin() + + ymax_promptEnhanced = 1.2*(hmass_promptEnhanced.GetMaximum() + hmass_promptEnhanced.GetBinError(bin_max)) + ymin_promptEnhanced = 0.8*(hmass_promptEnhanced.GetMinimum() - hmass_promptEnhanced.GetBinError(bin_min)) + # ymin_FDEnhanced, ymax_FDEnhanced = 0., 1.2*(hmass_FDEnhanced.GetMaximum() + hmass_FDEnhanced.GetBinError(bin_max)) + + title = f"{pt_mins[i_pt]:.0f} < #it{{p}}_{{T}} < {pt_maxs[i_pt]:.0f} GeV/#it{{c}};{title_xaxis};" \ + f"Counts per {width_bin*GEV2MEV:.0f} MeV/#it{{c}}^{{2}}" # pylint: disable=unnecessary-semicolon + + # fit_tot_promptEnhanced = file_promptEnhanced.Get(f"totalTF_{pt_mins[i_pt]:.0f}_{pt_maxs[i_pt]:.0f}") + fit_tot_promptEnhanced = file_promptEnhanced.Get(f"total_func_lc_pt{pt_mins[i_pt]:.0f}_{pt_maxs[i_pt]:.0f}") + # fit_bkg_promptEnhanced = file_promptEnhanced.Get(f"bkgTF_{pt_mins[i_pt]:.0f}_{pt_maxs[i_pt]:.0f}") + fit_bkg_promptEnhanced = file_promptEnhanced.Get(f"bkg_0_lc_pt{pt_mins[i_pt]:.0f}_{pt_maxs[i_pt]:.0f}") + #fit_refl_promptEnhanced = file_promptEnhanced.Get(f"freflect;13") + + + + # fit_tot_FDEnhanced = file_FDEnhanced.Get(f"totalTF_{pt_mins[i_pt]:.0f}.0_{pt_maxs[i_pt]:.0f}.0") + # fit_bkg_FDEnhanced = file_FDEnhanced.Get(f"bkgTF_{pt_mins[i_pt]:.0f}.0_{pt_maxs[i_pt]:.0f}.0") + # fit_refl_FDEnhanced = file_FDEnhanced.Get(f"freflect;13") + + # mean_promptEnhanced, err_mean_promptEnhanced = get_h_value_err(hmean_promptEnhanced, i_pt + 1, True) + # mean_FDEnhanced, err_mean_FDEnhanced = get_h_value_err(hmean_FDEnhanced, 13, True) + # sigma_promptEnhanced, _ = get_h_value_err(hsigma_promptEnhanced, i_pt + 1, True) + # sigma_FDEnhanced, _ = get_h_value_err(hsigma_FDEnhanced, 13, True) + # signal_promptEnhanced, err_signal_promptEnhanced = get_h_value_err(hsignal_promptEnhanced, i_pt + 1) + # signal_FDEnhanced, err_signal_FDEnhanced = get_h_value_err(hsignal_FDEnhanced, 13) + + lat_alice = TLatex() + lat_alice.SetNDC() + lat_alice.SetTextSize(SIZE_TEXT_LAT_ALICE) + lat_alice.SetTextFont(43) + lat_alice.SetTextColor(kBlack) + + lat_label = TLatex() + lat_label.SetNDC() + lat_label.SetTextFont(43) + lat_label.SetTextColor(kBlack) + + # str_mu = f"#it{{#mu}} = ({mean:.0f} #pm {err_mean:.0f}) MeV/#it{{c}}^{{2}}" + # str_sigma = f"#it{{#sigma}} = {sigma:.0f} MeV/#it{{c}}^{{2}}" + # str_sig_promptEnhanced = f'#it{{S}} = {signal_promptEnhanced:.0f} #pm {err_signal_promptEnhanced:.0f}' + # str_sig_FDEnhanced = f"#it{{S}} = {signal_FDEnhanced:.0f} #pm {err_signal_FDEnhanced:.0f}" + + legend = TLegend(0.6, 0.54, 0.87, 0.75) if particle == D0 else TLegend(0.62, 0.58, 0.85, 0.72) + legend.SetBorderSize(0) + legend.SetFillStyle(0) + legend.SetTextFont(43) + legend.SetTextSize(SIZE_TEXT_LEGEND) + legend.AddEntry(fit_tot_promptEnhanced, "Total fit function", "l") + legend.AddEntry(fit_bkg_promptEnhanced, "#splitline{Combinatorial}{background}", "l") + # if particle == D0: + # legend.AddEntry(fit_refl_promptEnhanced, "K#minus#pi reflected", "l") + + c = TCanvas("c", "", WIDTH, HEIGHT) + # Create the first pad + pad1 = TPad("promptEnhanced", "Prompt Enhanced", 0., 0., 1., 1.) + if not pad1: + raise RuntimeError("Failed to create pad1") + pad1.Draw() + pad1.cd() # Switch to pad1 + frame_promptEnhanced = pad1.DrawFrame(mass_mins[i_pt], ymin_promptEnhanced, + mass_maxs[i_pt], ymax_promptEnhanced, title) + frame_promptEnhanced.GetYaxis().SetDecimals() + + # c.cd() + # Create the second pad + # pad2 = TPad("NonPromptEnhanced", "Non-prompt enhanced", 0.5, 0., 1., 1.) + # if not pad2: + # raise RuntimeError("Failed to create pad2") + # pad2.Draw() + # pad2.cd() # Switch to pad2 + # frame_FDEnhanced = pad2.DrawFrame(mass_mins[i_pt], ymin_FDEnhanced, mass_maxs[i_pt], ymax_FDEnhanced, title) + # frame_FDEnhanced.GetYaxis().SetDecimals() + + # c.cd() + # pad1.cd() + set_object_style(hmass_promptEnhanced, linewidth=3, linecolor=kBlack, markersize=0.5) + set_object_style(fit_tot_promptEnhanced, linewidth=3, linecolor=kBlue) + set_object_style(fit_bkg_promptEnhanced, linewidth=3, linecolor=kRed, linestyle=2) + # set_object_style(fit_refl_promptEnhanced, linewidth=3, linecolor=kGreen+2, linestyle=9) + + hmass_promptEnhanced.Draw("sameE") + fit_bkg_promptEnhanced.Draw("same") + fit_tot_promptEnhanced.Draw("same") + # fit_refl_promptEnhanced.Draw("same") + + lat_alice.DrawLatex(0.19, 0.85, "ALICE Preliminary") + lat_label.SetTextSize(SIZE_TEXT_LAT_LABEL_FOR_COLL_SYSTEM) + lat_label.DrawLatex(0.19, 0.79, "pp,#kern[-0.08]{ #sqrt{#it{s}} = 13.6 TeV,}" \ + "#kern[-0.08]{ #it{L}_{int} = 5 pb^{#minus1}}") + lat_label.SetTextSize(SIZE_TEXT_LAT_LABEL) + # draw_info(lat_label, particle) + lat_label.DrawLatex(0.19, 0.73, f"{pt_mins[i_pt]:.0f} < #it{{p}}_{{T}} < {pt_maxs[i_pt]:.0f} GeV/#it{{c}}") + # lat_label.DrawLatex(0.19, 0.3, "Prompt enhanced") + # lat_label.DrawLatex(0.7, 0.85, "|#it{y}| < 0.5") + # fnonprompt_promptEnhanced = "#it{f}_{ non-prompt}^{ raw} = 0.246 #pm 0.007 (stat.)" # (4, 5) GeV + # fnonprompt_promptEnhanced = "#it{f}_{ non-prompt}^{ raw} = 0.30 #pm 0.02 (stat.)" # (0, 1) GeV + # lat_label.DrawLatex(0.19, 0.18, fnonprompt_promptEnhanced) + + # lat_label.DrawLatex(0.19, 0.64, str_mu) + # lat_label.DrawLatex(0.19, 0.58, str_sigma) + # lat_label.DrawLatex(0.19, 0.24, str_sig_promptEnhanced) + if mult_latex[i_pt]: + lat_label.DrawLatex(0.19, 0.24, mult_latex[i_pt]) + lat_label.DrawLatex(0.19, 0.18, "#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus} and charge conj.") + # lat_label.DrawLatex(0.19, 0.16, "#it{L}_{int} = 5 pb^{-1}") + + legend.Draw() + + # c.cd() + # pad2.cd() + # set_object_style(hmass_FDEnhanced, linewidth=3, linecolor=kBlack) + # set_object_style(fit_tot_FDEnhanced, linewidth=3, linecolor=kBlue) + # set_object_style(fit_bkg_FDEnhanced, linewidth=3, linecolor=kRed, linestyle=2) + # set_object_style(fit_refl_FDEnhanced, linewidth=3, linecolor=kGreen+2, linestyle=9) + # hmass_FDEnhanced.Draw("same") + # fit_bkg_FDEnhanced.Draw("same") + # fit_tot_FDEnhanced.Draw("same") + # fit_refl_FDEnhanced.Draw("same") + + # lat_alice.DrawLatex(0.19, 0.85, "ALICE Preliminary") + # lat_label.SetTextSize(SIZE_TEXT_LAT_LABEL_FOR_COLL_SYSTEM) + # lat_label.DrawLatex(0.19, 0.79, "pp, #sqrt{#it{s}} = 13.6 TeV") + # lat_label.SetTextSize(SIZE_TEXT_LAT_LABEL) + # draw_info(lat_label, particle) + # lat_label.DrawLatex(0.19, 0.3, "Non-prompt enhanced") + # lat_label.DrawLatex(0.7, 0.85, "|#it{y}| < 0.5") + # fnonprompt_FDEnhanced = "#it{f}_{ non-prompt}^{ raw} = 0.690 #pm 0.008 (stat.)" # (4, 5) GeV + # fnonprompt_FDEnhanced = "#it{f}_{ non-prompt}^{ raw} = 0.70 #pm 0.02 (stat.)" # (0, 1) GeV + # lat_label.DrawLatex(0.19, 0.18, fnonprompt_FDEnhanced) + + # lat_label.DrawLatex(0.19, 0.64, str_mu) + # lat_label.DrawLatex(0.19, 0.58, str_sigma) + # lat_label.DrawLatex(0.19, 0.24, str_sig_FDEnhanced) + + # legend.Draw() + + # c.Update() + c.cd() + + save_canvas(c, particle, pt_mins, pt_maxs, i_pt, mult) + + if not batch: + input("Press enter to exit") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Arguments") + parser.add_argument("config", metavar="text", default="config.yml", help="config file name for ml") + parser.add_argument("--batch", help="suppress video output", action="store_true") + args = parser.parse_args() + + print("Loading analysis configuration: ...", end="\r") + with open(args.config, encoding="utf-8") as yml_cfg: + configuration = yaml.load(yml_cfg, yaml.FullLoader) + print("Loading analysis configuration: Done!") + + for ipt in range(len(configuration["pp13.6TeVFD"]["PtMin"])): + main(particle=LAMBDAC_TO_PKPI, i_pt=ipt, cfg=configuration, batch=args.batch) diff --git a/machine_learning_hep/scripts-dhadrons/run-mlhep/README.md b/machine_learning_hep/scripts-dhadrons/run-mlhep/README.md new file mode 100644 index 0000000000..41ad64955d --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/run-mlhep/README.md @@ -0,0 +1,30 @@ +# Automate MLHEP running + +## A simple shortcut with default running options + +File: `run-mlhep.sh`
+Usage: `./run-mlhep.sh my_database.yml my_run_config.yml logfile.log` + +It calls: +```bash +mlhep --log-file logfile.log \ + -a Run3analysis \ + --run-config my_run_config.yml \ + --database-analysis my_database.yml +``` + +## Run MLHEP in batch for various BDT cuts + +File: `run-mlhep-batch.sh`
+Usage: `./run-mlhep-batch.sh` + +The script requires an MLHEP database with %resdir%, %bkg...%, and %fd% placeholders. +You can see the examples of placeholders in the `data/data_run3/database_ml_parameters_LcToPKPi_multiclass_fdd.yml` database. + +The script loops over different cuts defined with `seq`. By default, they are non-prompt cuts. To run for prompt cuts, you need simply to put the %fd% placeholders at the place of prompt cuts in `probcutoptimal` variables in the database. Background cuts are set to the `bkg` value. + +For each non-prompt cut, the MLHEP workflow is launched with %resdir% output directory set to `${RESDIR}/${RESDIR_PATTERN}${suffix}`, where `RESDIR` is the main MLHEP output directory, `RESDIR_PATTERN` is the prefix of the output directory name, and suffix is `fd_${fd}`, where `${fd}` is the current cut value. + +The MLHEP workflow is defined by `submission/analyzer.yml` file. Usually, you would enable `histomass` and `efficiency` steps for data and MC, and `fit` and `efficiency` steps in the "Inclusive hadrons" section. + +Adjust the script variables and the `submission/analyzer.yml` file to your needs. diff --git a/machine_learning_hep/scripts-dhadrons/run-mlhep/run-fdd-batch.sh b/machine_learning_hep/scripts-dhadrons/run-mlhep/run-fdd-batch.sh new file mode 100755 index 0000000000..0f6eb05da9 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/run-mlhep/run-fdd-batch.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +# Run MLHEP in batch for various BDT cuts. +# You need an MLHEP database with %resdir%, %bkg...%, and %fd% placeholders. + +# Throw error and exit. +function ErrExit { + MsgErr "Error: $*"; exit 1; +} + +WORKDIR="${HOME}/MachineLearningHEP/machine_learning_hep/" + +# Base database. +DATABASE="database_ml_parameters_LcToPKPi_multiclass_fdd" +DATABASE_EXT="${DATABASE}.yml" +DATABASE_PATH="${WORKDIR}/data/data_run3/${DATABASE_EXT}" + +# Prefix of the output directories names. +#RESDIR_PATTERN="results-24022025-prompt" +RESDIR_PATTERN="results-24022025-newtrain-ptshape-prompt" + +# Bkg cut. You can rewrite this to have different cuts in different pT bins. +bkg=0.00 + +# Loop over all non-prompt cuts. +for fd in $(seq 0.000 0.005 0.000) ; do + echo "fd ${fd}" + + # Variable suffix to append to the output directory name. + suffix="fd_${fd}" + + RESDIR="${RESDIR_PATTERN}${suffix}" + + CUR_DB="${DATABASE}_edit_fd${fd}.yml" + cp "${DATABASE_PATH}" "${CUR_DB}" || ErrExit "Could not copy database" + + # Adjust the output directory + sed -i "s/%resdir%/${RESDIR}/g" "${CUR_DB}" || ErrExit "Could not edit database" + + # Set bkg BDT cuts + sed -i "s/%bkg01%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg12%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg23%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg34%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg45%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg56%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg67%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg78%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg810%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg1012%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg1216%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + sed -i "s/%bkg1624%/${bkg}/g" "${CUR_DB}" || ErrExit "Could not edit database" + + # Set non-prompt BDT cuts + sed -i "s/%fd%/${fd}/g" "${CUR_DB}" || ErrExit "Could not edit database" + + # `yes` is a program that says `y` to all interactive console prompts. + # In this way, we skip all MLHEP questions about deleting old results. + yes | mlhep --log-file "logfile_${suffix}.log" \ + -a Run3analysis \ + --run-config submission/analyzer.yml \ + --database-analysis "${CUR_DB}" \ + --delete \ + > "debug_${suffix}.txt" 2>&1 || ErrExit "Analysis failed" +done diff --git a/machine_learning_hep/scripts-dhadrons/run-mlhep/run-mlhep.sh b/machine_learning_hep/scripts-dhadrons/run-mlhep/run-mlhep.sh new file mode 100755 index 0000000000..3bdcef5c10 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/run-mlhep/run-mlhep.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# Shortcut to run MLHEP +# Usage: ./run-mlhep.sh database_Lc.yml submission/analysis.yml logfile.log + +if [ "$#" -ne 3 ]; then + echo "Wrong number of parameters" + exit 1 +fi + +DB=$1 +CONFIG=$2 +LOGFILE=$3 + +mlhep --log-file "${LOGFILE}" \ + -a Run3analysis \ + --run-config "${CONFIG}" \ + --database-analysis "${DB}" diff --git a/machine_learning_hep/scripts-dhadrons/systematics/README.md b/machine_learning_hep/scripts-dhadrons/systematics/README.md new file mode 100644 index 0000000000..cb2124b10c --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/systematics/README.md @@ -0,0 +1,16 @@ +# Obtain various comparison plots, esp. for systematics and final analysis results + +File: `compare_fractions.py`
+Usage: `python compare_fractions.py config.json` + +All JSON files in this directory provide various configuration examples for different use cases. + +The script can read both model histograms and analysis result histograms, as specified in the JSON configuration. It plots all histograms on a single plot with different colours, and calculates the systematic errors, if no systematics is provided. They are printed in the console. The systematic errors can be provided in the JSON config, and then they are drawn as boxes around the central points. + +The script plots also a separate plot with the ratios of other histograms to the central histogram. The central histogram is the one specified as "default" in the JSON. + +The histogram labels in legend are taken from the dictionary labels in the JSON, which can be specified with the TLatex syntax. + +It is also possible to specify the `y_axis` title and an additional description under the "ALICE Preliminary" header (`alice_text` variable in the config). The header itself and its position can be adjusted in the `get_alice_text` function in the Python script. + +Colors and markers can be adjusted at the beginning of the script. diff --git a/machine_learning_hep/scripts-dhadrons/systematics/compare_fractions.py b/machine_learning_hep/scripts-dhadrons/systematics/compare_fractions.py new file mode 100644 index 0000000000..cd86bb3a51 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/systematics/compare_fractions.py @@ -0,0 +1,547 @@ +# pylint: disable=missing-function-docstring, too-many-locals +""" +file: compare_fractions.py +brief: Compare non-prompt corrected fractions for the systematic uncertainties analysis. +usage: python3 compare_fractions.py config_compare_fractions.json +author: Maja Karwowska , Warsaw University of Technology +""" + +import argparse +import json +import math +import os +from array import array + +from ROOT import ( # pylint: disable=import-error,no-name-in-module + TH1F, + MakeNullPointer, + TCanvas, + TFile, + TGraphAsymmErrors, + TLegend, + TLine, + TObject, + TPaveText, + gROOT, + gStyle, + kAzure, + kBlack, + kBlue, + kCyan, + kDashed, + kGray, + kGreen, + kMagenta, + kOrange, + kRed, + kTeal, + kYellow, +) + +COLORS=[kBlack, kRed-3, kAzure-7, kGreen+2, kOrange-3, kBlue, kMagenta+2, + kTeal+3, kGreen, kAzure+8, + kYellow+3, kOrange-5, kMagenta+2, kBlue-6, kCyan+1, kGreen-6] +MODELS_COLORS=[kGray+1, kOrange-3, kCyan-2, kRed-9, kAzure-9, kBlue-6, kGreen-6, kOrange-5] +MODELS_STYLES=[3001, 3004, 3245, 3250, 3244, 3254, 3209, 3245, 3250, 3244, 3254, 3209] +LEGEND_PER_COLUMN=4 + +def get_alice_text(cfg): + if "alice_text" not in cfg: + return None + + alice_text = TPaveText(0.17, 0.62, 0.50, 0.86, "brNDC") + alice_text.SetTextFont(42) + alice_text.SetTextSize(0.04) + alice_text.SetBorderSize(0) + alice_text.SetFillStyle(0) + alice_text.SetTextAlign(11) + + alice_text_config = cfg["alice_text"] + alice_text.AddText("#scale[1.35]{ALICE Preliminary}") + alice_text.AddText("#scale[1.05]{pp,#kern[-0.05]{ #sqrt{#it{s}} = 13.6 TeV, |#it{y}| < 0.5}}") + alice_text.AddText(f"#scale[1.20]{{{alice_text_config}}}") + + alice_text.Draw("same") + + return alice_text + + +def get_legend(x_1, y_1, x_2, y_2, num_hists, header=None): + leg = TLegend(x_1, y_1, x_2, y_2) + if num_hists > LEGEND_PER_COLUMN: + leg.SetNColumns(2) + if header: + leg.SetHeader(header) + leg.SetTextAlign(12) + leg.SetTextSize(0.04) + leg.SetMargin(0.3) + leg.SetBorderSize(0) + leg.SetFillStyle(0) + return leg + +def prepare_canvas(cname): + canv = TCanvas(cname, "") + canv.SetCanvasSize(900, 600) + #canv.SetTickx() + #canv.SetTicky() + canv.SetLeftMargin(0.15) + canv.SetBottomMargin(0.15) + return canv + + +def save_canvas(canv, cfg, filename): + for ext in ("png", "pdf"): + canv.SaveAs(os.path.join(cfg["output"]["outdir"], f"{filename}.{ext}")) + + +def combine_syst_errors(syst_errors, value): + err = 0.0 + err_perc = 0.0 + for syst in syst_errors: + err += syst * syst + err_perc += (100 * syst) * (100 * syst) + err_perc = math.sqrt(err_perc) + print(f"Combined percentage error: {err_perc:0.0f}") + return math.sqrt(err) * value + + +def get_hist_limits(hist, graph_syst = None, miny = 0.0, maxy = 0.0): + for binn in range(0, hist.GetNbinsX()): + print(f"bin {binn + 1} [{hist.GetXaxis().GetBinLowEdge(binn + 1)}, "\ + f"{hist.GetXaxis().GetBinLowEdge(binn + 2)}) val {hist.GetBinContent(binn + 1)} "\ + f"err {hist.GetBinError(binn + 1)}") + maxval = hist.GetBinContent(binn + 1) + hist.GetBinError(binn + 1) + minval = hist.GetBinContent(binn + 1) - hist.GetBinError(binn + 1) + if graph_syst: + maxval = max(maxval, hist.GetBinContent(binn + 1) + graph_syst.GetErrorY(binn)) + minval = min(minval, hist.GetBinContent(binn + 1) - graph_syst.GetErrorY(binn)) + maxy = max(maxval, maxy) + miny = min(minval, miny) + return miny, maxy + + +def merge_fractions(inputdir, histname, filenames): + with TFile.Open(os.path.join(inputdir, filenames[0])) as fin: + reshist = fin.Get(histname).Clone() + reshist.SetDirectory(0) + + for ind, file in enumerate(filenames[1:]): + binn = ind + 1 + with TFile.Open(os.path.join(inputdir, file)) as fin: + hist = fin.Get(histname) + reshist.SetBinContent(binn + 1, hist.GetBinContent(binn + 1)) + reshist.SetBinError(binn + 1, hist.GetBinError(binn + 1)) + + return reshist + + +def set_hist_style(hist, color, y_axis, style=None): + for axis in (hist.GetXaxis(), hist.GetYaxis()): + axis.SetLabelFont(42) + axis.SetLabelSize(0.05) + axis.SetLabelOffset(0.02) + axis.SetTitleFont(42) + axis.SetTitleSize(0.06) + axis.SetTitleOffset(1.3) + hist.GetXaxis().SetTitle("#it{p}_{T}(GeV/#it{c})") + hist.GetYaxis().SetTitle(y_axis) + hist.GetYaxis().SetTitleSize(0.05) + hist.GetXaxis().SetTitleOffset(1.1) + + hist.SetLineColor(color) + hist.SetLineWidth(2) + if style: + hist.SetFillColor(color) + hist.SetFillStyle(style) + #hist.SetTitle("") + else: + hist.SetMarkerColor(color) + hist.SetMarkerSize(1) + hist.SetMarkerStyle(21) + + +def get_hist_for_label(label, color, cfg): + if len(cfg["hists"][label]["file"]) == 1: + with TFile.Open(os.path.join(cfg["inputdir"], cfg["hists"][label]["file"][0])) as fin: + hist = fin.Get(cfg["histoname"]) + hist.SetDirectory(0) + else: + print(f"Merging histograms for {label}") + hist = merge_fractions(cfg["inputdir"], cfg["histoname"], cfg["hists"][label]["file"]) + + set_hist_style(hist, color, cfg["y_axis"]) + return hist + + +def get_graph_systematics(hist, label, color, cfg): + if isinstance(cfg["hists"][label]["systematics"][0], str): + with TFile.Open(os.path.join(cfg["inputdir"], \ + cfg["hists"][label]["systematics"][0])) as fin: + graph_syst = fin.Get(cfg["hists"][label]["systematics"][1]) + else: + graph_syst = TGraphAsymmErrors() + graph_syst.SetName(f"graph_{label}_syst") + for binn in range(hist.GetNbinsX()): + syst_err = combine_syst_errors(cfg["hists"][label]["systematics"][binn], + hist.GetBinContent(binn + 1)) + print(f"Syst error {label} bin {binn + 1} {syst_err}") + x_point = hist.GetBinCenter(binn + 1) + y_point = hist.GetBinContent(binn + 1) + x_width = hist.GetBinWidth(binn + 1) / 4.0 # We want syst boxes to be of half-bin width + if y_point != 0: + graph_syst.SetPoint(binn, x_point, y_point) + graph_syst.SetPointError(binn, x_width, x_width, syst_err, syst_err) + set_hist_style(graph_syst, color, cfg["y_axis"]) + graph_syst.SetFillStyle(0) + return graph_syst + + +def get_hist_model(label, color, style, cfg): + with TFile.Open(os.path.join(cfg["inputdir"], cfg["models"][label]["file"])) as fin: + hist = fin.Get(cfg["models"][label]["histoname"]) + hist.SetDirectory(0) + + set_hist_style(hist, color, cfg["y_axis"], style) + #hist.SetTitle("") + + return hist + + +def plot_models(cfg, canv): + maxy = 0. + miny = 1000000. + if cfg.get("models", None): + hists_models = {} + leg_models = get_legend(*cfg["legend_models"], len(cfg["models"])) + leg_models.SetMargin(0.9) + for ind, (label, color, style) in \ + enumerate(zip(cfg["models"], MODELS_COLORS, MODELS_STYLES, strict=False)): + hist = get_hist_model(label, color, style, cfg) + print(f"hist model for {label}: {hist.GetName()}") + miny, maxy = get_hist_limits(hist, None, miny, maxy) + + canv.cd() + draw_opt = "sameE3" if ind != 0 else "E3" + hist.Draw(draw_opt) + leg_models.AddEntry(hist, label, "f") + + hists_models[label] = hist + else: + leg_models = None + hists_models = None + + return canv, hists_models, leg_models, miny, maxy + + +def set_figs_limits(miny, maxy, hists, graphs, hists_models): + margin = 0.1 + #k = 1.0 - 2 * margin + #rangey = maxy - miny + #miny = miny - margin / k * rangey + #maxy = maxy + margin / k * rangey + print(f"Hist maxy: {maxy} miny: {miny}") + #miny = min(miny - margin * miny, 0) + miny = miny - margin * miny + if miny <= 0: + miny = 0.006 + print(f"Recalculated hist maxy: {maxy + margin * maxy} miny: {miny}") + if hists_models: + for _, hist in hists_models.items(): + hist.GetYaxis().SetRangeUser(miny, maxy + margin * maxy) + for _, hist in hists.items(): + hist.GetYaxis().SetRangeUser(miny, maxy + margin * maxy) + if graphs: + for graph_syst in graphs: + graph_syst.GetYaxis().SetRangeUser(miny, maxy + margin * maxy) + return hists, graphs, hists_models + +def plot_compare(cfg): + canv = prepare_canvas(f'c_{cfg["histoname"]}') + if cfg.get("log_scale", False): + canv.SetLogy() + + canv, hists_models, leg_models, miny, maxy = plot_models(cfg, canv) + leg = get_legend(*cfg["legend"], len(cfg["hists"])) + + hists = {} + central_graph = None + graphs_syst = [] + for ind, (label, color) in enumerate(zip(cfg["hists"], COLORS, strict=False)): + hist = get_hist_for_label(label, color, cfg) + print(label) + miny, maxy = get_hist_limits(hist, None, miny, maxy) + + canv.cd() + draw_opt = "sameE" if ind != 0 or hists_models else "E" + hist.Draw(draw_opt) + leg.AddEntry(hist, label, "p") + + hists[label] = hist + + if cfg["hists"][label].get("systematics", None): + print("Plotting systematic") + graph_syst = get_graph_systematics(hist, label, color, cfg) + miny, maxy = get_hist_limits(hist, graph_syst, miny, maxy) + graph_syst.Draw("sameE2") + graphs_syst.append(graph_syst) + if label == cfg["default"]: + central_graph = graph_syst + + hists, graphs_syst, hists_models = set_figs_limits(miny, maxy, hists, graphs_syst, hists_models) + + leg.Draw() + if leg_models: + leg_models.Draw() + + alice_text = get_alice_text(cfg) + + return canv, hists, graphs_syst, hists_models, leg, leg_models, alice_text, central_graph + + +def get_average(hist, graph_syst): + width_combined = hist.GetBinWidth(hist.GetNbinsX() -1) + hist.GetBinWidth(hist.GetNbinsX()) + val = ((hist.GetBinContent(hist.GetNbinsX() - 1) * hist.GetBinWidth(hist.GetNbinsX() - 1) +\ + hist.GetBinContent(hist.GetNbinsX()) * hist.GetBinWidth(hist.GetNbinsX())) /\ + width_combined) + err = math.sqrt((hist.GetBinError(hist.GetNbinsX() - 1) *\ + hist.GetBinWidth(hist.GetNbinsX() - 1) /\ + width_combined) **2 +\ + (hist.GetBinError(hist.GetNbinsX()) *\ + hist.GetBinWidth(hist.GetNbinsX()) /\ + width_combined) ** 2) + syst_err = math.sqrt((graph_syst.GetErrorYlow(hist.GetNbinsX() - 2) *\ + hist.GetBinWidth(hist.GetNbinsX() - 1) /\ + width_combined) **2 +\ + (graph_syst.GetErrorYlow(hist.GetNbinsX() - 1) *\ + hist.GetBinWidth(hist.GetNbinsX()) /\ + width_combined) ** 2) + return val, err, syst_err + + +def hist_for_ratio(hist, graph, central_hist): + hist2 = TH1F(hist.GetName(), "", central_hist.GetNbinsX(), + array('d', central_hist.GetXaxis().GetXbins())) + graph2 = TGraphAsymmErrors() + for binn in range(central_hist.GetNbinsX() - 1): + hist2.SetBinContent(binn + 1, hist.GetBinContent(binn + 1)) + hist2.SetBinError(binn + 1, hist.GetBinError(binn + 1)) + graph2.SetPoint(binn, graph.GetPointX(binn), graph.GetPointY(binn)) + graph2.SetPointError(binn, graph.GetErrorX(binn), graph.GetErrorY(binn)) + val, err, syst_err = get_average(hist, graph) + hist2.SetBinContent(hist2.GetNbinsX(), val) + hist2.SetBinError(hist2.GetNbinsX(), err) + graph2.SetPoint(hist2.GetNbinsX() - 1, + hist2.GetBinCenter(hist2.GetNbinsX()), val) + graph2.SetPointError(hist2.GetNbinsX() - 1, + hist2.GetBinWidth(hist2.GetNbinsX()) / 4.0, + hist2.GetBinWidth(hist2.GetNbinsX()) / 4.0, + syst_err, syst_err) + return hist2, graph2 + + +def divide_syst_error(val, val1, val2, err1, err2): + return val * math.sqrt((err1 / val1) **2 + (err2 / val2) **2) + + +def get_figs_ratio(central_graph, central_hist, hist_ratio, graph_ratio, label): + histr = hist_ratio.Clone() + histr.SetName(f"h_ratio_{label}") + histr.Divide(hist_ratio, central_hist, 1., 1., "B") + histr.GetXaxis().SetTitleOffset(1.10) + for binn in range(1, histr.GetNbinsX() + 1): + print(f"Ratio {binn}: {histr.GetBinContent(binn)}") + + graphr = None + if central_graph: + graphr = central_graph.Clone() + graphr.SetName(f"g_ratio_{label}") + for binn in range(1, central_hist.GetNbinsX() + 1): + x_err = histr.GetBinWidth(binn) / 4.0 + y_low = divide_syst_error(histr.GetBinContent(binn), + central_hist.GetBinContent(binn), + hist_ratio.GetBinContent(binn), + central_graph.GetErrorYlow(binn - 1), + graph_ratio.GetErrorYlow(binn - 1)) + y_high = divide_syst_error(histr.GetBinContent(binn), + central_hist.GetBinContent(binn), + hist_ratio.GetBinContent(binn), + central_graph.GetErrorYhigh(binn - 1), + graph_ratio.GetErrorYhigh(binn - 1)) + graphr.SetPoint(binn - 1, histr.GetBinCenter(binn), histr.GetBinContent(binn)) + graphr.SetPointError(binn - 1, x_err, x_err, y_low, y_high) + print(f"Central graph bin {binn-1} low {central_graph.GetErrorYlow(binn-1)} "\ + f"{label} low: {graph_ratio.GetErrorYlow(binn-1)} "\ + f"up {central_graph.GetErrorYhigh(binn-1)} "\ + f"{label} up: {graph_ratio.GetErrorYhigh(binn-1)}") + return histr, graphr + + +def plot_ratio_histos(canvr, legr, hists, graphs, central_hist, + central_label, central_graph, styles, y_axis): + maxx = 0.0 + miny = 0.05 + maxy = 300 + histsr = [] + graphsr = [] + + for ind, (label, color, style) in enumerate(zip(hists, COLORS, styles, strict=False)): + print(f"central hist bins: {central_hist.GetNbinsX()} "\ + f"{label} bins: {hists[label].GetNbinsX()}") + if label != central_label and hists[label].GetNbinsX() == central_hist.GetNbinsX(): + graph = graphs[ind] if graphs else None + #hist_ratio, graph_ratio = hist_for_ratio(hists[label], graph, central_hist) + hist_ratio = hists[label] + graph_ratio = graph + + histr, graphr = get_figs_ratio(central_graph, central_hist, + hist_ratio, graph_ratio, label) + #set_hist_style(histr, color, "Ratio to INEL > 0") + histr.GetYaxis().SetTitle(y_axis) + + if style: + set_hist_style(histr, color, y_axis, style) + draw_opt = "sameE3" if ind != 0 else "E3" + else: + draw_opt = "sameE" + histr.SetMaximum(maxy) + histr.SetMinimum(miny) + canvr.cd() + histr.Draw(draw_opt) + if style: + histr2 = histr.Clone() + histr2.SetFillStyle(0) + histr2.SetFillColor(0) + histr2.SetMarkerStyle(0) + histr2.Draw("hist same L") + histr2.SetFillStyle(style) + histsr.append(histr2) + histsr.append(histr) + if graphr: + set_hist_style(graphr, color, y_axis) + graphr.Draw("sameE2") + graphsr.append(graphr) + if style and ind == 1: + entry = legr.AddEntry(MakeNullPointer(TObject), "PYTHIA 8.243 Monash", "f") + entry.SetFillColor(kBlack) + entry.SetFillStyle(style) + elif not style: + legr.AddEntry(histr, label, "p") + maxx = max(maxx, histr.GetBinLowEdge(histr.GetNbinsX() + 1)) + return canvr, legr, histsr, graphsr, maxx + + +def plot_ratio(cfg, hists, graphs_syst, central_graph, hists_models): + canvr = prepare_canvas(f'c_ratio_{cfg["histoname"]}') + canvr.SetLogy() + + if hists_models: + leg_models = get_legend(*cfg["legend_ratio_models"], len(cfg["models"])) + leg_models.SetMargin(0.5) + central_hist = hists_models[cfg["model_default"]] + canvr, leg_models, histsr_models, _, maxx =\ + plot_ratio_histos(canvr, leg_models, hists_models, None, + central_hist, cfg["model_default"], None, + [3001] * len(cfg["models"]), cfg["y_axis"]) + leg_models.Draw() + else: + histsr_models = [] + leg_models = None + + legr = get_legend(*cfg["legend_ratio"], len(cfg["hists"]), + ":") + central_hist = hists[cfg["default"]] + canvr, legr, histsr, graphsr, maxx =\ + plot_ratio_histos(canvr, legr, hists, graphs_syst, + central_hist, cfg["default"], central_graph, + [None] * len(cfg["hists"]), cfg["y_axis"]) + + legr.Draw() + + line = TLine(histsr[0].GetBinLowEdge(1), 1.0, maxx, 1.0) + line.SetLineColor(COLORS[len(histsr)]) + line.SetLineWidth(3) + line.SetLineStyle(kDashed) + line.Draw() + + alice_text = get_alice_text(cfg) + + return canvr, histsr, graphsr, histsr_models, legr, line, alice_text, leg_models + + +def calc_systematics(cfg, hists): + syst_errors = [] + central_hist = hists[cfg["default"]] + + for binn in range(central_hist.GetNbinsX()): + syst_err_bin = 0.00 + count = 0 + for label in hists: + if label != cfg["default"] and hists[label].GetNbinsX() == central_hist.GetNbinsX(): + syst_err = float("inf") if central_hist.GetBinContent(binn + 1) == 0 else \ + (hists[label].GetBinContent(binn + 1) - \ + central_hist.GetBinContent(binn + 1)) / \ + central_hist.GetBinContent(binn + 1) + syst_err_bin += syst_err * syst_err + count += 1 + if count == 0: + return + syst_err_bin = 100 * (math.sqrt(syst_err_bin / count)) + syst_errors.append(syst_err_bin) + + str_err = "Systematic errors:" + for err in syst_errors: + str_err = f"{str_err} {err:0.2f}" + print(str_err) + + +def main(): + """ + Main function. + """ + gROOT.SetBatch(True) + + gStyle.SetOptStat(0) + gStyle.SetOptTitle(0) + gStyle.SetFrameLineWidth(2) + + parser = argparse.ArgumentParser(description="Arguments to pass") + parser.add_argument("config", help="JSON config file") + args = parser.parse_args() + + with open(args.config, encoding="utf8") as fil: + cfg = json.load(fil) + + with TFile(os.path.join(cfg["output"]["outdir"], + f'{cfg["output"]["file"]}.root'), "recreate") as output: + + (canv, hists, graphs_syst, hists_models, + leg, leg_models, alice_text, central_graph) = plot_compare(cfg) # pylint: disable=unused-variable + output.cd() + canv.Write() + save_canvas(canv, cfg, cfg["output"]["file"]) + for _, hist in hists.items(): + hist.Write() + if graphs_syst: + for graph in graphs_syst: + graph.Write() + if hists_models: + for _, hist in hists_models.items(): + hist.Write() + + canvr, histr, graphr, histr_models, legr, line, alice_text, leg_models =\ + plot_ratio(cfg, hists, graphs_syst, central_graph, hists_models) # pylint: disable=unused-variable + output.cd() + canvr.Write() + save_canvas(canvr, cfg, f'{cfg["output"]["file"]}_ratio') + for hist in histr: + hist.Write() + for graph in graphr: + graph.Write() + for hist in histr_models: + hist.Write() + + calc_systematics(cfg, hists) + + +if __name__ == "__main__": + main() diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_bdt_cuts.json b/machine_learning_hep/scripts-dhadrons/systematics/config_bdt_cuts.json new file mode 100644 index 0000000000..bbac88dbdd --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_bdt_cuts.json @@ -0,0 +1,55 @@ +{ + "inputdir": "/data8/majak/systematics/032025/bdt", + "histoname": "hCorrFracNonPrompt", + "default": "Default", + "hists": { + "Narrow left": { + "file": [ + "CutVarLc_pp13TeV_LHC23_pass4_narrow_left.root" + ] + }, + "Narrow right": { + "file": [ + "CutVarLc_pp13TeV_LHC23_pass4_narrow_right.root" + ] + }, + "Narrow both": { + "file": [ + "CutVarLc_pp13TeV_LHC23_pass4_narrow_both.root" + ] + }, + "Wide left": { + "file": [ + "CutVarLc_pp13TeV_LHC23_pass4_wide.root" + ] + }, + "Wide right": { + "file": [ + "CutVarLc_pp13TeV_LHC23_pass4_wide_right.root" + ] + }, + "Wide both": { + "file": [ + "CutVarLc_pp13TeV_LHC23_pass4_wide_both.root" + ] + }, + "Default": { + "file": [ + "CutVarLc_pp13TeV_LHC23_pass4_default.root" + ] + } + }, + "bin_min": [1,2,3,4,5,6,7,8,10,12,16], + "bin_max": [2,3,4,5,6,7,8,10,12,16,24], + "y_axis": "Non-prompt #Lambda_{c}^{#plus} fraction", + "legend": [0.50, 0.18, 0.90, 0.38], + "legend_p": [0.20, 0.18, 0.60, 0.38], + "legend_np": [0.50, 0.18, 0.90, 0.38], + "legend_ratio": [0.50, 0.70, 0.90, 0.90], + "legend_ratio_p": [0.20, 0.20, 0.60, 0.40], + "legend_ratio_np": [0.50, 0.70, 0.90, 0.90], + "output": { + "outdir": "/data8/majak/systematics/032025/bdt", + "file": "NP_Frac_pp13TeV_bdt_1-24" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_compare_fractions.json b/machine_learning_hep/scripts-dhadrons/systematics/config_compare_fractions.json new file mode 100644 index 0000000000..42bd8c562c --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_compare_fractions.json @@ -0,0 +1,87 @@ +{ + "inputdir": "/data8/majak/systematics/230824/fitting", + "histoname": "hCorrFracNonPrompt", + "default": "Default", + "hists": { + "Poly3 bkg": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_poly3_1-2_12.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_12.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_12.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_4-5_12.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_5-6_12.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_6-8_12.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_8-12_12.root" + ] + }, + "Double gauss signal": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_double_gauss_1-2_12.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_2-3_12.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_12.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_4-5_12.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_5-6_12.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_6-8_12.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_8-12_12.root" + ] + }, + "Rebin +1": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_rebin+1_1-2_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_4-5_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_5-6_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_6-8_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_8-12_12.root" + ] + }, + "Range wider by 0.04": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_wide4_1-2_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_5-6_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_6-8_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_8-12_12.root" + ] + }, + "Rebin -1": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_rebin+1_1-2_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_4-5_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_5-6_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_6-8_12.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_8-12_12.root" + ] + }, + "Range wider by 0.02": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_wide2_1-2_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_4-5_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_5-6_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_6-8_12.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_8-12_12.root" + ] + }, + "Default": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_default_12.root" + ] + } + }, + "bin_min": [1,2,3,4,5,6,8,12], + "bin_max": [2,3,4,5,6,8,12,24], + "y_axis": "Non-prompt #Lambda_{c}^{#plus} fraction", + "legend": [0.50, 0.18, 0.90, 0.38], + "legend_ratio": [0.50, 0.70, 0.90, 0.90], + "output": { + "outdir": "/data8/majak/systematics/230824/fitting", + "file": "NP_Frac_pp13TeV_fitting" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_crosssec_run2_run3.json b/machine_learning_hep/scripts-dhadrons/systematics/config_crosssec_run2_run3.json new file mode 100644 index 0000000000..7bd4b99da3 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_crosssec_run2_run3.json @@ -0,0 +1,45 @@ +{ + "inputdir": "/data8/majak/crosssec/032025", + "histoname": "hptspectrum", + "default": "#sqrt{#it{s}} = 13 TeV", + "hists": { + "#sqrt{#it{s}} = 13 TeV": { + "file": [ + "LcpKpi_generated_Run2.root" + ], + "systematics": [ + "LcpKpi_generated_Run2.root", + "graph_syst" + ] + }, + "#sqrt{#it{s}} = 13.6 TeV": { + "file": [ + "finalcrossLcpKpiRun3analysis_0-1_scaled_6.root" + ], + "systematics": [ + [0.0], + [0.19], + [0.16], + [0.16], + [0.16], + [0.16], + [0.16], + [0.16], + [0.16], + [0.16], + [0.16], + [0.24] + ] + } + }, + "y_axis": "d^{2}#it{#sigma}/(d#it{y}d#it{p}_{T}) (#mub GeV^{-1}#it{c})", + "alice_text": "pp, |#kern[0.06]{#it{y}| < 0.5}", + "_alice_text": "#splitline{#Lambda_{c}^{#plus} (and charge conj.)}{|#it{y}| < 0.5}", + "legend": [0.52, 0.50, 0.89, 0.77], + "legend_ratio": [0.40, 0.60, 0.90, 0.90], + "log_scale": true, + "output": { + "outdir": "/data8/majak/crosssec/032025", + "file": "crosssec_Lc_run2_run3_review" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_efficiency.json b/machine_learning_hep/scripts-dhadrons/systematics/config_efficiency.json new file mode 100644 index 0000000000..ccb6495275 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_efficiency.json @@ -0,0 +1,28 @@ +{ + "inputdir": "/data8/majak/crosssec/032025", + "histoname": "eff", + "default": "Prompt", + "hists": { + "Prompt": { + "file": [ + "eff.root" + ] + }, + "Non-prompt": { + "file": [ + "eff_fd.root" + ] + } + }, + "bin_min": [1,2,3,4,5,6,7,8,10,12,16], + "bin_max": [2,3,4,5,6,7,8,10,12,16,24], + "y_axis": "Acceptance x efficiency", + "alice_text": "#Lambda_{c}^{#plus} #kern[-0.05]{#rightarrow pK^{#minus}#pi^{#plus} (and charge conj.})", + "legend": [0.45, 0.20, 0.65, 0.30], + "legend_ratio": [0.40, 0.60, 0.90, 0.90], + "log_scale": true, + "output": { + "outdir": "/data8/majak/crosssec/032025", + "file": "efficiencies" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_fitting.json b/machine_learning_hep/scripts-dhadrons/systematics/config_fitting.json new file mode 100644 index 0000000000..2cbef9d998 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_fitting.json @@ -0,0 +1,114 @@ +{ + "inputdir": "/data8/majak/systematics/230824/fitting", + "histoname": "hCorrFracNonPrompt", + "default": "Default", + "hists": { + "Poly3 bkg": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_poly3_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_1-2_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_4-5_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_5-6_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_6-8_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_8-12_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_12-16_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_poly3_16-24_0-24.root" + ] + }, + "Double gauss signal": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_double_gauss_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_1-2_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_2-3_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_4-5_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_5-6_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_6-8_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_8-12_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_12-16_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_double_gauss_16-24_0-24.root" + ] + }, + "Rebin +1": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_rebin+1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_1-2_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_4-5_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_5-6_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_6-8_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_8-12_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_12-16_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin+1_16-24_0-24.root" + ] + }, + "Range changed by 0.04": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_wide4_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_1-2_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_5-6_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_6-8_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide4_8-12_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_narrow4_12-16_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_narrow4_16-24_0-24.root" + ] + }, + "Rebin -1": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_rebin-1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin-1_1-2_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin-1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin-1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin-1_4-5_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin-1_5-6_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin-1_6-8_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin-1_8-12_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin-1_12-16_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_rebin-1_16-24_0-24.root" + ] + }, + "Range changed by 0.02": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_wide2_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_1-2_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_4-5_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_5-6_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_6-8_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_wide2_8-12_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_narrow2_12-16_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_narrow2_16-24_0-24.root" + ] + }, + "Default": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_1-24_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_1-24_0-24.root" + ] + } + }, + "bin_min": [1,2,3,4,5,6,8,12,16], + "bin_max": [2,3,4,5,6,8,12,16,24], + "y_axis": "Non-prompt #Lambda_{c}^{#plus} fraction", + "legend": [0.50, 0.18, 0.90, 0.38], + "legend_ratio": [0.50, 0.70, 0.90, 0.90], + "output": { + "outdir": "/data8/majak/systematics/230824/fitting", + "file": "NP_Frac_pp13TeV_fitting_1-24" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_mult_ratios.json b/machine_learning_hep/scripts-dhadrons/systematics/config_mult_ratios.json new file mode 100644 index 0000000000..eaaec0b238 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_mult_ratios.json @@ -0,0 +1,168 @@ +{ + "inputdir": "/data8/majak/mult-ratios", + "histoname": "hptspectrum", + "default": "MB", + "model_default": "Monash MB", + "hists": { + "MB": { + "file": [ + "spectrum_MB_scaled.root" + ], + "systematics": [ + [0.07], + [0.06], + [0.06], + [0.05], + [0.05], + [0.05], + [0.05], + [0.06], + [0.12], + [0.20] + ] + }, + "4.34": { + "file": [ + "spectrum_2_scaled.root" + ], + "systematics": [ + [0.10], + [0.05], + [0.05], + [0.05], + [0.06], + [0.07], + [0.10], + [0.10], + [0.16], + [0.20] + ] + }, + "5.58": { + "file": [ + "spectrum_3_scaled.root" + ], + "systematics": [ + [0.09], + [0.05], + [0.05], + [0.05], + [0.05], + [0.06], + [0.07], + [0.07], + [0.11], + [0.20] + ] + }, + "7.93": { + "file": [ + "spectrum_4_scaled.root" + ], + "systematics": [ + [0.10], + [0.05], + [0.05], + [0.05], + [0.05], + [0.05], + [0.06], + [0.10], + [0.10], + [0.20] + ] + }, + "11.42": { + "file": [ + "spectrum_5_scaled.root" + ], + "systematics": [ + [0.10], + [0.06], + [0.06], + [0.06], + [0.06], + [0.06], + [0.06], + [0.09], + [0.10], + [0.12] + ] + }, + "15.94": { + "file": [ + "spectrum_6_scaled.root" + ], + "systematics": [ + [0.15], + [0.10], + [0.06], + [0.06], + [0.06], + [0.06], + [0.06], + [0.09], + [0.10], + [0.12] + ] + }, + "20.07": { + "file": [ + "spectrum_7_scaled.root" + ], + "systematics": [ + [0.22], + [0.12], + [0.11], + [0.06], + [0.06], + [0.10], + [0.10], + [0.10], + [0.14], + [0.20] + ] + } + }, + "models": { + "Monash MB": { + "file": "new_MBratio_Monash.root", + "histoname": "hpythia_prompt" + }, + "Monash 7085": { + "file": "new_MBratio_Monash.root", + "histoname": "hpythia_prompt_7085" + }, + "Monash 5070": { + "file": "new_MBratio_Monash.root", + "histoname": "hpythia_prompt_5070" + }, + "Monash 3050": { + "file": "new_MBratio_Monash.root", + "histoname": "hpythia_prompt_3050" + }, + "Monash 1030": { + "file": "new_MBratio_Monash.root", + "histoname": "hpythia_prompt_1030" + }, + "Monash 110": { + "file": "new_MBratio_Monash.root", + "histoname": "hpythia_prompt_110" + }, + "Monash 0-1": { + "file": "new_MBratio_Monash.root", + "histoname": "hpythia_prompt_01" + } + }, + "y_axis": "d^{2}#it{N}/(d#it{y}d#it{p}_{T})#left|_{mult.} / d^{2}#it{N}/(d#it{y}d#it{p}_{T})#right|_{INEL > 0}", + "alice_text": "Prompt#kern[-0.3]{ #Lambda_{c}^{#plus}}", + "legend": [0.50, 0.65, 0.90, 0.93], + "legend_models": [0.50, 0.65, 0.90, 0.93], + "legend_ratio": [0.60, 0.63, 0.90, 0.88], + "legend_ratio_models": [0.18, 0.18, 0.40, 0.23], + "log_scale": true, + "output": { + "outdir": "/data8/majak/mult-ratios", + "file": "mult_ratios_review" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_pt_weight.json b/machine_learning_hep/scripts-dhadrons/systematics/config_pt_weight.json new file mode 100644 index 0000000000..967b72a65b --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_pt_weight.json @@ -0,0 +1,26 @@ +{ + "inputdir": "/data8/majak/systematics/032025/pt-weight", + "histoname": "hptspectrum", + "default": "Default", + "hists": { + "Weighted #it{p}_{T}": { + "file": [ + "finalcrossLcpKpiRun3analysis_ptshape.root" + ] + }, + "Default": { + "file": [ + "finalcrossLcpKpiRun3analysis.root" + ] + } + }, + "bin_min": [1,2,3,4,5,6,7,8,10,12,16], + "bin_max": [2,3,4,5,6,7,8,10,12,16,24], + "y_axis": "#Lambda_{c} cross section (pb)", + "legend": [0.50, 0.18, 0.90, 0.38], + "legend_ratio": [0.50, 0.70, 0.90, 0.90], + "output": { + "outdir": "/data8/majak/systematics/032025/pt-weight", + "file": "crosssec_pt_weight_1-24" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_run2.json b/machine_learning_hep/scripts-dhadrons/systematics/config_run2.json new file mode 100644 index 0000000000..43ac089d78 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_run2.json @@ -0,0 +1,27 @@ +{ + "inputdir": "/data8/majak/systematics/220724/run2-run3d0", + "histoname": "hCorrFracNonPrompt", + "default": "Run 2 #Lambda_{c}^{#plus} #rightarrow pK_{S}^{0}", + "hists": { + "Run 2 #Lambda_{c}^{#plus} #rightarrow pK_{S}^{0}": { + "file": [ + "CutVarLc2pK0s_pp13TeV.root" + ], + "systematics": [ + [0.04, 0.09, 0.09, 0.04, 0.03, 0.016, 0.05], + [0.04, 0.09, 0.05, 0.04, 0.01, 0.016, 0.05], + [0.06, 0.09, 0.08, 0.04, 0.01, 0.016, 0.05], + [0.06, 0.09, 0.08, 0.04, 0.01, 0.016, 0.05] + ] + } + }, + "bin_min": [1,2,3,4,5,6,8,12], + "bin_max": [2,3,4,5,6,8,12,24], + "y_axis": "Non-prompt #Lambda_{c} fraction", + "legend": [0.50, 0.18, 0.90, 0.38], + "legend_ratio": [0.50, 0.70, 0.90, 0.90], + "output": { + "outdir": "/data8/majak/systematics/220724/run2-run3d0", + "file": "NP_Frac_pp13TeV_run2_only" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_run3.json b/machine_learning_hep/scripts-dhadrons/systematics/config_run3.json new file mode 100644 index 0000000000..b695a82906 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_run3.json @@ -0,0 +1,52 @@ +{ + "inputdir": "/data8/majak/systematics/230824/run3", + "histoname": "hCorrFracNonPrompt", + "default": "#splitline{#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp,}{#sqrt{#it{s}} = 13.6 TeV}", + "hists": { + "#splitline{#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp,}{#sqrt{#it{s}} = 13.6 TeV}": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_1-24_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_1-24_0-25.root" + ], + "systematics": [ + [0.0], + [0.17], + [0.16], + [0.16], + [0.16], + [0.16], + [0.16], + [0.18], + [0.20], + [0.24], + [0.0] + ] + } + }, + "models": { + "Monash": { + "file": "NonPromptLcFraction_Monash_1B.root", + "histoname": "h_nonprompt_fraction" + } + }, + "bin_min": [1,2,3,4,5,6,8,12,16], + "bin_max": [2,3,4,5,6,8,12,16,24], + "y_axis": "#it{f}_{non-prompt}", + "alice_text": "#Lambda_{c}^{#plus} baryon, |#it{y}| < 0.5", + "legend": [0.50, 0.65, 0.90, 0.93], + "legend_models": [0.50, 0.65, 0.90, 0.93], + "legend_ratio": [0.60, 0.63, 0.90, 0.88], + "legend_ratio_models": [0.18, 0.18, 0.40, 0.23], + "output": { + "outdir": "/data8/majak/systematics/230824/run3", + "file": "NP_Frac_pp13TeV_run3_1-24" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_run3_run2.json b/machine_learning_hep/scripts-dhadrons/systematics/config_run3_run2.json new file mode 100644 index 0000000000..d13a4c31b8 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_run3_run2.json @@ -0,0 +1,58 @@ +{ + "inputdir": "/data8/majak/systematics/230824/run3-run2", + "histoname": "hCorrFracNonPrompt", + "default": "#Lambda_{c}^{#plus}#rightarrowpK^{#minus}#pi^{#plus}, pp, #sqrt{#it{s}} = 13.6 TeV", + "hists": { + "#Lambda_{c}^{#plus}#rightarrowpK^{#minus}#pi^{#plus}, pK_{S}^{0}, pp, #sqrt{#it{s}} = 13 TeV": { + "file": [ + "CutVarLcMerged_pp13TeV_0-25.root" + ], + "systematics": [ + [0.0], + [0.08], + [0.08], + [0.08], + [0.08], + [0.13], + [0.0] + ] + }, + "#Lambda_{c}^{#plus}#rightarrowpK^{#minus}#pi^{#plus}, pp, #sqrt{#it{s}} = 13.6 TeV": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_1-24_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_1-24_0-25.root" + ], + "systematics": [ + [0.0], + [0.17], + [0.16], + [0.16], + [0.16], + [0.16], + [0.16], + [0.18], + [0.20], + [0.24], + [0.0] + ] + } + }, + "bin_min": [1,2,3,4,5,6,8,12,16], + "bin_max": [2,3,4,5,6,8,12,16,24], + "y_axis": "#it{f}_{non-prompt}", + "alice_text": "#Lambda_{c}^{#plus} and charge conj., |#it{y}| < 0.5", + "legend": [0.50, 0.65, 0.90, 0.93], + "legend_ratio": [0.60, 0.63, 0.90, 0.88], + "output": { + "outdir": "/data8/majak/systematics/230824/run3-run2", + "file": "NP_Frac_pp13TeV_run3_run2_1-24" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_run3d0.json b/machine_learning_hep/scripts-dhadrons/systematics/config_run3d0.json new file mode 100644 index 0000000000..5f96cc77e4 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_run3d0.json @@ -0,0 +1,66 @@ +{ + "inputdir": "/data8/majak/systematics/230824/run3d0", + "histoname": "hCorrFracNonPrompt", + "default": "#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp, #sqrt{#it{s}} = 13.6 TeV", + "hists": { + "D^{0} #rightarrow K^{#minus}#pi^{#plus}, pp, #sqrt{#it{s}} = 13.6 TeV": { + "file": [ + "CutVarD0_pp136TeV_final_0-25.root" + ], + "systematics": [ + [0.10, 0.04, 0.08, 0.08], + [0.08, 0.04, 0.03, 0.09], + [0.03, 0.04, 0.03, 0.08], + [0.04, 0.04, 0.03, 0.07], + [0.05, 0.03, 0.04, 0.06], + [0.07, 0.02, 0.06, 0.04], + [0.04, 0.01, 0.08, 0.05], + [0.05, 0.01, 0.09, 0.04], + [0.08, 0.01, 0.15, 0.08], + [0.08, 0.01, 0.15, 0.08], + [0.08, 0.01, 0.15, 0.08], + [0.08, 0.01, 0.15, 0.08], + [0.03, 0.01, 0.15, 0.08], + [0.03, 0.01, 0.15, 0.08], + [0.0] + ] + }, + "#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp, #sqrt{#it{s}} = 13.6 TeV": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_1-24_0-25.root", + "CutVarLc_pp13TeV_LHC24d3_default_1-24_0-25.root" + ], + "systematics": [ + [0.0], + [0.17], + [0.16], + [0.16], + [0.16], + [0.16], + [0.16], + [0.18], + [0.20], + [0.24], + [0.0] + ] + } + }, + "bin_min": [1,2,3,4,5,6,8,12,16], + "bin_max": [2,3,4,5,6,8,12,16,24], + "y_axis": "#it{f}_{non-prompt}", + "alice_text": "|#it{y}| < 0.5", + "legend": [0.50, 0.18, 0.90, 0.38], + "legend_ratio": [0.50, 0.70, 0.90, 0.90], + "output": { + "outdir": "/data8/majak/systematics/230824/run3d0", + "file": "NP_Frac_pp13TeV_run3d0_1-24" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_spans_cutvar.json b/machine_learning_hep/scripts-dhadrons/systematics/config_spans_cutvar.json new file mode 100644 index 0000000000..efe1c1fc92 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_spans_cutvar.json @@ -0,0 +1,26 @@ +{ + "inputdir": "/data8/majak/crosssec/202502/fractions", + "histoname": "hCorrFracNonPrompt", + "default": "#splitline{#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp,}{#sqrt{#it{s}} = 13.6 TeV}", + "hists": { + "#splitline{#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp,}{#sqrt{#it{s}} = 13.6 TeV}": { + "file": [ + "CutVarLc_pp13TeV_LHC23_pass4_default.root" + ] + }, + "#splitline{#Lambda_{c}^{#plus} #rightarrow pK^{#minus}#pi^{#plus}, pp,}{#sqrt{#it{s}} = 13.6 TeV, wide span}": { + "file": [ + "CutVarLc_pp13TeV_LHC23_pass4_wide.root" + ] + } + }, + "bin_min": [1,2,3,4,5,6,7,8,10,12,16], + "bin_max": [2,3,4,5,6,7,8,10,12,16,24], + "y_axis": "#it{f}_{non-prompt}", + "legend": [0.50, 0.15, 0.90, 0.45], + "legend_ratio": [0.40, 0.10, 0.90, 0.35], + "output": { + "outdir": "/data8/majak/crosssec/202502/fractions", + "file": "NP_Frac_pp13.6TeV_run3_spans" + } +} diff --git a/machine_learning_hep/scripts-dhadrons/systematics/config_track_tuner.json b/machine_learning_hep/scripts-dhadrons/systematics/config_track_tuner.json new file mode 100644 index 0000000000..c1f63fa343 --- /dev/null +++ b/machine_learning_hep/scripts-dhadrons/systematics/config_track_tuner.json @@ -0,0 +1,58 @@ +{ + "inputdir": "/data8/majak/systematics/230824/track-tuner", + "histoname": "hCorrFracNonPrompt", + "default": "Default (with #it{p}_{T} smearing)", + "hists": { + "No #it{p}_{T} smearing": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_no_pt_smearing_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_no_pt_smearing_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_no_pt_smearing_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_no_pt_smearing_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_no_pt_smearing_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_no_pt_smearing_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_no_pt_smearing_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_no_pt_smearing_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_no_pt_smearing_1-24_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_no_pt_smearing_16-24_0-24.root" + ] + }, + "Reso p1": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_resop1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_resop1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_resop1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_resop1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_resop1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_resop1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_resop1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_resop1_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_resop1_1-24_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_resop1_16-24_0-24.root" + ] + }, + "Default (with #it{p}_{T} smearing)": { + "file": [ + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_1-24_0-24.root", + "CutVarLc_pp13TeV_LHC24d3_default_1-24_0-24.root" + ] + } + }, + "bin_min": [1,2,3,4,5,6,8,12,16], + "bin_max": [2,3,4,5,6,8,12,16,24], + "y_axis": "Non-prompt #Lambda_{c}^{#plus} fraction", + "legend": [0.50, 0.18, 0.90, 0.38], + "legend_ratio": [0.50, 0.70, 0.90, 0.90], + "output": { + "outdir": "/data8/majak/systematics/230824/track-tuner", + "file": "NP_Frac_pp13TeV_track_tuner_1-24" + } +}