From 3f30eb00fe50120f2e8567e2aabbda2b0248a9cb Mon Sep 17 00:00:00 2001 From: Kati Lassila-Perini Date: Tue, 24 Oct 2023 10:47:17 +0200 Subject: [PATCH] cms-2016-simulated-datasets: updates as in #182 --- cms-2016-simulated-datasets/README.md | 2 +- .../code/config_store.py | 7 +-- .../code/das_json_store.py | 4 +- .../code/dataset_records.py | 52 ++++++++++++++++--- cms-2016-simulated-datasets/code/mcm_store.py | 8 ++- .../inputs/CMS-2016-mc-datasets.txt | 3 ++ .../inputs/recid_info.py | 7 ++- 7 files changed, 68 insertions(+), 15 deletions(-) diff --git a/cms-2016-simulated-datasets/README.md b/cms-2016-simulated-datasets/README.md index 13f1c5666..35cf33e97 100644 --- a/cms-2016-simulated-datasets/README.md +++ b/cms-2016-simulated-datasets/README.md @@ -45,7 +45,7 @@ To build sample records (with a limited number of datasets in the input file) do $ python3 ./code/interface.py --create-das-json-store --ignore-eos-store DATASET_LIST $ auth-get-sso-cookie -u https://cms-pdmv.cern.ch/mcm -o cookies.txt -$ python3 ./code/interface.py --create-mcm-json-store --ignore-eos-store DATASET_LIST +$ python3 ./code/interface.py --create-mcm-store --ignore-eos-store DATASET_LIST $ openssl pkcs12 -in myCert.p12 -nocerts -nodes -out userkey.nodes.pem # if not present $ python3 ./code/interface.py --get-conf-files --ignore-eos-store DATASET_LIST diff --git a/cms-2016-simulated-datasets/code/config_store.py b/cms-2016-simulated-datasets/code/config_store.py index 671b161e0..010314ba4 100644 --- a/cms-2016-simulated-datasets/code/config_store.py +++ b/cms-2016-simulated-datasets/code/config_store.py @@ -37,9 +37,10 @@ def main(eos_dir, conffile_ids = [] for dataset_full_name in eos_datasets: - for conffile_id in get_conffile_ids_all_chain_steps(dataset_full_name, mcm_dir): - if conffile_id not in conffile_ids: - conffile_ids.append(conffile_id) + if dataset_full_name.endswith('MINIAODSIM') == 0: + for conffile_id in get_conffile_ids_all_chain_steps(dataset_full_name, mcm_dir): + if conffile_id not in conffile_ids: + conffile_ids.append(conffile_id) if not os.path.exists(conf_dir): os.makedirs(conf_dir, exist_ok=True) diff --git a/cms-2016-simulated-datasets/code/das_json_store.py b/cms-2016-simulated-datasets/code/das_json_store.py index 4c3508147..391cbd05c 100644 --- a/cms-2016-simulated-datasets/code/das_json_store.py +++ b/cms-2016-simulated-datasets/code/das_json_store.py @@ -10,7 +10,7 @@ def get_parent_dataset(dataset, das_dir): - "Return parent dataset to the given dataset or an empty string if no parent found. Not used for 2016" + "Return parent dataset to the given dataset or an empty string if no parent found." parent_dataset = '' filepath = das_dir + '/parent/' + dataset.replace('/', '@') + '.json' @@ -74,6 +74,7 @@ def create(dataset, das_dir): result_file = dataset.replace('/', '@') + ".json" mydasgoclient(dataset, "dataset", das_dir, result_file) + mydasgoclient(dataset, "parent", das_dir, result_file) mydasgoclient(dataset, "config", das_dir, result_file) mydasgoclient(dataset, "release", das_dir, result_file) @@ -86,6 +87,7 @@ def main(das_dir, # create dirs for dataset and release for path in [das_dir + '/dataset', + das_dir + '/parent', das_dir + '/config', das_dir + '/release']: if not os.path.exists(path): diff --git a/cms-2016-simulated-datasets/code/dataset_records.py b/cms-2016-simulated-datasets/code/dataset_records.py index 8e4a12d36..388307450 100644 --- a/cms-2016-simulated-datasets/code/dataset_records.py +++ b/cms-2016-simulated-datasets/code/dataset_records.py @@ -158,9 +158,25 @@ def get_globaltag_from_conffile(afile, conf_dir): return globaltag -def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid): +#def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid): +def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid_info): """Return DICT with all information about the generator steps.""" + # For MiniAODSIM, find the corresponding Nano and use that information + # Might be best done at the when querying the McM + + if dataset.endswith('MINIAODSIM'): + nano_found=0 + dataset_first_name = get_from_deep_json(get_mcm_dict(dataset, mcm_dir), 'dataset_name') + for x in os.listdir(mcm_dir + '/chain'): + if x.startswith('@'+dataset_first_name): + dataset = x.replace('@', '/') + nano_found=1 + + if nano_found==0: + print("A corresponding NANOAODSIM was not found for dataset: " + dataset) + + recid = recid_info[dataset] info = {} info["description"] = "

These data were generated in several steps (see also CMS Monte Carlo production overview):

" info["steps"] = [] @@ -341,11 +357,18 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm rec['license'] = {} rec['license']['attribution'] = 'CC0' - rec['methodology'] = get_all_generator_text(dataset_full_name, das_dir, mcm_dir, conffiles_dir, recid_info[dataset_full_name]) + rec['methodology'] = get_all_generator_text(dataset_full_name, das_dir, mcm_dir, conffiles_dir, recid_info) + # For Mini, get the pileup from the corresponding Nano + dataset_name_for_nano = dataset_full_name + if dataset_full_name.endswith('MINIAODSIM'): + dataset_first_name = get_from_deep_json(get_mcm_dict(dataset_full_name, mcm_dir), 'dataset_name') + for x in os.listdir(mcm_dir + '/chain'): + if x.startswith('@'+dataset_first_name): + dataset_name_for_nano = x.replace('@', '/') pileup_dataset_name= '' - pileup_dataset_name= get_pileup_from_mcm(dataset_full_name, mcm_dir) + pileup_dataset_name= get_pileup_from_mcm(dataset_name_for_nano, mcm_dir) pileup_dataset_recid = { '/MinBias_TuneZ2_7TeV-pythia6/Summer11Leg-START53_LV4-v1/GEN-SIM': 36, # 2011 @@ -375,9 +398,26 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm rec['recid'] = str(recid_info[dataset_full_name]) - # rec['relations'] = [] - # rec['relations']['title'] = '' # FIXME, 2016 Nano are childs of 2016 Mini - # rec['relations']['type'] = 'isChildOf' + if dataset_full_name.endswith('NANOAODSIM'): + # Query from mcm dict fails for an example dataset because Mini is v1 in mcm and v2 in dataset list + # Get it from das instead + #dataset_name_for_mini = get_from_deep_json(get_mcm_dict(dataset_full_name, mcm_dir), 'input_dataset') + dataset_name_for_mini = get_parent_dataset(dataset_full_name, das_dir) + relations_description = 'The corresponding MINIAODSIM dataset:' + relations_recid = str(recid_info[dataset_name_for_mini]) + relations_type = 'isParentOf' + else: + relations_description = 'The corresponding NANOAODSIM dataset:' + relations_recid = str(recid_info[dataset_name_for_nano]) + relations_type = 'isChildOf' + + rec['relations'] = [ + { + 'description': relations_description, + 'recid': relations_recid, + 'type': relations_type + } + ] rec['run_period'] = run_period diff --git a/cms-2016-simulated-datasets/code/mcm_store.py b/cms-2016-simulated-datasets/code/mcm_store.py index a920bb983..203087585 100644 --- a/cms-2016-simulated-datasets/code/mcm_store.py +++ b/cms-2016-simulated-datasets/code/mcm_store.py @@ -19,7 +19,8 @@ def mcm_downloader(dataset, mcm_dir): print("==> " + dataset + "\n==> Already exist. Skipping...") return - cmd = "curl -s -k https://cms-pdmv.cern.ch/mcm/public/restapi/requests/" + #cmd = "curl -s -k https://cms-pdmv.cern.ch/mcm/public/restapi/requests/" + cmd = "curl -s -k https://cms-pdmv-prod.web.cern.ch/mcm/public/restapi/requests/" mcm_dict = subprocess.run(cmd + "produces" + dataset, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -52,6 +53,8 @@ def mcm_downloader(dataset, mcm_dir): ### New 2016 # create a directory with the dataset name under mcm_dir + "/chain" # create dirs + if dataset.endswith('MINIAODSIM'): + return path = mcm_dir + "/chain/" + dataset.replace('/', '@') os.makedirs(path, exist_ok=True) @@ -66,7 +69,8 @@ def mcm_downloader(dataset, mcm_dir): # commands line: curl -L -s -b cookies.txt https://cms-pdmv.cern.ch/mcm/restapi/chained_requests/get/ | jq .results.chain # FIXME: change shell jq to deep json query # REQUIRES: run on command line first: auth-get-sso-cookie -u https://cms-pdmv.cern.ch/mcm -o cookies.txt - chaincmd = "curl -L -s -b cookies.txt https://cms-pdmv.cern.ch/mcm/restapi/chained_requests/" + # chaincmd = "curl -L -s -b cookies.txt https://cms-pdmv.cern.ch/mcm/restapi/chained_requests/" + chaincmd = "curl -L -s -b cookies.txt https://cms-pdmv-prod.web.cern.ch/mcm/restapi/chained_requests/" mcm_chain_prepids = subprocess.run(chaincmd + "get/" + chain_prepid + " | jq .results.chain", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) mcm_chain_prepids_out = str(mcm_chain_prepids.stdout.decode("utf-8")) diff --git a/cms-2016-simulated-datasets/inputs/CMS-2016-mc-datasets.txt b/cms-2016-simulated-datasets/inputs/CMS-2016-mc-datasets.txt index f001f60c8..94b0fa817 100644 --- a/cms-2016-simulated-datasets/inputs/CMS-2016-mc-datasets.txt +++ b/cms-2016-simulated-datasets/inputs/CMS-2016-mc-datasets.txt @@ -1,3 +1,6 @@ /ADDmonoPhoton_MD-1_d-3_TuneCP5_13TeV-pythia8/RunIISummer20UL16NanoAODv9-106X_mcRun2_asymptotic_v17-v2/NANOAODSIM +/BBH_HToJPsiG_JPsiToMuMu_TuneCP5_13TeV-madgraph-pythia8/RunIISummer20UL16NanoAODv9-106X_mcRun2_asymptotic_v17-v1/NANOAODSIM +/WminusJetsToTauNu_TauToMu_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos/RunIISummer20UL16NanoAODv9-106X_mcRun2_asymptotic_v17-v1/NANOAODSIM +/ADDmonoPhoton_MD-1_d-3_TuneCP5_13TeV-pythia8/RunIISummer20UL16MiniAODv2-106X_mcRun2_asymptotic_v17-v2/MINIAODSIM /BBH_HToJPsiG_JPsiToMuMu_TuneCP5_13TeV-madgraph-pythia8/RunIISummer20UL16MiniAODv2-106X_mcRun2_asymptotic_v17-v1/MINIAODSIM /WminusJetsToTauNu_TauToMu_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos/RunIISummer20UL16MiniAODv2-106X_mcRun2_asymptotic_v17-v1/MINIAODSIM diff --git a/cms-2016-simulated-datasets/inputs/recid_info.py b/cms-2016-simulated-datasets/inputs/recid_info.py index d9337cb41..826b2e803 100644 --- a/cms-2016-simulated-datasets/inputs/recid_info.py +++ b/cms-2016-simulated-datasets/inputs/recid_info.py @@ -1,5 +1,8 @@ RECID_INFO ={ "/ADDmonoPhoton_MD-1_d-3_TuneCP5_13TeV-pythia8/RunIISummer20UL16NanoAODv9-106X_mcRun2_asymptotic_v17-v2/NANOAODSIM": 30000, -"/BBH_HToJPsiG_JPsiToMuMu_TuneCP5_13TeV-madgraph-pythia8/RunIISummer20UL16MiniAODv2-106X_mcRun2_asymptotic_v17-v1/MINIAODSIM": 30001, -"/WminusJetsToTauNu_TauToMu_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos/RunIISummer20UL16MiniAODv2-106X_mcRun2_asymptotic_v17-v1/MINIAODSIM": 30002 +"/BBH_HToJPsiG_JPsiToMuMu_TuneCP5_13TeV-madgraph-pythia8/RunIISummer20UL16NanoAODv9-106X_mcRun2_asymptotic_v17-v1/NANOAODSIM": 30001, +"/WminusJetsToTauNu_TauToMu_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos/RunIISummer20UL16NanoAODv9-106X_mcRun2_asymptotic_v17-v1/NANOAODSIM": 30002, +"/ADDmonoPhoton_MD-1_d-3_TuneCP5_13TeV-pythia8/RunIISummer20UL16MiniAODv2-106X_mcRun2_asymptotic_v17-v2/MINIAODSIM": 30003, +"/BBH_HToJPsiG_JPsiToMuMu_TuneCP5_13TeV-madgraph-pythia8/RunIISummer20UL16MiniAODv2-106X_mcRun2_asymptotic_v17-v1/MINIAODSIM": 30004, +"/WminusJetsToTauNu_TauToMu_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos/RunIISummer20UL16MiniAODv2-106X_mcRun2_asymptotic_v17-v1/MINIAODSIM": 30005 }