diff --git a/cms-2016-simulated-datasets/README.md b/cms-2016-simulated-datasets/README.md index 13f1c5666..35cf33e97 100644 --- a/cms-2016-simulated-datasets/README.md +++ b/cms-2016-simulated-datasets/README.md @@ -45,7 +45,7 @@ To build sample records (with a limited number of datasets in the input file) do $ python3 ./code/interface.py --create-das-json-store --ignore-eos-store DATASET_LIST $ auth-get-sso-cookie -u https://cms-pdmv.cern.ch/mcm -o cookies.txt -$ python3 ./code/interface.py --create-mcm-json-store --ignore-eos-store DATASET_LIST +$ python3 ./code/interface.py --create-mcm-store --ignore-eos-store DATASET_LIST $ openssl pkcs12 -in myCert.p12 -nocerts -nodes -out userkey.nodes.pem # if not present $ python3 ./code/interface.py --get-conf-files --ignore-eos-store DATASET_LIST diff --git a/cms-2016-simulated-datasets/code/config_store.py b/cms-2016-simulated-datasets/code/config_store.py index 671b161e0..010314ba4 100644 --- a/cms-2016-simulated-datasets/code/config_store.py +++ b/cms-2016-simulated-datasets/code/config_store.py @@ -37,9 +37,10 @@ def main(eos_dir, conffile_ids = [] for dataset_full_name in eos_datasets: - for conffile_id in get_conffile_ids_all_chain_steps(dataset_full_name, mcm_dir): - if conffile_id not in conffile_ids: - conffile_ids.append(conffile_id) + if dataset_full_name.endswith('MINIAODSIM') == 0: + for conffile_id in get_conffile_ids_all_chain_steps(dataset_full_name, mcm_dir): + if conffile_id not in conffile_ids: + conffile_ids.append(conffile_id) if not os.path.exists(conf_dir): os.makedirs(conf_dir, exist_ok=True) diff --git a/cms-2016-simulated-datasets/code/das_json_store.py b/cms-2016-simulated-datasets/code/das_json_store.py index 4c3508147..391cbd05c 100644 --- a/cms-2016-simulated-datasets/code/das_json_store.py +++ b/cms-2016-simulated-datasets/code/das_json_store.py @@ -10,7 +10,7 @@ def get_parent_dataset(dataset, das_dir): - "Return parent dataset to the given dataset or an empty string if no parent found. Not used for 2016" + "Return parent dataset to the given dataset or an empty string if no parent found." parent_dataset = '' filepath = das_dir + '/parent/' + dataset.replace('/', '@') + '.json' @@ -74,6 +74,7 @@ def create(dataset, das_dir): result_file = dataset.replace('/', '@') + ".json" mydasgoclient(dataset, "dataset", das_dir, result_file) + mydasgoclient(dataset, "parent", das_dir, result_file) mydasgoclient(dataset, "config", das_dir, result_file) mydasgoclient(dataset, "release", das_dir, result_file) @@ -86,6 +87,7 @@ def main(das_dir, # create dirs for dataset and release for path in [das_dir + '/dataset', + das_dir + '/parent', das_dir + '/config', das_dir + '/release']: if not os.path.exists(path): diff --git a/cms-2016-simulated-datasets/code/dataset_records.py b/cms-2016-simulated-datasets/code/dataset_records.py index 8e4a12d36..388307450 100644 --- a/cms-2016-simulated-datasets/code/dataset_records.py +++ b/cms-2016-simulated-datasets/code/dataset_records.py @@ -158,9 +158,25 @@ def get_globaltag_from_conffile(afile, conf_dir): return globaltag -def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid): +#def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid): +def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid_info): """Return DICT with all information about the generator steps.""" + # For MiniAODSIM, find the corresponding Nano and use that information + # Might be best done at the when querying the McM + + if dataset.endswith('MINIAODSIM'): + nano_found=0 + dataset_first_name = get_from_deep_json(get_mcm_dict(dataset, mcm_dir), 'dataset_name') + for x in os.listdir(mcm_dir + '/chain'): + if x.startswith('@'+dataset_first_name): + dataset = x.replace('@', '/') + nano_found=1 + + if nano_found==0: + print("A corresponding NANOAODSIM was not found for dataset: " + dataset) + + recid = recid_info[dataset] info = {} info["description"] = "
These data were generated in several steps (see also CMS Monte Carlo production overview):
" info["steps"] = [] @@ -341,11 +357,18 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm rec['license'] = {} rec['license']['attribution'] = 'CC0' - rec['methodology'] = get_all_generator_text(dataset_full_name, das_dir, mcm_dir, conffiles_dir, recid_info[dataset_full_name]) + rec['methodology'] = get_all_generator_text(dataset_full_name, das_dir, mcm_dir, conffiles_dir, recid_info) + # For Mini, get the pileup from the corresponding Nano + dataset_name_for_nano = dataset_full_name + if dataset_full_name.endswith('MINIAODSIM'): + dataset_first_name = get_from_deep_json(get_mcm_dict(dataset_full_name, mcm_dir), 'dataset_name') + for x in os.listdir(mcm_dir + '/chain'): + if x.startswith('@'+dataset_first_name): + dataset_name_for_nano = x.replace('@', '/') pileup_dataset_name= '' - pileup_dataset_name= get_pileup_from_mcm(dataset_full_name, mcm_dir) + pileup_dataset_name= get_pileup_from_mcm(dataset_name_for_nano, mcm_dir) pileup_dataset_recid = { '/MinBias_TuneZ2_7TeV-pythia6/Summer11Leg-START53_LV4-v1/GEN-SIM': 36, # 2011 @@ -375,9 +398,26 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm rec['recid'] = str(recid_info[dataset_full_name]) - # rec['relations'] = [] - # rec['relations']['title'] = '' # FIXME, 2016 Nano are childs of 2016 Mini - # rec['relations']['type'] = 'isChildOf' + if dataset_full_name.endswith('NANOAODSIM'): + # Query from mcm dict fails for an example dataset because Mini is v1 in mcm and v2 in dataset list + # Get it from das instead + #dataset_name_for_mini = get_from_deep_json(get_mcm_dict(dataset_full_name, mcm_dir), 'input_dataset') + dataset_name_for_mini = get_parent_dataset(dataset_full_name, das_dir) + relations_description = 'The corresponding MINIAODSIM dataset:' + relations_recid = str(recid_info[dataset_name_for_mini]) + relations_type = 'isParentOf' + else: + relations_description = 'The corresponding NANOAODSIM dataset:' + relations_recid = str(recid_info[dataset_name_for_nano]) + relations_type = 'isChildOf' + + rec['relations'] = [ + { + 'description': relations_description, + 'recid': relations_recid, + 'type': relations_type + } + ] rec['run_period'] = run_period diff --git a/cms-2016-simulated-datasets/code/mcm_store.py b/cms-2016-simulated-datasets/code/mcm_store.py index a920bb983..203087585 100644 --- a/cms-2016-simulated-datasets/code/mcm_store.py +++ b/cms-2016-simulated-datasets/code/mcm_store.py @@ -19,7 +19,8 @@ def mcm_downloader(dataset, mcm_dir): print("==> " + dataset + "\n==> Already exist. Skipping...") return - cmd = "curl -s -k https://cms-pdmv.cern.ch/mcm/public/restapi/requests/" + #cmd = "curl -s -k https://cms-pdmv.cern.ch/mcm/public/restapi/requests/" + cmd = "curl -s -k https://cms-pdmv-prod.web.cern.ch/mcm/public/restapi/requests/" mcm_dict = subprocess.run(cmd + "produces" + dataset, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -52,6 +53,8 @@ def mcm_downloader(dataset, mcm_dir): ### New 2016 # create a directory with the dataset name under mcm_dir + "/chain" # create dirs + if dataset.endswith('MINIAODSIM'): + return path = mcm_dir + "/chain/" + dataset.replace('/', '@') os.makedirs(path, exist_ok=True) @@ -66,7 +69,8 @@ def mcm_downloader(dataset, mcm_dir): # commands line: curl -L -s -b cookies.txt https://cms-pdmv.cern.ch/mcm/restapi/chained_requests/get/