Skip to content

Commit

Permalink
Merge pull request #158 from nikodemas/add_vega_to_hpc_monit
Browse files Browse the repository at this point in the history
Add new site to HPC monitoring
  • Loading branch information
nikodemas authored Mar 7, 2024
2 parents 76feb54 + ae807b8 commit fd28022
Showing 1 changed file with 21 additions and 18 deletions.
39 changes: 21 additions & 18 deletions src/python/CMSSpark/hpc_running_cores_and_corehr.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,11 @@
_BASE_HDFS_CONDOR = '/project/monitoring/archive/condor/raw/metric'

# Bottom to top bar stack order which set same colors for same site always
_HPC_SITES_STACK_ORDER = ['ANL', 'ANVIL', 'BSC', 'CINECA', 'HOREKA', 'NERSC', 'OSG', 'PSC', 'RWTH', 'SDSC', 'TACC']
_HPC_SITES_STACK_ORDER = ['ANL', 'ANVIL', 'BSC', 'CINECA', 'HOREKA', 'NERSC', 'OSG', 'PSC', 'RWTH', 'SDSC', 'TACC',
'VEGA']

# For new sites, please check list sizes
DISCRETE_COLOR_MAP = {site: px.colors.qualitative.Pastel[i] for i, site in enumerate(_HPC_SITES_STACK_ORDER)}
DISCRETE_COLOR_MAP = {site: px.colors.qualitative.Light24[i] for i, site in enumerate(_HPC_SITES_STACK_ORDER)}

_VALID_DATE_FORMATS = ["%Y/%m/%d", "%Y-%m-%d", "%Y%m%d"]
_CSV_DIR = 'csv'
Expand Down Expand Up @@ -89,18 +90,19 @@ def get_raw_df(spark, start_date, end_date):
(col("RecordTime") >= (start_date.replace(tzinfo=timezone.utc).timestamp() * 1000)) &
(col("RecordTime") < (end_date.replace(tzinfo=timezone.utc).timestamp() * 1000))
).filter(
(col('Site') == 'T3_US_ANL') | # ANL
(col('Site') == 'T3_US_Anvil') | # ANVIL
(col('Site') == 'T3_US_NERSC') | # NERSC
(col('Site') == 'T3_US_OSG') | # OSG
(col('Site') == 'T3_US_PSC') | # PSC
(col('Site') == 'T3_US_SDSC') | # SDSC
(col('Site') == 'T3_US_TACC') | # TACC
((col('Site').endswith('_ES_PIC_BSC')) & (col('MachineAttrCMSSubSiteName0') == 'PIC-BSC')) | # BSC
((col('Site').endswith('_ES_PIC')) & (col('MachineAttrCMSSubSiteName0') == 'PIC-BSC')) | # BSC
((col('Site') == 'T1_IT_CNAF') & (col('MachineAttrCMSSubSiteName0') == 'CNAF-CINECA')) | # CINECA
((col('Site') == 'T1_DE_KIT') & (col('MachineAttrCMSSubSiteName0') == 'KIT-HOREKA')) | # HOREKA
((col('Site') == 'T2_DE_RWTH') & (col('MachineAttrCMSSubSiteName0') == 'RWTH-HPC')) # RWTH
(col('Site') == 'T3_US_ANL') # ANL
| (col('Site') == 'T3_US_Anvil') # ANVIL
| (col('Site') == 'T3_US_NERSC') # NERSC
| (col('Site') == 'T3_US_OSG') # OSG
| (col('Site') == 'T3_US_PSC') # PSC
| (col('Site') == 'T3_US_SDSC') # SDSC
| (col('Site') == 'T3_US_TACC') # TACC
| ((col('Site').endswith('_ES_PIC_BSC')) & (col('MachineAttrCMSSubSiteName0') == 'PIC-BSC')) # BSC
| ((col('Site').endswith('_ES_PIC')) & (col('MachineAttrCMSSubSiteName0') == 'PIC-BSC')) # BSC
| ((col('Site') == 'T1_IT_CNAF') & (col('MachineAttrCMSSubSiteName0') == 'CNAF-CINECA')) # CINECA
| ((col('Site') == 'T1_DE_KIT') & (col('MachineAttrCMSSubSiteName0') == 'KIT-HOREKA')) # HOREKA
| ((col('Site') == 'T2_DE_RWTH') & (col('MachineAttrCMSSubSiteName0') == 'RWTH-HPC')) # RWTH
| ((col('Site') == 'T1_IT_CNAF') & (col('MachineAttrCMSSubSiteName0') == 'CNAF-VEGA')) # VEGA
).filter(
col('Status').isin(['Running', 'Completed'])
).withColumn(
Expand All @@ -119,6 +121,7 @@ def get_raw_df(spark, start_date, end_date):
.when(col('MachineAttrCMSSubSiteName0') == 'CNAF-CINECA', lit("CINECA"))
.when(col('MachineAttrCMSSubSiteName0') == 'KIT-HOREKA', lit("HOREKA"))
.when(col('MachineAttrCMSSubSiteName0') == 'RWTH-HPC', lit("RWTH"))
.when(col('MachineAttrCMSSubSiteName0') == 'CNAF-VEGA', lit("VEGA"))
).withColumn(
"RequestCpus",
when(col("RequestCpus").isNotNull(), col("RequestCpus")).otherwise(lit(1)),
Expand Down Expand Up @@ -573,12 +576,12 @@ def dates_iterative(iterative_ndays_ago):
! In any scenario, it starts from 1st day of start month !
"""
# end date is 2 days ago of now
end_date = datetime.today().replace(tzinfo=timezone.utc) \
.replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=2)
end_date = datetime.today().replace(tzinfo=timezone.utc).replace(hour=0, minute=0, second=0,
microsecond=0) - timedelta(days=2)

# start date is always first date of a month
safe_start_month = datetime.today().replace(tzinfo=timezone.utc) \
.replace(day=1) - timedelta(days=iterative_ndays_ago)
safe_start_month = datetime.today().replace(tzinfo=timezone.utc).replace(day=1) - timedelta(
days=iterative_ndays_ago)
start_date = safe_start_month.replace(day=1, hour=0, minute=0, second=0, microsecond=0)

will_update_months_list = get_month_range(start_date=start_date, end_date=datetime.today())
Expand Down

0 comments on commit fd28022

Please sign in to comment.