diff --git a/sdg/inputs/InputOpenDataPlatform.py b/sdg/inputs/InputOpenDataPlatform.py new file mode 100644 index 00000000..66820371 --- /dev/null +++ b/sdg/inputs/InputOpenDataPlatform.py @@ -0,0 +1,80 @@ +from urllib.request import urlopen +import json +from sdg.inputs import InputBase + +class InputOpenDataPlatform(InputBase): + + + def __init__(self, source=None, logging=None, unit_key='unit', + unit_multiplier_key='scale', indicator_key='indicator', + column_map=None, code_map=None): + InputBase.__init__(self, logging=logging, column_map=column_map, + code_map=code_map) + self.unit_key = unit_key + self.unit_multiplier_key = unit_multiplier_key + self.indicator_key = indicator_key + self.source = source + + + def execute(self, indicator_options): + payload = self.fetch_file(self.source) + parsed = json.loads(payload) + indicators = {} + for item in parsed['data']: + indicator_id = self.normalize_indicator_id(self.get_indicator_id(item)) + indicator_name = self.normalize_indicator_name(self.get_indicator_name(item), indicator_id) + if indicator_id not in indicators: + indicators[indicator_id] = [] + dimensions = self.get_dimensions(item) + + idx = 0 + for year in self.get_years(item): + value = item['values'][idx] + if value is not None: + disaggregations = dimensions.copy() + disaggregations[self.unit_key] = self.get_unit(item) + disaggregations[self.unit_multiplier_key] = self.get_unit_multiplier(item) + row = self.get_row(year, value, disaggregations) + indicators[indicator_id].append(row) + idx += 1 + + for indicator_id in indicators: + df = self.create_dataframe(indicators[indicator_id]) + self.add_indicator(indicator_id, name=indicator_name, data=df, options=indicator_options) + + + def get_dimensions(self, row): + dimensions = {} + non_dimension_props = ['goal', 'target', 'indicator'] + for prop in row: + if prop in non_dimension_props: + continue + try: + dimensions[prop] = row[prop]['id'] + except: + pass + return dimensions + + + def get_years(self, row): + start = int(row['startDate'][0:4]) + end = int(row['endDate'][0:4]) + if start == end: + return [start] + return list(range(start, end + 1)) + + + def get_unit(self, row): + return row[self.unit_key] + + + def get_unit_multiplier(self, row): + return row[self.unit_multiplier_key] + + + def get_indicator_id(self, row): + return row[self.indicator_key]['id'] + + + def get_indicator_name(self, row): + return row[self.indicator_key]['name'] diff --git a/sdg/inputs/InputOpenDataPlatformMeta.py b/sdg/inputs/InputOpenDataPlatformMeta.py new file mode 100644 index 00000000..8eef5c9f --- /dev/null +++ b/sdg/inputs/InputOpenDataPlatformMeta.py @@ -0,0 +1,83 @@ +import json +import os +import pandas as pd +from sdg.inputs import InputBase + +class InputOpenDataPlatformMeta(InputBase): + + + def __init__(self, source=None, logging=None, indicator_key='SDG_INDICATOR', + metadata_mapping=None): + InputBase.__init__(self, logging=logging) + self.indicator_key = indicator_key + self.metadata_mapping = metadata_mapping + self.source = source + + + def load_metadata_mapping(self): + mapping = None + if self.metadata_mapping is None: + mapping = {} + elif isinstance(self.metadata_mapping, dict): + mapping = self.metadata_mapping + # Otherwise assume it is a path to a file. + else: + extension = os.path.splitext(self.metadata_mapping)[1] + if extension.lower() == '.csv': + mapping = pd.read_csv(self.metadata_mapping, header=None, index_col=0, squeeze=True).to_dict() + + if mapping is None: + raise Exception('Format of metadata_mapping should be a dict or a path to a CSV file.') + + self.metadata_mapping = mapping + + + def apply_metadata_mapping(self, metadata): + for human_key in self.metadata_mapping: + machine_key = self.metadata_mapping[human_key] + if human_key in metadata and human_key != machine_key: + # If it has already been mapped, skip it. + if machine_key in metadata and metadata[machine_key] is not None: + continue + metadata[machine_key] = metadata[human_key] + del metadata[human_key] + + + def execute(self, indicator_options): + self.load_metadata_mapping() + payload = self.fetch_file(self.source) + parsed = json.loads(payload) + for item in parsed['data']: + meta = item.copy() + self.apply_metadata_mapping(meta) + try: + indicator_id = self.normalize_indicator_id(self.get_indicator_id(meta)) + # Safety check that we got a real indicator id. + assert len(indicator_id.split('-')) >= 3 + indicator_name = self.normalize_indicator_name(self.get_indicator_name(meta), indicator_id) + self.add_indicator(indicator_id, name=indicator_name, meta=meta, options=indicator_options) + except Exception as e: + print('Unable to parse an indicator in InputOpenDataPlatformMeta. Error below:') + print(e) + + + def normalize_indicator_id(self, indicator_id): + normalized = InputBase.normalize_indicator_id(self, indicator_id) + # A common issue is when an extra fourth part gets added. + parts = normalized.split('-') + if len(parts) == 4 and len(parts[3]) > 2: + # Assume the part is uneeded. + parts.pop() + normalized = '-'.join(parts) + return normalized + + + def get_indicator_id(self, row): + if self.indicator_key not in row: + print(row) + raise Exception('The indicator_key was not found in the metadata shown above.') + return row[self.indicator_key] + + + def get_indicator_name(self, row): + return self.get_indicator_id(row) diff --git a/sdg/inputs/__init__.py b/sdg/inputs/__init__.py index 2ab5ffb7..38db2a1d 100644 --- a/sdg/inputs/__init__.py +++ b/sdg/inputs/__init__.py @@ -16,3 +16,5 @@ from .InputApi import InputApi from .InputCkan import InputCkan from .InputSdmxMl_UnitedNationsApi import InputSdmxMl_UnitedNationsApi +from .InputOpenDataPlatform import InputOpenDataPlatform +from .InputOpenDataPlatformMeta import InputOpenDataPlatformMeta diff --git a/sdg/open_sdg.py b/sdg/open_sdg.py index 97e46221..fb500b39 100644 --- a/sdg/open_sdg.py +++ b/sdg/open_sdg.py @@ -448,6 +448,8 @@ def open_sdg_input_from_dict(params, options): 'InputExcelMeta', 'InputYamlMeta', 'InputSdmxMeta', + 'InputOpenDataPlatform', + 'InputOpenDataPlatformMeta', 'InputWordMeta', ] if input_class not in allowed: @@ -487,6 +489,10 @@ def open_sdg_input_from_dict(params, options): input_instance = sdg.inputs.InputYamlMeta(**params) elif input_class == 'InputSdmxMeta': input_instance = sdg.inputs.InputSdmxMeta(**params) + elif input_class == 'InputOpenDataPlatform': + input_instance = sdg.inputs.InputOpenDataPlatform(**params) + elif input_class == 'InputOpenDataPlatformMeta': + input_instance = sdg.inputs.InputOpenDataPlatformMeta(**params) elif input_class == 'InputWordMeta': input_instance = sdg.inputs.InputWordMeta(**params)