Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Inputs for data/metadata from AfDB's Open Data Portal #243

Closed
wants to merge 13 commits into from
80 changes: 80 additions & 0 deletions sdg/inputs/InputOpenDataPlatform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from urllib.request import urlopen
import json
from sdg.inputs import InputBase

class InputOpenDataPlatform(InputBase):


def __init__(self, source=None, logging=None, unit_key='unit',
unit_multiplier_key='scale', indicator_key='indicator',
column_map=None, code_map=None):
InputBase.__init__(self, logging=logging, column_map=column_map,
code_map=code_map)
self.unit_key = unit_key
self.unit_multiplier_key = unit_multiplier_key
self.indicator_key = indicator_key
self.source = source


def execute(self, indicator_options):
payload = self.fetch_file(self.source)
parsed = json.loads(payload)
indicators = {}
for item in parsed['data']:
indicator_id = self.normalize_indicator_id(self.get_indicator_id(item))
indicator_name = self.normalize_indicator_name(self.get_indicator_name(item), indicator_id)
if indicator_id not in indicators:
indicators[indicator_id] = []
dimensions = self.get_dimensions(item)

idx = 0
for year in self.get_years(item):
value = item['values'][idx]
if value is not None:
disaggregations = dimensions.copy()
disaggregations[self.unit_key] = self.get_unit(item)
disaggregations[self.unit_multiplier_key] = self.get_unit_multiplier(item)
row = self.get_row(year, value, disaggregations)
indicators[indicator_id].append(row)
idx += 1

for indicator_id in indicators:
df = self.create_dataframe(indicators[indicator_id])
self.add_indicator(indicator_id, name=indicator_name, data=df, options=indicator_options)


def get_dimensions(self, row):
dimensions = {}
non_dimension_props = ['goal', 'target', 'indicator']
for prop in row:
if prop in non_dimension_props:
continue
try:
dimensions[prop] = row[prop]['id']
except:
pass
return dimensions


def get_years(self, row):
start = int(row['startDate'][0:4])
end = int(row['endDate'][0:4])
if start == end:
return [start]
return list(range(start, end + 1))


def get_unit(self, row):
return row[self.unit_key]


def get_unit_multiplier(self, row):
return row[self.unit_multiplier_key]


def get_indicator_id(self, row):
return row[self.indicator_key]['id']


def get_indicator_name(self, row):
return row[self.indicator_key]['name']
83 changes: 83 additions & 0 deletions sdg/inputs/InputOpenDataPlatformMeta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import json
import os
import pandas as pd
from sdg.inputs import InputBase

class InputOpenDataPlatformMeta(InputBase):


def __init__(self, source=None, logging=None, indicator_key='SDG_INDICATOR',
metadata_mapping=None):
InputBase.__init__(self, logging=logging)
self.indicator_key = indicator_key
self.metadata_mapping = metadata_mapping
self.source = source


def load_metadata_mapping(self):
mapping = None
if self.metadata_mapping is None:
mapping = {}
elif isinstance(self.metadata_mapping, dict):
mapping = self.metadata_mapping
# Otherwise assume it is a path to a file.
else:
extension = os.path.splitext(self.metadata_mapping)[1]
if extension.lower() == '.csv':
mapping = pd.read_csv(self.metadata_mapping, header=None, index_col=0, squeeze=True).to_dict()

if mapping is None:
raise Exception('Format of metadata_mapping should be a dict or a path to a CSV file.')

self.metadata_mapping = mapping


def apply_metadata_mapping(self, metadata):
for human_key in self.metadata_mapping:
machine_key = self.metadata_mapping[human_key]
if human_key in metadata and human_key != machine_key:
# If it has already been mapped, skip it.
if machine_key in metadata and metadata[machine_key] is not None:
continue
metadata[machine_key] = metadata[human_key]
del metadata[human_key]


def execute(self, indicator_options):
self.load_metadata_mapping()
payload = self.fetch_file(self.source)
parsed = json.loads(payload)
for item in parsed['data']:
meta = item.copy()
self.apply_metadata_mapping(meta)
try:
indicator_id = self.normalize_indicator_id(self.get_indicator_id(meta))
# Safety check that we got a real indicator id.
assert len(indicator_id.split('-')) >= 3
indicator_name = self.normalize_indicator_name(self.get_indicator_name(meta), indicator_id)
self.add_indicator(indicator_id, name=indicator_name, meta=meta, options=indicator_options)
except Exception as e:
print('Unable to parse an indicator in InputOpenDataPlatformMeta. Error below:')
print(e)


def normalize_indicator_id(self, indicator_id):
normalized = InputBase.normalize_indicator_id(self, indicator_id)
# A common issue is when an extra fourth part gets added.
parts = normalized.split('-')
if len(parts) == 4 and len(parts[3]) > 2:
# Assume the part is uneeded.
parts.pop()
normalized = '-'.join(parts)
return normalized


def get_indicator_id(self, row):
if self.indicator_key not in row:
print(row)
raise Exception('The indicator_key was not found in the metadata shown above.')
return row[self.indicator_key]


def get_indicator_name(self, row):
return self.get_indicator_id(row)
2 changes: 2 additions & 0 deletions sdg/inputs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,5 @@
from .InputApi import InputApi
from .InputCkan import InputCkan
from .InputSdmxMl_UnitedNationsApi import InputSdmxMl_UnitedNationsApi
from .InputOpenDataPlatform import InputOpenDataPlatform
from .InputOpenDataPlatformMeta import InputOpenDataPlatformMeta
6 changes: 6 additions & 0 deletions sdg/open_sdg.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,8 @@ def open_sdg_input_from_dict(params, options):
'InputExcelMeta',
'InputYamlMeta',
'InputSdmxMeta',
'InputOpenDataPlatform',
'InputOpenDataPlatformMeta',
'InputWordMeta',
]
if input_class not in allowed:
Expand Down Expand Up @@ -487,6 +489,10 @@ def open_sdg_input_from_dict(params, options):
input_instance = sdg.inputs.InputYamlMeta(**params)
elif input_class == 'InputSdmxMeta':
input_instance = sdg.inputs.InputSdmxMeta(**params)
elif input_class == 'InputOpenDataPlatform':
input_instance = sdg.inputs.InputOpenDataPlatform(**params)
elif input_class == 'InputOpenDataPlatformMeta':
input_instance = sdg.inputs.InputOpenDataPlatformMeta(**params)
elif input_class == 'InputWordMeta':
input_instance = sdg.inputs.InputWordMeta(**params)

Expand Down