Skip to content

Commit

Permalink
fix: fixed a bug where empty rows weren't added to the dataframe
Browse files Browse the repository at this point in the history
  • Loading branch information
sg-s committed Nov 21, 2024
1 parent bef6e71 commit 6d67538
Showing 1 changed file with 35 additions and 12 deletions.
47 changes: 35 additions & 12 deletions src/data_hub/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1033,7 +1033,7 @@ def get_dataframe(
data[column["id"]] = []

for row in rows:
# warning: add_row_to_data mutates file_ids
# warning: add_row_to_data mutates data, file_ids
# and reference_ids
add_row_to_data(
data=data,
Expand Down Expand Up @@ -1173,21 +1173,26 @@ def download_files(
pass


@beartype
def add_row_to_data(
*,
data: dict,
row,
row: dict,
columns: list,
file_ids: list,
reference_ids: list,
):
"""utility function to combine data from a row into a dataframe"""
row_data = _row_to_dict(
row_data = row_to_dict(
row,
file_ids=file_ids,
reference_ids=reference_ids,
)
if row_data is None:
for column in columns:
col_id = column["id"]
data[col_id].append(None)

return

data["ID"].append(row_data["ID"])
Expand All @@ -1204,21 +1209,39 @@ def add_row_to_data(
data[col_id].append(None)


def _row_to_dict(
@beartype
def row_to_dict(
row,
*,
file_ids: list,
reference_ids: list,
):
"""utility function to convert a row to a dictionary"""
if "fields" not in row.keys():
return None
file_ids: Optional[list] = None,
reference_ids: Optional[list] = None,
) -> dict:
"""convert a database row (as returned by api.list_database_rows) to a dictionary where keys are column IDs and values are the values in the row
fields = row.fields
Danger: This function mutates inputs
This function mutates file_ids and reference_ids
Args:
row: database row (as returned by api.list_database_rows)
file_ids: list of file IDs, will be mutated in-place
reference_ids: list of reference IDs, will be mutated in-place
Returns:
dict
"""

if file_ids is None:
file_ids = []
if reference_ids is None:
reference_ids = []

values = {"ID": row.hid, "Validation Status": row.validationStatus}
if fields is None:

if "fields" not in row.keys() or row.fields is None:
return values

fields = row.fields

for field in fields:
if "systemType" in field.keys() and field.systemType == "bodyDocument":
continue
Expand Down

0 comments on commit 6d67538

Please sign in to comment.