From 60a9bdd5e8ce55ee70e20209b2bc383116e0310b Mon Sep 17 00:00:00 2001 From: Victor San Kho Lin Date: Sat, 11 Jan 2025 23:58:13 +1100 Subject: [PATCH] Improved Glue job script spreadsheet_library_tracking_metadata * Added step to strip whitespaces and carriage return values --- .../spreadsheet_library_tracking_metadata.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/infra/glue/workspace/spreadsheet_library_tracking_metadata/spreadsheet_library_tracking_metadata.py b/infra/glue/workspace/spreadsheet_library_tracking_metadata/spreadsheet_library_tracking_metadata.py index d4f5719..accabf1 100644 --- a/infra/glue/workspace/spreadsheet_library_tracking_metadata/spreadsheet_library_tracking_metadata.py +++ b/infra/glue/workspace/spreadsheet_library_tracking_metadata/spreadsheet_library_tracking_metadata.py @@ -117,6 +117,9 @@ def transform(): .name.keep() ) + # strip whitespaces, carriage return + df = df.with_columns(pl.col(pl.String).str.strip_chars()) + # drop row iff all values are null # https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.drop_nulls.html df = df.filter(~pl.all_horizontal(pl.all().is_null()))