From 1f62b552636a22376f766e0a00775f5408ad3404 Mon Sep 17 00:00:00 2001 From: Victor San Kho Lin Date: Sun, 12 Jan 2025 00:16:50 +1100 Subject: [PATCH] Implemented OrcaVault psa schema * Persistent Staging Area (psa) is a layer in data warehouse where data is kept archived and track change history. The data table in this area also act as an intermediate storage location to provide downstream transformation. * Made use of spreadsheet_library_tracking_metadata table as another data source to feed into vault layer for change history and data consolidation use. * Typically, psa data tables is processed by dbt with append only incremental materialization and the data is sourced from tsa schema table counterpart. * Added `next.sh` script and `tsa.truncate_tables()` db function to simulate incremental data loading between tsa to psa transformation in local dev setup. --- dev/Makefile | 15 ++- dev/src/next.sh | 7 + dev/src/psa.sql | 38 ++++++ dev/src/tsa.sql | 23 ++++ orcavault/Makefile | 9 +- orcavault/dbt_project.yml | 3 + orcavault/models/psa/schema.yml | 60 +++++++++ .../spreadsheet_library_tracking_metadata.sql | 123 ++++++++++++++++++ 8 files changed, 276 insertions(+), 2 deletions(-) create mode 100644 dev/src/next.sh create mode 100644 dev/src/psa.sql create mode 100644 orcavault/models/psa/schema.yml create mode 100644 orcavault/models/psa/spreadsheet_library_tracking_metadata.sql diff --git a/dev/Makefile b/dev/Makefile index d38b67a..e4b214f 100644 --- a/dev/Makefile +++ b/dev/Makefile @@ -31,11 +31,24 @@ tsa: @docker compose cp ./src/tsa.sql postgres:/tmp/tsa.sql @docker compose exec -e PGPASSWORD=dev -it postgres psql -h 0.0.0.0 -d orcavault -U dev -f /tmp/tsa.sql -all: ods tsa +psa: + @docker compose cp ./src/psa.sql postgres:/tmp/psa.sql + @docker compose exec -e PGPASSWORD=dev -it postgres psql -h 0.0.0.0 -d orcavault -U dev -f /tmp/psa.sql + +all: ods tsa psa load: @docker compose cp ./src/load.sh postgres:/tmp/load.sh @docker compose exec -it postgres bash /tmp/load.sh +wait: + @sleep 5 + +reload: down up wait all load + +next: + @docker compose cp ./src/next.sh postgres:/tmp/next.sh + @docker compose exec -it postgres bash /tmp/next.sh + sync: @aws s3 sync s3://orcabus-test-data-843407916570-ap-southeast-2/orcavault/data/ data/ diff --git a/dev/src/next.sh b/dev/src/next.sh new file mode 100644 index 0000000..0987968 --- /dev/null +++ b/dev/src/next.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +PGPASSWORD=dev psql -h 0.0.0.0 -d orcavault -U dev -c 'SELECT tsa.truncate_tables();' + +PGPASSWORD=dev psql -h 0.0.0.0 -d orcavault -U dev <