Skip to content

Commit

Permalink
Implemented OrcaVault tsa schema
Browse files Browse the repository at this point in the history
* Transient Staging Area (tsa) is a layer in data warehouse where data from
  multiple sources land at warehouse initially. The data table in this area
  act as an intermediate storage location to provide downstream transformation.
* Made use of `spreadsheet_library_tracking_metadata` table as another data
  source to feed into vault layer as well as psa layer for history tracking
  and records archiving purpose.
* Typically, tsa data tables have been pre-processed into db by Glue job setup.
  • Loading branch information
victorskl committed Jan 11, 2025
1 parent dc4229e commit 5931327
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 2 deletions.
6 changes: 6 additions & 0 deletions dev/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ ods:
@docker compose cp ./src/ods.sql postgres:/tmp/ods.sql
@docker compose exec -e PGPASSWORD=dev -it postgres psql -h 0.0.0.0 -d orcavault -U dev -f /tmp/ods.sql

tsa:
@docker compose cp ./src/tsa.sql postgres:/tmp/tsa.sql
@docker compose exec -e PGPASSWORD=dev -it postgres psql -h 0.0.0.0 -d orcavault -U dev -f /tmp/tsa.sql

all: ods tsa

load:
@docker compose cp ./src/load.sh postgres:/tmp/load.sh
@docker compose exec -it postgres bash /tmp/load.sh
Expand Down
1 change: 1 addition & 0 deletions dev/src/load.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ PGPASSWORD=dev psql -h 0.0.0.0 -d orcavault -U dev <<EOF
\copy ods.metadata_manager_libraryprojectlink from '/data/orcavault_ods_metadata_manager_libraryprojectlink.csv' with (format csv, header true, delimiter ',');
\copy ods.metadata_manager_contact from '/data/orcavault_ods_metadata_manager_contact.csv' with (format csv, header true, delimiter ',');
\copy ods.metadata_manager_projectcontactlink from '/data/orcavault_ods_metadata_manager_projectcontactlink.csv' with (format csv, header true, delimiter ',');
\copy tsa.spreadsheet_library_tracking_metadata from '/data/orcavault_tsa_spreadsheet_library_tracking_metadata.csv' with (format csv, header true, delimiter ',');
EOF
36 changes: 36 additions & 0 deletions dev/src/tsa.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
SELECT current_database();

-- create tsa schema
CREATE SCHEMA IF NOT EXISTS tsa AUTHORIZATION dev;
SET search_path TO tsa;

SELECT current_schema();

CREATE TABLE IF NOT EXISTS orcavault.tsa.spreadsheet_library_tracking_metadata
(
assay varchar,
comments varchar,
coverage varchar,
experiment_id varchar,
external_sample_id varchar,
external_subject_id varchar,
library_id varchar,
override_cycles varchar,
phenotype varchar,
project_name varchar,
project_owner varchar,
qpcr_id varchar,
quality varchar,
run varchar,
sample_id varchar,
sample_name varchar,
samplesheet_sample_id varchar,
source varchar,
subject_id varchar,
truseq_index varchar,
type varchar,
workflow varchar,
r_rna varchar,
study varchar,
sheet_name varchar
);
8 changes: 7 additions & 1 deletion orcavault/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,16 @@ psql:
ods:
@(cd ../dev/ && $(MAKE) ods)

tsa:
@(cd ../dev/ && $(MAKE) tsa)

all:
@(cd ../dev/ && $(MAKE) all)

load:
@(cd ../dev/ && $(MAKE) load)

reload: down up ods load
reload: down up ods tsa load

sync:
@(cd ../dev/ && $(MAKE) sync)
6 changes: 5 additions & 1 deletion orcavault/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ OrcaVault is a dbt project. It contains data warehouse models.
```
make up
make ps
make ods
make all
make psql
orcavault=> \l
orcavault=> \dn
Expand All @@ -16,6 +16,10 @@ orcavault=> set search_path to ods;
orcavault=> \dt
orcavault=> \d data_portal_labmetadata
orcavault=> select count(1) from data_portal_labmetadata;
orcavault=> set search_path to tsa;
orcavault=> \dt
orcavault=> \d spreadsheet_library_tracking_metadata
orcavault=> select count(1) from spreadsheet_library_tracking_metadata;
orcavault=> \q
```

Expand Down
8 changes: 8 additions & 0 deletions orcavault/models/tsa/sources.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
version: 2

sources:
- name: tsa
database: orcavault
schema: tsa
tables:
- name: spreadsheet_library_tracking_metadata

0 comments on commit 5931327

Please sign in to comment.