Skip to content

Commit

Permalink
Merge pull request #5 from umccr/add-prj-to-cnt-link
Browse files Browse the repository at this point in the history
Added raw schema link between Project hub to Contact hub
  • Loading branch information
victorskl authored Jan 4, 2025
2 parents 50b58ef + 59712df commit 39cf240
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 0 deletions.
1 change: 1 addition & 0 deletions dev/src/load.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ PGPASSWORD=dev psql -h 0.0.0.0 -d orcavault -U dev <<EOF
\copy ods.metadata_manager_project from '/data/orcavault_ods_metadata_manager_project.csv' with (format csv, header true, delimiter ',');
\copy ods.metadata_manager_libraryprojectlink from '/data/orcavault_ods_metadata_manager_libraryprojectlink.csv' with (format csv, header true, delimiter ',');
\copy ods.metadata_manager_contact from '/data/orcavault_ods_metadata_manager_contact.csv' with (format csv, header true, delimiter ',');
\copy ods.metadata_manager_projectcontactlink from '/data/orcavault_ods_metadata_manager_projectcontactlink.csv' with (format csv, header true, delimiter ',');
EOF
7 changes: 7 additions & 0 deletions dev/src/ods.sql
Original file line number Diff line number Diff line change
Expand Up @@ -197,3 +197,10 @@ CREATE TABLE IF NOT EXISTS orcavault.ods.metadata_manager_contact
description varchar,
email varchar(254)
);

CREATE TABLE IF NOT EXISTS orcavault.ods.metadata_manager_projectcontactlink
(
id bigint,
contact_orcabus_id varchar not null,
project_orcabus_id varchar not null
);
50 changes: 50 additions & 0 deletions orcavault/models/raw/link_project_contact.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
with source as (

select project_name as project_id, project_owner as contact_id from {{ source('ods', 'data_portal_labmetadata') }}
union
select project_name as project_id, project_owner as contact_id from {{ source('ods', 'data_portal_limsrow') }}
union
select project_id, contact_id from {{ source('ods', 'metadata_manager_project') }} as prj
join {{ source('ods', 'metadata_manager_projectcontactlink') }} as lnk on lnk.project_orcabus_id = prj.orcabus_id
join {{ source('ods', 'metadata_manager_contact') }} as cnt on lnk.contact_orcabus_id = cnt.orcabus_id

),

cleaned as (

select
distinct trim(project_id) as project_id, trim(contact_id) as contact_id
from
source
where
(project_id is not null and project_id <> '') and
(contact_id is not null and contact_id <> '')

),

transformed as (

select
encode(sha256(cast(contact_id as bytea)), 'hex') as contact_hk,
encode(sha256(cast(project_id as bytea)), 'hex') as project_hk,
cast('{{ run_started_at }}' as timestamptz) as load_datetime,
(select 'lab') as record_source
from
cleaned

),

final as (

select
encode(sha256(concat(contact_hk, project_hk)::bytea), 'hex') as project_contact_hk,
contact_hk,
project_hk,
load_datetime,
record_source
from
transformed

)

select * from final
26 changes: 26 additions & 0 deletions orcavault/models/raw/link_schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,32 @@ models:
- name: record_source
data_type: varchar(255)

- name: link_project_contact
config:
contract: { enforced: true }
constraints:
- type: primary_key
columns: [ project_contact_hk ]
- type: foreign_key
columns: [ contact_hk ]
to: ref('hub_contact')
to_columns: [ contact_hk ]
- type: foreign_key
columns: [ project_hk ]
to: ref('hub_project')
to_columns: [ project_hk ]
columns:
- name: project_contact_hk
data_type: char(64)
- name: contact_hk
data_type: char(64)
- name: project_hk
data_type: char(64)
- name: load_datetime
data_type: timestamptz
- name: record_source
data_type: varchar(255)

- name: link_internal_to_external_subject
config:
contract: { enforced: true }
Expand Down

0 comments on commit 39cf240

Please sign in to comment.