diff --git a/orcavault/models/raw/link_internal_to_external_sample.sql b/orcavault/models/raw/link_internal_to_external_sample.sql new file mode 100644 index 0000000..9abc67c --- /dev/null +++ b/orcavault/models/raw/link_internal_to_external_sample.sql @@ -0,0 +1,48 @@ +with source as ( + + select sample_id, external_sample_id from {{ source('ods', 'data_portal_labmetadata') }} + union + select sample_id, external_sample_id from {{ source('ods', 'data_portal_limsrow') }} + union + select sample_id, external_sample_id from {{ source('ods', 'metadata_manager_sample') }} + +), + +cleaned as ( + + select + distinct sample_id, trim(external_sample_id) as external_sample_id + from + source + where + (sample_id is not null and sample_id <> '') and + (external_sample_id is not null and external_sample_id <> '') + +), + +transformed as ( + + select + encode(sha256(cast(external_sample_id as bytea)), 'hex') as external_sample_hk, + encode(sha256(cast(sample_id as bytea)), 'hex') as sample_hk, + cast('{{ run_started_at }}' as timestamptz) as load_datetime, + (select 'lab') as record_source + from + cleaned + +), + +final as ( + + select + encode(sha256(concat(external_sample_hk, sample_hk)::bytea), 'hex') as internal_external_sample_hk, + external_sample_hk, + sample_hk, + load_datetime, + record_source + from + transformed + +) + +select * from final diff --git a/orcavault/models/raw/link_schema.yml b/orcavault/models/raw/link_schema.yml index e736824..a8a3f60 100644 --- a/orcavault/models/raw/link_schema.yml +++ b/orcavault/models/raw/link_schema.yml @@ -235,3 +235,29 @@ models: data_type: timestamptz - name: record_source data_type: varchar(255) + + - name: link_internal_to_external_sample + config: + contract: { enforced: true } + constraints: + - type: primary_key + columns: [ internal_external_sample_hk ] + - type: foreign_key + columns: [ external_sample_hk ] + to: ref('hub_external_sample') + to_columns: [ external_sample_hk ] + - type: foreign_key + columns: [ sample_hk ] + to: ref('hub_sample') + to_columns: [ sample_hk ] + columns: + - name: internal_external_sample_hk + data_type: char(64) + - name: external_sample_hk + data_type: char(64) + - name: sample_hk + data_type: char(64) + - name: load_datetime + data_type: timestamptz + - name: record_source + data_type: varchar(255)