diff --git a/orcavault/models/raw/link_library_external_sample.sql b/orcavault/models/raw/link_library_external_sample.sql new file mode 100644 index 0000000..d2740f3 --- /dev/null +++ b/orcavault/models/raw/link_library_external_sample.sql @@ -0,0 +1,49 @@ +with source as ( + + select library_id, external_sample_id from {{ source('ods', 'data_portal_labmetadata') }} + union + select library_id, external_sample_id from {{ source('ods', 'data_portal_limsrow') }} + union + select lib.library_id as library_id, smp.external_sample_id as external_sample_id from {{ source('ods', 'metadata_manager_library') }} as lib + join {{ source('ods', 'metadata_manager_sample') }} as smp on smp.orcabus_id = lib.sample_orcabus_id + +), + +cleaned as ( + + select + distinct library_id, trim(external_sample_id) as external_sample_id + from + source + where + (library_id is not null and library_id <> '') and + (external_sample_id is not null and external_sample_id <> '') + +), + +transformed as ( + + select + encode(sha256(cast(external_sample_id as bytea)), 'hex') as external_sample_hk, + encode(sha256(cast(library_id as bytea)), 'hex') as library_hk, + cast('{{ run_started_at }}' as timestamptz) as load_datetime, + (select 'lab') as record_source + from + cleaned + +), + +final as ( + + select + encode(sha256(concat(external_sample_hk, library_hk)::bytea), 'hex') as library_external_sample_hk, + external_sample_hk, + library_hk, + load_datetime, + record_source + from + transformed + +) + +select * from final diff --git a/orcavault/models/raw/link_schema.yml b/orcavault/models/raw/link_schema.yml index 4323a94..e736824 100644 --- a/orcavault/models/raw/link_schema.yml +++ b/orcavault/models/raw/link_schema.yml @@ -54,6 +54,32 @@ models: - name: record_source data_type: varchar(255) + - name: link_library_external_sample + config: + contract: { enforced: true } + constraints: + - type: primary_key + columns: [ library_external_sample_hk ] + - type: foreign_key + columns: [ external_sample_hk ] + to: ref('hub_external_sample') + to_columns: [ external_sample_hk ] + - type: foreign_key + columns: [ library_hk ] + to: ref('hub_library') + to_columns: [ library_hk ] + columns: + - name: library_external_sample_hk + data_type: char(64) + - name: external_sample_hk + data_type: char(64) + - name: library_hk + data_type: char(64) + - name: load_datetime + data_type: timestamptz + - name: record_source + data_type: varchar(255) + - name: link_library_internal_subject config: contract: { enforced: true }