Skip to content

Commit

Permalink
VDA-1935-1969 Update for agd and death tables (#1115)
Browse files Browse the repository at this point in the history
* Updated for agd-data-availability, deceased attributes

* Updated for deceased and vpc subnet for dataflow

* Updated and added DISTINCT since x_poa column could result in duplicates

* Updated person for death table and added condition from omop/condition to sd

* updated to add ui-components

* updated emerge and local
  • Loading branch information
chenchals authored Jan 6, 2025
1 parent 61e4378 commit 7241b8e
Show file tree
Hide file tree
Showing 17 changed files with 1,394 additions and 15 deletions.
1,282 changes: 1,282 additions & 0 deletions tanagra-indexing-1.html

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"attribute": "has_agd_genotype_result"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"name": "agdGenotypeResult",
"displayName": "AGD Data Availability",
"isEnabledForCohorts": true,
"isEnabledForDataFeatureSets": false,
"display": {
"category": "BioVU",
"tags": null
},
"filterBuilder": "core.PrimaryEntityFilterBuilder",
"plugin": "attribute",
"pluginConfig": null,
"pluginConfigFile": "agdGenotypeResult.json",
"modifiers": null
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"attribute": "is_deceased"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"name": "tanagra-deceased",
"displayName": "Deceased",
"isEnabledForCohorts": true,
"isEnabledForDataFeatureSets": false,
"display": {
"category": "Demographics",
"tags": null
},
"filterBuilder": "core.PrimaryEntityFilterBuilder",
"plugin": "attribute",
"pluginConfig": null,
"pluginConfigFile": "deceased.json",
"modifiers": null
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
SELECT
concept_id,
concept_name,
vocabulary_id,
concept_code,
(CASE WHEN standard_concept IS NULL THEN 'Source' WHEN standard_concept = 'S' THEN 'Standard' ELSE 'Unknown' END) AS standard_concept

FROM `${omopDataset}.concept`

WHERE domain_id = 'Condition'
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
SELECT
cr.concept_id_1 AS parent,
cr.concept_id_2 AS child,
FROM `${omopDataset}.concept_relationship` cr
JOIN `${omopDataset}.concept` c1 ON c1.concept_id = cr.concept_id_1
JOIN `${omopDataset}.concept` c2 ON c2.concept_id = cr.concept_id_2
WHERE
cr.relationship_id = 'Subsumes'
AND c1.domain_id = c2.domain_id
AND c2.domain_id = 'Condition'
AND c1.vocabulary_id = c2.vocabulary_id
AND c2.vocabulary_id = 'SNOMED'
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"name": "condition",
"allInstancesSqlFile": "all.sql",
"attributes": [
{ "name": "id", "dataType": "INT64", "valueFieldName": "concept_id" },
{ "name": "name", "dataType": "STRING", "valueFieldName": "concept_name" },
{ "name": "vocabulary", "dataType": "STRING", "valueFieldName": "vocabulary_id", "isComputeDisplayHint": true },
{ "name": "standard_concept", "dataType": "STRING", "isComputeDisplayHint": true },
{ "name": "concept_code", "dataType": "STRING" }
],
"idAttribute": "id",
"textSearch": {
"attributes": [ "id", "name", "concept_code" ]
},
"hierarchies": [
{
"childParentIdPairsSqlFile": "childParent.sql",
"childIdFieldName": "child",
"parentIdFieldName": "parent",
"rootNodeIds": [ 441840 ],
"maxDepth": 20,
"keepOrphanNodes": false
}
]
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SELECT
SELECT DISTINCT
co.condition_occurrence_id,
co.person_id,
p.person_source_value,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SELECT
SELECT DISTINCT
mo.measurement_id,
mo.person_id,
p.person_source_value,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SELECT
SELECT DISTINCT
o.observation_id,
o.person_id,
p.person_source_value,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@ SELECT
CASE WHEN x.nonshippable_ind = '1' THEN true WHEN x.nonshippable_ind = '0' THEN false ELSE null END AS biovu_sample_is_nonshippable,
CASE WHEN x.plasma_ind = '1' THEN true WHEN x.plasma_ind = '0' THEN false ELSE null END AS biovu_sample_has_plasma,
EXISTS
(SELECT 1 FROM `${omopDataset}.x_agd_queue` aq WHERE p.person_id = aq.person_id) AS has_agd_queue
(SELECT 1 FROM `${omopDataset}.x_agd_queue` aq WHERE p.person_id = aq.person_id) AS has_agd_queue,
EXISTS
(SELECT 1 FROM `${omopDataset}.genotype_result` gr WHERE p.person_id = gr.person_id
AND gr.assay_name = 'agd whole genome sequencing' ) AS has_agd_genotype_result,
CASE WHEN d.death_date is null THEN true ELSE false END AS is_deceased

FROM `${omopDataset}.person` p

Expand All @@ -28,6 +32,10 @@ ON rc.concept_id = p.race_source_concept_id
LEFT JOIN `${omopDataset}.concept` ec
ON ec.concept_id = p.ethnicity_concept_id

LEFT JOIN (SELECT person_id, max(death_date) as death_date
FROM `${omopDataset}.death` GROUP BY person_id) d
ON (p.person_id = d.person_id)

LEFT OUTER JOIN
(
/* Get rid of duplicate rows in x_biovu_sample_status. For example, person
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
{ "name": "biovu_sample_is_compromised", "dataType": "BOOLEAN" },
{ "name": "biovu_sample_is_nonshippable", "dataType": "BOOLEAN" },
{ "name": "biovu_sample_has_plasma", "dataType": "BOOLEAN" },
{ "name": "has_agd_queue", "dataType": "BOOLEAN" }
{ "name": "has_agd_queue", "dataType": "BOOLEAN" },
{ "name": "has_agd_genotype_result", "dataType": "BOOLEAN" },
{ "name": "is_deceased", "dataType": "BOOLEAN" }
],
"idAttribute": "id",
"optimizeGroupByAttributes": [ "gender", "race", "age" ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"serviceAccountEmail": "emerge-dev-dataflow-indexer@vumc-emerge-dev.iam.gserviceaccount.com",
"gcsTempDirectory": "gs://dataflow-indexing-emerge-dev/temp/",
"workerMachineType": "n1-standard-4",
"usePublicIps": false
"usePublicIps": false,
"vpcSubnetworkName": "default-dataflow"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
},
"indexData": {
"projectId": "vumc-sd-dev",
"datasetId": "indexed_sd_20240831_1"
"datasetId": "indexed_sd_20240831_2"
},
"queryProjectId": "vumc-sd-dev",
"dataLocation": "us-central1"
Expand All @@ -20,6 +20,7 @@
"serviceAccountEmail": "[email protected]",
"gcsTempDirectory": "gs://dataflow-indexing-sd-dev/temp/",
"workerMachineType": "n1-standard-4",
"usePublicIps": false
"usePublicIps": false,
"vpcSubnetworkName": "us-central1"
}
}
12 changes: 6 additions & 6 deletions underlay/src/main/resources/config/service/sd/sd_local.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@
"underlay": "sd",
"bigQuery": {
"sourceData": {
"projectId": "vumc-sd-test",
"datasetId": "sd_20230831",
"projectId": "vumc-sd-dev",
"datasetId": "sd_20240831",
"sqlSubstitutions": {
"omopDataset": "vumc-sd-test.sd_20230831",
"staticTablesDataset": "vumc-sd-test.sd_20230831"
"omopDataset": "vumc-sd-test.sd_20240831",
"staticTablesDataset": "vumc-sd-test.sd_20240831"
}
},
"indexData": {
"projectId": "vumc-sd-test",
"datasetId": "indexed_sd_20230831"
"projectId": "vumc-sd-dev",
"datasetId": "indexed_sd_20240831_2"
},
"queryProjectId": "vumc-sd-test",
"dataLocation": "us-central1"
Expand Down
4 changes: 3 additions & 1 deletion underlay/src/main/resources/config/underlay/sd/underlay.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"entities": [
"sd/person",

"omop/condition",
"sd/condition",
"sd/conditionOccurrence",
"omop/procedure",
"sd/procedureOccurrence",
Expand Down Expand Up @@ -87,9 +87,11 @@
"sd/gender",
"sd/race",
"sd/age",
"sd/deceased",
"sd/bioVU",
"sd/bioVUPlasma",
"sd/agdSubjectQueue",
"sd/agdGenotypeResult",
"sd/genotyping",
"sd/measurement",
"sd/documents",
Expand Down

0 comments on commit 7241b8e

Please sign in to comment.