DataBiosphere · chenchals · Jan 6, 2025 · Jan 2, 2025 · Jan 3, 2025 · Jan 6, 2025
diff --git a/tanagra-indexing-1.html b/tanagra-indexing-1.html
diff --git a/...in/resources/config/criteria/sd/criteriaselector/agdGenotypeResult/agdGenotypeResult.json b/...in/resources/config/criteria/sd/criteriaselector/agdGenotypeResult/agdGenotypeResult.json
@@ -0,0 +1,3 @@
+{
+  "attribute": "has_agd_genotype_result"
+}
diff --git a/...ay/src/main/resources/config/criteria/sd/criteriaselector/agdGenotypeResult/selector.json b/...ay/src/main/resources/config/criteria/sd/criteriaselector/agdGenotypeResult/selector.json
@@ -0,0 +1,15 @@
+{
+  "name": "agdGenotypeResult",
+  "displayName": "AGD Data Availability",
+  "isEnabledForCohorts": true,
+  "isEnabledForDataFeatureSets": false,
+  "display": {
+    "category": "BioVU",
+    "tags": null
+  },
+  "filterBuilder": "core.PrimaryEntityFilterBuilder",
+  "plugin": "attribute",
+  "pluginConfig": null,
+  "pluginConfigFile": "agdGenotypeResult.json",
+  "modifiers": null
+}
diff --git a/underlay/src/main/resources/config/criteria/sd/criteriaselector/deceased/deceased.json b/underlay/src/main/resources/config/criteria/sd/criteriaselector/deceased/deceased.json
@@ -0,0 +1,3 @@
+{
+  "attribute": "is_deceased"
+}
diff --git a/underlay/src/main/resources/config/criteria/sd/criteriaselector/deceased/selector.json b/underlay/src/main/resources/config/criteria/sd/criteriaselector/deceased/selector.json
@@ -0,0 +1,15 @@
+{
+  "name": "tanagra-deceased",
+  "displayName": "Deceased",
+  "isEnabledForCohorts": true,
+  "isEnabledForDataFeatureSets": false,
+  "display": {
+    "category": "Demographics",
+    "tags": null
+  },
+  "filterBuilder": "core.PrimaryEntityFilterBuilder",
+  "plugin": "attribute",
+  "pluginConfig": null,
+  "pluginConfigFile": "deceased.json",
+  "modifiers": null
+}
diff --git a/underlay/src/main/resources/config/datamapping/sd/entity/condition/all.sql b/underlay/src/main/resources/config/datamapping/sd/entity/condition/all.sql
@@ -0,0 +1,10 @@
+SELECT
+    concept_id,
+    concept_name,
+    vocabulary_id,
+    concept_code,
+    (CASE WHEN standard_concept IS NULL THEN 'Source' WHEN standard_concept = 'S' THEN 'Standard' ELSE 'Unknown' END) AS standard_concept
+
+FROM `${omopDataset}.concept`
+
+WHERE domain_id = 'Condition'
diff --git a/underlay/src/main/resources/config/datamapping/sd/entity/condition/childParent.sql b/underlay/src/main/resources/config/datamapping/sd/entity/condition/childParent.sql
@@ -0,0 +1,12 @@
+SELECT
+  cr.concept_id_1 AS parent,
+  cr.concept_id_2 AS child,
+FROM `${omopDataset}.concept_relationship` cr
+JOIN `${omopDataset}.concept` c1  ON c1.concept_id = cr.concept_id_1
+JOIN `${omopDataset}.concept` c2  ON c2.concept_id = cr.concept_id_2
+WHERE
+  cr.relationship_id = 'Subsumes'
+  AND c1.domain_id = c2.domain_id
+  AND c2.domain_id = 'Condition'
+  AND c1.vocabulary_id = c2.vocabulary_id
+  AND c2.vocabulary_id = 'SNOMED'
diff --git a/underlay/src/main/resources/config/datamapping/sd/entity/condition/entity.json b/underlay/src/main/resources/config/datamapping/sd/entity/condition/entity.json
@@ -0,0 +1,25 @@
+{
+  "name": "condition",
+  "allInstancesSqlFile": "all.sql",
+  "attributes": [
+    { "name": "id", "dataType": "INT64", "valueFieldName": "concept_id" },
+    { "name": "name", "dataType": "STRING", "valueFieldName": "concept_name" },
+    { "name": "vocabulary", "dataType": "STRING", "valueFieldName": "vocabulary_id", "isComputeDisplayHint": true },
+    { "name": "standard_concept", "dataType": "STRING", "isComputeDisplayHint": true },
+    { "name": "concept_code", "dataType": "STRING" }
+  ],
+  "idAttribute": "id",
+  "textSearch": {
+    "attributes": [ "id", "name", "concept_code" ]
+  },
+  "hierarchies": [
+    {
+      "childParentIdPairsSqlFile": "childParent.sql",
+      "childIdFieldName": "child",
+      "parentIdFieldName": "parent",
+      "rootNodeIds": [ 441840 ],
+      "maxDepth": 20,
+      "keepOrphanNodes": false
+    }
+  ]
+}
diff --git a/underlay/src/main/resources/config/datamapping/sd/entity/conditionOccurrence/all.sql b/underlay/src/main/resources/config/datamapping/sd/entity/conditionOccurrence/all.sql
@@ -1,4 +1,4 @@
-SELECT
+SELECT DISTINCT
   co.condition_occurrence_id,
   co.person_id,
   p.person_source_value,

diff --git a/underlay/src/main/resources/config/datamapping/sd/entity/measurementOccurrence/all.sql b/underlay/src/main/resources/config/datamapping/sd/entity/measurementOccurrence/all.sql
@@ -1,4 +1,4 @@
-SELECT
+SELECT DISTINCT
   mo.measurement_id,
   mo.person_id,
   p.person_source_value,

diff --git a/underlay/src/main/resources/config/datamapping/sd/entity/observationOccurrence/all.sql b/underlay/src/main/resources/config/datamapping/sd/entity/observationOccurrence/all.sql
@@ -1,4 +1,4 @@
-SELECT
+SELECT DISTINCT
   o.observation_id,
   o.person_id,
   p.person_source_value,

diff --git a/underlay/src/main/resources/config/datamapping/sd/entity/person/all.sql b/underlay/src/main/resources/config/datamapping/sd/entity/person/all.sql
@@ -15,7 +15,11 @@ SELECT
     CASE WHEN x.nonshippable_ind = '1' THEN true WHEN x.nonshippable_ind = '0' THEN false ELSE null END AS biovu_sample_is_nonshippable,
     CASE WHEN x.plasma_ind = '1' THEN true WHEN x.plasma_ind = '0' THEN false ELSE null END AS biovu_sample_has_plasma,
     EXISTS
-        (SELECT 1 FROM `${omopDataset}.x_agd_queue` aq WHERE p.person_id = aq.person_id) AS has_agd_queue
+        (SELECT 1 FROM `${omopDataset}.x_agd_queue` aq WHERE p.person_id = aq.person_id) AS has_agd_queue,
+    EXISTS
+        (SELECT 1 FROM `${omopDataset}.genotype_result` gr WHERE p.person_id = gr.person_id
+                  AND gr.assay_name = 'agd whole genome sequencing'  ) AS has_agd_genotype_result,
+    CASE WHEN d.death_date is null THEN true ELSE false END AS is_deceased
 
 FROM `${omopDataset}.person` p
 
@@ -28,6 +32,10 @@ ON rc.concept_id = p.race_source_concept_id
 LEFT JOIN `${omopDataset}.concept` ec
 ON ec.concept_id = p.ethnicity_concept_id
 
+LEFT JOIN (SELECT person_id, max(death_date) as death_date
+           FROM `${omopDataset}.death` GROUP BY person_id) d
+          ON (p.person_id = d.person_id)
+
 LEFT OUTER JOIN
     (
         /* Get rid of duplicate rows in x_biovu_sample_status. For example, person

diff --git a/underlay/src/main/resources/config/datamapping/sd/entity/person/entity.json b/underlay/src/main/resources/config/datamapping/sd/entity/person/entity.json
@@ -17,7 +17,9 @@
     { "name": "biovu_sample_is_compromised", "dataType": "BOOLEAN" },
     { "name": "biovu_sample_is_nonshippable", "dataType": "BOOLEAN" },
     { "name": "biovu_sample_has_plasma", "dataType": "BOOLEAN" },
-    { "name": "has_agd_queue", "dataType": "BOOLEAN" }
+    { "name": "has_agd_queue", "dataType": "BOOLEAN" },
+    { "name": "has_agd_genotype_result", "dataType": "BOOLEAN" },
+    { "name": "is_deceased", "dataType": "BOOLEAN" }
   ],
   "idAttribute": "id",
   "optimizeGroupByAttributes": [ "gender", "race", "age" ]

diff --git a/underlay/src/main/resources/config/indexer/emerge/emerge_20240603.json b/underlay/src/main/resources/config/indexer/emerge/emerge_20240603.json
@@ -20,6 +20,7 @@
     "serviceAccountEmail": "emerge-dev-dataflow-indexer@vumc-emerge-dev.iam.gserviceaccount.com",
     "gcsTempDirectory": "gs://dataflow-indexing-emerge-dev/temp/",
     "workerMachineType": "n1-standard-4",
-    "usePublicIps": false
+    "usePublicIps": false,
+    "vpcSubnetworkName": "default-dataflow"
   }
 }
diff --git a/underlay/src/main/resources/config/indexer/sd/sd020240831.json b/underlay/src/main/resources/config/indexer/sd/sd020240831.json
@@ -11,7 +11,7 @@
     },
     "indexData": {
       "projectId": "vumc-sd-dev",
-      "datasetId": "indexed_sd_20240831_1"
+      "datasetId": "indexed_sd_20240831_2"
     },
     "queryProjectId": "vumc-sd-dev",
     "dataLocation": "us-central1"
@@ -20,6 +20,7 @@
     "serviceAccountEmail": "[email protected]",
     "gcsTempDirectory": "gs://dataflow-indexing-sd-dev/temp/",
     "workerMachineType": "n1-standard-4",
-    "usePublicIps": false
+    "usePublicIps": false,
+    "vpcSubnetworkName": "us-central1"
   }
 }
diff --git a/underlay/src/main/resources/config/service/sd/sd_local.json b/underlay/src/main/resources/config/service/sd/sd_local.json
@@ -2,16 +2,16 @@
   "underlay": "sd",
   "bigQuery": {
     "sourceData": {
-      "projectId": "vumc-sd-test",
-      "datasetId": "sd_20230831",
+      "projectId": "vumc-sd-dev",
+      "datasetId": "sd_20240831",
       "sqlSubstitutions": {
-        "omopDataset": "vumc-sd-test.sd_20230831",
-        "staticTablesDataset": "vumc-sd-test.sd_20230831"
+        "omopDataset": "vumc-sd-test.sd_20240831",
+        "staticTablesDataset": "vumc-sd-test.sd_20240831"
       }
     },
     "indexData": {
-      "projectId": "vumc-sd-test",
-      "datasetId": "indexed_sd_20230831"
+      "projectId": "vumc-sd-dev",
+      "datasetId": "indexed_sd_20240831_2"
     },
     "queryProjectId": "vumc-sd-test",
     "dataLocation": "us-central1"

diff --git a/underlay/src/main/resources/config/underlay/sd/underlay.json b/underlay/src/main/resources/config/underlay/sd/underlay.json
@@ -4,7 +4,7 @@
   "entities": [
     "sd/person",
 
-    "omop/condition",
+    "sd/condition",
     "sd/conditionOccurrence",
     "omop/procedure",
     "sd/procedureOccurrence",
@@ -87,9 +87,11 @@
     "sd/gender",
     "sd/race",
     "sd/age",
+    "sd/deceased",
     "sd/bioVU",
     "sd/bioVUPlasma",
     "sd/agdSubjectQueue",
+    "sd/agdGenotypeResult",
     "sd/genotyping",
     "sd/measurement",
     "sd/documents",