From 1005c12c21626e7cf99397a1de0f34532ca31e67 Mon Sep 17 00:00:00 2001 From: Timothy Jennison Date: Wed, 27 Nov 2024 16:54:28 +0000 Subject: [PATCH] Add support for rollup instances level hints This allows survey versions to be rolled up to questions (and topics). Currently there is only support for a single hierarchy but that limitation exists elsewhere already. This will require re-indexing once the config has been updated. --- docs/generated/UNDERLAY_CONFIG.md | 5 ++ .../terra/tanagra/indexing/JobSequencer.java | 16 ++++- .../WriteInstanceLevelDisplayHints.java | 61 ++++++++++++++----- .../job/dataflow/WriteRollupCounts.java | 16 ++--- .../job/dataflow/beam/BigQueryBeamUtils.java | 20 ++++++ .../job/dataflow/beam/CountUtils.java | 50 +++++++++++++++ ui/src/tanagra-underlay/underlayConfig.ts | 1 + .../terra/tanagra/underlay/ConfigReader.java | 15 +++-- .../bio/terra/tanagra/underlay/Underlay.java | 7 ++- .../entitygroup/CriteriaOccurrence.java | 39 ++++++++++-- .../serialization/SZCriteriaOccurrence.java | 7 +++ .../resultparsing/BQHintQueryResultsTest.java | 2 +- 12 files changed, 199 insertions(+), 40 deletions(-) diff --git a/docs/generated/UNDERLAY_CONFIG.md b/docs/generated/UNDERLAY_CONFIG.md index a0c93e8c8..27b035c2c 100644 --- a/docs/generated/UNDERLAY_CONFIG.md +++ b/docs/generated/UNDERLAY_CONFIG.md @@ -860,6 +860,11 @@ Names of attributes that we want to calculate instance-level hints for. Instance-level hints are ranges of possible values for a particular criteria instance. They are used to support criteria-specific modifiers (e.g. range of values for measurement code "glucose test"). +### SZOccurrenceEntity.attributesWithRollupInstanceLevelHints +**required** Set [ String ] + +Names of attributes that we want to calculate instance-level hints for which values should be rolled up and included in their ancestors hints as well. + ### SZOccurrenceEntity.criteriaRelationship **required** [SZCriteriaRelationship](#szcriteriarelationship) diff --git a/indexer/src/main/java/bio/terra/tanagra/indexing/JobSequencer.java b/indexer/src/main/java/bio/terra/tanagra/indexing/JobSequencer.java index ecd329b45..dab107c7d 100644 --- a/indexer/src/main/java/bio/terra/tanagra/indexing/JobSequencer.java +++ b/indexer/src/main/java/bio/terra/tanagra/indexing/JobSequencer.java @@ -21,6 +21,7 @@ import bio.terra.tanagra.underlay.Underlay; import bio.terra.tanagra.underlay.entitymodel.Attribute; import bio.terra.tanagra.underlay.entitymodel.Entity; +import bio.terra.tanagra.underlay.entitymodel.Hierarchy; import bio.terra.tanagra.underlay.entitymodel.Relationship; import bio.terra.tanagra.underlay.entitymodel.entitygroup.CriteriaOccurrence; import bio.terra.tanagra.underlay.entitymodel.entitygroup.EntityGroup; @@ -421,6 +422,12 @@ public static SequencedJobSet getJobSetForCriteriaOccurrence( // TODO: Handle >1 occurrence entity. Entity occurrenceEntity = criteriaOccurrence.getOccurrenceEntities().get(0); if (criteriaOccurrence.hasInstanceLevelDisplayHints(occurrenceEntity)) { + // TODO: Handle >1 hierarchy. + Hierarchy hierarchy = + criteriaOccurrence.getCriteriaEntity().hasHierarchies() + ? criteriaOccurrence.getCriteriaEntity().getHierarchies().get(0) + : null; + Relationship occurrenceCriteriaRelationship = criteriaOccurrence.getOccurrenceCriteriaRelationship(occurrenceEntity.getName()); Relationship occurrencePrimaryRelationship = @@ -458,7 +465,14 @@ public static SequencedJobSet getJobSetForCriteriaOccurrence( .getInstanceLevelDisplayHints( criteriaOccurrence.getName(), occurrenceEntity.getName(), - criteriaOccurrence.getCriteriaEntity().getName()))); + criteriaOccurrence.getCriteriaEntity().getName()), + hierarchy, + hierarchy != null + ? underlay + .getIndexSchema() + .getHierarchyAncestorDescendant( + criteriaOccurrence.getCriteriaEntity().getName(), hierarchy.getName()) + : null)); } if (criteriaOccurrence.getCriteriaEntity().hasHierarchies()) { diff --git a/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/WriteInstanceLevelDisplayHints.java b/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/WriteInstanceLevelDisplayHints.java index ddd196c6a..cce28e012 100644 --- a/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/WriteInstanceLevelDisplayHints.java +++ b/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/WriteInstanceLevelDisplayHints.java @@ -3,25 +3,30 @@ import bio.terra.tanagra.api.shared.DataType; import bio.terra.tanagra.indexing.job.BigQueryJob; import bio.terra.tanagra.indexing.job.dataflow.beam.BigQueryBeamUtils; +import bio.terra.tanagra.indexing.job.dataflow.beam.CountUtils; import bio.terra.tanagra.indexing.job.dataflow.beam.DataflowUtils; import bio.terra.tanagra.query.sql.SqlField; import bio.terra.tanagra.query.sql.SqlQueryField; import bio.terra.tanagra.underlay.entitymodel.Attribute; import bio.terra.tanagra.underlay.entitymodel.Entity; +import bio.terra.tanagra.underlay.entitymodel.Hierarchy; import bio.terra.tanagra.underlay.entitymodel.Relationship; import bio.terra.tanagra.underlay.entitymodel.entitygroup.CriteriaOccurrence; import bio.terra.tanagra.underlay.indextable.ITEntityMain; +import bio.terra.tanagra.underlay.indextable.ITHierarchyAncestorDescendant; import bio.terra.tanagra.underlay.indextable.ITInstanceLevelDisplayHints; import bio.terra.tanagra.underlay.indextable.ITRelationshipIdPairs; import bio.terra.tanagra.underlay.serialization.SZIndexer; import com.google.api.services.bigquery.model.TableRow; import jakarta.annotation.Nullable; import java.io.Serializable; +import java.util.stream.StreamSupport; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; import org.apache.beam.sdk.transforms.Count; import org.apache.beam.sdk.transforms.Distinct; import org.apache.beam.sdk.transforms.Filter; +import org.apache.beam.sdk.transforms.FlatMapElements; import org.apache.beam.sdk.transforms.MapElements; import org.apache.beam.sdk.transforms.Max; import org.apache.beam.sdk.transforms.Min; @@ -48,6 +53,8 @@ public class WriteInstanceLevelDisplayHints extends BigQueryJob { private final @Nullable ITRelationshipIdPairs occurrenceCriteriaRelationshipIdPairsTable; private final @Nullable ITRelationshipIdPairs occurrencePrimaryRelationshipIdPairsTable; private final ITInstanceLevelDisplayHints indexTable; + private final @Nullable Hierarchy hierarchy; + private final @Nullable ITHierarchyAncestorDescendant ancestorDescendantTable; @SuppressWarnings("checkstyle:ParameterNumber") public WriteInstanceLevelDisplayHints( @@ -59,7 +66,9 @@ public WriteInstanceLevelDisplayHints( ITEntityMain primaryEntityIndexTable, @Nullable ITRelationshipIdPairs occurrenceCriteriaRelationshipIdPairsTable, @Nullable ITRelationshipIdPairs occurrencePrimaryRelationshipIdPairsTable, - ITInstanceLevelDisplayHints indexTable) { + ITInstanceLevelDisplayHints indexTable, + @Nullable Hierarchy hierarchy, + @Nullable ITHierarchyAncestorDescendant ancestorDescendantTable) { super(indexerConfig); this.criteriaOccurrence = criteriaOccurrence; this.occurrenceEntity = occurrenceEntity; @@ -69,6 +78,8 @@ public WriteInstanceLevelDisplayHints( this.occurrenceCriteriaRelationshipIdPairsTable = occurrenceCriteriaRelationshipIdPairsTable; this.occurrencePrimaryRelationshipIdPairsTable = occurrencePrimaryRelationshipIdPairsTable; this.indexTable = indexTable; + this.hierarchy = hierarchy; + this.ancestorDescendantTable = ancestorDescendantTable; } @Override @@ -119,8 +130,8 @@ public void run(boolean isDryRun) { readInRelationshipIdPairs( pipeline, occCriIdPairsSql, entityAIdColumnName, entityBIdColumnName); - // Build a query to select all occurrence-criteria id pairs, and the pipeline steps to read the - // results and build a (occurrence id, criteria id) KV PCollection. + // Build a query to select all occurrence-primary id pairs, and the pipeline steps to read the + // results and build a (occurrence id, primary id) KV PCollection. String occPriIdPairsSql = getQueryRelationshipIdPairs( entityAIdColumnName, @@ -134,17 +145,32 @@ public void run(boolean isDryRun) { readInRelationshipIdPairs( pipeline, occPriIdPairsSql, entityAIdColumnName, entityBIdColumnName); + PCollection> rollupOccCriIdPairKVs = null; + if (hierarchy != null + && criteriaOccurrence.hasRollupInstanceLevelDisplayHints(occurrenceEntity)) { + PCollection> descendantAncestorRelationshipsPC = + BigQueryBeamUtils.readDescendantAncestorRelationshipsFromBQ( + pipeline, ancestorDescendantTable); + + // Expand the set of occurrences to include a repeat for each ancestor. + rollupOccCriIdPairKVs = + CountUtils.repeatOccurrencesForHints(occCriIdPairKVs, descendantAncestorRelationshipsPC); + } + final PCollection> finalRollupOccCriIdPairKVs = rollupOccCriIdPairKVs; + criteriaOccurrence .getAttributesWithInstanceLevelDisplayHints(occurrenceEntity) .forEach( - attribute -> { + (attribute, rollup) -> { + PCollection> idPairsKVs = + rollup ? finalRollupOccCriIdPairKVs : occCriIdPairKVs; if (attribute.isValueDisplay()) { LOGGER.info("enum val hint: {}", attribute.getName()); - enumValHint(occCriIdPairKVs, occPriIdPairKVs, occIdRowKVs, attribute); + enumValHint(idPairsKVs, occPriIdPairKVs, occIdRowKVs, attribute); } else if (DataType.INT64.equals(attribute.getDataType()) || DataType.DOUBLE.equals(attribute.getDataType())) { LOGGER.info("numeric range hint: {}", attribute.getName()); - numericRangeHint(occCriIdPairKVs, occIdRowKVs, attribute); + numericRangeHint(idPairsKVs, occIdRowKVs, attribute); } // TODO: Calculate display hints for other data types. }); @@ -287,13 +313,15 @@ private void numericRangeHint( occIdAndNumValCriId .apply(Filter.by(cogb -> cogb.getValue().getAll(numValTag).iterator().hasNext())) .apply( - MapElements.into( + FlatMapElements.into( TypeDescriptors.kvs(TypeDescriptors.longs(), TypeDescriptors.doubles())) .via( - cogb -> - KV.of( - cogb.getValue().getOnly(criIdTag), - cogb.getValue().getOnly(numValTag)))); + cogb -> { + Iterable criIds = cogb.getValue().getAll(criIdTag); + return StreamSupport.stream(criIds.spliterator(), false) + .map((Long criId) -> KV.of(criId, cogb.getValue().getOnly(numValTag))) + .toList(); + })); // Compute numeric range for each criteriaId. PCollection numericRanges = numericRangeHint(criteriaValuePairs); @@ -361,23 +389,28 @@ private void enumValHint( .and(criIdTag, occCriIdPairs) .and(priIdTag, occPriIdPairs) .apply(CoGroupByKey.create()); + PCollection> criteriaEnumPrimaryPairs = occIdAndAttrsCriIdPriId .apply(Filter.by(cogb -> cogb.getValue().getAll(occAttrsTag).iterator().hasNext())) .apply( - MapElements.into( + FlatMapElements.into( TypeDescriptors.kvs( new TypeDescriptor() {}, TypeDescriptors.longs())) .via( cogb -> { - Long criId = cogb.getValue().getOnly(criIdTag); + Iterable criIds = cogb.getValue().getAll(criIdTag); Long priId = cogb.getValue().getOnly(priIdTag); TableRow occAttrs = cogb.getValue().getOnly(occAttrsTag); String enumValue = (String) occAttrs.get(enumValColName); String enumDisplay = (String) occAttrs.get(enumDisplayColName); - return KV.of(new IdEnumValue(criId, enumValue, enumDisplay), priId); + return StreamSupport.stream(criIds.spliterator(), false) + .map( + (Long criId) -> + KV.of(new IdEnumValue(criId, enumValue, enumDisplay), priId)) + .toList(); })); // Compute enum values and counts for each criteriaId. diff --git a/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/WriteRollupCounts.java b/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/WriteRollupCounts.java index 1bf0ed4c9..0a9e840da 100644 --- a/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/WriteRollupCounts.java +++ b/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/WriteRollupCounts.java @@ -263,21 +263,13 @@ private void writeFieldsToTempTable(boolean isDryRun) { // Optionally handle a hierarchy for the rollup entity. if (hierarchy != null) { - // Build a query to select all ancestor-descendant pairs from the ancestor-descendant table, - // and the pipeline step to read the results. - String ancestorDescendantSql = - "SELECT * FROM " + ancestorDescendantTable.getTablePointer().render(); - LOGGER.info("ancestor-descendant query: {}", ancestorDescendantSql); - PCollection> ancestorDescendantRelationshipsPC = - BigQueryBeamUtils.readTwoFieldRowsFromBQ( - pipeline, - ancestorDescendantSql, - ITHierarchyAncestorDescendant.Column.DESCENDANT.getSchema().getColumnName(), - ITHierarchyAncestorDescendant.Column.ANCESTOR.getSchema().getColumnName()); + PCollection> descendantAncestorRelationshipsPC = + BigQueryBeamUtils.readDescendantAncestorRelationshipsFromBQ( + pipeline, ancestorDescendantTable); // Expand the set of occurrences to include a repeat for each ancestor. idPairsPC = - CountUtils.repeatOccurrencesForHierarchy(idPairsPC, ancestorDescendantRelationshipsPC); + CountUtils.repeatOccurrencesForHierarchy(idPairsPC, descendantAncestorRelationshipsPC); } // Count the number of distinct occurrences per entity id. diff --git a/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/beam/BigQueryBeamUtils.java b/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/beam/BigQueryBeamUtils.java index bc876907d..b726895d8 100644 --- a/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/beam/BigQueryBeamUtils.java +++ b/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/beam/BigQueryBeamUtils.java @@ -3,6 +3,7 @@ import bio.terra.tanagra.api.shared.DataType; import bio.terra.tanagra.exception.SystemException; import bio.terra.tanagra.underlay.ColumnSchema; +import bio.terra.tanagra.underlay.indextable.ITHierarchyAncestorDescendant; import com.google.api.services.bigquery.model.TableFieldSchema; import com.google.api.services.bigquery.model.TableRow; import com.google.api.services.bigquery.model.TableSchema; @@ -20,8 +21,11 @@ import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.TypeDescriptors; import org.apache.commons.text.StringSubstitutor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public final class BigQueryBeamUtils { + private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryBeamUtils.class); private BigQueryBeamUtils() {} @@ -65,6 +69,22 @@ public static PCollection> readTwoFieldRowsFromBQ( })); } + /** + * Build a query to select all descendant-ancestor pairs from the ancestor-descendant table, and + * the pipeline step to read the results. + */ + public static PCollection> readDescendantAncestorRelationshipsFromBQ( + Pipeline pipeline, ITHierarchyAncestorDescendant ancestorDescendantTable) { + String descendantAncestorSql = + "SELECT * FROM " + ancestorDescendantTable.getTablePointer().render(); + LOGGER.info("descendant-ancestor query: {}", descendantAncestorSql); + return BigQueryBeamUtils.readTwoFieldRowsFromBQ( + pipeline, + descendantAncestorSql, + ITHierarchyAncestorDescendant.Column.DESCENDANT.getSchema().getColumnName(), + ITHierarchyAncestorDescendant.Column.ANCESTOR.getSchema().getColumnName()); + } + public static String getTableSqlPath(String projectId, String datasetId, String tableName) { final String template = "${projectId}:${datasetId}.${tableName}"; Map params = diff --git a/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/beam/CountUtils.java b/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/beam/CountUtils.java index 6a34d779c..c7e5d930b 100644 --- a/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/beam/CountUtils.java +++ b/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/beam/CountUtils.java @@ -4,6 +4,7 @@ import org.apache.beam.sdk.transforms.Count; import org.apache.beam.sdk.transforms.Distinct; import org.apache.beam.sdk.transforms.Flatten; +import org.apache.beam.sdk.transforms.KvSwap; import org.apache.beam.sdk.transforms.MapElements; import org.apache.beam.sdk.transforms.SimpleFunction; import org.apache.beam.sdk.transforms.Values; @@ -136,4 +137,53 @@ public static PCollection> repeatOccurrencesForHierarchy( .and(ancestorOccurrences) .apply(Flatten.pCollections()); } + + /** + * For each occurrence (occurrence, criteria), generate a new occurrence for each ancestor of the + * criteria node (occurrence, ancestor). + * + *

This is the same concept as repeatOccurrencesForHierarchy but over occurrence ids. + * + * @param occurrences a collection of all occurrences that we want to count and the criteria + * they're associated with + * @param descendantAncestor a collection of (descendant, ancestor) pairs for the criteria nodes + * that we want a count for. note that this is the expanded set of all transitive + * relationships in the hierarchy, not just the parent/child pairs + * @return an expanded collection of occurrences (occurrence, ancestor), where each occurrence has + * been repeated for each ancestor of its primary node. note for later steps that this will + * contain multiple keys + */ + public static PCollection> repeatOccurrencesForHints( + PCollection> occurrences, PCollection> descendantAncestor) { + // Remove duplicate occurrences. + PCollection> distinctOccurrences = + occurrences.apply( + "remove duplicate occurrences before repeating for hints", Distinct.create()); + + // Swap (occurrence, criteria) to (criteria, occurrence). Duplicate keys are allowed at this + // point. + PCollection> criteriaOccurrences = + distinctOccurrences.apply( + "swap (occurrence, criteria) to (criteria, occurrence)", KvSwap.create()); + + // JOIN: distinctOccurrences (criteria, occurrence) INNER JOIN descendantAncestor (descendant, + // ancestor) + // ON criteria=descendant + // RESULT: occurrenceToAncestorAndOccurrence (criteria=descendant, (occurrence, ancestor)) + PCollection>> criteriaToOccurrenceAndAncestor = + Join.innerJoin( + "inner join occurrences with ancestors", criteriaOccurrences, descendantAncestor); + + // Get rid of the descendant node. That was only needed as the innerJoin field. + // RESULT: (occurrence, ancestor) + PCollection> occurrenceAncestors = + criteriaToOccurrenceAndAncestor.apply(Values.create()); + + // The descendant-ancestor pairs don't include a self-reference row (i.e. descendant=ancestor). + // So to get the full set of occurrences, concatenate the original occurrences with the ancestor + // duplicates. + return PCollectionList.of(distinctOccurrences) + .and(occurrenceAncestors) + .apply(Flatten.pCollections()); + } } diff --git a/ui/src/tanagra-underlay/underlayConfig.ts b/ui/src/tanagra-underlay/underlayConfig.ts index c76413d17..194114599 100644 --- a/ui/src/tanagra-underlay/underlayConfig.ts +++ b/ui/src/tanagra-underlay/underlayConfig.ts @@ -152,6 +152,7 @@ export type SZMetadata = { export type SZOccurrenceEntity = { attributesWithInstanceLevelHints: string[]; + attributesWithRollupInstanceLevelHints: string[]; criteriaRelationship: SZCriteriaRelationship; occurrenceEntity: string; primaryRelationship: SZPrimaryRelationship; diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/ConfigReader.java b/underlay/src/main/java/bio/terra/tanagra/underlay/ConfigReader.java index 346268a48..f15a24828 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/ConfigReader.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/ConfigReader.java @@ -321,11 +321,16 @@ private SZCriteriaOccurrence deserializeCriteriaOccurrence(String criteriaOccurr ? new HashSet<>() : szCriteriaOccurrence.occurrenceEntities; szCriteriaOccurrence.occurrenceEntities.forEach( - szOccurrenceEntity -> - szOccurrenceEntity.attributesWithInstanceLevelHints = - szOccurrenceEntity.attributesWithInstanceLevelHints == null - ? new HashSet<>() - : szOccurrenceEntity.attributesWithInstanceLevelHints); + szOccurrenceEntity -> { + szOccurrenceEntity.attributesWithInstanceLevelHints = + szOccurrenceEntity.attributesWithInstanceLevelHints == null + ? new HashSet<>() + : szOccurrenceEntity.attributesWithInstanceLevelHints; + szOccurrenceEntity.attributesWithRollupInstanceLevelHints = + szOccurrenceEntity.attributesWithRollupInstanceLevelHints == null + ? new HashSet<>() + : szOccurrenceEntity.attributesWithRollupInstanceLevelHints; + }); return szCriteriaOccurrence; } catch (IOException ioEx) { diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/Underlay.java b/underlay/src/main/java/bio/terra/tanagra/underlay/Underlay.java index 906436988..4fc190419 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/Underlay.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/Underlay.java @@ -474,6 +474,7 @@ private static CriteriaOccurrence fromConfigCriteriaOccurrence( Map occurrenceCriteriaRelationships = new HashMap<>(); Map occurrencePrimaryRelationships = new HashMap<>(); Map> occurrenceAttributesWithInstanceLevelHints = new HashMap<>(); + Map> occurrenceAttributesWithRollupInstanceLevelHints = new HashMap<>(); szCriteriaOccurrence.occurrenceEntities.forEach( szOccurrenceEntity -> { // Get the occurrence entity. @@ -518,6 +519,9 @@ private static CriteriaOccurrence fromConfigCriteriaOccurrence( // Get the attributes with instance-level hints. occurrenceAttributesWithInstanceLevelHints.put( occurrenceEntity.getName(), szOccurrenceEntity.attributesWithInstanceLevelHints); + occurrenceAttributesWithRollupInstanceLevelHints.put( + occurrenceEntity.getName(), + szOccurrenceEntity.attributesWithRollupInstanceLevelHints); }); // Build the primary-criteria relationship. @@ -532,7 +536,8 @@ private static CriteriaOccurrence fromConfigCriteriaOccurrence( occurrenceCriteriaRelationships, occurrencePrimaryRelationships, primaryCriteriaRelationship, - occurrenceAttributesWithInstanceLevelHints); + occurrenceAttributesWithInstanceLevelHints, + occurrenceAttributesWithRollupInstanceLevelHints); } @VisibleForTesting diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/entitymodel/entitygroup/CriteriaOccurrence.java b/underlay/src/main/java/bio/terra/tanagra/underlay/entitymodel/entitygroup/CriteriaOccurrence.java index 0afbb7cac..c4f3078d9 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/entitymodel/entitygroup/CriteriaOccurrence.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/entitymodel/entitygroup/CriteriaOccurrence.java @@ -6,6 +6,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -22,6 +23,8 @@ public class CriteriaOccurrence extends EntityGroup { private final Relationship primaryCriteriaRelationship; private final ImmutableMap> occurrenceAttributesWithInstanceLevelDisplayHints; + private final ImmutableMap> + occurrenceAttributesWithRollupInstanceLevelDisplayHints; @SuppressWarnings("checkstyle:ParameterNumber") public CriteriaOccurrence( @@ -32,7 +35,8 @@ public CriteriaOccurrence( Map occurrenceCriteriaRelationships, Map occurrencePrimaryRelationships, Relationship primaryCriteriaRelationship, - Map> occurrenceAttributesWithInstanceLevelDisplayHints) { + Map> occurrenceAttributesWithInstanceLevelDisplayHints, + Map> occurrenceAttributesWithRollupInstanceLevelDisplayHints) { super(name); this.criteriaEntity = criteriaEntity; this.occurrenceEntities = ImmutableList.copyOf(occurrenceEntities); @@ -46,6 +50,12 @@ public CriteriaOccurrence( .collect( Collectors.toMap( Entry::getKey, entry -> ImmutableSet.copyOf(entry.getValue())))); + this.occurrenceAttributesWithRollupInstanceLevelDisplayHints = + ImmutableMap.copyOf( + occurrenceAttributesWithRollupInstanceLevelDisplayHints.entrySet().stream() + .collect( + Collectors.toMap( + Entry::getKey, entry -> ImmutableSet.copyOf(entry.getValue())))); } @Override @@ -102,15 +112,32 @@ public Relationship getPrimaryCriteriaRelationship() { public boolean hasInstanceLevelDisplayHints(Entity occurrenceEntity) { return !occurrenceAttributesWithInstanceLevelDisplayHints + .get(occurrenceEntity.getName()) + .isEmpty() + || !occurrenceAttributesWithRollupInstanceLevelDisplayHints + .get(occurrenceEntity.getName()) + .isEmpty(); + } + + public boolean hasRollupInstanceLevelDisplayHints(Entity occurrenceEntity) { + return !occurrenceAttributesWithRollupInstanceLevelDisplayHints .get(occurrenceEntity.getName()) .isEmpty(); } - public ImmutableSet getAttributesWithInstanceLevelDisplayHints( + public ImmutableMap getAttributesWithInstanceLevelDisplayHints( Entity occurrenceEntity) { - return ImmutableSet.copyOf( - occurrenceAttributesWithInstanceLevelDisplayHints.get(occurrenceEntity.getName()).stream() - .map(occurrenceEntity::getAttribute) - .collect(Collectors.toSet())); + Map merged = + new HashMap<>( + occurrenceAttributesWithInstanceLevelDisplayHints + .get(occurrenceEntity.getName()) + .stream() + .collect(Collectors.toMap(occurrenceEntity::getAttribute, entry -> false))); + merged.putAll( + occurrenceAttributesWithRollupInstanceLevelDisplayHints + .get(occurrenceEntity.getName()) + .stream() + .collect(Collectors.toMap(occurrenceEntity::getAttribute, entry -> true))); + return ImmutableMap.copyOf(merged); } } diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZCriteriaOccurrence.java b/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZCriteriaOccurrence.java index 718e0b013..8a58a4524 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZCriteriaOccurrence.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZCriteriaOccurrence.java @@ -73,6 +73,13 @@ public static class OccurrenceEntity { + "code \"glucose test\").") public Set attributesWithInstanceLevelHints; + @AnnotatedField( + name = "SZOccurrenceEntity.attributesWithRollupInstanceLevelHints", + markdown = + "Names of attributes that we want to calculate instance-level hints for which values " + + "should be rolled up and included in their ancestors hints as well.") + public Set attributesWithRollupInstanceLevelHints; + @AnnotatedClass( name = "SZCriteriaRelationship", markdown = diff --git a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQHintQueryResultsTest.java b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQHintQueryResultsTest.java index 74f98ed15..c2f3abc70 100644 --- a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQHintQueryResultsTest.java +++ b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQHintQueryResultsTest.java @@ -124,7 +124,7 @@ void instanceLevelHint() { assertTrue( criteriaOccurrence .getAttributesWithInstanceLevelDisplayHints(hintedEntity) - .contains(attribute)); + .containsKey(attribute)); if (hintInstance.isRangeHint()) { assertTrue( List.of(DataType.INT64, DataType.DOUBLE)