Skip to content

Commit

Permalink
Fix issue with formatted field names, missing non nullable field
Browse files Browse the repository at this point in the history
  • Loading branch information
ramari16 committed Nov 19, 2024
1 parent 87383a4 commit 25710e5
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ public class PfbWriter implements ResultWriter {
private final String drsUrlTableName;
private SchemaBuilder.FieldAssembler<Schema> entityFieldAssembler;

private List<String> fields;
private List<String> originalFields;
private List<String> formattedFields;
private DataFileWriter<GenericRecord> dataFileWriter;
private File file;
private Schema entitySchema;
Expand Down Expand Up @@ -81,7 +82,8 @@ public PfbWriter(File tempFile, String queryId, DictionaryService dictionaryServ

@Override
public void writeHeader(String[] data) {
fields = Arrays.stream(data.clone()).map(this::formatFieldName).collect(Collectors.toList());
originalFields = List.of(data);
formattedFields = originalFields.stream().map(this::formatFieldName).collect(Collectors.toList());

drsUriSchema = SchemaBuilder.record(drsUrlTableName)
.fields()
Expand All @@ -91,7 +93,7 @@ public void writeHeader(String[] data) {

SchemaBuilder.FieldAssembler<Schema> patientRecords = SchemaBuilder.record(patientTableName)
.fields();
fields.forEach(field -> {
formattedFields.forEach(field -> {
if (isSingularField(field)) {
patientRecords.nullableString(field, "null");
} else {
Expand Down Expand Up @@ -127,17 +129,17 @@ private void writeDrsUris() {
GenericRecord entityRecord = new GenericData.Record(entitySchema);;
Map<String, Concept> conceptMap = Map.of();
try {
conceptMap = dictionaryService.getConcepts(fields).stream()
conceptMap = dictionaryService.getConcepts(originalFields).stream()
.collect(Collectors.toMap(Concept::conceptPath, Function.identity()));
} catch (RuntimeException e) {
log.error("Error fetching DRS URIs from dictionary service");
}

for (String field : fields) {
for (int i = 0; i < formattedFields.size(); i++) {
GenericRecord drsUriData = new GenericData.Record(drsUriSchema);
drsUriData.put("concept_path", field);
drsUriData.put("concept_path", formattedFields.get(i));

Concept concept = conceptMap.get(field);
Concept concept = conceptMap.get(originalFields.get(i));
List<String> drsUris = List.of();
if (concept != null) {
Map<String, String> meta = concept.meta();
Expand All @@ -149,6 +151,7 @@ private void writeDrsUris() {

entityRecord.put("object", drsUriData);
entityRecord.put("name", drsUrlTableName);
entityRecord.put("id", "null");
entityRecord.put("relations", List.of());

try {
Expand All @@ -157,8 +160,6 @@ private void writeDrsUris() {
throw new UncheckedIOException(e);
}
}


}

private boolean isSingularField(String field) {
Expand All @@ -181,7 +182,7 @@ private void writeMetadata() {
GenericRecord entityRecord = new GenericData.Record(entitySchema);

List<GenericRecord> nodeList = new ArrayList<>();
for (String field : fields) {
for (String field : formattedFields) {
GenericRecord nodeData = new GenericData.Record(nodeSchema);
nodeData.put("name", field);
nodeData.put("ontology_reference", "");
Expand Down Expand Up @@ -213,21 +214,21 @@ public void writeEntity(Collection<String[]> entities) {
@Override
public void writeMultiValueEntity(Collection<List<List<String>>> entities) {
entities.forEach(entity -> {
if (entity.size() != fields.size()) {
if (entity.size() != formattedFields.size()) {
throw new IllegalArgumentException("Entity length much match the number of fields in this document");
}
GenericRecord patientData = new GenericData.Record(patientDataSchema);
String patientId = "";
for(int i = 0; i < fields.size(); i++) {
if ("patient_id".equals(fields.get(i))) {
for(int i = 0; i < formattedFields.size(); i++) {
if ("patient_id".equals(formattedFields.get(i))) {
patientId = (entity.get(i) != null && !entity.get(i).isEmpty()) ? entity.get(i).get(0) : "";
}
if (isSingularField(fields.get(i))) {
if (isSingularField(formattedFields.get(i))) {
String entityValue = (entity.get(i) != null && !entity.get(i).isEmpty()) ? entity.get(i).get(0) : "";
patientData.put(fields.get(i), entityValue);
patientData.put(formattedFields.get(i), entityValue);
} else {
List<String> fieldValue = entity.get(i) != null ? entity.get(i) : List.of();
patientData.put(fields.get(i), fieldValue);
patientData.put(formattedFields.get(i), fieldValue);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
package edu.harvard.hms.dbmi.avillach.hpds.processing.io;

import edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary.Concept;
import edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary.DictionaryService;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension;

import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.UUID;

import static org.junit.jupiter.api.Assertions.*;
Expand All @@ -24,6 +27,9 @@ public class PfbWriterTest {
public void writeValidPFB() {
PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString(), dictionaryService);

Mockito.when(dictionaryService.getConcepts(List.of("patient_id", "\\demographics\\age\\", "\\phs123\\stroke\\")))
.thenReturn(List.of(new Concept("\\demographics\\age\\", "age", Map.of("drs_uri", "a-drs.uri"))));

pfbWriter.writeHeader(new String[] {"patient_id", "\\demographics\\age\\", "\\phs123\\stroke\\"});
List<List<String>> nullableList = new ArrayList<>();
nullableList.add(List.of("123"));
Expand All @@ -40,7 +46,6 @@ public void writeValidPFB() {
List.of(List.of(), List.of("75"), List.of())
));
pfbWriter.close();
// todo: validate this programatically
}

@Test
Expand Down

0 comments on commit 25710e5

Please sign in to comment.