Skip to content

Commit

Permalink
Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
ramari16 committed Oct 9, 2024
1 parent 44e6337 commit 60f34de
Showing 1 changed file with 10 additions and 87 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,18 @@

public class VariantStore implements Serializable {
private static final long serialVersionUID = -6970128712587609414L;
public static final String VARIANT_STORE_JAVABIN_FILENAME = "variantStore.javabin";
public static final String VARIANT_SPEC_INDEX_JAVABIN_FILENAME = "variantSpecIndex.javabin";
private static Logger log = LoggerFactory.getLogger(VariantStore.class);
public static final int BUCKET_SIZE = 1000;

public static final String VARIANT_SPEC_INDEX_FILE = "variantSpecIndex.javabin";
public static final String VARIANT_SPEC_INDEX_FILE = VARIANT_SPEC_INDEX_JAVABIN_FILENAME;

private BigInteger emptyBitmask;
private String[] patientIds;

private transient String[] variantSpecIndex;

private Integer variantStorageSize;

private String[] vcfHeaders = new String[24];


private Map<String, FileBackedJsonIndexStorage<Integer, ConcurrentHashMap<String, VariableVariantMasks>>> variantMaskStorage = new TreeMap<>();

public Map<String, FileBackedJsonIndexStorage<Integer, ConcurrentHashMap<String, VariableVariantMasks>>> getVariantMaskStorage() {
Expand All @@ -48,7 +46,7 @@ public void setVariantSpecIndex(String[] variantSpecIndex) {
}

public static VariantStore readInstance(String genomicDataDirectory) throws IOException, ClassNotFoundException {
ObjectInputStream ois = new ObjectInputStream(new GZIPInputStream(new FileInputStream(genomicDataDirectory + "variantStore.javabin")));
ObjectInputStream ois = new ObjectInputStream(new GZIPInputStream(new FileInputStream(genomicDataDirectory + VARIANT_STORE_JAVABIN_FILENAME)));
VariantStore variantStore = (VariantStore) ois.readObject();
ois.close();
variantStore.getVariantMaskStorage().values().forEach(store -> {
Expand All @@ -60,14 +58,14 @@ public static VariantStore readInstance(String genomicDataDirectory) throws IOEx
}

public void writeInstance(String genomicDirectory) {
try (FileOutputStream fos = new FileOutputStream(new File(genomicDirectory, "variantStore.javabin"));
try (FileOutputStream fos = new FileOutputStream(new File(genomicDirectory, VARIANT_STORE_JAVABIN_FILENAME));
GZIPOutputStream gzos = new GZIPOutputStream(fos);
ObjectOutputStream oos = new ObjectOutputStream(gzos);) {
oos.writeObject(this);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
try (FileOutputStream fos = new FileOutputStream(new File(genomicDirectory, "variantSpecIndex.javabin"));
try (FileOutputStream fos = new FileOutputStream(new File(genomicDirectory, VARIANT_SPEC_INDEX_JAVABIN_FILENAME));
GZIPOutputStream gzos = new GZIPOutputStream(fos);
ObjectOutputStream oos = new ObjectOutputStream(gzos);) {
oos.writeObject(Arrays.asList(variantSpecIndex));
Expand All @@ -76,41 +74,6 @@ public void writeInstance(String genomicDirectory) {
}
}

public Map<String, int[]> countVariants() {
HashMap<String, Integer> countOffsetMap = new HashMap<String, Integer>();
TreeMap<String, int[]> counts = new TreeMap<>();
for (String contig : variantMaskStorage.keySet()) {
counts.put(contig, new int[5]);
FileBackedJsonIndexStorage<Integer, ConcurrentHashMap<String, VariableVariantMasks>> storage = variantMaskStorage
.get(contig);
storage.keys().stream().forEach((Integer key) -> {
int[] contigCounts = counts.get(contig);
Collection<VariableVariantMasks> values = storage.get(key).values();
contigCounts[0] += values.stream().collect(Collectors.summingInt((VariableVariantMasks masks) -> {
return masks.heterozygousMask != null ? 1 : 0;
}));
contigCounts[1] += values.stream().collect(Collectors.summingInt((VariableVariantMasks masks) -> {
return masks.homozygousMask != null ? 1 : 0;
}));
contigCounts[2] += values.stream().collect(Collectors.summingInt((VariableVariantMasks masks) -> {
return masks.heterozygousNoCallMask != null ? 1 : 0;
}));
contigCounts[3] += values.stream().collect(Collectors.summingInt((VariableVariantMasks masks) -> {
return masks.homozygousNoCallMask != null ? 1 : 0;
}));
contigCounts[4] += values.stream().collect(Collectors.summingInt((VariableVariantMasks masks) -> {
return masks.heterozygousMask != null || masks.homozygousMask != null
|| masks.heterozygousNoCallMask != null || masks.homozygousNoCallMask != null ? 1 : 0;
}));
});
}
return counts;
}

public String[] getVCFHeaders() {
return vcfHeaders;
}

public String[] getPatientIds() {
return patientIds;
}
Expand All @@ -124,11 +87,6 @@ public Optional<VariableVariantMasks> getMasks(String variant, VariantBucketHold
int chrOffset = Integer.parseInt(segments[1]) / BUCKET_SIZE;
String contig = segments[0];

// if (Level.DEBUG.equals(log.getEffectiveLevel())) {
// log.debug("Getting masks for variant " + variant + " Same bucket test: " + (bucketCache.lastValue != null
// && contig.contentEquals(bucketCache.lastContig) && chrOffset == bucketCache.lastChunkOffset));
// }

if (bucketCache.lastValue != null && contig.contentEquals(bucketCache.lastContig)
&& chrOffset == bucketCache.lastChunkOffset) {
// TODO : This is a temporary efficiency hack, NOT THREADSAFE!!!
Expand Down Expand Up @@ -166,10 +124,6 @@ public List<VariableVariantMasks> getMasksForDbSnpSpec(String variant) {
}
}

public String[] getHeaders() {
return vcfHeaders;
}

public void open() {
variantMaskStorage.values().stream().forEach((fbbis -> {
if (fbbis != null) {
Expand All @@ -190,34 +144,6 @@ public void setPatientIds(String[] patientIds) {
this.patientIds = patientIds;
}

public int getVariantStorageSize() {
return variantStorageSize;
}

public void setVariantStorageSize(int variantStorageSize) {
this.variantStorageSize = variantStorageSize;
}

public List<VariantMasks> getMasksForRangesOfChromosome(String contigForGene, List<Integer> offsetsForGene,
RangeSet<Integer> rangeSetsForGene) throws IOException {
FileBackedJsonIndexStorage masksForChromosome = variantMaskStorage.get(contigForGene);
Set<Integer> bucketsForGene = offsetsForGene.stream().map((offset) -> {
return offset / BUCKET_SIZE;
}).collect(Collectors.toSet());
List<VariantMasks> masks = new ArrayList<VariantMasks>();
for (Integer bucket : bucketsForGene) {
Map<String, VariantMasks> variantMaskBucket = (Map<String, VariantMasks>) masksForChromosome.get(bucket);
variantMaskBucket.keySet().stream().filter((String spec) -> {
int offsetForVariant = Integer.parseInt(spec.split(",")[1]);
return rangeSetsForGene.contains(offsetForVariant);
}).forEach((spec) -> {
System.out.println(spec);
masks.add(variantMaskBucket.get(spec));
});
}
return masks;
}

public BigInteger emptyBitmask() {
if (emptyBitmask == null || emptyBitmask.testBit(emptyBitmask.bitLength() / 2)) {
String emptyVariantMask = "";
Expand All @@ -229,19 +155,16 @@ public BigInteger emptyBitmask() {
return emptyBitmask;
}

@SuppressWarnings("unchecked")
public static String[] loadVariantIndexFromFile(String genomicDataDirectory) {
try (ObjectInputStream objectInputStream = new ObjectInputStream(new GZIPInputStream(new FileInputStream(genomicDataDirectory + "/" + VARIANT_SPEC_INDEX_FILE)));){

List<String> variants = (List<String>) objectInputStream.readObject();
return variants.toArray(new String[0]);

} catch (FileNotFoundException e) {
throw new RuntimeException(e);
} catch (IOException e) {
throw new RuntimeException(e);
} catch (ClassNotFoundException e) {
} catch (IOException | ClassNotFoundException e) {
throw new RuntimeException(e);
}
}
}

}

0 comments on commit 60f34de

Please sign in to comment.