/** Uncompress VCF entry having genotypes in "HO,HE,NA" fields */ public VcfEntry uncompressGenotypes() { // Not compressed? Nothing to do if (!isCompressedGenotypes()) return this; // Get 'sparse' matrix entries String hoStr = getInfo(VCF_INFO_HOMS); String heStr = getInfo(VCF_INFO_HETS); String naStr = getInfo(VCF_INFO_NAS); // Parse 'sparse' entries List<String> sampleNames = getVcfFileIterator().getVcfHeader().getSampleNames(); if (sampleNames == null) throw new RuntimeException( "Cannot find sample names in VCF header. Unable to uncompress genotypes."); int numSamples = sampleNames.size(); byte gt[] = new byte[numSamples]; parseSparseGt(naStr, gt, -1); parseSparseGt(heStr, gt, 1); parseSparseGt(hoStr, gt, 2); // Remove info fields if (hoStr != null) rmInfo(VCF_INFO_HOMS); if (heStr != null) rmInfo(VCF_INFO_HETS); if (naStr != null) rmInfo(VCF_INFO_NAS); setFormat("GT"); // Create output string for (int i = 0; i < gt.length; i++) { String gtStr; switch (gt[i]) { case -1: gtStr = "./."; break; case 0: gtStr = "0/0"; break; case 1: gtStr = "0/1"; break; case 2: gtStr = "1/1"; break; default: throw new RuntimeException("Unknown code '" + gt[i] + "'"); } addGenotype(gtStr); } return this; }
/** Parse GENOTPYE entries */ void parseGenotypes() { if (isCompressedGenotypes()) { uncompressGenotypes(); } else { vcfGenotypes = new ArrayList<>(); // No genotype string? => Nothing to do if (genotypeFieldsStr == null) return; // Split genotypes and parse them genotypeFields = genotypeFieldsStr.split("\t"); for (int i = 0; i < genotypeFields.length; i++) { String gen = genotypeFields[i]; if (gen.equals(VcfFileIterator.MISSING)) gen = ""; addGenotype(gen); } } }