/** Parse a 'line' from a 'vcfFileIterator' */ public void parse() { // Parse line String fields[] = line.split("\t", 10); // Only pare the fist 9 fields (i.e. do not parse genotypes) // Is line OK? if (fields.length >= 4) { // Chromosome and position. VCF files are one-base, so inOffset should be 1. chromosomeName = fields[0].trim(); // Chromosome Chromosome chromo = vcfFileIterator.getChromosome(chromosomeName); parent = chromo; vcfFileIterator.sanityCheckChromo(chromosomeName, chromo); // Sanity check // Start start = vcfFileIterator.parsePosition(vcfFileIterator.readField(fields, 1)); // ID (e.g. might indicate dbSnp) id = vcfFileIterator.readField(fields, 2); // REF ref = vcfFileIterator.readField(fields, 3).toUpperCase(); // Reference and change strandMinus = false; // Strand is always positive (defined in VCF spec.) // ALT altStr = vcfFileIterator.readField(fields, 4).toUpperCase(); parseAlts(altStr); // Quality String qStr = vcfFileIterator.readField(fields, 5); if (!qStr.isEmpty()) quality = Gpr.parseDoubleSafe(qStr); else quality = null; // Filter filter = vcfFileIterator.readField(fields, 6); // Filter parameters // INFO fields infoStr = vcfFileIterator.readField(fields, 7); info = null; // Start & End coordinates are anchored to the reference genome, thus based on REF field (ALT // is not taken into account) parseEnd(altStr); // Genotype format format = null; if (fields.length > 8) format = vcfFileIterator.readField( fields, 8); // This field is optional, So it can be null or EMPTY ('.') // Add genotype fields (lazy parse) if (fields.length > 9) genotypeFieldsStr = fields[9]; } else throw new RuntimeException( "Impropper VCF entry: Not enough fields (missing tab separators?).\n" + line); }
/** * Get info field as a 'double' number The norm specifies data type as 'FLOAT', that is why the * name of this method might be not intuitive */ public double getInfoFloat(String key) { if (info == null) parseInfo(); String f = info.get(key); if (f == null) return Double.NaN; return Gpr.parseDoubleSafe(f); }