Ejemplo n.º 1
0
  /** Parse a 'line' from a 'vcfFileIterator' */
  public void parse() {
    // Parse line
    String fields[] =
        line.split("\t", 10); // Only pare the fist 9 fields (i.e. do not parse genotypes)

    // Is line OK?
    if (fields.length >= 4) {
      // Chromosome and position. VCF files are one-base, so inOffset should be 1.
      chromosomeName = fields[0].trim();

      // Chromosome
      Chromosome chromo = vcfFileIterator.getChromosome(chromosomeName);
      parent = chromo;
      vcfFileIterator.sanityCheckChromo(chromosomeName, chromo); // Sanity check

      // Start
      start = vcfFileIterator.parsePosition(vcfFileIterator.readField(fields, 1));

      // ID (e.g. might indicate dbSnp)
      id = vcfFileIterator.readField(fields, 2);

      // REF
      ref = vcfFileIterator.readField(fields, 3).toUpperCase(); // Reference and change
      strandMinus = false; // Strand is always positive (defined in VCF spec.)

      // ALT
      altStr = vcfFileIterator.readField(fields, 4).toUpperCase();
      parseAlts(altStr);

      // Quality
      String qStr = vcfFileIterator.readField(fields, 5);
      if (!qStr.isEmpty()) quality = Gpr.parseDoubleSafe(qStr);
      else quality = null;

      // Filter
      filter = vcfFileIterator.readField(fields, 6); // Filter parameters

      // INFO fields
      infoStr = vcfFileIterator.readField(fields, 7);
      info = null;

      // Start & End coordinates are anchored to the reference genome, thus based on REF field (ALT
      // is not taken into account)
      parseEnd(altStr);

      // Genotype format
      format = null;
      if (fields.length > 8)
        format =
            vcfFileIterator.readField(
                fields, 8); // This field is optional, So it can be null or EMPTY ('.')

      // Add genotype fields (lazy parse)
      if (fields.length > 9) genotypeFieldsStr = fields[9];
    } else
      throw new RuntimeException(
          "Impropper VCF entry: Not enough fields (missing tab separators?).\n" + line);
  }
Ejemplo n.º 2
0
 /**
  * Get info field as a 'double' number The norm specifies data type as 'FLOAT', that is why the
  * name of this method might be not intuitive
  */
 public double getInfoFloat(String key) {
   if (info == null) parseInfo();
   String f = info.get(key);
   if (f == null) return Double.NaN;
   return Gpr.parseDoubleSafe(f);
 }