Beispiel #1
0
  /** Create a list of variants from this VcfEntry */
  public List<Variant> variants() {
    if (variants != null) return variants;

    // Create list of variants
    variants = new LinkedList<>();

    // Create one Variant for each ALT
    Chromosome chr = (Chromosome) parent;

    if (!isVariant()) {
      // Not a variant?
      List<Variant> vars = variants(chr, start, ref, null, id);
      String alt = ".";

      // Add original 'ALT' field as genotype
      for (Variant variant : vars) variant.setGenotype(alt);

      variants.addAll(vars);
    } else {
      // At least one variant
      for (String alt : alts) {
        if (!isVariant(alt)) alt = null;
        List<Variant> vars = variants(chr, start, ref, alt, id);
        variants.addAll(vars);
      }
    }

    return variants;
  }
Beispiel #2
0
  /** Annotate a VCF entry */
  public boolean annotate(VcfEntry vcfEntry) throws IOException {
    boolean annotated = false;
    Set<String> idSet = new HashSet<>();
    Map<String, String> infos = new HashMap<>();
    boolean exists = false;

    // ---
    // Find all matching database entries
    // Note that QueryResult.variantVcfEntry can be 'null'
    // ---
    List<QueryResult> queryResults = new LinkedList<>();
    Set<VcfEntry> uniqueVcfEntries = new HashSet<>();
    for (Variant var : vcfEntry.variants()) {
      // Skip huge structural variants
      if (var.isStructuralHuge()) continue;

      // Query database
      Collection<VariantVcfEntry> results = query(var);

      // Make sure we add all found VcfEntries
      for (VariantVcfEntry dbEntry : results) uniqueVcfEntries.add(dbEntry.getVcfEntry());

      // Add query and result
      QueryResult qr = new QueryResult(var, results);
      queryResults.add(qr);
      if (debug) Gpr.debug("Adding QueryResult: " + qr);
    }

    // Try to find INFO fields that we might have not seen before
    if (useAllInfoFields) {
      for (VcfEntry ve : uniqueVcfEntries) discoverInfoFields(ve);
    }

    // Add INFO fields using 'REF' data
    findDbInfoRef(infos, uniqueVcfEntries);

    // ---
    // Annotate all fields
    // ---
    for (QueryResult qr : queryResults) {
      if (debug) Gpr.debug("Processing QueryResult: " + qr);

      if (useId) findDbId(idSet, qr);
      if (existsInfoField != null) exists |= findDbExists(qr);
      if (useInfoFields) findDbInfo(infos, qr);
    }

    // Annotate input vcfEntry
    annotated |= annotateIds(vcfEntry, idSet);
    annotated |= annotateInfo(vcfEntry, infos);
    if (exists) annotateExists(vcfEntry);

    return annotated;
  }
Beispiel #3
0
  /** Does database entry 'dbVcfEntry' match 'variant'? */
  protected boolean match(Variant var, VariantVcfEntry dbEntry) {
    // Do coordinates match?
    if (var.getChromosomeName().equals(dbEntry.getChromosomeName()) //
        && var.getStart() == dbEntry.getStart() //
        && var.getEnd() == dbEntry.getEnd() //
    ) {
      if (useRefAlt) {
        // Compare Ref & Alt
        if (var.getReference().equalsIgnoreCase(dbEntry.getReference()) //
            && var.getAlt().equalsIgnoreCase(dbEntry.getAlt()) //
        ) return true;
      } else {
        // No need to use Ref & Alt, it's a match
        return true;
      }
    }

    return false;
  }
Beispiel #4
0
  /** Query VCF entries intersecting 'marker' at node 'idx' */
  protected void queryIntersects(Interval queryMarker, int idx, Markers results) {
    if (intersectFilePosStart[idx] == null) return;
    if (debug) Gpr.debug("queryIntersects\tidx: " + idx);

    // Read entries from disk
    List<VcfEntry> vcfEntries = readEntries(idx);

    // Find matching entries
    for (VcfEntry ve : vcfEntries) {
      // If any variant within the vcfEntry intersects the query
      // marker, we store this VCF entry as a result
      for (Variant var : ve.variants()) {
        if (var.intersects(queryMarker)) {
          if (debug) Gpr.debug("\tAdding matching result: " + ve);
          results.add(ve);
          break; // Store this entry only once
        }
      }

      // Past query's end coordinate? We don't need to look any further
      if (queryMarker.getEnd() < ve.getStart()) return;
    }
  }
Beispiel #5
0
  /** Create a variant */
  List<Variant> variants(Chromosome chromo, int start, String reference, String alt, String id) {
    List<Variant> list = null;
    if (alt != null) alt = alt.toUpperCase();

    if (alt == null || alt.isEmpty() || alt.equals(reference)) {
      // Non-variant
      list = Variant.factory(chromo, start, reference, null, id, false);
    } else if (alt.charAt(0) == '<') {
      // Structural variants
      if (alt.startsWith("<DEL")) {
        // Case: Deletion
        // 2 321682    .  T   <DEL>         6     PASS
        // IMPRECISE;SVTYPE=DEL;END=321887;SVLEN=-105;CIPOS=-56,20;CIEND=-10,62
        String ch = ref;
        int startNew = start;

        if (end > start) {
          startNew = start + reference.length();
          int size = end - startNew + 1;
          char change[] = new char[size];
          for (int i = 0; i < change.length; i++)
            change[i] = reference.length() > i ? reference.charAt(i) : 'N';
          ch = new String(change);
        }
        list = Variant.factory(chromo, startNew, ch, "", id, false);
      } else if (alt.startsWith("<INV")) {
        // Inversion
        int startNew = start + reference.length();
        Variant var = new Variant(chromo, startNew, end, id);
        var.setVariantType(VariantType.INV);
        list = new LinkedList<>();
        list.add(var);
      } else if (alt.startsWith("<DUP")) {
        // Duplication
        int startNew = start + reference.length();
        Variant var = new Variant(chromo, startNew, end, id);
        var.setVariantType(VariantType.DUP);
        list = new LinkedList<>();
        list.add(var);
      }
    } else if ((alt.indexOf('[') >= 0) || (alt.indexOf(']') >= 0)) {
      // Translocations

      // Parse ALT string
      boolean left = alt.indexOf(']') >= 0;
      String sep = (left ? "\\]" : "\\[");
      String tpos[] = alt.split(sep);
      String pos = tpos[1];
      boolean before = (alt.indexOf(']') == 0) || (alt.indexOf('[') == 0);
      String altBases = (before ? tpos[2] : tpos[0]);

      // Parse 'chr:start'
      String posSplit[] = pos.split(":");
      String trChrName = posSplit[0];
      Chromosome trChr = chromo.getGenome().getOrCreateChromosome(trChrName);
      int trStart = Gpr.parseIntSafe(posSplit[1]) - 1;

      VariantBnd var = new VariantBnd(chromo, start, ref, altBases, trChr, trStart, left, before);
      list = new LinkedList<>();
      list.add(var);
    } else if (reference.length() == alt.length()) {
      // Case: SNP, MNP
      if (reference.length() == 1) {
        // SNPs
        // 20     3 .         C      G       .   PASS  DP=100
        list = Variant.factory(chromo, start, reference, alt, id, true);
      } else {
        // MNPs
        // 20     3 .         TC     AT      .   PASS  DP=100
        // Sometimes the first bases are the same and we can trim them
        int startDiff = Integer.MAX_VALUE;
        for (int i = 0; i < reference.length(); i++)
          if (reference.charAt(i) != alt.charAt(i)) startDiff = Math.min(startDiff, i);

        // MNPs
        // Sometimes the last bases are the same and we can trim them
        int endDiff = 0;
        for (int i = reference.length() - 1; i >= 0; i--)
          if (reference.charAt(i) != alt.charAt(i)) endDiff = Math.max(endDiff, i);

        String newRef = reference.substring(startDiff, endDiff + 1);
        String newAlt = alt.substring(startDiff, endDiff + 1);
        list = Variant.factory(chromo, start + startDiff, newRef, newAlt, id, true);
      }
    } else {
      // Short Insertions, Deletions or Mixed Variants (substitutions)
      VcfRefAltAlign align = new VcfRefAltAlign(alt, reference);
      align.align();
      int startDiff = align.getOffset();

      switch (align.getVariantType()) {
        case DEL:
          // Case: Deletion
          // 20     2 .         TC      T      .   PASS  DP=100
          // 20     2 .         AGAC    AAC    .   PASS  DP=100
          String ref = "";
          String ch = align.getAlignment();
          if (!ch.startsWith("-"))
            throw new RuntimeException(
                "Deletion '" + ch + "' does not start with '-'. This should never happen!");
          list = Variant.factory(chromo, start + startDiff, ref, ch, id, true);
          break;

        case INS:
          // Case: Insertion of A { tC ; tCA } tC is the reference allele
          // 20     2 .         TC      TCA    .   PASS  DP=100
          ch = align.getAlignment();
          ref = "";
          if (!ch.startsWith("+"))
            throw new RuntimeException(
                "Insertion '" + ch + "' does not start with '+'. This should never happen!");
          list = Variant.factory(chromo, start + startDiff, ref, ch, id, true);
          break;

        case MIXED:
          // Case: Mixed variant (substitution)
          reference = reference.substring(startDiff);
          alt = alt.substring(startDiff);
          list = Variant.factory(chromo, start + startDiff, reference, alt, id, true);
          break;

        default:
          // Other change type?
          throw new RuntimeException(
              "Unsupported VCF change type '"
                  + align.getVariantType()
                  + "'\n\tRef: "
                  + reference
                  + "'\n\tAlt: '"
                  + alt
                  + "'\n\tVcfEntry: "
                  + this);
      }
    }

    // ---
    // Add original 'ALT' field as genotype
    // ---
    if (list == null) list = new LinkedList<>();
    for (Variant variant : list) variant.setGenotype(alt);

    return list;
  }