Java GenomeLoc.getStart 예제들, org.broadinstitute.sting.utils.GenomeLoc.getStart Java 예제들

예제 #1

0

파일 보기

파일: AllLocusViewUnitTest.java 프로젝트: mlovci/gatk

  /**
   * Test the reads according to an independently derived context.
   *
   * @param view
   * @param range
   * @param reads
   */
  @Override
  protected void testReadsInContext(
      LocusView view, List<GenomeLoc> range, List<GATKSAMRecord> reads) {
    AllLocusView allLocusView = (AllLocusView) view;

    // TODO: Should skip over loci not in the given range.
    GenomeLoc firstLoc = range.get(0);
    GenomeLoc lastLoc = range.get(range.size() - 1);
    GenomeLoc bounds =
        genomeLocParser.createGenomeLoc(
            firstLoc.getContig(), firstLoc.getStart(), lastLoc.getStop());

    for (int i = bounds.getStart(); i <= bounds.getStop(); i++) {
      GenomeLoc site = genomeLocParser.createGenomeLoc("chr1", i);
      AlignmentContext locusContext = allLocusView.next();
      Assert.assertEquals(locusContext.getLocation(), site, "Locus context location is incorrect");
      int expectedReadsAtSite = 0;

      for (GATKSAMRecord read : reads) {
        if (genomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation())) {
          Assert.assertTrue(
              locusContext.getReads().contains(read),
              "Target locus context does not contain reads");
          expectedReadsAtSite++;
        }
      }

      Assert.assertEquals(
          locusContext.getReads().size(),
          expectedReadsAtSite,
          "Found wrong number of reads at site");
    }
  }

예제 #2

0

파일 보기

파일: GenomeLocSortedSet.java 프로젝트: raj76/gatk

  /**
   * Adds a GenomeLoc to the collection, merging it if it overlaps another region. If it's not
   * overlapping then we add it in sorted order.
   *
   * @param e the GenomeLoc to add to the collection
   * @return true, if the GenomeLoc could be added to the collection
   */
  public boolean addRegion(GenomeLoc e) {
    if (e == null) {
      return false;
    }
    // have we added it to the collection?
    boolean haveAdded = false;

    /**
     * check if the specified element overlaps any current locations, if so we should merge the two.
     */
    for (GenomeLoc g : mArray) {
      if (g.contiguousP(e)) {
        GenomeLoc c = g.merge(e);
        mArray.set(mArray.indexOf(g), c);
        haveAdded = true;
      } else if ((g.getContigIndex() == e.getContigIndex())
          && (e.getStart() < g.getStart())
          && !haveAdded) {
        mArray.add(mArray.indexOf(g), e);
        return true;
      } else if (haveAdded
          && ((e.getContigIndex() > e.getContigIndex())
              || (g.getContigIndex() == e.getContigIndex() && e.getStart() > g.getStart()))) {
        return true;
      }
    }
    /**
     * we're at the end and we haven't found locations that should fall after it, so we'll put it at
     * the end
     */
    if (!haveAdded) {
      mArray.add(e);
    }
    return true;
  }

예제 #3

0

파일 보기

파일: GenomeLocSortedSet.java 프로젝트: raj76/gatk

  /**
   * Return the number of bps before loc in the sorted set
   *
   * @param loc the location before which we are counting bases
   * @return
   */
  public long sizeBeforeLoc(GenomeLoc loc) {
    long s = 0;

    for (GenomeLoc e : this) {
      if (e.isBefore(loc)) s += e.size();
      else if (e.isPast(loc)) ; // don't do anything
      else // loc is inside of s
      s += loc.getStart() - e.getStart();
    }

    return s;
  }

예제 #4

0

파일 보기

파일: GenomeLocSortedSet.java 프로젝트: raj76/gatk

 /**
  * return a deep copy of this collection.
  *
  * @return a new GenomeLocSortedSet, identical to the current GenomeLocSortedSet.
  */
 public GenomeLocSortedSet clone() {
   GenomeLocSortedSet ret = new GenomeLocSortedSet(genomeLocParser);
   for (GenomeLoc loc : this.mArray) {
     // ensure a deep copy
     ret.mArray.add(
         genomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), loc.getStop()));
   }
   return ret;
 }

예제 #5

0

파일 보기

파일: GenomeLocSortedSet.java 프로젝트: raj76/gatk

  public GenomeLocSortedSet subtractRegions(GenomeLocSortedSet toRemoveSet) {
    LinkedList<GenomeLoc> good = new LinkedList<GenomeLoc>();
    Stack<GenomeLoc> toProcess = new Stack<GenomeLoc>();
    Stack<GenomeLoc> toExclude = new Stack<GenomeLoc>();

    // initialize the stacks
    toProcess.addAll(mArray);
    Collections.reverse(toProcess);
    toExclude.addAll(toRemoveSet.mArray);
    Collections.reverse(toExclude);

    int i = 0;
    while (!toProcess.empty()) { // while there's still stuff to process
      if (toExclude.empty()) {
        good.addAll(toProcess); // no more excludes, all the processing stuff is good
        break;
      }

      GenomeLoc p = toProcess.peek();
      GenomeLoc e = toExclude.peek();

      if (p.overlapsP(e)) {
        toProcess.pop();
        for (GenomeLoc newP : p.subtract(e)) toProcess.push(newP);
      } else if (p.compareContigs(e) < 0) {
        good.add(toProcess.pop()); // p is now good
      } else if (p.compareContigs(e) > 0) {
        toExclude.pop(); // e can't effect anything
      } else if (p.getStop() < e.getStart()) {
        good.add(toProcess.pop()); // p stops before e starts, p is good
      } else if (e.getStop() < p.getStart()) {
        toExclude.pop(); // p starts after e stops, e is done
      } else {
        throw new ReviewedStingException("BUG: unexpected condition: p=" + p + ", e=" + e);
      }

      if (i++ % 10000 == 0) logger.debug("removeRegions operation: i = " + i);
    }

    return createSetFromList(genomeLocParser, good);
  }

예제 #6

0

파일 보기

파일: GenomeLocSortedSetUnitTest.java 프로젝트: mlovci/gatk

  @Test
  public void deleteSuperRegion() {
    GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 10, 20);
    GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 70, 100);
    mSortedSet.add(g);
    mSortedSet.addRegion(e);
    assertTrue(mSortedSet.size() == 2);
    // now delete a region
    GenomeLoc d = genomeLocParser.createGenomeLoc(contigOneName, 15, 75);
    mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(genomeLocParser, d));
    Iterator<GenomeLoc> iter = mSortedSet.iterator();
    GenomeLoc loc = iter.next();
    assertTrue(loc.getStart() == 10);
    assertTrue(loc.getStop() == 14);
    assertTrue(loc.getContigIndex() == 1);

    loc = iter.next();
    assertTrue(loc.getStart() == 76);
    assertTrue(loc.getStop() == 100);
    assertTrue(loc.getContigIndex() == 1);
  }

예제 #7

0

파일 보기

파일: ReadUtils.java 프로젝트: johandahlberg/gatk

  /**
   * Determines what is the position of the read in relation to the interval. Note: This function
   * uses the UNCLIPPED ENDS of the reads for the comparison.
   *
   * @param read the read
   * @param interval the interval
   * @return the overlap type as described by ReadAndIntervalOverlap enum (see above)
   */
  public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(
      GATKSAMRecord read, GenomeLoc interval) {

    int sStart = read.getSoftStart();
    int sStop = read.getSoftEnd();
    int uStart = read.getUnclippedStart();
    int uStop = read.getUnclippedEnd();

    if (!read.getReferenceName().equals(interval.getContig()))
      return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
    else if (uStop < interval.getStart()) return ReadAndIntervalOverlap.NO_OVERLAP_LEFT;
    else if (uStart > interval.getStop()) return ReadAndIntervalOverlap.NO_OVERLAP_RIGHT;
    else if (sStop < interval.getStart()) return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_LEFT;
    else if (sStart > interval.getStop())
      return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_RIGHT;
    else if ((sStart >= interval.getStart()) && (sStop <= interval.getStop()))
      return ReadAndIntervalOverlap.OVERLAP_CONTAINED;
    else if ((sStart < interval.getStart()) && (sStop > interval.getStop()))
      return ReadAndIntervalOverlap.OVERLAP_LEFT_AND_RIGHT;
    else if ((sStart < interval.getStart())) return ReadAndIntervalOverlap.OVERLAP_LEFT;
    else return ReadAndIntervalOverlap.OVERLAP_RIGHT;
  }

예제 #8

0

파일 보기

파일: GenomeLocSortedSetUnitTest.java 프로젝트: mlovci/gatk

 @Test
 public void deleteSomeByRegion() {
   GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 1, 100);
   mSortedSet.add(e);
   for (int x = 1; x < 50; x++) {
     GenomeLoc del = genomeLocParser.createGenomeLoc(contigOneName, x, x);
     mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(genomeLocParser, del));
   }
   assertTrue(!mSortedSet.isEmpty());
   assertTrue(mSortedSet.size() == 1);
   GenomeLoc loc = mSortedSet.iterator().next();
   assertTrue(loc.getStop() == 100);
   assertTrue(loc.getStart() == 50);
 }

예제 #9

0

파일 보기

파일: ReferenceDataSource.java 프로젝트: GinYan/gatk

  public Iterable<Shard> createShardsOverIntervals(
      final SAMDataSource readsDataSource,
      final GenomeLocSortedSet intervals,
      final int maxShardSize) {
    List<Shard> shards = new ArrayList<Shard>();

    for (GenomeLoc interval : intervals) {
      while (interval.size() > maxShardSize) {
        shards.add(
            new LocusShard(
                intervals.getGenomeLocParser(),
                readsDataSource,
                Collections.singletonList(
                    intervals
                        .getGenomeLocParser()
                        .createGenomeLoc(
                            interval.getContig(),
                            interval.getStart(),
                            interval.getStart() + maxShardSize - 1)),
                null));
        interval =
            intervals
                .getGenomeLocParser()
                .createGenomeLoc(
                    interval.getContig(), interval.getStart() + maxShardSize, interval.getStop());
      }
      shards.add(
          new LocusShard(
              intervals.getGenomeLocParser(),
              readsDataSource,
              Collections.singletonList(interval),
              null));
    }

    return shards;
  }

예제 #10

0

파일 보기

파일: GenomeLocSortedSetUnitTest.java 프로젝트: mlovci/gatk

 @Test
 public void fromSequenceDictionary() {
   mSortedSet =
       GenomeLocSortedSet.createSetFromSequenceDictionary(this.header.getSequenceDictionary());
   // we should have sequence
   assertTrue(mSortedSet.size() == GenomeLocSortedSetUnitTest.NUMBER_OF_CHROMOSOMES);
   int seqNumber = 0;
   for (GenomeLoc loc : mSortedSet) {
     assertTrue(loc.getStart() == 1);
     assertTrue(loc.getStop() == GenomeLocSortedSetUnitTest.CHROMOSOME_SIZE);
     assertTrue(loc.getContigIndex() == seqNumber);
     ++seqNumber;
   }
   assertTrue(seqNumber == GenomeLocSortedSetUnitTest.NUMBER_OF_CHROMOSOMES);
 }

예제 #11

0

파일 보기

파일: GenomeLocSortedSetUnitTest.java 프로젝트: mlovci/gatk

 @Test
 public void mergingOverlappingAbove() {
   GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 0, 50);
   GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 49, 100);
   assertTrue(mSortedSet.size() == 0);
   mSortedSet.add(g);
   assertTrue(mSortedSet.size() == 1);
   mSortedSet.addRegion(e);
   assertTrue(mSortedSet.size() == 1);
   Iterator<GenomeLoc> iter = mSortedSet.iterator();
   GenomeLoc loc = iter.next();
   assertEquals(loc.getStart(), 0);
   assertEquals(loc.getStop(), 100);
   assertEquals(loc.getContigIndex(), 1);
 }

예제 #12

0

파일 보기

파일: VariantSummary.java 프로젝트: sbaheti/gatk

  private boolean overlapsKnownCNV(VariantContext cnv) {
    if (knownCNVs != null) {
      final GenomeLoc loc =
          getWalker().getToolkit().getGenomeLocParser().createGenomeLoc(cnv, true);
      IntervalTree<GenomeLoc> intervalTree = knownCNVs.get(loc.getContig());

      final Iterator<IntervalTree.Node<GenomeLoc>> nodeIt =
          intervalTree.overlappers(loc.getStart(), loc.getStop());
      while (nodeIt.hasNext()) {
        final double overlapP = loc.reciprocialOverlapFraction(nodeIt.next().getValue());
        if (overlapP > MIN_CNV_OVERLAP) return true;
      }
    }

    return false;
  }

예제 #13

0

파일 보기

파일: ProgressMeter.java 프로젝트: johandahlberg/gatk

  /**
   * Utility routine that prints out process information (including timing) every N records or every
   * M seconds, for N and M set in global variables.
   *
   * <p>Synchronized to ensure that even with multiple threads calling notifyOfProgress we still get
   * one clean stream of meter logs.
   *
   * <p>Note this thread doesn't actually print progress, unless must print is true, but just
   * registers the progress itself. A separate printing daemon periodically polls the meter to print
   * out progress
   *
   * @param loc Current location, can be null if you are at the end of the processing unit
   * @param nTotalRecordsProcessed the total number of records we've processed
   */
  public synchronized void notifyOfProgress(
      final GenomeLoc loc, final long nTotalRecordsProcessed) {
    if (nTotalRecordsProcessed < 0)
      throw new IllegalArgumentException("nTotalRecordsProcessed must be >= 0");

    // weird comparison to ensure that loc == null (in unmapped reads) is keep before maxGenomeLoc
    // == null (on startup)
    this.maxGenomeLoc = loc == null ? loc : (maxGenomeLoc == null ? loc : loc.max(maxGenomeLoc));
    this.nTotalRecordsProcessed = Math.max(this.nTotalRecordsProcessed, nTotalRecordsProcessed);

    // a pretty name for our position
    this.positionMessage =
        maxGenomeLoc == null
            ? "unmapped reads"
            : String.format("%s:%d", maxGenomeLoc.getContig(), maxGenomeLoc.getStart());
  }

예제 #14

0

파일 보기

파일: VariantEvalWalker.java 프로젝트: iontorrent/Torrent-Variant-Caller-stable

  public final Map<String, IntervalTree<GenomeLoc>> createIntervalTreeByContig(
      final IntervalBinding<Feature> intervals) {
    final Map<String, IntervalTree<GenomeLoc>> byContig =
        new HashMap<String, IntervalTree<GenomeLoc>>();

    final List<GenomeLoc> locs = intervals.getIntervals(getToolkit());

    // set up the map from contig -> interval tree
    for (final String contig : getContigNames())
      byContig.put(contig, new IntervalTree<GenomeLoc>());

    for (final GenomeLoc loc : locs) {
      byContig.get(loc.getContig()).put(loc.getStart(), loc.getStop(), loc);
    }

    return byContig;
  }

예제 #15

0

파일 보기

파일: GenomeLoc.java 프로젝트: nh13/gatk

  /**
   * Returns a new GenomeLoc that represents the region between the endpoints of this and that.
   * Requires that this and that GenomeLoc are both mapped.
   */
  @Requires({"that != null", "isUnmapped(this) == isUnmapped(that)"})
  @Ensures("result != null")
  public GenomeLoc endpointSpan(GenomeLoc that) throws ReviewedStingException {
    if (GenomeLoc.isUnmapped(this) || GenomeLoc.isUnmapped(that)) {
      throw new ReviewedStingException("Cannot get endpoint span for unmerged genome locs");
    }

    if (!this.getContig().equals(that.getContig())) {
      throw new ReviewedStingException(
          "Cannot get endpoint span for genome locs on different contigs");
    }

    return new GenomeLoc(
        getContig(),
        this.contigIndex,
        Math.min(getStart(), that.getStart()),
        Math.max(getStop(), that.getStop()));
  }

예제 #16

0

파일 보기

파일: GenomeLoc.java 프로젝트: nh13/gatk

  /**
   * Returns a new GenomeLoc that represents the entire span of this and that. Requires that this
   * and that GenomeLoc are contiguous and both mapped
   */
  @Requires({"that != null", "isUnmapped(this) == isUnmapped(that)"})
  @Ensures("result != null")
  public GenomeLoc merge(GenomeLoc that) throws ReviewedStingException {
    if (GenomeLoc.isUnmapped(this) || GenomeLoc.isUnmapped(that)) {
      if (!GenomeLoc.isUnmapped(this) || !GenomeLoc.isUnmapped(that))
        throw new ReviewedStingException("Tried to merge a mapped and an unmapped genome loc");
      return UNMAPPED;
    }

    if (!(this.contiguousP(that))) {
      throw new ReviewedStingException("The two genome loc's need to be contigous");
    }

    return new GenomeLoc(
        getContig(),
        this.contigIndex,
        Math.min(getStart(), that.getStart()),
        Math.max(getStop(), that.getStop()));
  }

예제 #17

0

파일 보기

파일: GenomeLoc.java 프로젝트: nh13/gatk

  @Requires("that != null")
  @Ensures("result != null")
  public GenomeLoc intersect(GenomeLoc that) throws ReviewedStingException {
    if (GenomeLoc.isUnmapped(this) || GenomeLoc.isUnmapped(that)) {
      if (!GenomeLoc.isUnmapped(this) || !GenomeLoc.isUnmapped(that))
        throw new ReviewedStingException("Tried to intersect a mapped and an unmapped genome loc");
      return UNMAPPED;
    }

    if (!(this.overlapsP(that))) {
      throw new ReviewedStingException(
          "GenomeLoc::intersect(): The two genome loc's need to overlap");
    }

    return new GenomeLoc(
        getContig(),
        this.contigIndex,
        Math.max(getStart(), that.getStart()),
        Math.min(getStop(), that.getStop()));
  }

예제 #18

0

파일 보기

파일: GenomeLoc.java 프로젝트: nh13/gatk

  @Requires("that != null")
  @Ensures("result == 0 || result == 1 || result == -1")
  public int compareTo(GenomeLoc that) {
    int result = 0;

    if (this == that) {
      result = 0;
    } else if (GenomeLoc.isUnmapped(this)) result = 1;
    else if (GenomeLoc.isUnmapped(that)) result = -1;
    else {
      final int cmpContig = compareContigs(that);

      if (cmpContig != 0) {
        result = cmpContig;
      } else {
        if (this.getStart() < that.getStart()) result = -1;
        if (this.getStart() > that.getStart()) result = 1;
      }
    }

    return result;
  }

예제 #19

0

파일 보기

파일: IndelGenotypeLikelihoodsCalculationModel.java 프로젝트: singerma/gatk

  private ArrayList<Allele> computeConsensusAlleles(
      ReferenceContext ref,
      Map<String, AlignmentContext> contexts,
      AlignmentContextUtils.ReadOrientation contextType) {
    Allele refAllele = null, altAllele = null;
    GenomeLoc loc = ref.getLocus();
    ArrayList<Allele> aList = new ArrayList<Allele>();

    HashMap<String, Integer> consensusIndelStrings = new HashMap<String, Integer>();

    int insCount = 0, delCount = 0;
    // quick check of total number of indels in pileup
    for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
      AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);

      final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
      insCount += indelPileup.getNumberOfInsertions();
      delCount += indelPileup.getNumberOfDeletions();
    }

    if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping)
      return aList;

    for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
      // todo -- warning, can be duplicating expensive partition here
      AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);

      final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();

      for (ExtendedEventPileupElement p : indelPileup.toExtendedIterable()) {
        // SAMRecord read = p.getRead();
        GATKSAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead());
        if (read == null) continue;
        if (ReadUtils.is454Read(read)) {
          continue;
        }

        /*                if (DEBUG && p.isIndel()) {
                         System.out.format("Read: %s, cigar: %s, aln start: %d, aln end: %d, p.len:%d, Type:%s, EventBases:%s\n",
                                 read.getReadName(),read.getCigar().toString(),read.getAlignmentStart(),read.getAlignmentEnd(),
                                 p.getEventLength(),p.getType().toString(), p.getEventBases());
                     }
        */

        String indelString = p.getEventBases();
        if (p.isInsertion()) {
          boolean foundKey = false;
          if (read.getAlignmentEnd() == loc.getStart()) {
            // first corner condition: a read has an insertion at the end, and we're right at the
            // insertion.
            // In this case, the read could have any of the inserted bases and we need to build a
            // consensus
            for (String s : consensusIndelStrings.keySet()) {
              int cnt = consensusIndelStrings.get(s);
              if (s.startsWith(indelString)) {
                // case 1: current insertion is prefix of indel in hash map
                consensusIndelStrings.put(s, cnt + 1);
                foundKey = true;
                break;
              } else if (indelString.startsWith(s)) {
                // case 2: indel stored in hash table is prefix of current insertion
                // In this case, new bases are new key.
                consensusIndelStrings.remove(s);
                consensusIndelStrings.put(indelString, cnt + 1);
                foundKey = true;
                break;
              }
            }
            if (!foundKey)
              // none of the above: event bases not supported by previous table, so add new key
              consensusIndelStrings.put(indelString, 1);

          } else if (read.getAlignmentStart() == loc.getStart() + 1) {
            // opposite corner condition: read will start at current locus with an insertion
            for (String s : consensusIndelStrings.keySet()) {
              int cnt = consensusIndelStrings.get(s);
              if (s.endsWith(indelString)) {
                // case 1: current insertion is suffix of indel in hash map
                consensusIndelStrings.put(s, cnt + 1);
                foundKey = true;
                break;
              } else if (indelString.endsWith(s)) {
                // case 2: indel stored in hash table is suffix of current insertion
                // In this case, new bases are new key.

                consensusIndelStrings.remove(s);
                consensusIndelStrings.put(indelString, cnt + 1);
                foundKey = true;
                break;
              }
            }
            if (!foundKey)
              // none of the above: event bases not supported by previous table, so add new key
              consensusIndelStrings.put(indelString, 1);

          } else {
            // normal case: insertion somewhere in the middle of a read: add count to hash map
            int cnt =
                consensusIndelStrings.containsKey(indelString)
                    ? consensusIndelStrings.get(indelString)
                    : 0;
            consensusIndelStrings.put(indelString, cnt + 1);
          }

        } else if (p.isDeletion()) {
          indelString = String.format("D%d", p.getEventLength());
          int cnt =
              consensusIndelStrings.containsKey(indelString)
                  ? consensusIndelStrings.get(indelString)
                  : 0;
          consensusIndelStrings.put(indelString, cnt + 1);
        }
      }

      /*            if (DEBUG) {
          int icount = indelPileup.getNumberOfInsertions();
          int dcount = indelPileup.getNumberOfDeletions();
          if (icount + dcount > 0)
          {
              List<Pair<String,Integer>> eventStrings = indelPileup.getEventStringsWithCounts(ref.getBases());
              System.out.format("#ins: %d, #del:%d\n", insCount, delCount);

              for (int i=0 ; i < eventStrings.size() ; i++ ) {
                  System.out.format("%s:%d,",eventStrings.get(i).first,eventStrings.get(i).second);
                  //                int k=0;
              }
              System.out.println();
          }
      }             */
    }

    int maxAlleleCnt = 0;
    String bestAltAllele = "";
    for (String s : consensusIndelStrings.keySet()) {
      int curCnt = consensusIndelStrings.get(s);
      if (curCnt > maxAlleleCnt) {
        maxAlleleCnt = curCnt;
        bestAltAllele = s;
      }
      //            if (DEBUG)
      //                System.out.format("Key:%s, number: %d\n",s,consensusIndelStrings.get(s)  );
    } // gdebug-

    if (maxAlleleCnt < minIndelCountForGenotyping) return aList;

    if (bestAltAllele.startsWith("D")) {
      // get deletion length
      int dLen = Integer.valueOf(bestAltAllele.substring(1));
      // get ref bases of accurate deletion
      int startIdxInReference = (int) (1 + loc.getStart() - ref.getWindow().getStart());

      // System.out.println(new String(ref.getBases()));
      byte[] refBases =
          Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);

      if (Allele.acceptableAlleleBases(refBases)) {
        refAllele = Allele.create(refBases, true);
        altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
      }
    } else {
      // insertion case
      if (Allele.acceptableAlleleBases(bestAltAllele)) {
        refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
        altAllele = Allele.create(bestAltAllele, false);
      }
    }
    if (refAllele != null && altAllele != null) {
      aList.add(0, refAllele);
      aList.add(1, altAllele);
    }
    return aList;
  }

예제 #20

0

파일 보기

파일: ExactCallLogger.java 프로젝트: stevenmburns/gatk

  /**
   * Read in a list of ExactCall objects from reader, keeping only those with starts in startsToKeep
   * or all sites (if this is empty)
   *
   * @param reader a just-opened reader sitting at the start of the file
   * @param startsToKeep a list of start position of the calls to keep, or empty if all calls should
   *     be kept
   * @param parser a genome loc parser to create genome locs
   * @return a list of ExactCall objects in reader
   * @throws IOException
   */
  public static List<ExactCall> readExactLog(
      final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser)
      throws IOException {
    if (reader == null) throw new IllegalArgumentException("reader cannot be null");
    if (startsToKeep == null) throw new IllegalArgumentException("startsToKeep cannot be null");
    if (parser == null) throw new IllegalArgumentException("GenomeLocParser cannot be null");

    List<ExactCall> calls = new LinkedList<ExactCall>();

    // skip the header line
    reader.readLine();

    // skip the first "type" line
    reader.readLine();

    while (true) {
      final VariantContextBuilder builder = new VariantContextBuilder();
      final List<Allele> alleles = new ArrayList<Allele>();
      final List<Genotype> genotypes = new ArrayList<Genotype>();
      final double[] posteriors = new double[2];
      final double[] priors = MathUtils.normalizeFromLog10(new double[] {0.5, 0.5}, true);
      final List<Integer> mle = new ArrayList<Integer>();
      final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>();
      long runtimeNano = -1;

      GenomeLoc currentLoc = null;
      while (true) {
        final String line = reader.readLine();
        if (line == null) return calls;

        final String[] parts = line.split("\t");
        final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]);
        final String variable = parts[1];
        final String key = parts[2];
        final String value = parts[3];

        if (currentLoc == null) currentLoc = lineLoc;

        if (variable.equals("type")) {
          if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) {
            builder.alleles(alleles);
            final int stop = currentLoc.getStart() + alleles.get(0).length() - 1;
            builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
            builder.genotypes(genotypes);
            final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[] {}));
            final AFCalcResult result =
                new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele);
            calls.add(new ExactCall(builder.make(), runtimeNano, result));
          }
          break;
        } else if (variable.equals("allele")) {
          final boolean isRef = key.equals("0");
          alleles.add(Allele.create(value, isRef));
        } else if (variable.equals("PL")) {
          final GenotypeBuilder gb = new GenotypeBuilder(key);
          gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs());
          genotypes.add(gb.make());
        } else if (variable.equals("log10PosteriorOfAFEq0")) {
          posteriors[0] = Double.valueOf(value);
        } else if (variable.equals("log10PosteriorOfAFGt0")) {
          posteriors[1] = Double.valueOf(value);
        } else if (variable.equals("MLE")) {
          mle.add(Integer.valueOf(value));
        } else if (variable.equals("pNonRefByAllele")) {
          final Allele a = Allele.create(key);
          log10pNonRefByAllele.put(a, Double.valueOf(value));
        } else if (variable.equals("runtime.nano")) {
          runtimeNano = Long.valueOf(value);
        } else {
          // nothing to do
        }
      }
    }
  }

예제 #21

0

파일 보기

파일: IndelGenotypeLikelihoodsCalculationModel.java 프로젝트: singerma/gatk

  public Allele getLikelihoods(
      RefMetaDataTracker tracker,
      ReferenceContext ref,
      Map<String, AlignmentContext> contexts,
      AlignmentContextUtils.ReadOrientation contextType,
      GenotypePriors priors,
      Map<String, MultiallelicGenotypeLikelihoods> GLs,
      Allele alternateAlleleToUse,
      boolean useBAQedPileup) {

    if (tracker == null) return null;

    GenomeLoc loc = ref.getLocus();
    Allele refAllele, altAllele;
    VariantContext vc = null;

    if (!ref.getLocus().equals(lastSiteVisited)) {
      // starting a new site: clear allele list
      alleleList.clear();
      lastSiteVisited = ref.getLocus();
      indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
      haplotypeMap.clear();

      if (getAlleleListFromVCF) {
        for (final VariantContext vc_input : tracker.getValues(UAC.alleles, loc)) {
          if (vc_input != null
              && allowableTypes.contains(vc_input.getType())
              && ref.getLocus().getStart() == vc_input.getStart()) {
            vc = vc_input;
            break;
          }
        }
        // ignore places where we don't have a variant
        if (vc == null) return null;

        alleleList.clear();
        if (ignoreSNPAllelesWhenGenotypingIndels) {
          // if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore
          // it and don't genotype it
          for (Allele a : vc.getAlleles())
            if (a.isNonReference() && a.getBases().length == vc.getReference().getBases().length)
              continue;
            else alleleList.add(a);

        } else {
          for (Allele a : vc.getAlleles()) alleleList.add(a);
        }

      } else {
        alleleList = computeConsensusAlleles(ref, contexts, contextType);
        if (alleleList.isEmpty()) return null;
      }
    }
    // protect against having an indel too close to the edge of a contig
    if (loc.getStart() <= HAPLOTYPE_SIZE) return null;

    // check if there is enough reference window to create haplotypes (can be an issue at end of
    // contigs)
    if (ref.getWindow().getStop() < loc.getStop() + HAPLOTYPE_SIZE) return null;
    if (!(priors instanceof DiploidIndelGenotypePriors))
      throw new StingException(
          "Only diploid-based Indel priors are supported in the DINDEL GL model");

    if (alleleList.isEmpty()) return null;

    refAllele = alleleList.get(0);
    altAllele = alleleList.get(1);

    // look for alt allele that has biggest length distance to ref allele
    int maxLenDiff = 0;
    for (Allele a : alleleList) {
      if (a.isNonReference()) {
        int lenDiff = Math.abs(a.getBaseString().length() - refAllele.getBaseString().length());
        if (lenDiff > maxLenDiff) {
          maxLenDiff = lenDiff;
          altAllele = a;
        }
      }
    }

    final int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length();
    final int hsize = (int) ref.getWindow().size() - Math.abs(eventLength) - 1;
    final int numPrefBases = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;

    haplotypeMap =
        Haplotype.makeHaplotypeListFromAlleles(
            alleleList, loc.getStart(), ref, hsize, numPrefBases);

    // For each sample, get genotype likelihoods based on pileup
    // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with
    // them.
    // initialize the GenotypeLikelihoods
    GLs.clear();

    for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
      AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);

      ReadBackedPileup pileup = null;
      if (context.hasExtendedEventPileup()) pileup = context.getExtendedEventPileup();
      else if (context.hasBasePileup()) pileup = context.getBasePileup();

      if (pileup != null) {
        final double[] genotypeLikelihoods =
            pairModel.computeReadHaplotypeLikelihoods(
                pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());

        GLs.put(
            sample.getKey(),
            new MultiallelicGenotypeLikelihoods(
                sample.getKey(), alleleList, genotypeLikelihoods, getFilteredDepth(pileup)));

        if (DEBUG) {
          System.out.format("Sample:%s Alleles:%s GL:", sample.getKey(), alleleList.toString());
          for (int k = 0; k < genotypeLikelihoods.length; k++)
            System.out.format("%1.4f ", genotypeLikelihoods[k]);
          System.out.println();
        }
      }
    }

    return refAllele;
  }

예제 #22

0

파일 보기

파일: GenomeLoc.java 프로젝트: nh13/gatk

  @Requires("that != null")
  public final List<GenomeLoc> subtract(final GenomeLoc that) {
    if (GenomeLoc.isUnmapped(this) || GenomeLoc.isUnmapped(that)) {
      if (!GenomeLoc.isUnmapped(this) || !GenomeLoc.isUnmapped(that))
        throw new ReviewedStingException("Tried to intersect a mapped and an unmapped genome loc");
      return Arrays.asList(UNMAPPED);
    }

    if (!(this.overlapsP(that))) {
      throw new ReviewedStingException("GenomeLoc::minus(): The two genome loc's need to overlap");
    }

    if (equals(that)) {
      return Collections.emptyList();
    } else if (containsP(that)) {
      List<GenomeLoc> l = new ArrayList<GenomeLoc>(2);

      /**
       * we have to create two new region, one for the before part, one for the after The old
       * region: |----------------- old region (g) -------------| |----- to delete (e) ------|
       *
       * <p>product (two new regions): |------| + |--------|
       */
      int afterStop = this.getStop(), afterStart = that.getStop() + 1;
      int beforeStop = that.getStart() - 1, beforeStart = this.getStart();
      if (afterStop - afterStart >= 0) {
        GenomeLoc after = new GenomeLoc(this.getContig(), getContigIndex(), afterStart, afterStop);
        l.add(after);
      }
      if (beforeStop - beforeStart >= 0) {
        GenomeLoc before =
            new GenomeLoc(this.getContig(), getContigIndex(), beforeStart, beforeStop);
        l.add(before);
      }

      return l;
    } else if (that.containsP(this)) {
      /**
       * e completely contains g, delete g, but keep looking, there may be more regions i.e.:
       * |--------------------- e --------------------| |--- g ---| |---- others ----|
       */
      return Collections.emptyList(); // don't need to do anything
    } else {
      /**
       * otherwise e overlaps some part of g
       *
       * <p>figure out which region occurs first on the genome. I.e., is it: |------------- g
       * ----------| |------------- e ----------|
       *
       * <p>or: |------------- g ----------| |------------ e -----------|
       */
      GenomeLoc n;
      if (that.getStart() < this.getStart()) {
        n = new GenomeLoc(this.getContig(), getContigIndex(), that.getStop() + 1, this.getStop());
      } else {
        n = new GenomeLoc(this.getContig(), getContigIndex(), this.getStart(), that.getStart() - 1);
      }

      // replace g with the new region
      return Arrays.asList(n);
    }
  }

예제 #23

0

파일 보기

파일: TraverseActiveRegions.java 프로젝트: stevenmburns/gatk

  @Override
  public T traverse(
      final ActiveRegionWalker<M, T> walker, final LocusShardDataProvider dataProvider, T sum) {
    logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));

    final LocusView locusView = getLocusView(walker, dataProvider);
    final GenomeLocSortedSet initialIntervals = engine.getIntervals();

    final LocusReferenceView referenceView = new LocusReferenceView(walker, dataProvider);
    final int activeRegionExtension =
        walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
    final int maxRegionSize =
        walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();

    if (locusView
        .hasNext()) { // trivial optimization to avoid unnecessary processing when there's nothing
                      // here at all
      int minStart = Integer.MAX_VALUE;
      ActivityProfile profile =
          new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions());

      ReferenceOrderedView referenceOrderedDataView =
          getReferenceOrderedView(walker, dataProvider, locusView);

      // We keep processing while the next reference location is within the interval
      GenomeLoc prevLoc = null;
      while (locusView.hasNext()) {
        final AlignmentContext locus = locusView.next();
        GenomeLoc location = locus.getLocation();

        if (prevLoc != null) {
          // fill in the active / inactive labels from the stop of the previous location to the
          // start of this location
          // TODO refactor to separate function
          for (int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++) {
            final GenomeLoc fakeLoc =
                engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
            if (initialIntervals == null || initialIntervals.overlaps(fakeLoc)) {
              profile.add(
                  fakeLoc,
                  new ActivityProfileResult(
                      walker.hasPresetActiveRegions()
                              && walker.presetActiveRegions.overlaps(fakeLoc)
                          ? 1.0
                          : 0.0));
            }
          }
        }

        dataProvider.getShard().getReadMetrics().incrementNumIterations();

        // create reference context. Note that if we have a pileup of "extended events", the context
        // will
        // hold the (longest) stretch of deleted reference bases (if deletions are present in the
        // pileup).
        final ReferenceContext refContext = referenceView.getReferenceContext(location);

        // Iterate forward to get all reference ordered data covering this location
        final RefMetaDataTracker tracker =
            referenceOrderedDataView.getReferenceOrderedDataAtLocus(
                locus.getLocation(), refContext);

        // Call the walkers isActive function for this locus and add them to the list to be
        // integrated later
        if (initialIntervals == null || initialIntervals.overlaps(location)) {
          profile.add(location, walkerActiveProb(walker, tracker, refContext, locus, location));
        }

        // Grab all the previously unseen reads from this pileup and add them to the massive read
        // list
        for (final PileupElement p : locus.getBasePileup()) {
          final GATKSAMRecord read = p.getRead();
          if (!myReads.contains(read)) {
            myReads.add(read);
          }

          // If this is the last pileup for this shard calculate the minimum alignment start so that
          // we know
          // which active regions in the work queue are now safe to process
          minStart = Math.min(minStart, read.getAlignmentStart());
        }

        prevLoc = location;

        printProgress(locus.getLocation());
      }

      updateCumulativeMetrics(dataProvider.getShard());

      // Take the individual isActive calls and integrate them into contiguous active regions and
      // add these blocks of work to the work queue
      // band-pass filter the list of isActive probabilities and turn into active regions
      final ActivityProfile bandPassFiltered = profile.bandPassFilter();
      final List<ActiveRegion> activeRegions =
          bandPassFiltered.createActiveRegions(activeRegionExtension, maxRegionSize);

      // add active regions to queue of regions to process
      // first check if can merge active regions over shard boundaries
      if (!activeRegions.isEmpty()) {
        if (!workQueue.isEmpty()) {
          final ActiveRegion last = workQueue.getLast();
          final ActiveRegion first = activeRegions.get(0);
          if (last.isActive == first.isActive
              && last.getLocation().contiguousP(first.getLocation())
              && last.getLocation().size() + first.getLocation().size() <= maxRegionSize) {
            workQueue.removeLast();
            activeRegions.remove(first);
            workQueue.add(
                new ActiveRegion(
                    last.getLocation().union(first.getLocation()),
                    first.isActive,
                    this.engine.getGenomeLocParser(),
                    activeRegionExtension));
          }
        }
        workQueue.addAll(activeRegions);
      }

      logger.debug(
          "Integrated "
              + profile.size()
              + " isActive calls into "
              + activeRegions.size()
              + " regions.");

      // now go and process all of the active regions
      sum = processActiveRegions(walker, sum, minStart, dataProvider.getLocus().getContig());
    }

    return sum;
  }

예제 #24

0

파일 보기

파일: GenomeLoc.java 프로젝트: nh13/gatk

 @Requires("that != null")
 @Ensures("result >= 0")
 public final int distance(final GenomeLoc that) {
   if (this.onSameContig(that)) return Math.abs(this.getStart() - that.getStart());
   else return Integer.MAX_VALUE;
 }

예제 #25

0

파일 보기

파일: GenomeLoc.java 프로젝트: nh13/gatk

 @Requires("that != null")
 public final boolean containsP(GenomeLoc that) {
   return onSameContig(that) && getStart() <= that.getStart() && getStop() >= that.getStop();
 }

예제 #26

0

파일 보기

파일: ProduceBeagleInputWalker.java 프로젝트: nh13/gatk

  public void writeBeagleOutput(
      VariantContext preferredVC, VariantContext otherVC, boolean isValidationSite, double prior) {
    GenomeLoc currentLoc =
        VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), preferredVC);
    StringBuffer beagleOut = new StringBuffer();

    String marker = String.format("%s:%d ", currentLoc.getContig(), currentLoc.getStart());
    beagleOut.append(marker);
    if (markers != null)
      markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t");
    for (Allele allele : preferredVC.getAlleles()) {
      String bglPrintString;
      if (allele.isNoCall() || allele.isNull()) bglPrintString = "-";
      else bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele

      beagleOut.append(String.format("%s ", bglPrintString));
      if (markers != null) markers.append(bglPrintString).append("\t");
    }
    if (markers != null) markers.append("\n");

    GenotypesContext preferredGenotypes = preferredVC.getGenotypes();
    GenotypesContext otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null;
    for (String sample : samples) {
      boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Gender.MALE;

      Genotype genotype;
      boolean isValidation;
      // use sample as key into genotypes structure
      if (preferredGenotypes.containsSample(sample)) {
        genotype = preferredGenotypes.get(sample);
        isValidation = isValidationSite;
      } else if (otherGenotypes != null && otherGenotypes.containsSample(sample)) {
        genotype = otherGenotypes.get(sample);
        isValidation = !isValidationSite;
      } else {
        // there is magically no genotype for this sample.
        throw new StingException(
            "Sample "
                + sample
                + " arose with no genotype in variant or validation VCF. This should never happen.");
      }

      /*
       * Use likelihoods if: is validation, prior is negative; or: is not validation, has genotype key
       */
      double[] log10Likelihoods = null;
      if ((isValidation && prior < 0.0) || genotype.hasLikelihoods()) {
        log10Likelihoods = genotype.getLikelihoods().getAsVector();

        // see if we need to randomly mask out genotype in this position.
        if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() <= insertedNoCallRate) {
          // we are masking out this genotype
          log10Likelihoods =
              isMaleOnChrX ? HAPLOID_FLAT_LOG10_LIKELIHOODS : DIPLOID_FLAT_LOG10_LIKELIHOODS;
        }

        if (isMaleOnChrX) {
          log10Likelihoods[1] = -255; // todo -- warning this is dangerous for multi-allele case
        }
      }
      /** otherwise, use the prior uniformly */
      else if (!isValidation && genotype.isCalled() && !genotype.hasLikelihoods()) {
        // hack to deal with input VCFs with no genotype likelihoods.  Just assume the called
        // genotype
        // is confident.  This is useful for Hapmap and 1KG release VCFs.
        double AA = (1.0 - prior) / 2.0;
        double AB = (1.0 - prior) / 2.0;
        double BB = (1.0 - prior) / 2.0;

        if (genotype.isHomRef()) {
          AA = prior;
        } else if (genotype.isHet()) {
          AB = prior;
        } else if (genotype.isHomVar()) {
          BB = prior;
        }

        log10Likelihoods = MathUtils.toLog10(new double[] {AA, isMaleOnChrX ? 0.0 : AB, BB});
      } else {
        log10Likelihoods =
            isMaleOnChrX ? HAPLOID_FLAT_LOG10_LIKELIHOODS : DIPLOID_FLAT_LOG10_LIKELIHOODS;
      }

      writeSampleLikelihoods(beagleOut, preferredVC, log10Likelihoods);
    }

    beagleWriter.println(beagleOut.toString());
  }

예제 #27

0

파일 보기

파일: GenomeLoc.java 프로젝트: nh13/gatk

 /**
  * Tests whether any portion of this contig is before that contig.
  *
  * @param that Other contig to test.
  * @return True if the start of this contig is before the start of the that contig.
  */
 @Requires("that != null")
 public final boolean startsBefore(final GenomeLoc that) {
   int comparison = this.compareContigs(that);
   return (comparison == -1 || (comparison == 0 && this.getStart() < that.getStart()));
 }

예제 #28

0

파일 보기

파일: GenomeLoc.java 프로젝트: nh13/gatk

 @Requires({"locFirst != null", "locSecond != null", "locSecond.isPast(locFirst)"})
 @Ensures("result >= 0")
 private static int distanceFirstStopToSecondStart(GenomeLoc locFirst, GenomeLoc locSecond) {
   return locSecond.getStart() - locFirst.getStop();
 }

예제 #29

0

파일 보기

파일: VariantContextUtils.java 프로젝트: iontorrent/Torrent-Variant-Caller-stable

  /**
   * Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority
   * order, if provided. If uniqifySamples is true, the priority order is ignored and names are
   * created by concatenating the VC name with the sample name
   *
   * @param genomeLocParser loc parser
   * @param unsortedVCs collection of unsorted VCs
   * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
   * @param filteredRecordMergeType merge type for filtered records
   * @param genotypeMergeOptions merge option for genotypes
   * @param annotateOrigin should we annotate the set it came from?
   * @param printMessages should we print messages?
   * @param setKey the key name of the set
   * @param filteredAreUncalled are filtered records uncalled?
   * @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
   * @return new VariantContext representing the merge of unsortedVCs
   */
  public static VariantContext simpleMerge(
      final GenomeLocParser genomeLocParser,
      final Collection<VariantContext> unsortedVCs,
      final List<String> priorityListOfVCs,
      final FilteredRecordMergeType filteredRecordMergeType,
      final GenotypeMergeType genotypeMergeOptions,
      final boolean annotateOrigin,
      final boolean printMessages,
      final String setKey,
      final boolean filteredAreUncalled,
      final boolean mergeInfoWithMaxAC) {
    if (unsortedVCs == null || unsortedVCs.size() == 0) return null;

    if (annotateOrigin && priorityListOfVCs == null)
      throw new IllegalArgumentException(
          "Cannot merge calls and annotate their origins without a complete priority list of VariantContexts");

    if (genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE)
      verifyUniqueSampleNames(unsortedVCs);

    List<VariantContext> prepaddedVCs =
        sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
    // Make sure all variant contexts are padded with reference base in case of indels if necessary
    List<VariantContext> VCs = new ArrayList<VariantContext>();

    for (VariantContext vc : prepaddedVCs) {
      // also a reasonable place to remove filtered calls, if needed
      if (!filteredAreUncalled || vc.isNotFiltered())
        VCs.add(createVariantContextWithPaddedAlleles(vc, false));
    }
    if (VCs.size() == 0) // everything is filtered out and we're filteredAreUncalled
    return null;

    // establish the baseline info from the first VC
    final VariantContext first = VCs.get(0);
    final String name = first.getSource();
    final Allele refAllele = determineReferenceAllele(VCs);

    final Set<Allele> alleles = new LinkedHashSet<Allele>();
    final Set<String> filters = new TreeSet<String>();
    final Map<String, Object> attributes = new TreeMap<String, Object>();
    final Set<String> inconsistentAttributes = new HashSet<String>();
    final Set<String> variantSources =
        new HashSet<
            String>(); // contains the set of sources we found in our set of VCs that are variant
    final Set<String> rsIDs = new LinkedHashSet<String>(1); // most of the time there's one id

    GenomeLoc loc = getLocation(genomeLocParser, first);
    int depth = 0;
    int maxAC = -1;
    final Map<String, Object> attributesWithMaxAC = new TreeMap<String, Object>();
    double log10PError = 1;
    VariantContext vcWithMaxAC = null;
    GenotypesContext genotypes = GenotypesContext.create();

    // counting the number of filtered and variant VCs
    int nFiltered = 0;

    boolean remapped = false;

    // cycle through and add info from the other VCs, making sure the loc/reference matches

    for (VariantContext vc : VCs) {
      if (loc.getStart() != vc.getStart()) // || !first.getReference().equals(vc.getReference()) )
      throw new ReviewedStingException(
            "BUG: attempting to merge VariantContexts with different start sites: first="
                + first.toString()
                + " second="
                + vc.toString());

      if (getLocation(genomeLocParser, vc).size() > loc.size())
        loc = getLocation(genomeLocParser, vc); // get the longest location

      nFiltered += vc.isFiltered() ? 1 : 0;
      if (vc.isVariant()) variantSources.add(vc.getSource());

      AlleleMapper alleleMapping = resolveIncompatibleAlleles(refAllele, vc, alleles);
      remapped = remapped || alleleMapping.needsRemapping();

      alleles.addAll(alleleMapping.values());

      mergeGenotypes(
          genotypes, vc, alleleMapping, genotypeMergeOptions == GenotypeMergeType.UNIQUIFY);

      log10PError = Math.min(log10PError, vc.isVariant() ? vc.getLog10PError() : 1);

      filters.addAll(vc.getFilters());

      //
      // add attributes
      //
      // special case DP (add it up) and ID (just preserve it)
      //
      if (vc.hasAttribute(VCFConstants.DEPTH_KEY))
        depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0);
      if (vc.hasID()) rsIDs.add(vc.getID());
      if (mergeInfoWithMaxAC && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) {
        String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY, null);
        // lets see if the string contains a , separator
        if (rawAlleleCounts.contains(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)) {
          List<String> alleleCountArray =
              Arrays.asList(
                  rawAlleleCounts
                      .substring(1, rawAlleleCounts.length() - 1)
                      .split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR));
          for (String alleleCount : alleleCountArray) {
            final int ac = Integer.valueOf(alleleCount.trim());
            if (ac > maxAC) {
              maxAC = ac;
              vcWithMaxAC = vc;
            }
          }
        } else {
          final int ac = Integer.valueOf(rawAlleleCounts);
          if (ac > maxAC) {
            maxAC = ac;
            vcWithMaxAC = vc;
          }
        }
      }

      for (Map.Entry<String, Object> p : vc.getAttributes().entrySet()) {
        String key = p.getKey();
        // if we don't like the key already, don't go anywhere
        if (!inconsistentAttributes.contains(key)) {
          boolean alreadyFound = attributes.containsKey(key);
          Object boundValue = attributes.get(key);
          boolean boundIsMissingValue =
              alreadyFound && boundValue.equals(VCFConstants.MISSING_VALUE_v4);

          if (alreadyFound && !boundValue.equals(p.getValue()) && !boundIsMissingValue) {
            // we found the value but we're inconsistent, put it in the exclude list
            // System.out.printf("Inconsistent INFO values: %s => %s and %s%n", key, boundValue,
            // p.getValue());
            inconsistentAttributes.add(key);
            attributes.remove(key);
          } else if (!alreadyFound || boundIsMissingValue) { // no value
            // if ( vc != first ) System.out.printf("Adding key %s => %s%n", p.getKey(),
            // p.getValue());
            attributes.put(key, p.getValue());
          }
        }
      }
    }

    // if we have more alternate alleles in the merged VC than in one or more of the
    // original VCs, we need to strip out the GL/PLs (because they are no longer accurate), as well
    // as allele-dependent attributes like AC,AF
    for (VariantContext vc : VCs) {
      if (vc.alleles.size() == 1) continue;
      if (hasPLIncompatibleAlleles(alleles, vc.alleles)) {
        if (!genotypes.isEmpty())
          logger.warn(
              String.format(
                  "Stripping PLs at %s due incompatible alleles merged=%s vs. single=%s",
                  genomeLocParser.createGenomeLoc(vc), alleles, vc.alleles));
        genotypes = stripPLs(genotypes);
        // this will remove stale AC,AF attributed from vc
        calculateChromosomeCounts(vc, attributes, true);
        break;
      }
    }

    // take the VC with the maxAC and pull the attributes into a modifiable map
    if (mergeInfoWithMaxAC && vcWithMaxAC != null) {
      attributesWithMaxAC.putAll(vcWithMaxAC.getAttributes());
    }

    // if at least one record was unfiltered and we want a union, clear all of the filters
    if ((filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED
            && nFiltered != VCs.size())
        || filteredRecordMergeType == FilteredRecordMergeType.KEEP_UNCONDITIONAL) filters.clear();

    if (annotateOrigin) { // we care about where the call came from
      String setValue;
      if (nFiltered == 0
          && variantSources.size() == priorityListOfVCs.size()) // nothing was unfiltered
      setValue = MERGE_INTERSECTION;
      else if (nFiltered == VCs.size()) // everything was filtered out
      setValue = MERGE_FILTER_IN_ALL;
      else if (variantSources.isEmpty()) // everyone was reference
      setValue = MERGE_REF_IN_ALL;
      else {
        LinkedHashSet<String> s = new LinkedHashSet<String>();
        for (VariantContext vc : VCs)
          if (vc.isVariant())
            s.add(vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource());
        setValue = Utils.join("-", s);
      }

      if (setKey != null) {
        attributes.put(setKey, setValue);
        if (mergeInfoWithMaxAC && vcWithMaxAC != null) {
          attributesWithMaxAC.put(setKey, vcWithMaxAC.getSource());
        }
      }
    }

    if (depth > 0) attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth));

    final String ID = rsIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(",", rsIDs);

    final VariantContextBuilder builder = new VariantContextBuilder().source(name).id(ID);
    builder.loc(loc.getContig(), loc.getStart(), loc.getStop());
    builder.alleles(alleles);
    builder.genotypes(genotypes);
    builder.log10PError(log10PError);
    builder.filters(filters).attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes);

    // Trim the padded bases of all alleles if necessary
    VariantContext merged = createVariantContextWithTrimmedAlleles(builder.make());
    if (printMessages && remapped) System.out.printf("Remapped => %s%n", merged);
    return merged;
  }

예제 #30

0

파일 보기

파일: DiagnoseTargets.java 프로젝트: iontorrent/Torrent-Variant-Caller-stable

 private GenomeLoc createIntervalBefore(GenomeLoc interval) {
   int start = Math.max(interval.getStart() - expandInterval, 0);
   int stop = Math.max(interval.getStart() - 1, 0);
   return parser.createGenomeLoc(interval.getContig(), interval.getContigIndex(), start, stop);
 }