@Override
  public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    GenomeLoc refLocus = ref.getLocus();
    while (currentInterval == null || currentInterval.isBefore(refLocus)) {
      if (!intervalListIterator.hasNext()) return 0L;

      currentInterval = intervalListIterator.next();
      currentIntervalStatistics = intervalMap.get(currentInterval);
    }

    if (currentInterval.isPast(refLocus)) return 0L;

    byte[] mappingQualities = context.getBasePileup().getMappingQuals();
    byte[] baseQualities = context.getBasePileup().getQuals();
    int coverage =
        context
            .getBasePileup()
            .getBaseAndMappingFilteredPileup(minimumBaseQuality, minimumMappingQuality)
            .depthOfCoverage();
    int rawCoverage = context.size();

    IntervalStatisticLocus locusData =
        new IntervalStatisticLocus(mappingQualities, baseQualities, coverage, rawCoverage);
    currentIntervalStatistics.addLocus(refLocus, locusData);

    return 1L;
  }
예제 #2
0
 /**
  * Determine if the given loc overlaps any loc in the sorted set
  *
  * @param loc the location to test
  * @return
  */
 public boolean overlaps(final GenomeLoc loc) {
   for (final GenomeLoc e : mArray) {
     if (e.overlapsP(loc)) {
       return true;
     }
   }
   return false;
 }
예제 #3
0
 /**
  * return a deep copy of this collection.
  *
  * @return a new GenomeLocSortedSet, identical to the current GenomeLocSortedSet.
  */
 public GenomeLocSortedSet clone() {
   GenomeLocSortedSet ret = new GenomeLocSortedSet(genomeLocParser);
   for (GenomeLoc loc : this.mArray) {
     // ensure a deep copy
     ret.mArray.add(
         genomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), loc.getStop()));
   }
   return ret;
 }
예제 #4
0
  public String toString() {
    StringBuilder s = new StringBuilder();
    s.append("[");
    for (GenomeLoc e : this) {
      s.append(" ");
      s.append(e.toString());
    }
    s.append("]");

    return s.toString();
  }
 private GenomeLoc createIntervalAfter(GenomeLoc interval) {
   int contigLimit =
       getToolkit()
           .getSAMFileHeader()
           .getSequenceDictionary()
           .getSequence(interval.getContigIndex())
           .getSequenceLength();
   int start = Math.min(interval.getStop() + 1, contigLimit);
   int stop = Math.min(interval.getStop() + expandInterval, contigLimit);
   return parser.createGenomeLoc(interval.getContig(), interval.getContigIndex(), start, stop);
 }
예제 #6
0
 @Test
 public void deleteSomeByRegion() {
   GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 1, 100);
   mSortedSet.add(e);
   for (int x = 1; x < 50; x++) {
     GenomeLoc del = genomeLocParser.createGenomeLoc(contigOneName, x, x);
     mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(genomeLocParser, del));
   }
   assertTrue(!mSortedSet.isEmpty());
   assertTrue(mSortedSet.size() == 1);
   GenomeLoc loc = mSortedSet.iterator().next();
   assertTrue(loc.getStop() == 100);
   assertTrue(loc.getStart() == 50);
 }
예제 #7
0
  /**
   * Test the reads according to an independently derived context.
   *
   * @param view
   * @param range
   * @param reads
   */
  @Override
  protected void testReadsInContext(
      LocusView view, List<GenomeLoc> range, List<GATKSAMRecord> reads) {
    AllLocusView allLocusView = (AllLocusView) view;

    // TODO: Should skip over loci not in the given range.
    GenomeLoc firstLoc = range.get(0);
    GenomeLoc lastLoc = range.get(range.size() - 1);
    GenomeLoc bounds =
        genomeLocParser.createGenomeLoc(
            firstLoc.getContig(), firstLoc.getStart(), lastLoc.getStop());

    for (int i = bounds.getStart(); i <= bounds.getStop(); i++) {
      GenomeLoc site = genomeLocParser.createGenomeLoc("chr1", i);
      AlignmentContext locusContext = allLocusView.next();
      Assert.assertEquals(locusContext.getLocation(), site, "Locus context location is incorrect");
      int expectedReadsAtSite = 0;

      for (GATKSAMRecord read : reads) {
        if (genomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation())) {
          Assert.assertTrue(
              locusContext.getReads().contains(read),
              "Target locus context does not contain reads");
          expectedReadsAtSite++;
        }
      }

      Assert.assertEquals(
          locusContext.getReads().size(),
          expectedReadsAtSite,
          "Found wrong number of reads at site");
    }
  }
예제 #8
0
 @Test
 public void mergingOverlappingAbove() {
   GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 0, 50);
   GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 49, 100);
   assertTrue(mSortedSet.size() == 0);
   mSortedSet.add(g);
   assertTrue(mSortedSet.size() == 1);
   mSortedSet.addRegion(e);
   assertTrue(mSortedSet.size() == 1);
   Iterator<GenomeLoc> iter = mSortedSet.iterator();
   GenomeLoc loc = iter.next();
   assertEquals(loc.getStart(), 0);
   assertEquals(loc.getStop(), 100);
   assertEquals(loc.getContigIndex(), 1);
 }
예제 #9
0
 @Test
 public void fromSequenceDictionary() {
   mSortedSet =
       GenomeLocSortedSet.createSetFromSequenceDictionary(this.header.getSequenceDictionary());
   // we should have sequence
   assertTrue(mSortedSet.size() == GenomeLocSortedSetUnitTest.NUMBER_OF_CHROMOSOMES);
   int seqNumber = 0;
   for (GenomeLoc loc : mSortedSet) {
     assertTrue(loc.getStart() == 1);
     assertTrue(loc.getStop() == GenomeLocSortedSetUnitTest.CHROMOSOME_SIZE);
     assertTrue(loc.getContigIndex() == seqNumber);
     ++seqNumber;
   }
   assertTrue(seqNumber == GenomeLocSortedSetUnitTest.NUMBER_OF_CHROMOSOMES);
 }
예제 #10
0
  private boolean overlapsKnownCNV(VariantContext cnv) {
    if (knownCNVs != null) {
      final GenomeLoc loc =
          getWalker().getToolkit().getGenomeLocParser().createGenomeLoc(cnv, true);
      IntervalTree<GenomeLoc> intervalTree = knownCNVs.get(loc.getContig());

      final Iterator<IntervalTree.Node<GenomeLoc>> nodeIt =
          intervalTree.overlappers(loc.getStart(), loc.getStop());
      while (nodeIt.hasNext()) {
        final double overlapP = loc.reciprocialOverlapFraction(nodeIt.next().getValue());
        if (overlapP > MIN_CNV_OVERLAP) return true;
      }
    }

    return false;
  }
예제 #11
0
  /**
   * Utility routine that prints out process information (including timing) every N records or every
   * M seconds, for N and M set in global variables.
   *
   * <p>Synchronized to ensure that even with multiple threads calling notifyOfProgress we still get
   * one clean stream of meter logs.
   *
   * <p>Note this thread doesn't actually print progress, unless must print is true, but just
   * registers the progress itself. A separate printing daemon periodically polls the meter to print
   * out progress
   *
   * @param loc Current location, can be null if you are at the end of the processing unit
   * @param nTotalRecordsProcessed the total number of records we've processed
   */
  public synchronized void notifyOfProgress(
      final GenomeLoc loc, final long nTotalRecordsProcessed) {
    if (nTotalRecordsProcessed < 0)
      throw new IllegalArgumentException("nTotalRecordsProcessed must be >= 0");

    // weird comparison to ensure that loc == null (in unmapped reads) is keep before maxGenomeLoc
    // == null (on startup)
    this.maxGenomeLoc = loc == null ? loc : (maxGenomeLoc == null ? loc : loc.max(maxGenomeLoc));
    this.nTotalRecordsProcessed = Math.max(this.nTotalRecordsProcessed, nTotalRecordsProcessed);

    // a pretty name for our position
    this.positionMessage =
        maxGenomeLoc == null
            ? "unmapped reads"
            : String.format("%s:%d", maxGenomeLoc.getContig(), maxGenomeLoc.getStart());
  }
예제 #12
0
파일: GenomeLoc.java 프로젝트: nh13/gatk
 @Ensures("result != null")
 public final String toString() {
   if (GenomeLoc.isUnmapped(this)) return "unmapped";
   if (throughEndOfContigP() && atBeginningOfContigP()) return getContig();
   else if (throughEndOfContigP() || getStart() == getStop())
     return String.format("%s:%d", getContig(), getStart());
   else return String.format("%s:%d-%d", getContig(), getStart(), getStop());
 }
예제 #13
0
 /**
  * add a genomeLoc to the collection, simply inserting in order into the set
  *
  * @param e the GenomeLoc to add
  * @return true
  */
 public boolean add(GenomeLoc e) {
   // assuming that the intervals coming arrive in order saves us a fair amount of time (and it's
   // most likely true)
   if (mArray.size() > 0 && e.isPast(mArray.get(mArray.size() - 1))) {
     mArray.add(e);
     return true;
   } else {
     int loc = Collections.binarySearch(mArray, e);
     if (loc >= 0) {
       throw new ReviewedStingException(
           "Genome Loc Sorted Set already contains the GenomicLoc " + e.toString());
     } else {
       mArray.add((loc + 1) * -1, e);
       return true;
     }
   }
 }
  public final Map<String, IntervalTree<GenomeLoc>> createIntervalTreeByContig(
      final IntervalBinding<Feature> intervals) {
    final Map<String, IntervalTree<GenomeLoc>> byContig =
        new HashMap<String, IntervalTree<GenomeLoc>>();

    final List<GenomeLoc> locs = intervals.getIntervals(getToolkit());

    // set up the map from contig -> interval tree
    for (final String contig : getContigNames())
      byContig.put(contig, new IntervalTree<GenomeLoc>());

    for (final GenomeLoc loc : locs) {
      byContig.get(loc.getContig()).put(loc.getStart(), loc.getStop(), loc);
    }

    return byContig;
  }
예제 #15
0
파일: GenomeLoc.java 프로젝트: nh13/gatk
  /**
   * Returns a new GenomeLoc that represents the region between the endpoints of this and that.
   * Requires that this and that GenomeLoc are both mapped.
   */
  @Requires({"that != null", "isUnmapped(this) == isUnmapped(that)"})
  @Ensures("result != null")
  public GenomeLoc endpointSpan(GenomeLoc that) throws ReviewedStingException {
    if (GenomeLoc.isUnmapped(this) || GenomeLoc.isUnmapped(that)) {
      throw new ReviewedStingException("Cannot get endpoint span for unmerged genome locs");
    }

    if (!this.getContig().equals(that.getContig())) {
      throw new ReviewedStingException(
          "Cannot get endpoint span for genome locs on different contigs");
    }

    return new GenomeLoc(
        getContig(),
        this.contigIndex,
        Math.min(getStart(), that.getStart()),
        Math.max(getStop(), that.getStop()));
  }
예제 #16
0
  public GenomeLocSortedSet subtractRegions(GenomeLocSortedSet toRemoveSet) {
    LinkedList<GenomeLoc> good = new LinkedList<GenomeLoc>();
    Stack<GenomeLoc> toProcess = new Stack<GenomeLoc>();
    Stack<GenomeLoc> toExclude = new Stack<GenomeLoc>();

    // initialize the stacks
    toProcess.addAll(mArray);
    Collections.reverse(toProcess);
    toExclude.addAll(toRemoveSet.mArray);
    Collections.reverse(toExclude);

    int i = 0;
    while (!toProcess.empty()) { // while there's still stuff to process
      if (toExclude.empty()) {
        good.addAll(toProcess); // no more excludes, all the processing stuff is good
        break;
      }

      GenomeLoc p = toProcess.peek();
      GenomeLoc e = toExclude.peek();

      if (p.overlapsP(e)) {
        toProcess.pop();
        for (GenomeLoc newP : p.subtract(e)) toProcess.push(newP);
      } else if (p.compareContigs(e) < 0) {
        good.add(toProcess.pop()); // p is now good
      } else if (p.compareContigs(e) > 0) {
        toExclude.pop(); // e can't effect anything
      } else if (p.getStop() < e.getStart()) {
        good.add(toProcess.pop()); // p stops before e starts, p is good
      } else if (e.getStop() < p.getStart()) {
        toExclude.pop(); // p starts after e stops, e is done
      } else {
        throw new ReviewedStingException("BUG: unexpected condition: p=" + p + ", e=" + e);
      }

      if (i++ % 10000 == 0) logger.debug("removeRegions operation: i = " + i);
    }

    return createSetFromList(genomeLocParser, good);
  }
예제 #17
0
  private T callWalkerMapOnActiveRegions(
      final ActiveRegionWalker<M, T> walker,
      T sum,
      final int minStart,
      final String currentContig) {
    // Since we've traversed sufficiently past this point (or this contig!) in the workQueue we can
    // unload those regions and process them
    // TODO can implement parallel traversal here
    while (workQueue.peek() != null) {
      final GenomeLoc extendedLoc = workQueue.peek().getExtendedLoc();
      if (extendedLoc.getStop() < minStart
          || (currentContig != null
              && !workQueue.peek().getExtendedLoc().getContig().equals(currentContig))) {
        final ActiveRegion activeRegion = workQueue.remove();
        sum = processActiveRegion(activeRegion, myReads, workQueue, sum, walker);
      } else {
        break;
      }
    }

    return sum;
  }
예제 #18
0
파일: GenomeLoc.java 프로젝트: nh13/gatk
  @Requires("that != null")
  @Ensures("result == 0 || result == 1 || result == -1")
  public int compareTo(GenomeLoc that) {
    int result = 0;

    if (this == that) {
      result = 0;
    } else if (GenomeLoc.isUnmapped(this)) result = 1;
    else if (GenomeLoc.isUnmapped(that)) result = -1;
    else {
      final int cmpContig = compareContigs(that);

      if (cmpContig != 0) {
        result = cmpContig;
      } else {
        if (this.getStart() < that.getStart()) result = -1;
        if (this.getStart() > that.getStart()) result = 1;
      }
    }

    return result;
  }
예제 #19
0
파일: GenomeLoc.java 프로젝트: nh13/gatk
  /** Return the minimum distance between any pair of bases in this and that GenomeLocs: */
  @Requires("that != null")
  @Ensures("result >= 0")
  public final int minDistance(final GenomeLoc that) {
    if (!this.onSameContig(that)) return Integer.MAX_VALUE;

    int minDistance;
    if (this.isBefore(that)) minDistance = distanceFirstStopToSecondStart(this, that);
    else if (that.isBefore(this)) minDistance = distanceFirstStopToSecondStart(that, this);
    else // this and that overlap [and possibly one contains the other]:
    minDistance = 0;

    return minDistance;
  }
예제 #20
0
  /**
   * Determines what is the position of the read in relation to the interval. Note: This function
   * uses the UNCLIPPED ENDS of the reads for the comparison.
   *
   * @param read the read
   * @param interval the interval
   * @return the overlap type as described by ReadAndIntervalOverlap enum (see above)
   */
  public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(
      GATKSAMRecord read, GenomeLoc interval) {

    int sStart = read.getSoftStart();
    int sStop = read.getSoftEnd();
    int uStart = read.getUnclippedStart();
    int uStop = read.getUnclippedEnd();

    if (!read.getReferenceName().equals(interval.getContig()))
      return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
    else if (uStop < interval.getStart()) return ReadAndIntervalOverlap.NO_OVERLAP_LEFT;
    else if (uStart > interval.getStop()) return ReadAndIntervalOverlap.NO_OVERLAP_RIGHT;
    else if (sStop < interval.getStart()) return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_LEFT;
    else if (sStart > interval.getStop())
      return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_RIGHT;
    else if ((sStart >= interval.getStart()) && (sStop <= interval.getStop()))
      return ReadAndIntervalOverlap.OVERLAP_CONTAINED;
    else if ((sStart < interval.getStart()) && (sStop > interval.getStop()))
      return ReadAndIntervalOverlap.OVERLAP_LEFT_AND_RIGHT;
    else if ((sStart < interval.getStart())) return ReadAndIntervalOverlap.OVERLAP_LEFT;
    else return ReadAndIntervalOverlap.OVERLAP_RIGHT;
  }
예제 #21
0
파일: PhasingUtils.java 프로젝트: nh13/gatk
  static boolean mergeIntoMNPvalidationCheck(
      GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2) {
    GenomeLoc loc1 = VariantContextUtils.getLocation(genomeLocParser, vc1);
    GenomeLoc loc2 = VariantContextUtils.getLocation(genomeLocParser, vc2);

    if (!loc1.onSameContig(loc2))
      throw new ReviewedStingException("Can only merge vc1, vc2 if on the same chromosome");

    if (!loc1.isBefore(loc2))
      throw new ReviewedStingException("Can only merge if vc1 is BEFORE vc2");

    if (vc1.isFiltered() || vc2.isFiltered()) return false;

    if (!vc1.getSampleNames()
        .equals(vc2.getSampleNames())) // vc1, vc2 refer to different sample sets
    return false;

    if (!allGenotypesAreUnfilteredAndCalled(vc1) || !allGenotypesAreUnfilteredAndCalled(vc2))
      return false;

    return true;
  }
예제 #22
0
  /**
   * Return the number of bps before loc in the sorted set
   *
   * @param loc the location before which we are counting bases
   * @return
   */
  public long sizeBeforeLoc(GenomeLoc loc) {
    long s = 0;

    for (GenomeLoc e : this) {
      if (e.isBefore(loc)) s += e.size();
      else if (e.isPast(loc)) ; // don't do anything
      else // loc is inside of s
      s += loc.getStart() - e.getStart();
    }

    return s;
  }
예제 #23
0
  @Test
  public void deleteSuperRegion() {
    GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 10, 20);
    GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 70, 100);
    mSortedSet.add(g);
    mSortedSet.addRegion(e);
    assertTrue(mSortedSet.size() == 2);
    // now delete a region
    GenomeLoc d = genomeLocParser.createGenomeLoc(contigOneName, 15, 75);
    mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(genomeLocParser, d));
    Iterator<GenomeLoc> iter = mSortedSet.iterator();
    GenomeLoc loc = iter.next();
    assertTrue(loc.getStart() == 10);
    assertTrue(loc.getStop() == 14);
    assertTrue(loc.getContigIndex() == 1);

    loc = iter.next();
    assertTrue(loc.getStart() == 76);
    assertTrue(loc.getStop() == 100);
    assertTrue(loc.getContigIndex() == 1);
  }
예제 #24
0
 /**
  * Fully qualified constructor: instantiates a new GATKFeatureRecordList object with specified
  * GATKFeature track name, location on the reference, and list of associated GATKFeatures. This is
  * a knee-deep COPY constructor: passed name, loc, and data element objects will be referenced
  * from the created GATKFeatureRecordList (so that changing them from outside will affect data in
  * this object), however, the data elements will be copied into a newly allocated list, so that
  * the 'data' collection argument can be modified afterwards without affecting the state of this
  * record list. WARNING: this constructor is (semi-)validating: passed name and location are
  * allowed to be nulls (although it maybe unsafe, use caution), but if they are not nulls, then
  * passed non-null GATKFeature data elements must have same track name, and their locations must
  * overlap with the passed 'location' argument. Null data elements or null 'data' collection
  * argument are allowed as well.
  *
  * @param name the name of the track
  * @param data the collection of features at this location
  * @param loc the location
  */
 public RODRecordListImpl(String name, Collection<GATKFeature> data, GenomeLoc loc) {
   this.records = new ArrayList<GATKFeature>(data == null ? 0 : data.size());
   this.name = name;
   this.location = loc;
   if (data == null || data.size() == 0) return; // empty dataset, nothing to do
   for (GATKFeature r : data) {
     records.add(r);
     if (r == null) continue;
     if (!this.name.equals(r.getName())) {
       throw new ReviewedStingException(
           "Attempt to add GATKFeature with non-matching name "
               + r.getName()
               + " to the track "
               + name);
     }
     if (location != null && !location.overlapsP(r.getLocation())) {
       throw new ReviewedStingException(
           "Attempt to add GATKFeature that lies outside of specified interval "
               + location
               + "; offending GATKFeature:\n"
               + r.toString());
     }
   }
 }
예제 #25
0
파일: GenomeLoc.java 프로젝트: nh13/gatk
  /**
   * Returns a new GenomeLoc that represents the entire span of this and that. Requires that this
   * and that GenomeLoc are contiguous and both mapped
   */
  @Requires({"that != null", "isUnmapped(this) == isUnmapped(that)"})
  @Ensures("result != null")
  public GenomeLoc merge(GenomeLoc that) throws ReviewedStingException {
    if (GenomeLoc.isUnmapped(this) || GenomeLoc.isUnmapped(that)) {
      if (!GenomeLoc.isUnmapped(this) || !GenomeLoc.isUnmapped(that))
        throw new ReviewedStingException("Tried to merge a mapped and an unmapped genome loc");
      return UNMAPPED;
    }

    if (!(this.contiguousP(that))) {
      throw new ReviewedStingException("The two genome loc's need to be contigous");
    }

    return new GenomeLoc(
        getContig(),
        this.contigIndex,
        Math.min(getStart(), that.getStart()),
        Math.max(getStop(), that.getStop()));
  }
예제 #26
0
파일: GenomeLoc.java 프로젝트: nh13/gatk
  @Requires("that != null")
  @Ensures("result != null")
  public GenomeLoc intersect(GenomeLoc that) throws ReviewedStingException {
    if (GenomeLoc.isUnmapped(this) || GenomeLoc.isUnmapped(that)) {
      if (!GenomeLoc.isUnmapped(this) || !GenomeLoc.isUnmapped(that))
        throw new ReviewedStingException("Tried to intersect a mapped and an unmapped genome loc");
      return UNMAPPED;
    }

    if (!(this.overlapsP(that))) {
      throw new ReviewedStingException(
          "GenomeLoc::intersect(): The two genome loc's need to overlap");
    }

    return new GenomeLoc(
        getContig(),
        this.contigIndex,
        Math.max(getStart(), that.getStart()),
        Math.min(getStop(), that.getStop()));
  }
예제 #27
0
  public Iterable<Shard> createShardsOverIntervals(
      final SAMDataSource readsDataSource,
      final GenomeLocSortedSet intervals,
      final int maxShardSize) {
    List<Shard> shards = new ArrayList<Shard>();

    for (GenomeLoc interval : intervals) {
      while (interval.size() > maxShardSize) {
        shards.add(
            new LocusShard(
                intervals.getGenomeLocParser(),
                readsDataSource,
                Collections.singletonList(
                    intervals
                        .getGenomeLocParser()
                        .createGenomeLoc(
                            interval.getContig(),
                            interval.getStart(),
                            interval.getStart() + maxShardSize - 1)),
                null));
        interval =
            intervals
                .getGenomeLocParser()
                .createGenomeLoc(
                    interval.getContig(), interval.getStart() + maxShardSize, interval.getStop());
      }
      shards.add(
          new LocusShard(
              intervals.getGenomeLocParser(),
              readsDataSource,
              Collections.singletonList(interval),
              null));
    }

    return shards;
  }
  private ArrayList<Allele> computeConsensusAlleles(
      ReferenceContext ref,
      Map<String, AlignmentContext> contexts,
      AlignmentContextUtils.ReadOrientation contextType) {
    Allele refAllele = null, altAllele = null;
    GenomeLoc loc = ref.getLocus();
    ArrayList<Allele> aList = new ArrayList<Allele>();

    HashMap<String, Integer> consensusIndelStrings = new HashMap<String, Integer>();

    int insCount = 0, delCount = 0;
    // quick check of total number of indels in pileup
    for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
      AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);

      final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
      insCount += indelPileup.getNumberOfInsertions();
      delCount += indelPileup.getNumberOfDeletions();
    }

    if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping)
      return aList;

    for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
      // todo -- warning, can be duplicating expensive partition here
      AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);

      final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();

      for (ExtendedEventPileupElement p : indelPileup.toExtendedIterable()) {
        // SAMRecord read = p.getRead();
        GATKSAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead());
        if (read == null) continue;
        if (ReadUtils.is454Read(read)) {
          continue;
        }

        /*                if (DEBUG && p.isIndel()) {
                         System.out.format("Read: %s, cigar: %s, aln start: %d, aln end: %d, p.len:%d, Type:%s, EventBases:%s\n",
                                 read.getReadName(),read.getCigar().toString(),read.getAlignmentStart(),read.getAlignmentEnd(),
                                 p.getEventLength(),p.getType().toString(), p.getEventBases());
                     }
        */

        String indelString = p.getEventBases();
        if (p.isInsertion()) {
          boolean foundKey = false;
          if (read.getAlignmentEnd() == loc.getStart()) {
            // first corner condition: a read has an insertion at the end, and we're right at the
            // insertion.
            // In this case, the read could have any of the inserted bases and we need to build a
            // consensus
            for (String s : consensusIndelStrings.keySet()) {
              int cnt = consensusIndelStrings.get(s);
              if (s.startsWith(indelString)) {
                // case 1: current insertion is prefix of indel in hash map
                consensusIndelStrings.put(s, cnt + 1);
                foundKey = true;
                break;
              } else if (indelString.startsWith(s)) {
                // case 2: indel stored in hash table is prefix of current insertion
                // In this case, new bases are new key.
                consensusIndelStrings.remove(s);
                consensusIndelStrings.put(indelString, cnt + 1);
                foundKey = true;
                break;
              }
            }
            if (!foundKey)
              // none of the above: event bases not supported by previous table, so add new key
              consensusIndelStrings.put(indelString, 1);

          } else if (read.getAlignmentStart() == loc.getStart() + 1) {
            // opposite corner condition: read will start at current locus with an insertion
            for (String s : consensusIndelStrings.keySet()) {
              int cnt = consensusIndelStrings.get(s);
              if (s.endsWith(indelString)) {
                // case 1: current insertion is suffix of indel in hash map
                consensusIndelStrings.put(s, cnt + 1);
                foundKey = true;
                break;
              } else if (indelString.endsWith(s)) {
                // case 2: indel stored in hash table is suffix of current insertion
                // In this case, new bases are new key.

                consensusIndelStrings.remove(s);
                consensusIndelStrings.put(indelString, cnt + 1);
                foundKey = true;
                break;
              }
            }
            if (!foundKey)
              // none of the above: event bases not supported by previous table, so add new key
              consensusIndelStrings.put(indelString, 1);

          } else {
            // normal case: insertion somewhere in the middle of a read: add count to hash map
            int cnt =
                consensusIndelStrings.containsKey(indelString)
                    ? consensusIndelStrings.get(indelString)
                    : 0;
            consensusIndelStrings.put(indelString, cnt + 1);
          }

        } else if (p.isDeletion()) {
          indelString = String.format("D%d", p.getEventLength());
          int cnt =
              consensusIndelStrings.containsKey(indelString)
                  ? consensusIndelStrings.get(indelString)
                  : 0;
          consensusIndelStrings.put(indelString, cnt + 1);
        }
      }

      /*            if (DEBUG) {
          int icount = indelPileup.getNumberOfInsertions();
          int dcount = indelPileup.getNumberOfDeletions();
          if (icount + dcount > 0)
          {
              List<Pair<String,Integer>> eventStrings = indelPileup.getEventStringsWithCounts(ref.getBases());
              System.out.format("#ins: %d, #del:%d\n", insCount, delCount);

              for (int i=0 ; i < eventStrings.size() ; i++ ) {
                  System.out.format("%s:%d,",eventStrings.get(i).first,eventStrings.get(i).second);
                  //                int k=0;
              }
              System.out.println();
          }
      }             */
    }

    int maxAlleleCnt = 0;
    String bestAltAllele = "";
    for (String s : consensusIndelStrings.keySet()) {
      int curCnt = consensusIndelStrings.get(s);
      if (curCnt > maxAlleleCnt) {
        maxAlleleCnt = curCnt;
        bestAltAllele = s;
      }
      //            if (DEBUG)
      //                System.out.format("Key:%s, number: %d\n",s,consensusIndelStrings.get(s)  );
    } // gdebug-

    if (maxAlleleCnt < minIndelCountForGenotyping) return aList;

    if (bestAltAllele.startsWith("D")) {
      // get deletion length
      int dLen = Integer.valueOf(bestAltAllele.substring(1));
      // get ref bases of accurate deletion
      int startIdxInReference = (int) (1 + loc.getStart() - ref.getWindow().getStart());

      // System.out.println(new String(ref.getBases()));
      byte[] refBases =
          Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);

      if (Allele.acceptableAlleleBases(refBases)) {
        refAllele = Allele.create(refBases, true);
        altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
      }
    } else {
      // insertion case
      if (Allele.acceptableAlleleBases(bestAltAllele)) {
        refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
        altAllele = Allele.create(bestAltAllele, false);
      }
    }
    if (refAllele != null && altAllele != null) {
      aList.add(0, refAllele);
      aList.add(1, altAllele);
    }
    return aList;
  }
  public Allele getLikelihoods(
      RefMetaDataTracker tracker,
      ReferenceContext ref,
      Map<String, AlignmentContext> contexts,
      AlignmentContextUtils.ReadOrientation contextType,
      GenotypePriors priors,
      Map<String, MultiallelicGenotypeLikelihoods> GLs,
      Allele alternateAlleleToUse,
      boolean useBAQedPileup) {

    if (tracker == null) return null;

    GenomeLoc loc = ref.getLocus();
    Allele refAllele, altAllele;
    VariantContext vc = null;

    if (!ref.getLocus().equals(lastSiteVisited)) {
      // starting a new site: clear allele list
      alleleList.clear();
      lastSiteVisited = ref.getLocus();
      indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
      haplotypeMap.clear();

      if (getAlleleListFromVCF) {
        for (final VariantContext vc_input : tracker.getValues(UAC.alleles, loc)) {
          if (vc_input != null
              && allowableTypes.contains(vc_input.getType())
              && ref.getLocus().getStart() == vc_input.getStart()) {
            vc = vc_input;
            break;
          }
        }
        // ignore places where we don't have a variant
        if (vc == null) return null;

        alleleList.clear();
        if (ignoreSNPAllelesWhenGenotypingIndels) {
          // if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore
          // it and don't genotype it
          for (Allele a : vc.getAlleles())
            if (a.isNonReference() && a.getBases().length == vc.getReference().getBases().length)
              continue;
            else alleleList.add(a);

        } else {
          for (Allele a : vc.getAlleles()) alleleList.add(a);
        }

      } else {
        alleleList = computeConsensusAlleles(ref, contexts, contextType);
        if (alleleList.isEmpty()) return null;
      }
    }
    // protect against having an indel too close to the edge of a contig
    if (loc.getStart() <= HAPLOTYPE_SIZE) return null;

    // check if there is enough reference window to create haplotypes (can be an issue at end of
    // contigs)
    if (ref.getWindow().getStop() < loc.getStop() + HAPLOTYPE_SIZE) return null;
    if (!(priors instanceof DiploidIndelGenotypePriors))
      throw new StingException(
          "Only diploid-based Indel priors are supported in the DINDEL GL model");

    if (alleleList.isEmpty()) return null;

    refAllele = alleleList.get(0);
    altAllele = alleleList.get(1);

    // look for alt allele that has biggest length distance to ref allele
    int maxLenDiff = 0;
    for (Allele a : alleleList) {
      if (a.isNonReference()) {
        int lenDiff = Math.abs(a.getBaseString().length() - refAllele.getBaseString().length());
        if (lenDiff > maxLenDiff) {
          maxLenDiff = lenDiff;
          altAllele = a;
        }
      }
    }

    final int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length();
    final int hsize = (int) ref.getWindow().size() - Math.abs(eventLength) - 1;
    final int numPrefBases = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;

    haplotypeMap =
        Haplotype.makeHaplotypeListFromAlleles(
            alleleList, loc.getStart(), ref, hsize, numPrefBases);

    // For each sample, get genotype likelihoods based on pileup
    // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with
    // them.
    // initialize the GenotypeLikelihoods
    GLs.clear();

    for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
      AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);

      ReadBackedPileup pileup = null;
      if (context.hasExtendedEventPileup()) pileup = context.getExtendedEventPileup();
      else if (context.hasBasePileup()) pileup = context.getBasePileup();

      if (pileup != null) {
        final double[] genotypeLikelihoods =
            pairModel.computeReadHaplotypeLikelihoods(
                pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());

        GLs.put(
            sample.getKey(),
            new MultiallelicGenotypeLikelihoods(
                sample.getKey(), alleleList, genotypeLikelihoods, getFilteredDepth(pileup)));

        if (DEBUG) {
          System.out.format("Sample:%s Alleles:%s GL:", sample.getKey(), alleleList.toString());
          for (int k = 0; k < genotypeLikelihoods.length; k++)
            System.out.format("%1.4f ", genotypeLikelihoods[k]);
          System.out.println();
        }
      }
    }

    return refAllele;
  }
예제 #30
0
  /**
   * Read in a list of ExactCall objects from reader, keeping only those with starts in startsToKeep
   * or all sites (if this is empty)
   *
   * @param reader a just-opened reader sitting at the start of the file
   * @param startsToKeep a list of start position of the calls to keep, or empty if all calls should
   *     be kept
   * @param parser a genome loc parser to create genome locs
   * @return a list of ExactCall objects in reader
   * @throws IOException
   */
  public static List<ExactCall> readExactLog(
      final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser)
      throws IOException {
    if (reader == null) throw new IllegalArgumentException("reader cannot be null");
    if (startsToKeep == null) throw new IllegalArgumentException("startsToKeep cannot be null");
    if (parser == null) throw new IllegalArgumentException("GenomeLocParser cannot be null");

    List<ExactCall> calls = new LinkedList<ExactCall>();

    // skip the header line
    reader.readLine();

    // skip the first "type" line
    reader.readLine();

    while (true) {
      final VariantContextBuilder builder = new VariantContextBuilder();
      final List<Allele> alleles = new ArrayList<Allele>();
      final List<Genotype> genotypes = new ArrayList<Genotype>();
      final double[] posteriors = new double[2];
      final double[] priors = MathUtils.normalizeFromLog10(new double[] {0.5, 0.5}, true);
      final List<Integer> mle = new ArrayList<Integer>();
      final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>();
      long runtimeNano = -1;

      GenomeLoc currentLoc = null;
      while (true) {
        final String line = reader.readLine();
        if (line == null) return calls;

        final String[] parts = line.split("\t");
        final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]);
        final String variable = parts[1];
        final String key = parts[2];
        final String value = parts[3];

        if (currentLoc == null) currentLoc = lineLoc;

        if (variable.equals("type")) {
          if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) {
            builder.alleles(alleles);
            final int stop = currentLoc.getStart() + alleles.get(0).length() - 1;
            builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
            builder.genotypes(genotypes);
            final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[] {}));
            final AFCalcResult result =
                new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele);
            calls.add(new ExactCall(builder.make(), runtimeNano, result));
          }
          break;
        } else if (variable.equals("allele")) {
          final boolean isRef = key.equals("0");
          alleles.add(Allele.create(value, isRef));
        } else if (variable.equals("PL")) {
          final GenotypeBuilder gb = new GenotypeBuilder(key);
          gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs());
          genotypes.add(gb.make());
        } else if (variable.equals("log10PosteriorOfAFEq0")) {
          posteriors[0] = Double.valueOf(value);
        } else if (variable.equals("log10PosteriorOfAFGt0")) {
          posteriors[1] = Double.valueOf(value);
        } else if (variable.equals("MLE")) {
          mle.add(Integer.valueOf(value));
        } else if (variable.equals("pNonRefByAllele")) {
          final Allele a = Allele.create(key);
          log10pNonRefByAllele.put(a, Double.valueOf(value));
        } else if (variable.equals("runtime.nano")) {
          runtimeNano = Long.valueOf(value);
        } else {
          // nothing to do
        }
      }
    }
  }