/** @{inheritDoc} */
  @Override
  public SAMRecordIterator query(
      final String sequence, final int start, final int end, final boolean contained) {
    GenomeLoc region = genomeLocParser.createGenomeLoc(sequence, start, end);
    List<SAMRecord> coveredSubset = new ArrayList<SAMRecord>();

    for (SAMRecord read : reads) {
      GenomeLoc readPosition = genomeLocParser.createGenomeLoc(read);
      if (contained && region.containsP(readPosition)) coveredSubset.add(read);
      else if (!contained && readPosition.overlapsP(region)) coveredSubset.add(read);
    }

    final Iterator<SAMRecord> iterator = coveredSubset.iterator();
    return new SAMRecordIterator() {
      public boolean hasNext() {
        return iterator.hasNext();
      }

      public SAMRecord next() {
        return iterator.next();
      }

      public void close() {}

      public void remove() {
        iterator.remove();
      }

      public SAMRecordIterator assertSorted(SAMFileHeader.SortOrder sortOrder) {
        return this;
      }
    };
  }
Ejemplo n.º 2
0
 /**
  * Adds all intervals that overlap the current reference locus to the intervalMap
  *
  * @param refLocus the current reference locus
  */
 private void addNewOverlappingIntervals(final GenomeLoc refLocus) {
   GenomeLoc interval = intervalListIterator.peek();
   while (interval != null && !interval.isPast(refLocus)) {
     intervalMap.put(interval, createIntervalStatistic(interval));
     intervalListIterator.next();
     interval = intervalListIterator.peek();
   }
 }
 public boolean isReportableEvent() {
   return getToolkit()
           .getGenomeLocParser()
           .isValidGenomeLoc(loc.getContig(), eventStartPos, eventStopPos, true)
       && eventStopPos >= 0
       && eventStopPos - eventStartPos < maxIntervalSize;
 }
  /**
   * Loop over all of the reads in this likelihood map and realign them to its most likely haplotype
   *
   * @param haplotypes the collection of haplotypes
   * @param paddedReferenceLoc the active region
   */
  public void realignReadsToMostLikelyHaplotype(
      final Collection<Haplotype> haplotypes, final GenomeLoc paddedReferenceLoc) {

    // we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a
    // requirement currently
    final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<>(haplotypes.size());
    Haplotype refHaplotype = null;
    for (final Haplotype haplotype : haplotypes) {
      alleleToHaplotypeMap.put(Allele.create(haplotype.getBases()), haplotype);
      if (refHaplotype == null && haplotype.isReference()) refHaplotype = haplotype;
    }

    final Map<GATKSAMRecord, Map<Allele, Double>> newLikelihoodReadMap =
        new LinkedHashMap<>(likelihoodReadMap.size());
    for (final Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : likelihoodReadMap.entrySet()) {
      final MostLikelyAllele bestAllele =
          PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue());
      final GATKSAMRecord alignedToRef =
          AlignmentUtils.createReadAlignedToRef(
              entry.getKey(),
              alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()),
              refHaplotype,
              paddedReferenceLoc.getStart(),
              bestAllele.isInformative());
      newLikelihoodReadMap.put(alignedToRef, entry.getValue());
    }

    likelihoodReadMap.clear();
    likelihoodReadMap.putAll(newLikelihoodReadMap);
  }
Ejemplo n.º 5
0
 /**
  * Outputs all intervals that are behind the current reference locus
  *
  * @param refLocus the current reference locus
  * @param refBase the reference allele
  */
 private void outputFinishedIntervals(final GenomeLoc refLocus, final byte refBase) {
   // output any intervals that were finished
   final List<GenomeLoc> toRemove = new LinkedList<>();
   for (GenomeLoc key : intervalMap.keySet()) {
     if (key.isBefore(refLocus)) {
       final IntervalStratification intervalStats = intervalMap.get(key);
       outputStatsToVCF(intervalStats, Allele.create(refBase, true));
       if (hasMissingLoci(intervalStats)) {
         outputMissingInterval(intervalStats);
       }
       toRemove.add(key);
     }
   }
   for (GenomeLoc key : toRemove) {
     intervalMap.remove(key);
   }
 }
    public Event(GenomeLoc loc, int furthestStopPos, EVENT_TYPE type) {
      this.loc = loc;
      this.furthestStopPos = furthestStopPos;
      this.type = type;

      if (type == EVENT_TYPE.INDEL_EVENT || type == EVENT_TYPE.BOTH) {
        eventStartPos = loc.getStart();
        eventStopPos = loc.getStop();
      } else {
        eventStartPos = -1;
        eventStopPos = -1;
      }

      if (type == EVENT_TYPE.POINT_EVENT || type == EVENT_TYPE.BOTH) {
        pointEvents.add(loc.getStart());
      }
    }
Ejemplo n.º 7
0
 private void outputMissingInterval(final IntervalStratification stats) {
   final GenomeLoc interval = stats.getInterval();
   final boolean missing[] = new boolean[interval.size()];
   Arrays.fill(missing, true);
   for (AbstractStratification sample : stats.getElements()) {
     if (hasMissingStatuses(sample)) {
       int pos = 0;
       for (AbstractStratification locus : sample.getElements()) {
         if (locus.callableStatuses().isEmpty()) {
           missing[pos] = false;
         }
         pos++;
       }
     }
   }
   int start = -1;
   boolean insideMissing = false;
   for (int i = 0; i < missing.length; i++) {
     if (missing[i] && !insideMissing) {
       start = interval.getStart() + i;
       insideMissing = true;
     } else if (!missing[i] && insideMissing) {
       final int stop = interval.getStart() + i - 1;
       outputMissingInterval(interval.getContig(), start, stop);
       insideMissing = false;
     }
   }
   if (insideMissing) {
     outputMissingInterval(interval.getContig(), start, interval.getStop());
   }
 }
Ejemplo n.º 8
0
    public int compareTo(Element other) {
      if (nextLoc == null) {
        if (other.nextLoc != null)
          return 1; // null means no more data available, so its after any non-null position
        return 0;
      }
      if (other.nextLoc == null) return -1; // we can get to this point only if this.nextLoc != null

      return nextLoc.compareTo(other.nextLoc);
    }
Ejemplo n.º 9
0
  private void writeDifferences(
      final List<VariantContext> source1Alleles, final List<VariantContext> source2Alleles) {
    int currentIndex1 = 0, currentIndex2 = 0;
    final int size1 = source1Alleles.size(), size2 = source2Alleles.size();
    VariantContext current1 = source1Alleles.get(0);
    VariantContext current2 = source2Alleles.get(0);

    while (currentIndex1 < size1 || currentIndex2 < size2) {
      if (current1 == null) {
        writeOne(current2, source2, null);
        currentIndex2++;
        current2 = (currentIndex2 < size2 ? source2Alleles.get(currentIndex2) : null);
      } else if (current2 == null) {
        writeOne(current1, source1, null);
        currentIndex1++;
        current1 = (currentIndex1 < size1 ? source1Alleles.get(currentIndex1) : null);
      } else {

        final GenomeLoc loc1 = getToolkit().getGenomeLocParser().createGenomeLoc(current1);
        final GenomeLoc loc2 = getToolkit().getGenomeLocParser().createGenomeLoc(current2);

        if (loc1.getStart() == loc2.getStart() || loc1.overlapsP(loc2)) {
          String status;
          if (loc1.getStart() == loc2.getStart()) {
            final String allele1 = current1.getAlternateAllele(0).getBaseString();
            final String allele2 = current2.getAlternateAllele(0).getBaseString();
            if (allele1.indexOf(allele2) != -1 || allele2.indexOf(allele1) != -1)
              status = ONE_ALLELE_SUBSET_OF_OTHER_STATUS;
            else status = SAME_START_DIFFERENT_ALLELES_STATUS;
          } else {
            status = OVERLAPPING_EVENTS_STATUS;
          }

          writeOne(current1, INTERSECTION_SET, status);
          currentIndex1++;
          currentIndex2++;
          current1 = (currentIndex1 < size1 ? source1Alleles.get(currentIndex1) : null);
          current2 = (currentIndex2 < size2 ? source2Alleles.get(currentIndex2) : null);
        } else if (loc1.isBefore(loc2)) {
          writeOne(current1, source1, null);
          currentIndex1++;
          current1 = (currentIndex1 < size1 ? source1Alleles.get(currentIndex1) : null);
        } else {
          writeOne(current2, source2, null);
          currentIndex2++;
          current2 = (currentIndex2 < size2 ? source2Alleles.get(currentIndex2) : null);
        }
      }
    }
  }
Ejemplo n.º 10
0
  /**
   * Takes the interval, finds it in the stash, prints it to the VCF
   *
   * @param stats The statistics of the interval
   * @param refAllele the reference allele
   */
  private void outputStatsToVCF(final IntervalStratification stats, final Allele refAllele) {
    GenomeLoc interval = stats.getInterval();

    final List<Allele> alleles = new ArrayList<>();
    final Map<String, Object> attributes = new HashMap<>();
    final ArrayList<Genotype> genotypes = new ArrayList<>();

    for (String sample : samples) {
      final GenotypeBuilder gb = new GenotypeBuilder(sample);

      SampleStratification sampleStat = stats.getSampleStatistics(sample);
      gb.attribute(
          GATKVCFConstants.AVG_INTERVAL_DP_BY_SAMPLE_KEY,
          sampleStat.averageCoverage(interval.size()));
      gb.attribute(GATKVCFConstants.LOW_COVERAGE_LOCI, sampleStat.getNLowCoveredLoci());
      gb.attribute(GATKVCFConstants.ZERO_COVERAGE_LOCI, sampleStat.getNUncoveredLoci());
      gb.filters(statusToStrings(stats.getSampleStatistics(sample).callableStatuses(), false));

      genotypes.add(gb.make());
    }
    alleles.add(refAllele);
    alleles.add(SYMBOLIC_ALLELE);
    VariantContextBuilder vcb =
        new VariantContextBuilder(
            "DiagnoseTargets",
            interval.getContig(),
            interval.getStart(),
            interval.getStop(),
            alleles);

    vcb = vcb.log10PError(VariantContext.NO_LOG10_PERROR);
    vcb.filters(new LinkedHashSet<>(statusToStrings(stats.callableStatuses(), true)));

    attributes.put(VCFConstants.END_KEY, interval.getStop());
    attributes.put(GATKVCFConstants.AVG_INTERVAL_DP_KEY, stats.averageCoverage(interval.size()));
    attributes.put(GATKVCFConstants.INTERVAL_GC_CONTENT_KEY, stats.gcContent());

    vcb = vcb.attributes(attributes);
    vcb = vcb.genotypes(genotypes);

    vcfWriter.add(vcb.make());
  }
Ejemplo n.º 11
0
  private void writeAndPurgeAllEqualVariants(
      final List<VariantContext> sourceVCs1,
      final List<VariantContext> sourceVCs2,
      final String status) {

    int currentIndex1 = 0, currentIndex2 = 0;
    int size1 = sourceVCs1.size(), size2 = sourceVCs2.size();
    VariantContext current1 = (currentIndex1 < size1 ? sourceVCs1.get(currentIndex1) : null);
    VariantContext current2 = (currentIndex2 < size2 ? sourceVCs2.get(currentIndex2) : null);

    while (current1 != null && current2 != null) {

      final GenomeLoc loc1 = getToolkit().getGenomeLocParser().createGenomeLoc(current1);
      final GenomeLoc loc2 = getToolkit().getGenomeLocParser().createGenomeLoc(current2);

      if (loc1.equals(loc2)
          || (loc1.getStart() == loc2.getStart()
              && (current1.getAlternateAlleles().size() > 1
                  || current2.getAlternateAlleles().size() > 1))) {
        // test the alleles
        if (determineAndWriteOverlap(current1, current2, status)) {
          sourceVCs1.remove(currentIndex1);
          sourceVCs2.remove(currentIndex2);
          size1--;
          size2--;
        } else {
          currentIndex1++;
          currentIndex2++;
        }
        current1 = (currentIndex1 < size1 ? sourceVCs1.get(currentIndex1) : null);
        current2 = (currentIndex2 < size2 ? sourceVCs2.get(currentIndex2) : null);
      } else if (loc1.isBefore(loc2)) {
        currentIndex1++;
        current1 = (currentIndex1 < size1 ? sourceVCs1.get(currentIndex1) : null);
      } else {
        currentIndex2++;
        current2 = (currentIndex2 < size2 ? sourceVCs2.get(currentIndex2) : null);
      }
    }
  }
  public void onTraversalDone(EventPair sum) {
    if (sum.left != null && sum.left.isReportableEvent()) sum.intervals.add(sum.left.getLoc());
    if (sum.right != null && sum.right.isReportableEvent()) sum.intervals.add(sum.right.getLoc());

    if (FilenameUtils.getExtension(out.getName()).equals("interval_list")) {
      final SAMFileHeader masterSequenceDictionaryHeader = new SAMFileHeader();
      masterSequenceDictionaryHeader.setSequenceDictionary(
          getToolkit().getMasterSequenceDictionary());
      final IntervalList intervalList = new IntervalList(masterSequenceDictionaryHeader);
      for (GenomeLoc loc : sum.intervals) {
        intervalList.add(new Interval(loc.getContig(), loc.getStart(), loc.getStop()));
      }
      intervalList.write(out);
    } else {
      try (BufferedWriter bufferedWriter = IOUtil.openFileForBufferedWriting(out)) {
        for (GenomeLoc loc : sum.intervals) {
          bufferedWriter.write(loc.toString());
          bufferedWriter.newLine();
        }
      } catch (final IOException e) {
        throw new GATKException("Error writing out intervals to file: " + out.getAbsolutePath(), e);
      }
    }
  }
Ejemplo n.º 13
0
 public String toString() {
   return String.format(
       "%s\t%d\t%d\t%s", loc.getContig(), loc.getStart() - 1, loc.getStop(), state);
 }
Ejemplo n.º 14
0
 /**
  * Updating the location of this CalledBaseState by the new stop location
  *
  * @param newStop
  */
 public void update(GenomeLoc newStop) {
   loc = genomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), newStop.getStop());
 }
 public GenomeLoc getLoc() {
   return getToolkit()
       .getGenomeLocParser()
       .createGenomeLoc(loc.getContig(), eventStartPos, eventStopPos);
 }
Ejemplo n.º 16
0
 /**
  * can we flash back to the specified location?
  *
  * @param location the location to try and flash back to
  * @return true if we can, false otherwise
  */
 public boolean canFlashBackTo(GenomeLoc location) {
   GenomeLoc farthestBack =
       (pastQueue.size() > 0) ? pastQueue.getFirst().getLocation() : iterator.peekNextLocation();
   return (!farthestBack.isPast(location));
 }
  public Event map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {

    boolean hasIndel = false;
    boolean hasInsertion = false;
    boolean hasPointEvent = false;

    int furthestStopPos = -1;

    // look at the rods for indels or SNPs
    if (tracker != null) {
      for (VariantContext vc : tracker.getValues(known)) {
        switch (vc.getType()) {
          case INDEL:
            hasIndel = true;
            if (vc.isSimpleInsertion()) hasInsertion = true;
            break;
          case SNP:
            hasPointEvent = true;
            break;
          case MIXED:
            hasPointEvent = true;
            hasIndel = true;
            if (vc.isSimpleInsertion()) hasInsertion = true;
            break;
          default:
            break;
        }
        if (hasIndel) furthestStopPos = vc.getEnd();
      }
    }

    // look at the normal context to get deletions and positions with high entropy
    final ReadBackedPileup pileup = context.getBasePileup();

    int mismatchQualities = 0, totalQualities = 0;
    final byte refBase = ref.getBase();
    for (PileupElement p : pileup) {

      // check the ends of the reads to see how far they extend
      furthestStopPos = Math.max(furthestStopPos, p.getRead().getAlignmentEnd());

      // is it a deletion or insertion?
      if (p.isDeletion() || p.isBeforeInsertion()) {
        hasIndel = true;
        if (p.isBeforeInsertion()) hasInsertion = true;
      }

      // look for mismatches
      else if (lookForMismatchEntropy) {
        if (p.getBase() != refBase) mismatchQualities += p.getQual();
        totalQualities += p.getQual();
      }
    }

    // make sure we're supposed to look for high entropy
    if (lookForMismatchEntropy
        && pileup.getNumberOfElements() >= minReadsAtLocus
        && (double) mismatchQualities / (double) totalQualities >= mismatchThreshold)
      hasPointEvent = true;

    // return null if no event occurred
    if (!hasIndel && !hasPointEvent) return null;

    // return null if we didn't find any usable reads/rods associated with the event
    if (furthestStopPos == -1) return null;

    GenomeLoc eventLoc = context.getLocation();
    if (hasInsertion)
      eventLoc =
          getToolkit()
              .getGenomeLocParser()
              .createGenomeLoc(eventLoc.getContig(), eventLoc.getStart(), eventLoc.getStart() + 1);

    EVENT_TYPE eventType =
        (hasIndel
            ? (hasPointEvent ? EVENT_TYPE.BOTH : EVENT_TYPE.INDEL_EVENT)
            : EVENT_TYPE.POINT_EVENT);

    return new Event(eventLoc, furthestStopPos, eventType);
  }