예제 #1
0
파일: PhasingUtils.java 프로젝트: nh13/gatk
    private Allele ensureMergedAllele(
        Allele all1, Allele all2, boolean creatingReferenceForFirstTime) {
      AlleleOneAndTwo all12 = new AlleleOneAndTwo(all1, all2);
      Allele mergedAllele = mergedAlleles.get(all12);

      if (mergedAllele == null) {
        byte[] bases1 = all1.getBases();
        byte[] bases2 = all2.getBases();

        byte[] mergedBases = new byte[bases1.length + intermediateLength + bases2.length];
        System.arraycopy(bases1, 0, mergedBases, 0, bases1.length);
        if (intermediateBases != null)
          System.arraycopy(intermediateBases, 0, mergedBases, bases1.length, intermediateLength);
        System.arraycopy(bases2, 0, mergedBases, bases1.length + intermediateLength, bases2.length);

        mergedAllele = Allele.create(mergedBases, creatingReferenceForFirstTime);
        mergedAlleles.put(all12, mergedAllele);
      }

      return mergedAllele;
    }
  private Map<String, Object> annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) {

    if (!stratifiedContext.hasBasePileup()) return null;

    HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
    for (Allele allele : vc.getAlternateAlleles()) alleleCounts.put(allele.getBases()[0], 0);

    ReadBackedPileup pileup = stratifiedContext.getBasePileup();
    int totalDepth = pileup.size();

    Map<String, Object> map = new HashMap<String, Object>();
    map.put(getKeyNames().get(0), totalDepth); // put total depth in right away

    if (totalDepth == 0) return map; // done, can not compute FA at 0 coverage!!

    int mq0 = 0; // number of "ref" reads that are acually mq0
    for (PileupElement p : pileup) {
      if (p.getMappingQual() == 0) {
        mq0++;
        continue;
      }
      if (alleleCounts.containsKey(p.getBase())) // non-mq0 read and it's an alt
      alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase()) + 1);
    }

    if (mq0 == totalDepth) return map; // if all reads are mq0, there is nothing left to do

    // we need to add counts in the correct order
    String[] fracs = new String[alleleCounts.size()];
    for (int i = 0; i < vc.getAlternateAlleles().size(); i++) {
      fracs[i] =
          String.format(
              "%.3f",
              ((float) alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]))
                  / (totalDepth - mq0));
    }

    map.put(getKeyNames().get(1), fracs);
    return map;
  }
  public Allele getLikelihoods(
      RefMetaDataTracker tracker,
      ReferenceContext ref,
      Map<String, AlignmentContext> contexts,
      AlignmentContextUtils.ReadOrientation contextType,
      GenotypePriors priors,
      Map<String, MultiallelicGenotypeLikelihoods> GLs,
      Allele alternateAlleleToUse,
      boolean useBAQedPileup) {

    if (tracker == null) return null;

    GenomeLoc loc = ref.getLocus();
    Allele refAllele, altAllele;
    VariantContext vc = null;

    if (!ref.getLocus().equals(lastSiteVisited)) {
      // starting a new site: clear allele list
      alleleList.clear();
      lastSiteVisited = ref.getLocus();
      indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
      haplotypeMap.clear();

      if (getAlleleListFromVCF) {
        for (final VariantContext vc_input : tracker.getValues(UAC.alleles, loc)) {
          if (vc_input != null
              && allowableTypes.contains(vc_input.getType())
              && ref.getLocus().getStart() == vc_input.getStart()) {
            vc = vc_input;
            break;
          }
        }
        // ignore places where we don't have a variant
        if (vc == null) return null;

        alleleList.clear();
        if (ignoreSNPAllelesWhenGenotypingIndels) {
          // if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore
          // it and don't genotype it
          for (Allele a : vc.getAlleles())
            if (a.isNonReference() && a.getBases().length == vc.getReference().getBases().length)
              continue;
            else alleleList.add(a);

        } else {
          for (Allele a : vc.getAlleles()) alleleList.add(a);
        }

      } else {
        alleleList = computeConsensusAlleles(ref, contexts, contextType);
        if (alleleList.isEmpty()) return null;
      }
    }
    // protect against having an indel too close to the edge of a contig
    if (loc.getStart() <= HAPLOTYPE_SIZE) return null;

    // check if there is enough reference window to create haplotypes (can be an issue at end of
    // contigs)
    if (ref.getWindow().getStop() < loc.getStop() + HAPLOTYPE_SIZE) return null;
    if (!(priors instanceof DiploidIndelGenotypePriors))
      throw new StingException(
          "Only diploid-based Indel priors are supported in the DINDEL GL model");

    if (alleleList.isEmpty()) return null;

    refAllele = alleleList.get(0);
    altAllele = alleleList.get(1);

    // look for alt allele that has biggest length distance to ref allele
    int maxLenDiff = 0;
    for (Allele a : alleleList) {
      if (a.isNonReference()) {
        int lenDiff = Math.abs(a.getBaseString().length() - refAllele.getBaseString().length());
        if (lenDiff > maxLenDiff) {
          maxLenDiff = lenDiff;
          altAllele = a;
        }
      }
    }

    final int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length();
    final int hsize = (int) ref.getWindow().size() - Math.abs(eventLength) - 1;
    final int numPrefBases = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;

    haplotypeMap =
        Haplotype.makeHaplotypeListFromAlleles(
            alleleList, loc.getStart(), ref, hsize, numPrefBases);

    // For each sample, get genotype likelihoods based on pileup
    // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with
    // them.
    // initialize the GenotypeLikelihoods
    GLs.clear();

    for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
      AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);

      ReadBackedPileup pileup = null;
      if (context.hasExtendedEventPileup()) pileup = context.getExtendedEventPileup();
      else if (context.hasBasePileup()) pileup = context.getBasePileup();

      if (pileup != null) {
        final double[] genotypeLikelihoods =
            pairModel.computeReadHaplotypeLikelihoods(
                pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());

        GLs.put(
            sample.getKey(),
            new MultiallelicGenotypeLikelihoods(
                sample.getKey(), alleleList, genotypeLikelihoods, getFilteredDepth(pileup)));

        if (DEBUG) {
          System.out.format("Sample:%s Alleles:%s GL:", sample.getKey(), alleleList.toString());
          for (int k = 0; k < genotypeLikelihoods.length; k++)
            System.out.format("%1.4f ", genotypeLikelihoods[k]);
          System.out.println();
        }
      }
    }

    return refAllele;
  }