예제 #1
0
 /**
  * Create random read qualities
  *
  * @param length the length of the read
  * @param allowNs whether or not to allow N's in the read
  * @return an array with randomized bases (A-N) with equal probability
  */
 public static byte[] createRandomReadBases(int length, boolean allowNs) {
   Random random = GenomeAnalysisEngine.getRandomGenerator();
   int numberOfBases = allowNs ? 5 : 4;
   byte[] bases = new byte[length];
   for (int i = 0; i < length; i++) {
     switch (random.nextInt(numberOfBases)) {
       case 0:
         bases[i] = 'A';
         break;
       case 1:
         bases[i] = 'C';
         break;
       case 2:
         bases[i] = 'G';
         break;
       case 3:
         bases[i] = 'T';
         break;
       case 4:
         bases[i] = 'N';
         break;
       default:
         throw new ReviewedStingException("Something went wrong, this is just impossible");
     }
   }
   return bases;
 }
예제 #2
0
 private void randomlyAddVariant(int rank, VariantContext vc, byte refBase) {
   if (nVariantsAdded < numRandom) variantArray[nVariantsAdded++] = new RandomVariantStructure(vc);
   else {
     double v = GenomeAnalysisEngine.getRandomGenerator().nextDouble();
     double t = (1.0 / (rank - numRandom + 1));
     if (v < t) {
       variantArray[positionToAdd].set(vc);
       nVariantsAdded++;
       positionToAdd = nextCircularPosition(positionToAdd);
     }
   }
 }
 @Test(enabled = true)
 public void testGetBasesReverseComplement() {
   int iterations = 1000;
   Random random = GenomeAnalysisEngine.getRandomGenerator();
   while (iterations-- > 0) {
     final int l = random.nextInt(1000);
     GATKSAMRecord read = GATKSAMRecord.createRandomRead(l);
     byte[] original = read.getReadBases();
     byte[] reconverted = new byte[l];
     String revComp = ReadUtils.getBasesReverseComplement(read);
     for (int i = 0; i < l; i++) {
       reconverted[l - 1 - i] = BaseUtils.getComplement((byte) revComp.charAt(i));
     }
     Assert.assertEquals(reconverted, original);
   }
 }
  private void testPerSampleEqualsFlat(
      final String bam1,
      final String bam2,
      final String persampleFile,
      final Double downsampling,
      final String md5) {
    final String command =
        baseCommand3
            + " -I "
            + ArtificalBAMLocation
            + bam1
            + " -I "
            + ArtificalBAMLocation
            + bam2
            + " -o %s  ";

    WalkerTestSpec spec =
        new WalkerTestSpec(command + " -contaminationFile " + persampleFile, 1, Arrays.asList(md5));
    final Random rnd = GenomeAnalysisEngine.getRandomGenerator();

    rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result
    executeTest(
        "test contamination on Artificial Contamination, with per-sample file on "
            + bam1
            + " and "
            + bam2
            + " with "
            + persampleFile,
        spec);

    spec =
        new WalkerTestSpec(
            command + "-contamination " + downsampling.toString(), 1, Arrays.asList(md5));

    rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result
    executeTest(
        "test contamination on Artificial Contamination, with flat contamination on "
            + bam1
            + " and "
            + bam2
            + " with "
            + downsampling.toString(),
        spec);
  }
예제 #5
0
  public void writeBeagleOutput(
      VariantContext preferredVC, VariantContext otherVC, boolean isValidationSite, double prior) {
    GenomeLoc currentLoc =
        VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), preferredVC);
    StringBuffer beagleOut = new StringBuffer();

    String marker = String.format("%s:%d ", currentLoc.getContig(), currentLoc.getStart());
    beagleOut.append(marker);
    if (markers != null)
      markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t");
    for (Allele allele : preferredVC.getAlleles()) {
      String bglPrintString;
      if (allele.isNoCall() || allele.isNull()) bglPrintString = "-";
      else bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele

      beagleOut.append(String.format("%s ", bglPrintString));
      if (markers != null) markers.append(bglPrintString).append("\t");
    }
    if (markers != null) markers.append("\n");

    GenotypesContext preferredGenotypes = preferredVC.getGenotypes();
    GenotypesContext otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null;
    for (String sample : samples) {
      boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Gender.MALE;

      Genotype genotype;
      boolean isValidation;
      // use sample as key into genotypes structure
      if (preferredGenotypes.containsSample(sample)) {
        genotype = preferredGenotypes.get(sample);
        isValidation = isValidationSite;
      } else if (otherGenotypes != null && otherGenotypes.containsSample(sample)) {
        genotype = otherGenotypes.get(sample);
        isValidation = !isValidationSite;
      } else {
        // there is magically no genotype for this sample.
        throw new StingException(
            "Sample "
                + sample
                + " arose with no genotype in variant or validation VCF. This should never happen.");
      }

      /*
       * Use likelihoods if: is validation, prior is negative; or: is not validation, has genotype key
       */
      double[] log10Likelihoods = null;
      if ((isValidation && prior < 0.0) || genotype.hasLikelihoods()) {
        log10Likelihoods = genotype.getLikelihoods().getAsVector();

        // see if we need to randomly mask out genotype in this position.
        if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() <= insertedNoCallRate) {
          // we are masking out this genotype
          log10Likelihoods =
              isMaleOnChrX ? HAPLOID_FLAT_LOG10_LIKELIHOODS : DIPLOID_FLAT_LOG10_LIKELIHOODS;
        }

        if (isMaleOnChrX) {
          log10Likelihoods[1] = -255; // todo -- warning this is dangerous for multi-allele case
        }
      }
      /** otherwise, use the prior uniformly */
      else if (!isValidation && genotype.isCalled() && !genotype.hasLikelihoods()) {
        // hack to deal with input VCFs with no genotype likelihoods.  Just assume the called
        // genotype
        // is confident.  This is useful for Hapmap and 1KG release VCFs.
        double AA = (1.0 - prior) / 2.0;
        double AB = (1.0 - prior) / 2.0;
        double BB = (1.0 - prior) / 2.0;

        if (genotype.isHomRef()) {
          AA = prior;
        } else if (genotype.isHet()) {
          AB = prior;
        } else if (genotype.isHomVar()) {
          BB = prior;
        }

        log10Likelihoods = MathUtils.toLog10(new double[] {AA, isMaleOnChrX ? 0.0 : AB, BB});
      } else {
        log10Likelihoods =
            isMaleOnChrX ? HAPLOID_FLAT_LOG10_LIKELIHOODS : DIPLOID_FLAT_LOG10_LIKELIHOODS;
      }

      writeSampleLikelihoods(beagleOut, preferredVC, log10Likelihoods);
    }

    beagleWriter.println(beagleOut.toString());
  }
예제 #6
0
 /**
  * Create random read qualities
  *
  * @param length the length of the read
  * @return an array with randomized base qualities between 0 and 50
  */
 public static byte[] createRandomReadQuals(int length) {
   Random random = GenomeAnalysisEngine.getRandomGenerator();
   byte[] quals = new byte[length];
   for (int i = 0; i < length; i++) quals[i] = (byte) random.nextInt(50);
   return quals;
 }
예제 #7
0
  /**
   * Subset VC record if necessary and emit the modified record (provided it satisfies criteria for
   * printing)
   *
   * @param tracker the ROD tracker
   * @param ref reference information
   * @param context alignment info
   * @return 1 if the record was printed to the output file, 0 if otherwise
   */
  @Override
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    if (tracker == null) return 0;

    Collection<VariantContext> vcs =
        tracker.getValues(variantCollection.variants, context.getLocation());

    if (vcs == null || vcs.size() == 0) {
      return 0;
    }

    for (VariantContext vc : vcs) {
      if (MENDELIAN_VIOLATIONS) {
        boolean foundMV = false;
        for (MendelianViolation mv : mvSet) {
          if (mv.isViolation(vc)) {
            foundMV = true;
            // System.out.println(vc.toString());
            if (outMVFile != null)
              outMVFileStream.format(
                  "MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, "
                      + "childG=%s childGL=%s\n",
                  vc.getChr(),
                  vc.getStart(),
                  vc.getReference().getDisplayString(),
                  vc.getAlternateAllele(0).getDisplayString(),
                  vc.getChromosomeCount(vc.getAlternateAllele(0)),
                  mv.getSampleMom(),
                  mv.getSampleDad(),
                  mv.getSampleChild(),
                  vc.getGenotype(mv.getSampleMom()).toBriefString(),
                  vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
                  vc.getGenotype(mv.getSampleDad()).toBriefString(),
                  vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
                  vc.getGenotype(mv.getSampleChild()).toBriefString(),
                  vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString());
          }
        }

        if (!foundMV) break;
      }
      if (DISCORDANCE_ONLY) {
        Collection<VariantContext> compVCs =
            tracker.getValues(discordanceTrack, context.getLocation());
        if (!isDiscordant(vc, compVCs)) return 0;
      }
      if (CONCORDANCE_ONLY) {
        Collection<VariantContext> compVCs =
            tracker.getValues(concordanceTrack, context.getLocation());
        if (!isConcordant(vc, compVCs)) return 0;
      }

      if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic())
        continue;

      if (alleleRestriction.equals(NumberAlleleRestriction.MULTIALLELIC) && vc.isBiallelic())
        continue;

      if (!selectedTypes.contains(vc.getType())) continue;

      VariantContext sub = subsetRecord(vc, samples);
      if ((sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS)
          && (!sub.isFiltered() || !EXCLUDE_FILTERED)) {
        for (VariantContextUtils.JexlVCMatchExp jexl : jexls) {
          if (!VariantContextUtils.match(sub, jexl)) {
            return 0;
          }
        }
        if (SELECT_RANDOM_NUMBER) {
          randomlyAddVariant(++variantNumber, sub, ref.getBase());
        } else if (!SELECT_RANDOM_FRACTION
            || (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
          vcfWriter.add(sub);
        }
      }
    }

    return 1;
  }