/** * Create random read qualities * * @param length the length of the read * @param allowNs whether or not to allow N's in the read * @return an array with randomized bases (A-N) with equal probability */ public static byte[] createRandomReadBases(int length, boolean allowNs) { Random random = GenomeAnalysisEngine.getRandomGenerator(); int numberOfBases = allowNs ? 5 : 4; byte[] bases = new byte[length]; for (int i = 0; i < length; i++) { switch (random.nextInt(numberOfBases)) { case 0: bases[i] = 'A'; break; case 1: bases[i] = 'C'; break; case 2: bases[i] = 'G'; break; case 3: bases[i] = 'T'; break; case 4: bases[i] = 'N'; break; default: throw new ReviewedStingException("Something went wrong, this is just impossible"); } } return bases; }
private void randomlyAddVariant(int rank, VariantContext vc, byte refBase) { if (nVariantsAdded < numRandom) variantArray[nVariantsAdded++] = new RandomVariantStructure(vc); else { double v = GenomeAnalysisEngine.getRandomGenerator().nextDouble(); double t = (1.0 / (rank - numRandom + 1)); if (v < t) { variantArray[positionToAdd].set(vc); nVariantsAdded++; positionToAdd = nextCircularPosition(positionToAdd); } } }
@Test(enabled = true) public void testGetBasesReverseComplement() { int iterations = 1000; Random random = GenomeAnalysisEngine.getRandomGenerator(); while (iterations-- > 0) { final int l = random.nextInt(1000); GATKSAMRecord read = GATKSAMRecord.createRandomRead(l); byte[] original = read.getReadBases(); byte[] reconverted = new byte[l]; String revComp = ReadUtils.getBasesReverseComplement(read); for (int i = 0; i < l; i++) { reconverted[l - 1 - i] = BaseUtils.getComplement((byte) revComp.charAt(i)); } Assert.assertEquals(reconverted, original); } }
private void testPerSampleEqualsFlat( final String bam1, final String bam2, final String persampleFile, final Double downsampling, final String md5) { final String command = baseCommand3 + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s "; WalkerTestSpec spec = new WalkerTestSpec(command + " -contaminationFile " + persampleFile, 1, Arrays.asList(md5)); final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result executeTest( "test contamination on Artificial Contamination, with per-sample file on " + bam1 + " and " + bam2 + " with " + persampleFile, spec); spec = new WalkerTestSpec( command + "-contamination " + downsampling.toString(), 1, Arrays.asList(md5)); rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result executeTest( "test contamination on Artificial Contamination, with flat contamination on " + bam1 + " and " + bam2 + " with " + downsampling.toString(), spec); }
public void writeBeagleOutput( VariantContext preferredVC, VariantContext otherVC, boolean isValidationSite, double prior) { GenomeLoc currentLoc = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), preferredVC); StringBuffer beagleOut = new StringBuffer(); String marker = String.format("%s:%d ", currentLoc.getContig(), currentLoc.getStart()); beagleOut.append(marker); if (markers != null) markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t"); for (Allele allele : preferredVC.getAlleles()) { String bglPrintString; if (allele.isNoCall() || allele.isNull()) bglPrintString = "-"; else bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele beagleOut.append(String.format("%s ", bglPrintString)); if (markers != null) markers.append(bglPrintString).append("\t"); } if (markers != null) markers.append("\n"); GenotypesContext preferredGenotypes = preferredVC.getGenotypes(); GenotypesContext otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null; for (String sample : samples) { boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Gender.MALE; Genotype genotype; boolean isValidation; // use sample as key into genotypes structure if (preferredGenotypes.containsSample(sample)) { genotype = preferredGenotypes.get(sample); isValidation = isValidationSite; } else if (otherGenotypes != null && otherGenotypes.containsSample(sample)) { genotype = otherGenotypes.get(sample); isValidation = !isValidationSite; } else { // there is magically no genotype for this sample. throw new StingException( "Sample " + sample + " arose with no genotype in variant or validation VCF. This should never happen."); } /* * Use likelihoods if: is validation, prior is negative; or: is not validation, has genotype key */ double[] log10Likelihoods = null; if ((isValidation && prior < 0.0) || genotype.hasLikelihoods()) { log10Likelihoods = genotype.getLikelihoods().getAsVector(); // see if we need to randomly mask out genotype in this position. if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() <= insertedNoCallRate) { // we are masking out this genotype log10Likelihoods = isMaleOnChrX ? HAPLOID_FLAT_LOG10_LIKELIHOODS : DIPLOID_FLAT_LOG10_LIKELIHOODS; } if (isMaleOnChrX) { log10Likelihoods[1] = -255; // todo -- warning this is dangerous for multi-allele case } } /** otherwise, use the prior uniformly */ else if (!isValidation && genotype.isCalled() && !genotype.hasLikelihoods()) { // hack to deal with input VCFs with no genotype likelihoods. Just assume the called // genotype // is confident. This is useful for Hapmap and 1KG release VCFs. double AA = (1.0 - prior) / 2.0; double AB = (1.0 - prior) / 2.0; double BB = (1.0 - prior) / 2.0; if (genotype.isHomRef()) { AA = prior; } else if (genotype.isHet()) { AB = prior; } else if (genotype.isHomVar()) { BB = prior; } log10Likelihoods = MathUtils.toLog10(new double[] {AA, isMaleOnChrX ? 0.0 : AB, BB}); } else { log10Likelihoods = isMaleOnChrX ? HAPLOID_FLAT_LOG10_LIKELIHOODS : DIPLOID_FLAT_LOG10_LIKELIHOODS; } writeSampleLikelihoods(beagleOut, preferredVC, log10Likelihoods); } beagleWriter.println(beagleOut.toString()); }
/** * Create random read qualities * * @param length the length of the read * @return an array with randomized base qualities between 0 and 50 */ public static byte[] createRandomReadQuals(int length) { Random random = GenomeAnalysisEngine.getRandomGenerator(); byte[] quals = new byte[length]; for (int i = 0; i < length; i++) quals[i] = (byte) random.nextInt(50); return quals; }
/** * Subset VC record if necessary and emit the modified record (provided it satisfies criteria for * printing) * * @param tracker the ROD tracker * @param ref reference information * @param context alignment info * @return 1 if the record was printed to the output file, 0 if otherwise */ @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (tracker == null) return 0; Collection<VariantContext> vcs = tracker.getValues(variantCollection.variants, context.getLocation()); if (vcs == null || vcs.size() == 0) { return 0; } for (VariantContext vc : vcs) { if (MENDELIAN_VIOLATIONS) { boolean foundMV = false; for (MendelianViolation mv : mvSet) { if (mv.isViolation(vc)) { foundMV = true; // System.out.println(vc.toString()); if (outMVFile != null) outMVFileStream.format( "MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " + "childG=%s childGL=%s\n", vc.getChr(), vc.getStart(), vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getChromosomeCount(vc.getAlternateAllele(0)), mv.getSampleMom(), mv.getSampleDad(), mv.getSampleChild(), vc.getGenotype(mv.getSampleMom()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), vc.getGenotype(mv.getSampleDad()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), vc.getGenotype(mv.getSampleChild()).toBriefString(), vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString()); } } if (!foundMV) break; } if (DISCORDANCE_ONLY) { Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation()); if (!isDiscordant(vc, compVCs)) return 0; } if (CONCORDANCE_ONLY) { Collection<VariantContext> compVCs = tracker.getValues(concordanceTrack, context.getLocation()); if (!isConcordant(vc, compVCs)) return 0; } if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic()) continue; if (alleleRestriction.equals(NumberAlleleRestriction.MULTIALLELIC) && vc.isBiallelic()) continue; if (!selectedTypes.contains(vc.getType())) continue; VariantContext sub = subsetRecord(vc, samples); if ((sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED)) { for (VariantContextUtils.JexlVCMatchExp jexl : jexls) { if (!VariantContextUtils.match(sub, jexl)) { return 0; } } if (SELECT_RANDOM_NUMBER) { randomlyAddVariant(++variantNumber, sub, ref.getBase()); } else if (!SELECT_RANDOM_FRACTION || (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) { vcfWriter.add(sub); } } } return 1; }