public static void main(String[] args) { args = new String[] { "/commun/data/users/cfaucheron/aln_20120329/S0529/data_S0529/S0529_sort.nodup.bam" }; ReferenceSequenceFile rsf = ReferenceSequenceFileFactory.getReferenceSequenceFile( new File("/commun/data/pubdb/ucsc/hg19/chromosomes/hg19.fa")); int count = 0; for (String filename : args) { File file = new File(filename); SAMFileReader samIn = new SAMFileReader(file); SAMRecordIterator r = samIn.iterator(); while (r.hasNext()) { SAMRecord rec = r.next(); if (rec.getReadUnmappedFlag()) continue; if (++count > 10000) break; if (rec.getAlignmentStart() > rec.getAlignmentEnd()) throw new IllegalStateException(); byte bases[] = rsf.getSubsequenceAt( rec.getReferenceName(), rec.getAlignmentStart(), Math.max( rec.getAlignmentEnd(), rec.getAlignmentStart() + rec.getCigar().getPaddedReferenceLength())) .getBases(); Iterator<CigarAlignment> i = CigarAlignment.iterator(rec); /*System.err.println(rec.getCigarString()); System.err.println(bases.length); System.err.println("start:"+rec.getAlignmentStart());*/ StringBuilder s1 = new StringBuilder(); StringBuilder s2 = new StringBuilder(); while (i.hasNext()) { CigarAlignment caln = i.next(); /* System.err.println(rec.getCigarString()); System.err.println("bases.length:"+bases.length); System.err.println("refpos:"+caln.getReferencePosition1()); System.err.println("readpos:"+rec.getAlignmentStart()); */ if (caln.getReferencePosition1() - rec.getAlignmentStart() >= bases.length) { System.out.println("SHORT!"); System.out.println("op:" + caln.getCigarOperator()); System.out.println("read start:" + rec.getAlignmentStart()); System.out.println("clan.pos1:" + caln.getReferencePosition1()); System.out.println("read end:" + rec.getAlignmentEnd()); System.out.println("bases.length:" + bases.length); System.out.println( "getPaddedReferenceLength:" + rec.getCigar().getPaddedReferenceLength()); System.out.println("getReferenceLength:" + rec.getCigar().getReferenceLength()); System.out.println("getReadLength:" + rec.getCigar().getReadLength()); System.out.println( "cigar.read.length:" + Cigar.getReadLength(rec.getCigar().getCigarElements())); count = 2000; break; } if (caln.isInsertRef()) { s2.append("-"); s1.append(caln.getReadBase()); } else if (caln.isDeletionRef()) { s2.append((char) bases[caln.getReferencePosition1() - rec.getAlignmentStart()]); s1.append("-"); } else { s2.append((char) bases[caln.getReferencePosition1() - rec.getAlignmentStart()]); s1.append(caln.getReadBase()); } // System.out.println(s1); // System.out.println(s2); // System.out.println(); } System.out.println( rec.getCigarString() + " " + rec.getReferenceName() + ":" + rec.getAlignmentStart()); System.out.println("ref :" + new String(bases)); System.out.println("read:" + new String(rec.getReadBases())); System.out.println(); System.out.println(s1); System.out.println(s2); System.out.println(); } samIn.close(); } }
/** * Asserts that files are readable and writable and then fires off an HsMetricsCalculator instance * to do the real work. */ protected int doWork() { IoUtil.assertFileIsReadable(getProbeIntervals()); IoUtil.assertFileIsReadable(TARGET_INTERVALS); IoUtil.assertFileIsReadable(INPUT); IoUtil.assertFileIsWritable(OUTPUT); if (PER_TARGET_COVERAGE != null) IoUtil.assertFileIsWritable(PER_TARGET_COVERAGE); final SAMFileReader samReader = new SAMFileReader(INPUT); final File probeIntervals = getProbeIntervals(); // Validate that the targets and baits have the same references as the reads file SequenceUtil.assertSequenceDictionariesEqual( samReader.getFileHeader().getSequenceDictionary(), IntervalList.fromFile(TARGET_INTERVALS).getHeader().getSequenceDictionary(), INPUT, TARGET_INTERVALS); SequenceUtil.assertSequenceDictionariesEqual( samReader.getFileHeader().getSequenceDictionary(), IntervalList.fromFile(probeIntervals).getHeader().getSequenceDictionary(), INPUT, probeIntervals); ReferenceSequenceFile ref = null; if (REFERENCE_SEQUENCE != null) { IoUtil.assertFileIsReadable(REFERENCE_SEQUENCE); ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE); SequenceUtil.assertSequenceDictionariesEqual( samReader.getFileHeader().getSequenceDictionary(), ref.getSequenceDictionary(), INPUT, REFERENCE_SEQUENCE); } final TargetMetricsCollector collector = makeCollector( METRIC_ACCUMULATION_LEVEL, samReader.getFileHeader().getReadGroups(), ref, PER_TARGET_COVERAGE, TARGET_INTERVALS, probeIntervals, getProbeSetName()); // Add each record to the requested collectors final Iterator<SAMRecord> records = samReader.iterator(); final ProgressLogger progress = new ProgressLogger(log); while (records.hasNext()) { final SAMRecord sam = records.next(); collector.acceptRecord(sam, null); progress.record(sam); } // Write the output file final MetricsFile<HsMetrics, Integer> metrics = getMetricsFile(); collector.finish(); collector.addAllLevelsToFile(metrics); metrics.write(OUTPUT); return 0; }
static VariantContext reallyMergeIntoMNP( VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) { int startInter = vc1.getEnd() + 1; int endInter = vc2.getStart() - 1; byte[] intermediateBases = null; if (startInter <= endInter) { intermediateBases = referenceFile.getSubsequenceAt(vc1.getChr(), startInter, endInter).getBases(); StringUtil.toUpperCase(intermediateBases); } MergedAllelesData mergeData = new MergedAllelesData( intermediateBases, vc1, vc2); // ensures that the reference allele is added GenotypesContext mergedGenotypes = GenotypesContext.create(); for (final Genotype gt1 : vc1.getGenotypes()) { Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); List<Allele> site1Alleles = gt1.getAlleles(); List<Allele> site2Alleles = gt2.getAlleles(); List<Allele> mergedAllelesForSample = new LinkedList<Allele>(); /* NOTE: Since merged alleles are added to mergedAllelesForSample in the SAME order as in the input VC records, we preserve phase information (if any) relative to whatever precedes vc1: */ Iterator<Allele> all2It = site2Alleles.iterator(); for (Allele all1 : site1Alleles) { Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable() Allele mergedAllele = mergeData.ensureMergedAllele(all1, all2); mergedAllelesForSample.add(mergedAllele); } double mergedGQ = Math.max(gt1.getLog10PError(), gt2.getLog10PError()); Set<String> mergedGtFilters = new HashSet< String>(); // Since gt1 and gt2 were unfiltered, the Genotype remains unfiltered Map<String, Object> mergedGtAttribs = new HashMap<String, Object>(); PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2); if (phaseQual.PQ != null) mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ); Genotype mergedGt = new Genotype( gt1.getSampleName(), mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased); mergedGenotypes.add(mergedGt); } String mergedName = mergeVariantContextNames(vc1.getSource(), vc2.getSource()); double mergedLog10PError = Math.min(vc1.getLog10PError(), vc2.getLog10PError()); Set<String> mergedFilters = new HashSet< String>(); // Since vc1 and vc2 were unfiltered, the merged record remains unfiltered Map<String, Object> mergedAttribs = mergeVariantContextAttributes(vc1, vc2); // ids List<String> mergedIDs = new ArrayList<String>(); if (vc1.hasID()) mergedIDs.add(vc1.getID()); if (vc2.hasID()) mergedIDs.add(vc2.getID()); String mergedID = mergedIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(VCFConstants.ID_FIELD_SEPARATOR, mergedIDs); VariantContextBuilder mergedBuilder = new VariantContextBuilder( mergedName, vc1.getChr(), vc1.getStart(), vc2.getEnd(), mergeData.getAllMergedAlleles()) .id(mergedID) .genotypes(mergedGenotypes) .log10PError(mergedLog10PError) .filters(mergedFilters) .attributes(mergedAttribs); VariantContextUtils.calculateChromosomeCounts(mergedBuilder, true); return mergedBuilder.make(); }