@Override public int doWork(String[] args) { File refFile = null; com.github.lindenb.jvarkit.util.cli.GetOpt getopt = new com.github.lindenb.jvarkit.util.cli.GetOpt(); int c; while ((c = getopt.getopt(args, "hvL:r:")) != -1) { switch (c) { case 'h': printUsage(); return 0; case 'v': System.out.println(getVersion()); return 0; case 'L': getLogger().setLevel(java.util.logging.Level.parse(getopt.getOptArg())); break; case 'r': refFile = new File(getopt.getOptArg()); break; case ':': System.err.println("Missing argument for option -" + getopt.getOptOpt()); return -1; default: System.err.println("Unknown option -" + getopt.getOptOpt()); return -1; } } if (refFile == null) { error("Undefined REF file"); return -1; } File bamFile = null; if (getopt.getOptInd() + 1 != args.length) { info("reading from stdin."); } else { bamFile = new File(args[getopt.getOptInd()]); } IndexedFastaSequenceFile indexedFastaSequenceFile = null; SAMFileReader samFileReader = null; try { GenomicSequence genomicSequence = null; indexedFastaSequenceFile = new IndexedFastaSequenceFile(refFile); SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT); samFileReader = null; if (bamFile == null) { samFileReader = new SAMFileReader(System.in); } else { samFileReader = new SAMFileReader(bamFile); } XMLOutputFactory xmlfactory = XMLOutputFactory.newInstance(); XMLStreamWriter w = xmlfactory.createXMLStreamWriter(System.out, "UTF-8"); w.writeStartDocument("UTF-8", "1.0"); w.writeStartElement("sam"); w.writeComment(getProgramCommandLine()); w.writeAttribute("ref", (bamFile == null ? "stdin" : bamFile.getPath())); w.writeAttribute("bam", args[1]); SAMRecordIterator iter = samFileReader.iterator(); while (iter.hasNext()) { SAMRecord rec = iter.next(); final byte readbases[] = rec.getReadBases(); w.writeStartElement("read"); w.writeStartElement("name"); w.writeCharacters(rec.getReadName()); w.writeEndElement(); w.writeStartElement("sequence"); w.writeCharacters(new String(readbases)); w.writeEndElement(); w.writeStartElement("flags"); w.writeAttribute("paired", String.valueOf(rec.getReadPairedFlag())); w.writeAttribute( "failsVendorQual", String.valueOf(rec.getReadFailsVendorQualityCheckFlag())); w.writeAttribute("mapped", String.valueOf(!rec.getReadUnmappedFlag())); w.writeAttribute("strand", (rec.getReadNegativeStrandFlag() ? "-" : "+")); if (rec.getReadPairedFlag()) { w.writeAttribute("mate-mapped", String.valueOf(!rec.getMateUnmappedFlag())); w.writeAttribute("mate-strand", (rec.getMateNegativeStrandFlag() ? "-" : "+")); w.writeAttribute("proper-pair", String.valueOf(rec.getProperPairFlag())); } w.writeCharacters(String.valueOf(rec.getFlags())); w.writeEndElement(); if (!rec.getReadUnmappedFlag()) { w.writeStartElement("qual"); w.writeCharacters(String.valueOf(rec.getMappingQuality())); w.writeEndElement(); w.writeStartElement("chrom"); w.writeAttribute("index", String.valueOf(rec.getReferenceIndex())); w.writeCharacters(rec.getReferenceName()); w.writeEndElement(); w.writeStartElement("pos"); w.writeCharacters(String.valueOf(rec.getAlignmentStart())); w.writeEndElement(); w.writeStartElement("cigar"); w.writeCharacters(rec.getCigarString()); w.writeEndElement(); } if (!rec.getMateUnmappedFlag()) { w.writeStartElement("mate-chrom"); w.writeAttribute("index", String.valueOf(rec.getMateReferenceIndex())); w.writeCharacters(rec.getMateReferenceName()); w.writeEndElement(); w.writeStartElement("mate-pos"); w.writeCharacters(String.valueOf(rec.getMateAlignmentStart())); w.writeEndElement(); } if (!rec.getReadUnmappedFlag()) { if (genomicSequence == null || genomicSequence.getChrom().equals(rec.getReferenceName())) { genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName()); } w.writeStartElement("align"); int readIndex = 0; int refIndex = rec.getAlignmentStart(); for (final CigarElement e : rec.getCigar().getCigarElements()) { switch (e.getOperator()) { case H: break; // ignore hard clips case P: break; // ignore pads case I: // cont. case S: { final int length = e.getLength(); for (int i = 0; i < length; ++i) { w.writeEmptyElement(e.getOperator().name()); w.writeAttribute("read-index", String.valueOf(readIndex + 1)); if (readIndex >= 0 && readIndex < readbases.length) { w.writeAttribute("read-base", String.valueOf((char) (readbases[readIndex]))); } readIndex++; } break; } case N: // cont. -- reference skip case D: { final int length = e.getLength(); for (int i = 0; i < length; ++i) { w.writeEmptyElement(e.getOperator().name()); w.writeAttribute("ref-index", String.valueOf(refIndex)); if (refIndex >= 1 && refIndex <= genomicSequence.length()) { w.writeAttribute( "ref-base", String.valueOf(genomicSequence.charAt(refIndex - 1))); } refIndex++; } break; } case M: case EQ: case X: { final int length = e.getLength(); for (int i = 0; i < length; ++i) { w.writeEmptyElement(e.getOperator().name()); char baseRead = '\0'; if (readIndex >= 0 && readIndex < readbases.length) { baseRead = (char) (rec.getReadBases()[readIndex]); w.writeAttribute("read-index", String.valueOf(readIndex + 1)); w.writeAttribute("read-base", String.valueOf(baseRead)); } w.writeAttribute("ref-index", String.valueOf(refIndex)); if (refIndex >= 1 && refIndex <= genomicSequence.length()) { char baseRef = genomicSequence.charAt(refIndex - 1); w.writeAttribute("ref-base", String.valueOf(baseRef)); if (Character.toUpperCase(baseRef) != Character.toUpperCase(baseRead)) { w.writeAttribute("mismatch", "true"); } } refIndex++; readIndex++; } break; } default: throw new IllegalStateException( "Case statement didn't deal with cigar op: " + e.getOperator()); } } } w.writeEndElement(); w.writeEndElement(); iter.close(); w.writeEndElement(); } w.writeEndElement(); w.writeEndDocument(); w.flush(); w.close(); } catch (Exception err) { error(err); return -1; } finally { CloserUtil.close(samFileReader); CloserUtil.close(indexedFastaSequenceFile); } return 0; }
public static void main(String[] args) { args = new String[] { "/commun/data/users/cfaucheron/aln_20120329/S0529/data_S0529/S0529_sort.nodup.bam" }; ReferenceSequenceFile rsf = ReferenceSequenceFileFactory.getReferenceSequenceFile( new File("/commun/data/pubdb/ucsc/hg19/chromosomes/hg19.fa")); int count = 0; for (String filename : args) { File file = new File(filename); SAMFileReader samIn = new SAMFileReader(file); SAMRecordIterator r = samIn.iterator(); while (r.hasNext()) { SAMRecord rec = r.next(); if (rec.getReadUnmappedFlag()) continue; if (++count > 10000) break; if (rec.getAlignmentStart() > rec.getAlignmentEnd()) throw new IllegalStateException(); byte bases[] = rsf.getSubsequenceAt( rec.getReferenceName(), rec.getAlignmentStart(), Math.max( rec.getAlignmentEnd(), rec.getAlignmentStart() + rec.getCigar().getPaddedReferenceLength())) .getBases(); Iterator<CigarAlignment> i = CigarAlignment.iterator(rec); /*System.err.println(rec.getCigarString()); System.err.println(bases.length); System.err.println("start:"+rec.getAlignmentStart());*/ StringBuilder s1 = new StringBuilder(); StringBuilder s2 = new StringBuilder(); while (i.hasNext()) { CigarAlignment caln = i.next(); /* System.err.println(rec.getCigarString()); System.err.println("bases.length:"+bases.length); System.err.println("refpos:"+caln.getReferencePosition1()); System.err.println("readpos:"+rec.getAlignmentStart()); */ if (caln.getReferencePosition1() - rec.getAlignmentStart() >= bases.length) { System.out.println("SHORT!"); System.out.println("op:" + caln.getCigarOperator()); System.out.println("read start:" + rec.getAlignmentStart()); System.out.println("clan.pos1:" + caln.getReferencePosition1()); System.out.println("read end:" + rec.getAlignmentEnd()); System.out.println("bases.length:" + bases.length); System.out.println( "getPaddedReferenceLength:" + rec.getCigar().getPaddedReferenceLength()); System.out.println("getReferenceLength:" + rec.getCigar().getReferenceLength()); System.out.println("getReadLength:" + rec.getCigar().getReadLength()); System.out.println( "cigar.read.length:" + Cigar.getReadLength(rec.getCigar().getCigarElements())); count = 2000; break; } if (caln.isInsertRef()) { s2.append("-"); s1.append(caln.getReadBase()); } else if (caln.isDeletionRef()) { s2.append((char) bases[caln.getReferencePosition1() - rec.getAlignmentStart()]); s1.append("-"); } else { s2.append((char) bases[caln.getReferencePosition1() - rec.getAlignmentStart()]); s1.append(caln.getReadBase()); } // System.out.println(s1); // System.out.println(s2); // System.out.println(); } System.out.println( rec.getCigarString() + " " + rec.getReferenceName() + ":" + rec.getAlignmentStart()); System.out.println("ref :" + new String(bases)); System.out.println("read:" + new String(rec.getReadBases())); System.out.println(); System.out.println(s1); System.out.println(s2); System.out.println(); } samIn.close(); } }