public void set(MouseEvent e, SAMRecord sr) { if (sr == null) return; StringBuffer text = new StringBuffer(); text.append("<html>"); if (sr != null) { text.append( MessageManager.getString("shortreadtrack.name") + " " + sr.getReadName() + "<br/>"); text.append( MessageManager.getString("shortreadtrack.len") + " " + sr.getReadLength() + "<br/>"); text.append( MessageManager.getString("shortreadtrack.cigar") + " " + sr.getCigarString() + "<br/>"); text.append( MessageManager.getString("shortreadtrack.sequence") + " " + rerun(sr.getReadString()) + "<br/>"); text.append( MessageManager.getString("shortreadtrack.paired") + " " + sr.getReadPairedFlag() + "<br/>"); if (sr.getReadPairedFlag()) { if (!sr.getMateUnmappedFlag()) text.append( MessageManager.getString("shortreadtrack.mate") + " " + sr.getMateReferenceName() + ":" + sr.getMateAlignmentStart() + "<br/>"); else text.append(MessageManager.getString("shortreadtrack.mate_missing") + "<br/>"); text.append( MessageManager.getString("shortreadtrack.second") + " " + sr.getFirstOfPairFlag()); } // text.append("<br/>"); } text.append("</html>"); if (!text.toString().equals(floater.getText())) { floater.setText(text.toString()); this.pack(); } setLocation(e.getXOnScreen() + 5, e.getYOnScreen() + 5); if (!isVisible()) { setVisible(true); } }
public void acceptRecord(final SAMRecordAndReference args) { final SAMRecord rec = args.getSamRecord(); final ReferenceSequence ref = args.getReferenceSequence(); if (rec.getReadPairedFlag()) { if (rec.getFirstOfPairFlag()) { firstOfPairCollector.addRecord(rec, ref); } else { secondOfPairCollector.addRecord(rec, ref); } pairCollector.addRecord(rec, ref); } else { unpairedCollector.addRecord(rec, ref); } }
private static void removeMateInfo(SAMRecord rec) { if (rec.getReadPairedFlag()) { // Remove all information of its mate // flag rec.setProperPairFlag(false); // not paired any more rec.setMateUnmappedFlag(false); rec.setMateNegativeStrandFlag(false); // entries rec.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX); rec.setMateAlignmentStart(0); rec.setInferredInsertSize(0); // TODO: remove tags and values that are mate pair inclined. } }
private void collectReadData(final SAMRecord record, final ReferenceSequence ref) { metrics.TOTAL_READS++; readLengthHistogram.increment(record.getReadBases().length); if (!record.getReadFailsVendorQualityCheckFlag()) { metrics.PF_READS++; if (isNoiseRead(record)) metrics.PF_NOISE_READS++; if (record.getReadUnmappedFlag()) { // If the read is unmapped see if it's adapter sequence final byte[] readBases = record.getReadBases(); if (!(record instanceof BAMRecord)) StringUtil.toUpperCase(readBases); if (isAdapterSequence(readBases)) { this.adapterReads++; } } else if (doRefMetrics) { metrics.PF_READS_ALIGNED++; if (!record.getReadNegativeStrandFlag()) numPositiveStrand++; if (record.getReadPairedFlag() && !record.getMateUnmappedFlag()) { metrics.READS_ALIGNED_IN_PAIRS++; // Check that both ends have mapq > minimum final Integer mateMq = record.getIntegerAttribute("MQ"); if (mateMq == null || mateMq >= MAPPING_QUALITY_THRESOLD && record.getMappingQuality() >= MAPPING_QUALITY_THRESOLD) { ++this.chimerasDenominator; // With both reads mapped we can see if this pair is chimeric if (Math.abs(record.getInferredInsertSize()) > maxInsertSize || !record.getReferenceIndex().equals(record.getMateReferenceIndex())) { ++this.chimeras; } } } } } }
@Override public int doWork(String[] args) { File refFile = null; com.github.lindenb.jvarkit.util.cli.GetOpt getopt = new com.github.lindenb.jvarkit.util.cli.GetOpt(); int c; while ((c = getopt.getopt(args, "hvL:r:")) != -1) { switch (c) { case 'h': printUsage(); return 0; case 'v': System.out.println(getVersion()); return 0; case 'L': getLogger().setLevel(java.util.logging.Level.parse(getopt.getOptArg())); break; case 'r': refFile = new File(getopt.getOptArg()); break; case ':': System.err.println("Missing argument for option -" + getopt.getOptOpt()); return -1; default: System.err.println("Unknown option -" + getopt.getOptOpt()); return -1; } } if (refFile == null) { error("Undefined REF file"); return -1; } File bamFile = null; if (getopt.getOptInd() + 1 != args.length) { info("reading from stdin."); } else { bamFile = new File(args[getopt.getOptInd()]); } IndexedFastaSequenceFile indexedFastaSequenceFile = null; SAMFileReader samFileReader = null; try { GenomicSequence genomicSequence = null; indexedFastaSequenceFile = new IndexedFastaSequenceFile(refFile); SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT); samFileReader = null; if (bamFile == null) { samFileReader = new SAMFileReader(System.in); } else { samFileReader = new SAMFileReader(bamFile); } XMLOutputFactory xmlfactory = XMLOutputFactory.newInstance(); XMLStreamWriter w = xmlfactory.createXMLStreamWriter(System.out, "UTF-8"); w.writeStartDocument("UTF-8", "1.0"); w.writeStartElement("sam"); w.writeComment(getProgramCommandLine()); w.writeAttribute("ref", (bamFile == null ? "stdin" : bamFile.getPath())); w.writeAttribute("bam", args[1]); SAMRecordIterator iter = samFileReader.iterator(); while (iter.hasNext()) { SAMRecord rec = iter.next(); final byte readbases[] = rec.getReadBases(); w.writeStartElement("read"); w.writeStartElement("name"); w.writeCharacters(rec.getReadName()); w.writeEndElement(); w.writeStartElement("sequence"); w.writeCharacters(new String(readbases)); w.writeEndElement(); w.writeStartElement("flags"); w.writeAttribute("paired", String.valueOf(rec.getReadPairedFlag())); w.writeAttribute( "failsVendorQual", String.valueOf(rec.getReadFailsVendorQualityCheckFlag())); w.writeAttribute("mapped", String.valueOf(!rec.getReadUnmappedFlag())); w.writeAttribute("strand", (rec.getReadNegativeStrandFlag() ? "-" : "+")); if (rec.getReadPairedFlag()) { w.writeAttribute("mate-mapped", String.valueOf(!rec.getMateUnmappedFlag())); w.writeAttribute("mate-strand", (rec.getMateNegativeStrandFlag() ? "-" : "+")); w.writeAttribute("proper-pair", String.valueOf(rec.getProperPairFlag())); } w.writeCharacters(String.valueOf(rec.getFlags())); w.writeEndElement(); if (!rec.getReadUnmappedFlag()) { w.writeStartElement("qual"); w.writeCharacters(String.valueOf(rec.getMappingQuality())); w.writeEndElement(); w.writeStartElement("chrom"); w.writeAttribute("index", String.valueOf(rec.getReferenceIndex())); w.writeCharacters(rec.getReferenceName()); w.writeEndElement(); w.writeStartElement("pos"); w.writeCharacters(String.valueOf(rec.getAlignmentStart())); w.writeEndElement(); w.writeStartElement("cigar"); w.writeCharacters(rec.getCigarString()); w.writeEndElement(); } if (!rec.getMateUnmappedFlag()) { w.writeStartElement("mate-chrom"); w.writeAttribute("index", String.valueOf(rec.getMateReferenceIndex())); w.writeCharacters(rec.getMateReferenceName()); w.writeEndElement(); w.writeStartElement("mate-pos"); w.writeCharacters(String.valueOf(rec.getMateAlignmentStart())); w.writeEndElement(); } if (!rec.getReadUnmappedFlag()) { if (genomicSequence == null || genomicSequence.getChrom().equals(rec.getReferenceName())) { genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName()); } w.writeStartElement("align"); int readIndex = 0; int refIndex = rec.getAlignmentStart(); for (final CigarElement e : rec.getCigar().getCigarElements()) { switch (e.getOperator()) { case H: break; // ignore hard clips case P: break; // ignore pads case I: // cont. case S: { final int length = e.getLength(); for (int i = 0; i < length; ++i) { w.writeEmptyElement(e.getOperator().name()); w.writeAttribute("read-index", String.valueOf(readIndex + 1)); if (readIndex >= 0 && readIndex < readbases.length) { w.writeAttribute("read-base", String.valueOf((char) (readbases[readIndex]))); } readIndex++; } break; } case N: // cont. -- reference skip case D: { final int length = e.getLength(); for (int i = 0; i < length; ++i) { w.writeEmptyElement(e.getOperator().name()); w.writeAttribute("ref-index", String.valueOf(refIndex)); if (refIndex >= 1 && refIndex <= genomicSequence.length()) { w.writeAttribute( "ref-base", String.valueOf(genomicSequence.charAt(refIndex - 1))); } refIndex++; } break; } case M: case EQ: case X: { final int length = e.getLength(); for (int i = 0; i < length; ++i) { w.writeEmptyElement(e.getOperator().name()); char baseRead = '\0'; if (readIndex >= 0 && readIndex < readbases.length) { baseRead = (char) (rec.getReadBases()[readIndex]); w.writeAttribute("read-index", String.valueOf(readIndex + 1)); w.writeAttribute("read-base", String.valueOf(baseRead)); } w.writeAttribute("ref-index", String.valueOf(refIndex)); if (refIndex >= 1 && refIndex <= genomicSequence.length()) { char baseRef = genomicSequence.charAt(refIndex - 1); w.writeAttribute("ref-base", String.valueOf(baseRef)); if (Character.toUpperCase(baseRef) != Character.toUpperCase(baseRead)) { w.writeAttribute("mismatch", "true"); } } refIndex++; readIndex++; } break; } default: throw new IllegalStateException( "Case statement didn't deal with cigar op: " + e.getOperator()); } } } w.writeEndElement(); w.writeEndElement(); iter.close(); w.writeEndElement(); } w.writeEndElement(); w.writeEndDocument(); w.flush(); w.close(); } catch (Exception err) { error(err); return -1; } finally { CloserUtil.close(samFileReader); CloserUtil.close(indexedFastaSequenceFile); } return 0; }
@Override public int doWork(String[] args) { boolean repair_missing_read = false; SortingCollectionFactory<MappedFastq> sortingFactory = new SortingCollectionFactory<MappedFastq>(); File forwardFile = null; File reverseFile = null; com.github.lindenb.jvarkit.util.cli.GetOpt opt = new com.github.lindenb.jvarkit.util.cli.GetOpt(); int c; sortingFactory.setComponentType(MappedFastq.class); sortingFactory.setCodec(new MappedFastqCodec()); sortingFactory.setComparator(new MappedFastqComparator()); while ((c = opt.getopt(args, super.getGetOptDefault() + "F:R:N:r")) != -1) { switch (c) { case 'F': forwardFile = new File(opt.getOptArg()); break; case 'R': reverseFile = new File(opt.getOptArg()); break; case 't': addTmpDirectory(new File(opt.getOptArg())); break; case 'N': sortingFactory.setMaxRecordsInRAM(Math.max(Integer.parseInt(opt.getOptArg()), 100)); break; case 'r': repair_missing_read = true; break; case ':': System.err.println("Missing argument for option -" + opt.getOptOpt()); return -1; default: { switch (handleOtherOptions(c, opt, args)) { case EXIT_FAILURE: return -1; case EXIT_SUCCESS: return 0; default: break; } } } } SAMFileReader sfr = null; SortingCollection<MappedFastq> fastqCollection = null; try { sortingFactory.setTmpDirs(this.getTmpDirectories()); fastqCollection = sortingFactory.make(); fastqCollection.setDestructiveIteration(true); boolean found_single = false; boolean found_paired = false; long non_primary_alignmaned_flag = 0L; if (opt.getOptInd() == args.length) { info("Reading from stdin"); sfr = new SAMFileReader(System.in); } else if (opt.getOptInd() + 1 == args.length) { String filename = args[opt.getOptInd()]; sfr = new SAMFileReader(new File(filename)); } else { error(getMessageBundle("illegal.number.of.arguments")); return -1; } sfr.setValidationStringency(ValidationStringency.LENIENT); SAMRecordIterator iter = sfr.iterator(); SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(sfr.getFileHeader().getSequenceDictionary()); while (iter.hasNext()) { SAMRecord rec = iter.next(); progress.watch(rec); if (rec.isSecondaryOrSupplementary()) { if (non_primary_alignmaned_flag == 0) { warning("SKIPPING NON-PRIMARY " + (non_primary_alignmaned_flag + 1) + " ALIGNMENTS"); } non_primary_alignmaned_flag++; continue; } MappedFastq m = new MappedFastq(); m.name = rec.getReadName(); if (m.name == null) m.name = ""; m.hash = m.name.hashCode(); m.seq = rec.getReadString(); if (m.seq.equals(SAMRecord.NULL_SEQUENCE_STRING)) m.seq = ""; m.qual = rec.getBaseQualityString(); if (m.qual.equals(SAMRecord.NULL_QUALS_STRING)) m.qual = ""; if (!rec.getReadUnmappedFlag() && rec.getReadNegativeStrandFlag()) { m.seq = AcidNucleics.reverseComplement(m.seq); m.qual = new StringBuilder(m.qual).reverse().toString(); } if (m.seq.length() != m.qual.length()) { error("length(seq)!=length(qual) in " + m.name); continue; } if (m.seq.isEmpty() && m.qual.isEmpty()) { m.seq = "N"; m.qual = "#"; } if (rec.getReadPairedFlag()) { found_paired = true; if (found_single) { sfr.close(); throw new PicardException("input is a mix of paired/singled reads"); } m.side = (byte) (rec.getSecondOfPairFlag() ? 2 : 1); } else { found_single = true; if (found_paired) { sfr.close(); throw new PicardException("input is a mix of paired/singled reads"); } m.side = (byte) 0; } fastqCollection.add(m); } iter.close(); CloserUtil.close(iter); CloserUtil.close(sfr); progress.finish(); fastqCollection.doneAdding(); info("Done reading."); if (found_paired) { FastqWriter fqw1 = null; FastqWriter fqw2 = null; if (forwardFile != null) { info("Writing to " + forwardFile); fqw1 = new BasicFastqWriter(forwardFile); } else { info("Writing to stdout"); fqw1 = new BasicFastqWriter(new PrintStream(System.out)); } if (reverseFile != null) { info("Writing to " + reverseFile); fqw2 = new BasicFastqWriter(reverseFile); } else { info("Writing to interlaced stdout"); fqw2 = fqw1; } List<MappedFastq> row = new ArrayList<MappedFastq>(); CloseableIterator<MappedFastq> r = fastqCollection.iterator(); for (; ; ) { MappedFastq curr = null; if (r.hasNext()) curr = r.next(); if (curr == null || (!row.isEmpty() && !row.get(0).name.equals(curr.name))) { if (!row.isEmpty()) { if (row.size() > 2) { warning("WTF :" + row); } boolean found_F = false; boolean found_R = false; for (MappedFastq m : row) { switch ((int) m.side) { case 1: if (found_F) throw new PicardException("two forward reads found for " + row.get(0).name); found_F = true; echo(fqw1, m); break; case 2: if (found_R) throw new PicardException("two reverse reads found for " + row.get(0).name); found_R = true; echo(fqw2, m); break; default: throw new IllegalStateException("uh???"); } } if (!found_F) { if (repair_missing_read) { warning("forward not found for " + row.get(0)); MappedFastq pad = new MappedFastq(); pad.side = (byte) 1; pad.name = row.get(0).name; pad.seq = "N"; pad.qual = "#"; echo(fqw1, pad); } else { throw new PicardException("forward not found for " + row); } } if (!found_R) { if (repair_missing_read) { warning("reverse not found for " + row.get(0)); MappedFastq pad = new MappedFastq(); pad.side = (byte) 2; pad.name = row.get(0).name; pad.seq = "N"; pad.qual = "#"; echo(fqw2, pad); } else { throw new PicardException("reverse not found for " + row); } } } if (curr == null) break; row.clear(); } row.add(curr); } r.close(); fqw1.close(); fqw2.close(); } else if (found_single) { FastqWriter fqw1 = null; if (forwardFile != null) { info("Writing to " + forwardFile); fqw1 = new BasicFastqWriter(forwardFile); } else { info("Writing to stdout"); fqw1 = new BasicFastqWriter(new PrintStream(System.out)); } CloseableIterator<MappedFastq> r = fastqCollection.iterator(); while (r.hasNext()) { echo(fqw1, r.next()); } r.close(); fqw1.close(); } return 0; } catch (Exception err) { error(err); return -1; } finally { if (fastqCollection != null) fastqCollection.cleanup(); } }