@Override public SAMRecordPair getNextReadPair() { // insert first read into dictionary by queryname // insert second read into dictionary // check if the dictionary length for that entry has both pairs // if it is does return the read pair // otherwise continue reading // this way just return pairs as they are completed // should be MUCH faster // make sure to delete the entry after returning so that we dont have a memory leak if (iterator.hasNext()) { while (iterator.hasNext()) { SAMRecord record = iterator.next(); countRead(record); // skip if the read is unmapped, not properly paired or mate is unmapped if (record.getReadUnmappedFlag() == true || record.getProperPairFlag() == false || record.getMateUnmappedFlag() == true) { continue; } String query = record.getReadName(); // check if read mate has been read already if (readBuffer.containsKey(query)) { // if it has then return the pair SAMRecordPair pair = readBuffer.get(query); pair.addPair(record); if (pair.bothPairsAligned() && pair.isValidPair()) { // prevent memory leak by deleting keys that are no longer needed readBuffer.remove(query); return pair; } else { throw new RuntimeException(query + " is not properly mated"); } } else { // otherwise create an entry and store it by its query name SAMRecordPair pair = new SAMRecordPair(); pair.addPair(record); readBuffer.put(query, pair); } } } else { if (readBuffer.size() > 0) { for (String key : readBuffer.keySet()) { logger.info("No mate for for " + key); } throw new RuntimeException( "No mates found for some reads please make sure all reads are properly paired"); } } return null; }
@Override public String next() { if (sri != null) { return sri.next().getReadString(); } else if (fri != null) { return fri.next().getReadString(); } else if (fsi != null) { return fsi.next(); } return null; }
private void scan() { String chrom = ""; int chromStart = 0; int width = 80; int column2genomic[] = new int[width]; for (int i = 0; i < column2genomic.length; ++i) { column2genomic[i] = chromStart + i; } GenomicSequence genomicSequence = new GenomicSequence(indexedFastaSequenceFile, chrom); Element contig = new Element(); for (int i = 0; i < width; ++i) {} Root root = new Root(); SAMRecordIterator iter = null; while (iter.hasNext()) { SAMRecord rec = iter.next(); } }
@Override public int doWork(String[] args) { boolean compressed = false; int maxRecordsInRAM = 100000; long count = -1L; File fileout = null; com.github.lindenb.jvarkit.util.cli.GetOpt opt = new com.github.lindenb.jvarkit.util.cli.GetOpt(); int c; while ((c = opt.getopt(args, getGetOptDefault() + "o:n:N:T:b")) != -1) { switch (c) { case 'b': compressed = true; break; case 'N': maxRecordsInRAM = Integer.parseInt(opt.getOptArg()); break; case 'n': count = Long.parseLong(opt.getOptArg()); break; case 'o': fileout = new File(opt.getOptArg()); break; case 'T': this.addTmpDirectory(new File(opt.getOptArg())); break; default: { switch (handleOtherOptions(c, opt, null)) { case EXIT_FAILURE: return -1; case EXIT_SUCCESS: return 0; default: break; } } } } if (count < -1L) // -1 == infinite { error("Bad count:" + count); return -1; } SamReader samReader = null; SAMRecordIterator iter = null; SAMFileWriter samWriter = null; Random random = new Random(); CloseableIterator<RandSamRecord> iter2 = null; try { SamFileReaderFactory.setDefaultValidationStringency(ValidationStringency.SILENT); if (opt.getOptInd() == args.length) { info("Reading from stdin"); samReader = SamFileReaderFactory.mewInstance().openStdin(); } else if (opt.getOptInd() + 1 == args.length) { File filename = new File(args[opt.getOptInd()]); info("Reading from " + filename); samReader = SamFileReaderFactory.mewInstance().open(filename); } else { error("Illegal number of arguments."); return -1; } SAMFileHeader header = samReader.getFileHeader(); header = header.clone(); header.setSortOrder(SortOrder.unsorted); header.addComment("Processed with " + getProgramName() + " : " + getProgramCommandLine()); SAMFileWriterFactory sfw = new SAMFileWriterFactory(); sfw.setCreateIndex(false); sfw.setCreateMd5File(false); if (fileout == null) { if (compressed) { samWriter = sfw.makeBAMWriter(header, true, System.out); } else { samWriter = sfw.makeSAMWriter(header, true, System.out); } } else { samWriter = sfw.makeSAMOrBAMWriter(header, true, fileout); this.addTmpDirectory(fileout); } iter = samReader.iterator(); SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(samReader.getFileHeader().getSequenceDictionary()); SortingCollection<RandSamRecord> sorter = SortingCollection.newInstance( RandSamRecord.class, new RandSamRecordCodec(header), new RandSamRecordComparator(), maxRecordsInRAM, getTmpDirectories()); sorter.setDestructiveIteration(true); while (iter.hasNext()) { RandSamRecord r = new RandSamRecord(); r.rand_index = random.nextInt(); r.samRecord = progress.watch(iter.next()); sorter.add(r); } iter.close(); iter = null; sorter.doneAdding(); iter2 = sorter.iterator(); if (count == -1) { while (iter2.hasNext()) { samWriter.addAlignment(iter2.next().samRecord); } } else { while (iter2.hasNext() && count > 0) { samWriter.addAlignment(iter2.next().samRecord); count--; } } iter2.close(); iter2 = null; sorter.cleanup(); progress.finish(); } catch (Exception e) { error(e); return -1; } finally { CloserUtil.close(iter); CloserUtil.close(iter2); CloserUtil.close(samReader); CloserUtil.close(samWriter); } return 0; }