@Override public SAMRecordPair getNextReadPair() { // insert first read into dictionary by queryname // insert second read into dictionary // check if the dictionary length for that entry has both pairs // if it is does return the read pair // otherwise continue reading // this way just return pairs as they are completed // should be MUCH faster // make sure to delete the entry after returning so that we dont have a memory leak if (iterator.hasNext()) { while (iterator.hasNext()) { SAMRecord record = iterator.next(); countRead(record); // skip if the read is unmapped, not properly paired or mate is unmapped if (record.getReadUnmappedFlag() == true || record.getProperPairFlag() == false || record.getMateUnmappedFlag() == true) { continue; } String query = record.getReadName(); // check if read mate has been read already if (readBuffer.containsKey(query)) { // if it has then return the pair SAMRecordPair pair = readBuffer.get(query); pair.addPair(record); if (pair.bothPairsAligned() && pair.isValidPair()) { // prevent memory leak by deleting keys that are no longer needed readBuffer.remove(query); return pair; } else { throw new RuntimeException(query + " is not properly mated"); } } else { // otherwise create an entry and store it by its query name SAMRecordPair pair = new SAMRecordPair(); pair.addPair(record); readBuffer.put(query, pair); } } } else { if (readBuffer.size() > 0) { for (String key : readBuffer.keySet()) { logger.info("No mate for for " + key); } throw new RuntimeException( "No mates found for some reads please make sure all reads are properly paired"); } } return null; }
@Override public void count(SAMRecordPair samRecordPair) { String chr = samRecordPair.getMate1().getReferenceName(); // all features from the same chromosome Feature[] features = chromosomeFeatures.get(chr); /* * For stranded data will have to get features for strand separately */ if (samRecordPair.bothPairsAligned()) { totalCount += 1; // Should we keep the read or not? if (!keep(samRecordPair)) { filtered++; readLogger.logRead("filtered_out", samRecordPair.getMate1(), null); readLogger.logRead("filtered_out", samRecordPair.getMate2(), null); return; } Map<Feature, Integer> mappedFeaturesAcrossChunksForMate1 = getMappedRegionsForMate( samRecordPair.getMate1(), features, converter, findOverlappingFeatures); boolean isUnmappedMate1 = mappedFeaturesAcrossChunksForMate1.size() == 0; int mate1MappedBases = converter.getNumberOfMappedBases(samRecordPair.getMate1()); Map<Feature, Integer> mappedFeaturesAcrossChunksForMate2 = getMappedRegionsForMate( samRecordPair.getMate2(), features, converter, findOverlappingFeatures); boolean isUnmappedMate2 = mappedFeaturesAcrossChunksForMate2.size() == 0; int mate2MappedBases = converter.getNumberOfMappedBases(samRecordPair.getMate2()); Map<Feature, Integer> union = unionFeaturesForMates( mappedFeaturesAcrossChunksForMate1, mappedFeaturesAcrossChunksForMate2); int totalMappedBases = mate1MappedBases + mate2MappedBases; /** * if read pairs map to multiple genes, ignore them if either mate is unmapped then do not * count it if a read pair overlaps an intron do not count it else count the mates */ // logger.info("Ambiguous count: " + ambiguousCount); // logger.info("Read name: " + samRecordPair.getMate1().getReadName()); // logger.info("Coordinates: " + samRecordPair.getMate1().getReferenceName() + ":" + // samRecordPair.getMate1().getAlignmentStart() + "-" + // samRecordPair.getMate1().getAlignmentEnd()); if (mapToMultipleGenes(union.keySet())) { ambiguousCount += 1; readLogger.logRead( "ambiguous", samRecordPair.getMate1(), mappedFeaturesAcrossChunksForMate1); readLogger.logRead( "ambiguous", samRecordPair.getMate2(), mappedFeaturesAcrossChunksForMate2); } else if (isUnmappedMate1 || isUnmappedMate2) { // either read overlaps an intron or intergenic region unmappedCount += 1; readLogger.logRead( "unmapped", samRecordPair.getMate1(), mappedFeaturesAcrossChunksForMate1); readLogger.logRead( "unmapped", samRecordPair.getMate2(), mappedFeaturesAcrossChunksForMate2); // } else if(isUnmappedMate1) { // //Mate 2 is mapped otherwise the above case // double unmappedFrac = addToFeatures(totalMappedBases, // mappedFeaturesAcrossChunksForMate2, featureCounts); // mappedCount += (1 - unmappedFrac); // unmappedCount += unmappedFrac; // if(Math.abs(unmappedFrac) >= 0.0001) { // partiallyUnmappedReads++; // readLogger.logRead("partially_unmapped", // samRecordPair.getMate1(),mappedFeaturesAcrossChunksForMate1); // readLogger.logRead("partially_unmapped", // samRecordPair.getMate2(),mappedFeaturesAcrossChunksForMate2); // } // } else if(isUnmappedMate2) { // //Mate 1 is mapped otherwise the above case // double unmappedFrac = addToFeatures(totalMappedBases, // mappedFeaturesAcrossChunksForMate1, featureCounts); // mappedCount += (1 - unmappedFrac); // unmappedCount += unmappedFrac; // if(Math.abs(unmappedFrac) >= 0.0001) { // partiallyUnmappedReads++; // readLogger.logRead("partially_unmapped", // samRecordPair.getMate1(),mappedFeaturesAcrossChunksForMate1); // readLogger.logRead("partially_unmapped", // samRecordPair.getMate2(),mappedFeaturesAcrossChunksForMate2); // } } else if (!completelyExonic(union, totalMappedBases)) { unmappedCount++; readLogger.logRead( "non_exon_overlap", samRecordPair.getMate1(), mappedFeaturesAcrossChunksForMate1); readLogger.logRead( "non_exon_overlap", samRecordPair.getMate2(), mappedFeaturesAcrossChunksForMate2); } else { double unmappedFrac = addToFeatures(totalMappedBases, union, featureCounts); // mappedCount += (1 - unmappedFrac); // unmappedCount += unmappedFrac; if (Math.abs(unmappedFrac) >= 0.0001) { // partiallyUnmappedReads++; // unmappedCount++; // readLogger.logRead("non_exon_overlap", // samRecordPair.getMate1(),mappedFeaturesAcrossChunksForMate1); // readLogger.logRead("non_exon_overlap", // samRecordPair.getMate2(),mappedFeaturesAcrossChunksForMate2); throw new IllegalStateException("Inconsitency in read counts"); } else { mappedCount += 1; } } // logger.info("Ambiguous count: " + ambiguousCount); } else { throw new IllegalStateException( "Read is not properly paired. Because its pair was not found. " + samRecordPair.getMate1().getReadName()); // Only use first mate // totalCount++; // Map<Feature,Integer> mappedFeaturesAcrossChunksForMate1 = // getMappedRegionsForMate(samRecordPair.getMate1() , features); // boolean isAmbiguousMate1 = // mapToMultipleGenes(mappedFeaturesAcrossChunksForMate1.keySet()); // boolean isUnmappedMate1 = mappedFeaturesAcrossChunksForMate1.size() == 0; // // if(isAmbiguousMate1) { // ambiguousCount++; // } else if(isUnmappedMate1) { // unmappedCount++; // } else { // double fracToAdd = 1.0 / mappedFeaturesAcrossChunksForMate1.size(); // //addToFeatures(fracToAdd,mappedFeaturesAcrossChunksForMate1); // mappedCount++; // } } if (!this.validState()) { logInfo(); throw new IllegalStateException( "Counts do not add up correctly. Last read: " + samRecordPair.getMate1().getReadName() + " " + samRecordPair.getMate1().getReferenceName() + ":" + samRecordPair.getMate1().getAlignmentStart()); } }