private void removeOverlap(List<ClusteredResult> clusteredResultList) { Collections.sort(clusteredResultList); Collections.reverse(clusteredResultList); for (int i = clusteredResultList.size() - 1; i >= 1; i--) { for (int j = i - 1; j >= 0; j--) for (OptMapResultNode result1 : clusteredResultList.get(i).updatedResult) for (OptMapResultNode result2 : clusteredResultList.get(j).updatedResult) if (result1.overlap(result2)) { clusteredResultList.remove(i); break; } } }
/** * Process the confidence, a measure of uniqueness of score * * @param crList */ private void processConfidence(List<ClusteredResult> crList) { int itemCount = confItemCount; if (itemCount == -1 || itemCount > crList.size()) itemCount = crList.size(); double minscore = Double.MAX_VALUE; double bestscore = Double.MIN_VALUE; for (int i = 0; i < itemCount; i++) { ClusteredResult cr = crList.get(i); if (cr.score < minscore) minscore = cr.score; if (cr.score > bestscore) bestscore = cr.score; } int base = 1; double total = 0; if (minscore == bestscore) // evenly distributed, rare case for (int i = 0; i < itemCount; i++) { ClusteredResult cr = crList.get(i); for (OptMapResultNode result : cr.updatedResult) result.confidence = (1 / (double) crList.size()); } else { for (int i = 0; i < itemCount; i++) { ClusteredResult cr = crList.get(i); total += (cr.score - minscore) + base; } for (int i = 0; i < itemCount; i++) { ClusteredResult cr = crList.get(i); for (OptMapResultNode result : cr.updatedResult) result.confidence = ((cr.score - minscore + base) / (double) total); } } // Confidence not calculated within itemCount range is set as 0 for (int i = itemCount; i < crList.size(); i++) { ClusteredResult cr = crList.get(i); for (OptMapResultNode result : cr.updatedResult) result.confidence = 0; } }
/** * Check if two partial maps can be connected. Trimming is done on overlapping partial maps * * <p>It is essential to use trimming. Yet, there are two drawbacks using trimming 1. Trimming is * expensive. 2. Trimming may lead to inappropriate results (This is done by validation using * trimear) * * @param optrefmap * @param maxTrim * @param result1 * @param result2 * @param vmProcessor * @param ear * @return */ private TrimResult trimOverlap( LinkedHashMap<String, DataNode> optrefmap, OptMapResultNode[] trim1Result, OptMapResultNode[] trim2Result, PathBuilderFilter pbFilter, VirtualMapProcessor vmProcessor) { int penalty = vmProcessor.calcBasicPenalty(trim1Result[0], trim2Result[0]); double bestscore = Double.NEGATIVE_INFINITY; int bestTrim1 = -1; int bestTrim2 = -1; boolean[][] trimPair = new boolean[maxTrim + 1][maxTrim + 1]; // temp increasing maxTrim greatly: // 1. if the two score addition is already lower than any one of the score, discard // 2. maxTrim for (int trimmed = 0; trimmed <= maxTrim; trimmed++) { for (int trim1 = 0; trim1 <= trimmed; trim1++) { int trim2 = trimmed - trim1; if (trimPair[trim1][trim2]) // this trim pair should no longer be done continue; if ((trim1Result[0].cigar.getMatch() - trim1 >= minMatch) && (trim2Result[0].cigar.getMatch() - trim2 >= minMatch)) { OptMapResultNode tresult1 = trim1Result[trim1]; OptMapResultNode tresult2 = trim2Result[trim2]; // trim1Result[trim1 - 1] must exist if (tresult1 == null) { tresult1 = new OptMapResultNode(trim1Result[trim1 - 1]); tresult1.trimResult(-1 * trim1Result[trim1 - 1].mappedstrand, optrefmap); tresult1.updateScore(optrefmap, match, fpp, fnp); trim1Result[trim1] = tresult1; } if (tresult2 == null) { tresult2 = new OptMapResultNode(trim2Result[trim2 - 1]); tresult2.trimResult(1 * trim2Result[trim2 - 1].mappedstrand, optrefmap); tresult2.updateScore(optrefmap, match, fpp, fnp); trim2Result[trim2] = tresult2; } boolean withinTrimear = true; withinTrimear = Math.abs(tresult1.getMapScale() - 1) < trimear && Math.abs(tresult2.getMapScale() - 1) < trimear; boolean removeLaterTrim = false; if (pbFilter.checkPass(tresult1, tresult2)) { if (withinTrimear && !tresult1.overlap(tresult2)) { if (tresult1.mappedscore + tresult2.mappedscore - vmProcessor.calcPenalty(tresult1, tresult2) > bestscore) { bestscore = tresult1.mappedscore + tresult2.mappedscore - vmProcessor.calcPenalty(tresult1, tresult2); bestTrim1 = trim1; bestTrim2 = trim2; } removeLaterTrim = true; } else if (withinTrimear && tresult1.mappedscore + tresult2.mappedscore - penalty < bestscore) // We could not calcPenalty directly when // tresult1.overlap(tresult2) removeLaterTrim = true; } else removeLaterTrim = true; if (removeLaterTrim) for (int i = trim1; i <= maxTrim; i++) for (int j = trim2; j <= maxTrim; j++) trimPair[i][j] = true; } } } if (bestTrim1 == -1) return TrimResult.FailedTrimResult(); else return TrimResult.SuccessfulTrimResult(trim1Result[bestTrim1], trim2Result[bestTrim2]); }
/** * Cluster the results by using 1. Path builder filter which limits indel, inversion and * translocation 2. vmProcessor to calculate the score * * <p>The logic includes two steps 1. Grouping 2. Build paths between partial maps (which may * undergo trimming) 3. Link paths to paths (not partial maps to partial maps) 4. Get the path * with best score and convert it back to partial maps * * @param mapList * @param pbFilter * @param vmProcessor * @return */ private List<ClusteredResult> cluster( List<OptMapResultNode> mapList, PathBuilderFilter pbFilter, VirtualMapProcessor vmProcessor) { // exit if no result is input if (mapList == null || mapList.size() == 0) return new ArrayList<ClusteredResult>(); // Pre-update the results for (OptMapResultNode result : mapList) vmProcessor.calcScore(result); List<ClusteredResult> clusteredResultList = new ArrayList<ClusteredResult>(); // Grouping close results to be 1. clustered effectively 2. output one clustered result per // group List<List<OptMapResultNode>> groupedMapList = group(mapList, pbFilter.sameStrand, pbFilter.closeReference, pbFilter.closeFragment); // In each result group for (List<OptMapResultNode> groupedMap : groupedMapList) { // Build a path whenever the results can be joined // Please see ClusterPathNode for details List<ClusterPathNode> clusterPathList = buildPath(groupedMap, pbFilter, vmProcessor); // Link the paths linkPath(clusterPathList, vmProcessor); // Get the best path and extract the final result maps List<OptMapResultNode> clusteredGroupMap = convertPathToMapList(getBestPath(clusterPathList), vmProcessor); // if (false) // { // for (ClusterPathNode cp : clusterPathList) { // if (cp.currentScore < 0) // continue; // if (cp.secondMap != null) // continue; // List<ClusterPathNode> bestPath = new ArrayList<ClusterPathNode>(); // ClusterPathNode recentPath = cp; // while (recentPath != null) // { // bestPath.add(recentPath); // recentPath = recentPath.previousPath; // } // Collections.reverse(bestPath); // if (convertPathToMapList(bestPath).isEmpty()) // continue; // System.out.println("PATH - " + cp.currentScore); // for (OptMapResultNode result : convertPathToMapList(bestPath)) // System.out.println(result); // } // } ClusteredResult cr = new ClusteredResult(); // We want to retain the original alignments List<OptMapResultNode> expandedGroupedMap = new ArrayList<OptMapResultNode>(); // Saving the original results for (OptMapResultNode map : groupedMap) expandedGroupedMap.addAll(map.getRealMap()); // We want to import the modified alignments List<OptMapResultNode> expandedClusterGroupMap = new ArrayList<OptMapResultNode>(); // Saving the updated results for (OptMapResultNode map : clusteredGroupMap) expandedClusterGroupMap.addAll(map.getRealMap()); // checkDirectLink(expandedClusterGroupMap, vmProcessor); cr.importUpdatedResult(expandedClusterGroupMap); if (cr.updatedResult.size() > 0) { cr.process(vmProcessor); clusteredResultList.add(cr); } } return clusteredResultList; }
/** * Group the partial maps * * @param mapList * @param strandSpecific * @param closeReference * @param closeFragment * @return */ public List<List<OptMapResultNode>> group( List<OptMapResultNode> mapList, boolean strandSpecific, long closeReference, long closeFragment) { if (strandSpecific) { List<OptMapResultNode> forwardList = new ArrayList<OptMapResultNode>(); List<OptMapResultNode> reverseList = new ArrayList<OptMapResultNode>(); List<List<OptMapResultNode>> unknownListList = new ArrayList<List<OptMapResultNode>>(); // for virtual fragment for (OptMapResultNode map : mapList) { if (map.mappedstrand == 1) forwardList.add(map); else if (map.mappedstrand == -1) reverseList.add(map); else { List<OptMapResultNode> unknownList = new ArrayList<OptMapResultNode>(); unknownList.add(map); unknownListList.add(unknownList); } } List<List<OptMapResultNode>> clusteredList = new ArrayList<List<OptMapResultNode>>(); clusteredList.addAll(group(forwardList, false, closeReference, closeFragment)); clusteredList.addAll(group(reverseList, false, closeReference, closeFragment)); clusteredList.addAll(unknownListList); return clusteredList; } else { // first group according to reference List<List<OptMapResultNode>> clusteredList = new ArrayList<List<OptMapResultNode>>(); Collections.sort(mapList, OptMapResultNode.mappedstartcomparator); List<List<OptMapResultNode>> groupRefList = new ArrayList<List<OptMapResultNode>>(); if (closeReference < 0) clusteredList.add(mapList); else { GenomicPosNode region = null; List<OptMapResultNode> recentList = null; for (OptMapResultNode map : mapList) { if (region == null || map.mappedRegion.start == -1 || !map.isClose(region, closeReference)) { recentList = new ArrayList<OptMapResultNode>(); groupRefList.add(recentList); if (map.mappedRegion.start == -1) region = null; else region = new GenomicPosNode( map.mappedRegion.ref, map.mappedRegion.start, map.mappedRegion.stop); } recentList.add(map); if (map.mappedRegion.stop > region.stop) region = new GenomicPosNode(region.ref, region.start, map.mappedRegion.stop); } clusteredList = groupRefList; } if (closeFragment < 0) return clusteredList; else { List<List<OptMapResultNode>> groupFragList = new ArrayList<List<OptMapResultNode>>(); for (List<OptMapResultNode> tmpMapList : clusteredList) { Collections.sort(tmpMapList, OptMapResultNode.subfragstartstopcomparator); SimpleLongLocation loc = null; List<OptMapResultNode> recentList = null; for (OptMapResultNode map : tmpMapList) { SimpleLongLocation maploc = new SimpleLongLocation( map.length(0, map.subfragstart - 1), map.length(0, map.subfragstop + 1)); if (loc == null || (!loc.overlap(maploc, closeFragment))) { recentList = new ArrayList<OptMapResultNode>(); groupFragList.add(recentList); loc = maploc; } recentList.add(map); if (maploc.max > loc.max) loc.max = maploc.max; if (maploc.min < loc.min) loc.min = maploc.min; } } clusteredList = groupFragList; return clusteredList; } } }