public List<DoubleList<Split, Integer>> findConflictingSplitCounts( int threshold, boolean thresholdIsLength) { findGreedySplits(); List<DoubleList<Split, Integer>> results = new Vector<DoubleList<Split, Integer>>(greedySplits.size()); // could be more efficient by storing where the greedy splits appear in the sorted list. for (int i : greedySplitIndex) { String splitStr = sortedSplits.elementAt(i); Split split = splits.get(splitStr); if (!thresholdIsLength && counts.get(splitStr) < threshold) break; // ignore splits with frequency below threshold DoubleList<Split, Integer> splitList = new DoubleList<Split, Integer>(); splitList.add(split, counts.get(splitStr)); // Only check splits after this one in sorted list: ones before this one are guaranteed to be // compatible, else this split would not be in the greedy list. for (int j = i + 1; j < sortedSplits.size(); j++) { String otherSplitString = sortedSplits.elementAt(j); if (!thresholdIsLength && counts.get(otherSplitString) < threshold) break; // ignore conflicting splits with frequency below threshold Split otherSplit = splits.get(otherSplitString); if (!split.compatible(otherSplit)) { splitList.add(otherSplit, counts.get(otherSplitString)); } // if !compatible if (thresholdIsLength && splitList.size() == threshold) break; // have enough secondary splits now } // for otherSplit (j) results.add(splitList); } // for i over sortedSplits return results; }
/* * Return a twin list of the greedy consensus tree splits and their internode certainties */ public DoubleList<Split, Double> getICs() { DoubleList<Split, Double> ic = new DoubleList<Split, Double>(); List<DoubleList<Split, Integer>> greedyTreeConflicts = findConflictingSplitCounts(2, true); for (DoubleList<Split, Integer> splitList : greedyTreeConflicts) { ic.add(splitList.getA(0), internodeCertainty(splitList)); } return ic; }
/** * Returns the sum of Internode Certainty All over a greedy consensus tree. * * @return */ public double treeCertaintyAll(int threshold) { double tca = 0; DoubleList<Split, Double> ic = getICAs(threshold); for (int i = 0; i < ic.size(); i++) { tca += ic.getB(i); } return tca; }
/** * Returns the sum of Internode Certainties over a greedy consensus tree. * * @return */ public double treeCertainty() { double tc = 0; DoubleList<Split, Double> ic = getICs(); for (int i = 0; i < ic.size(); i++) { tc += ic.getB(i); } return tc; }
/* * Return a twin list of the greedy consensus tree splits and their ICA (internode certainty all) * scores, with a threshold for which incompatible splits are included in the ICA calculation */ public DoubleList<Split, Double> getICAs(int threshold) { DoubleList<Split, Double> ica = new DoubleList<Split, Double>(); List<DoubleList<Split, Integer>> greedyTreeConflicts = findConflictingSplitCounts(threshold, false); for (DoubleList<Split, Integer> splitList : greedyTreeConflicts) { ica.add(splitList.getA(0), internodeCertainty(splitList)); } return ica; }
/* * Salichos Stamatakis and Rokas, MBE v31 p1261 (2014) * If only two splits are listed, returns IC, the Internode Certainty. * If more splits are listed, returns ICA, (IC All). */ private double internodeCertainty(DoubleList<Split, Integer> splits) { int sum = 0; int n = splits.size(); // number of splits under consideration for (int i = 0; i < n; i++) sum += splits.getB(i); double ic = 1; if (n > 1) { ic = Math.log(n); for (int i = 0; i < n; i++) { double p = ((double) splits.getB(i)) / sum; ic += p * Math.log(p); } ic /= Math.log(n); // convert from natural log to log base n } return ic; }
public void printInternodeCertainties(PrintWriter out) { DoubleList<Split, Double> ic = getICs(); for (int i = 0; i < ic.size(); i++) { out.printf("IC = %f for split %s\n", ic.getB(i), ic.getA(i).toString()); } }