public List<DoubleList<Split, Integer>> findConflictingSplitCounts( int threshold, boolean thresholdIsLength) { findGreedySplits(); List<DoubleList<Split, Integer>> results = new Vector<DoubleList<Split, Integer>>(greedySplits.size()); // could be more efficient by storing where the greedy splits appear in the sorted list. for (int i : greedySplitIndex) { String splitStr = sortedSplits.elementAt(i); Split split = splits.get(splitStr); if (!thresholdIsLength && counts.get(splitStr) < threshold) break; // ignore splits with frequency below threshold DoubleList<Split, Integer> splitList = new DoubleList<Split, Integer>(); splitList.add(split, counts.get(splitStr)); // Only check splits after this one in sorted list: ones before this one are guaranteed to be // compatible, else this split would not be in the greedy list. for (int j = i + 1; j < sortedSplits.size(); j++) { String otherSplitString = sortedSplits.elementAt(j); if (!thresholdIsLength && counts.get(otherSplitString) < threshold) break; // ignore conflicting splits with frequency below threshold Split otherSplit = splits.get(otherSplitString); if (!split.compatible(otherSplit)) { splitList.add(otherSplit, counts.get(otherSplitString)); } // if !compatible if (thresholdIsLength && splitList.size() == threshold) break; // have enough secondary splits now } // for otherSplit (j) results.add(splitList); } // for i over sortedSplits return results; }
/* * Does not recalculate if greedySplits are already cached. */ private void findGreedySplits() { if (greedySplits != null) return; sortSplits(); int nGreedySplits = idg.getIdCount() - 3; greedySplits = new Vector<Split>(nGreedySplits); greedySplitIndex = new Vector<Integer>(nGreedySplits); for (int i = 0; i < sortedSplits.size() && greedySplits.size() < nGreedySplits; i++) { String splitStr = sortedSplits.elementAt(i); Split split = splits.get(splitStr); if (split.compatible(greedySplits)) { greedySplits.add(split); greedySplitIndex.add(i); } } }