public HashSet<NGramSet> findCommonNGrams( String string1, String string2, int min, int max, boolean maximizePrimaryWindowSize) { ordered_scores = new TreeMap<Double, Integer>(); errors = new ArrayList<Error>(); HashSet<NGramSet> NGramsWithMatches = new HashSet<NGramSet>(); // ensure that min <= max if (min > max) { int temp = max; max = min; min = temp; logError("Min greater than max; assuming the opposite parameterization"); } char[] chars1 = string1.toCharArray(); char[] chars2 = string2.toCharArray(); List<String> words1 = scanForWords(chars1); List<String> words2 = scanForWords(chars2); // when testing, restrict the length of documents to be small if (isTesting) { int maxSub = 1000; words1 = words1.subList(0, maxSizeOutOfRangeForSource(maxSub, words1) ? words1.size() : maxSub); words2 = words2.subList(0, maxSizeOutOfRangeForSource(maxSub, words2) ? words2.size() : maxSub); } NGramSetImpl.setMatchCase(matchCase); NGramSetImpl.setUseStopWords(USESTOPWORDS); NGramSetImpl.setStrictness(STRICT); NGramSetImpl.setMinSize(min); int leftMax = (words1.size() <= max || maximizePrimaryWindowSize) ? words1.size() : max; int rightMax = (words2.size() <= max) ? words2.size() - 1 : max; HashMap<String, List<NGramSet>> map = new HashMap<String, List<NGramSet>>(); if (rightMax < max) { logError( "Window size greater than number of length of secondary text; decreasing secondary window size to: " + rightMax); } if (leftMax < max && maximizePrimaryWindowSize) { logError("Maximizing primary window"); } else if (leftMax < max) { logError("Max out of range for primary source. Scaling down to: " + leftMax); } ArrayList<NGramSet> nGrams1 = null; nGrams1 = getAllNGramsOfSize(words1, leftMax, null); // ArrayList<NGramSet> nGrams2 = getAllNGramsOfSize(words2, rightMax, map); findAllCommon(NGramsWithMatches, nGrams1, map); return NGramsWithMatches; }