public String processVoiceResults(ArrayList<String> matchedStrings) { Log.d(TAG, "Initial results = " + matchedStrings); // Default result is the most likely match of those returned String result = matchedStrings.get(0); boolean resultFound = false; for (int i = 0; i < Search.getCurrentSearch().getSearchObjects().size() && !resultFound; i++) { LocalEntity currentSearchItem = Search.getCurrentSearch().getSearchObjects().get(i); for (int j = 0; j < matchedStrings.size() && !resultFound; j++) { for (int k = 0; k < currentSearchItem.getSearchTerms().size() && !resultFound; k++) { // Log.d(TAG, "matched string = " + matchedStrings.get(j)); // Log.d(TAG, "search string = " + // currentSearchItem.getSearchTerms().get(k)); // If magic algorithm finds a match if (StringUtils.getLevenshteinDistance( matchedStrings.get(j), currentSearchItem.getSearchTerms().get(k)) < (currentSearchItem.getSearchTerms().get(k).length() / 3) || currentSearchItem.getSearchTerms().get(k).equals(matchedStrings.get(j))) { resultFound = true; if (Search.getCurrentSearch().getName().equals("yesno")) { Log.d(TAG, "got here"); result = currentSearchItem.getSearchTerms().get(k); } else { result = Search.getCurrentSearch().getSearchObjects().get(i).getName(); } Log.d(TAG, "result = " + result); // If result found then update next search based on match processExpectedResult(result); return null; } } } } return processUnexpectedResult(result); }
/** * Returns the nearest valid dictionary words to a given word, by calculating the Levenshtein * distance between the given word and all valid ones, and returning an array of tiered sizes, * based on the length of the given word. The returned array is ordered by closeness of match, and * contains more close matches than distant ones. It also checks for more matches with larger * Levenshtein distances for long given words than for short ones. * * @param wordToMatch the word to find similar valid words for. * @return an array of valid dictionary words close to the given one, ordered by relevance and in * order of increasing Levenshtein distance. */ public static String[] closeMatches(String wordToMatch) { String[] dictionary = VALUES.toArray(new String[VALUES.size()]); int count = 0; int countOne = 0; int countTwo = 0; int countThree = 0; String[] nearestMatches = new String[15]; String[] nearestMatchesTwo = new String[10]; String[] nearestMatchesThree = new String[5]; for (int i = 0; i < dictionary.length; i++) { if (count < 30 && StringUtils.getLevenshteinDistance(wordToMatch, dictionary[i]) == 1 && countOne < 15) { nearestMatches[countOne] = dictionary[i]; countOne++; count++; } else if (count < 30 && StringUtils.getLevenshteinDistance(wordToMatch, dictionary[i]) == 2 && countTwo < 10) { nearestMatchesTwo[countTwo] = dictionary[i]; countTwo++; count++; } else if (count < 30 && StringUtils.getLevenshteinDistance(wordToMatch, dictionary[i]) == 3 && countThree < 5) { nearestMatchesThree[countThree] = dictionary[i]; countThree++; count++; } } String[] moreMatches = ArrayUtils.addAll(nearestMatches, nearestMatchesTwo); String[] evenMoreMatches = ArrayUtils.addAll(moreMatches, nearestMatchesThree); if (wordToMatch.length() > 5) { return removeNullVals(moreMatches); } if (wordToMatch.length() > 9) { return removeNullVals(evenMoreMatches); } return removeNullVals(nearestMatches); }
/** * Checks the given player name. * * @param expected expected name * @param actual actual name */ private void assertPlayerName(String expected, String actual) { // OCR'ing player names is hard, so we're going to give Tesseract a break and allow for some // mismatched // characters. int distance = StringUtils.getLevenshteinDistance(expected, actual); String message = String.format( "Expected \"%s\", got \"%s\", Levenshtein distance: %s", expected, actual, "" + distance); log.info(message); Assert.assertTrue(message, distance < 5); }
public static SupportedWebDriver getClosestDriverValueTo(final String value) { SupportedWebDriver closestDriver = null; int closestDriverDistance = Integer.MAX_VALUE; for (SupportedWebDriver supportedDriver : values()) { int distance = StringUtils.getLevenshteinDistance(supportedDriver.toString(), value); if (distance < closestDriverDistance) { closestDriverDistance = distance; closestDriver = supportedDriver; } } return closestDriver; }
private static boolean containsAll(List<String> what, List<String> where) { int count = 0; for (String whatC : what) { for (String whereC : where) { if (StringUtils.getLevenshteinDistance(whatC, whereC) <= Math.max(1, whereC.length() / 3) && Math.abs(whatC.length() - whereC.length()) <= 1) { count++; } } } return (what.size() <= 3) ? what.size() <= count : what.size() - 1 <= count; }
public static List<Weibo> filterWeibo(List<Weibo> topicList) throws IOException { for (int i = 0; i < topicList.size(); i++) { String str1 = topicList.get(i).getContent(); for (int j = i + 1; j < topicList.size(); j++) { String str2 = topicList.get(j).getContent(); int ld = StringUtils.getLevenshteinDistance(str1, str2); double sim = 1 - (ld / (double) Math.max(str1.length(), str2.length())); if ((sim > 0.4) || (str2.length() < 5)) { topicList.remove(j); j--; } } } return topicList; }
/** * Finds and returns the string from the alternatives that closest matches the input string based * on the Levenshtein distance. * * @see <a href="http://en.wikipedia.org/wiki/Levenshtein_distance">Levenshtein Distance</a> * @param input * @param alternatives * @return the string from the alternatives that closest matches the input string based on the * Levenshtein distance. */ public static String getClosestMatchingString(String input, Collection<String> alternatives) { if (input == null || alternatives == null || alternatives.isEmpty()) { return null; } String current = null; int minDistance = Integer.MAX_VALUE; for (String alternative : alternatives) { int currentDistance = StringUtils.getLevenshteinDistance(input, alternative); if ((current == null) || (currentDistance < minDistance)) { current = alternative; minDistance = currentDistance; } } return current; }
private static Player getPlayerByDisplayNameAlt(String needle) { needle = needle.toLowerCase().trim(); Integer minEditDistance = null; Player minEditMatch = null; for (Player player : Bukkit.getOnlinePlayers()) { String haystack = player.getDisplayName().toLowerCase().trim(); int editDistance = StringUtils.getLevenshteinDistance(needle, haystack.toLowerCase()); if (minEditDistance == null || minEditDistance.intValue() > editDistance) { minEditDistance = editDistance; minEditMatch = player; } } return minEditMatch; }
/** * Gets map object associated with given map name. * * @param name name of map * @return map object associated with the name */ public static AutoRefMap getMap(String name) { // assume worldName exists if (name == null) return null; name = AutoRefMatch.normalizeMapName(name); // if there is no map library, quit File mapLibrary = AutoRefMap.getMapLibrary(); if (!mapLibrary.exists()) return null; AutoRefMap bmap = null; int ldist = MAX_NAME_DISTANCE; for (AutoRefMap map : getAvailableMaps()) { String mapName = AutoRefMatch.normalizeMapName(map.name); int namedist = StringUtils.getLevenshteinDistance(name, mapName); if (namedist <= ldist) { bmap = map; ldist = namedist; } } // get best match return bmap; }
public static Movie scrapeMovie( File movieFile, SiteParsingProfile siteToParseFrom, String urlToScrapeFromDMM, boolean useURLtoScrapeFrom) throws IOException { // System.out.println("movieFile = " + movieFile); String searchString = siteToParseFrom.createSearchString(movieFile); SearchResult[] searchResults = null; int searchResultNumberToUse = 0; // no URL was passed in so we gotta figure it ourselves if (!useURLtoScrapeFrom) { searchResults = siteToParseFrom.getSearchResults(searchString); int levDistanceOfCurrentMatch = 999999; // just some super high number String idFromMovieFile = SiteParsingProfile.findIDTagFromFile(movieFile); // loop through search results and see if URL happens to contain ID number in the URL. This // will improve accuracy! for (int i = 0; i < searchResults.length; i++) { String urltoMatch = searchResults[i].getUrlPath().toLowerCase(); String idFromMovieFileToMatch = idFromMovieFile.toLowerCase().replaceAll("-", ""); // System.out.println("Comparing " + searchResults[i].toLowerCase() + " to " + // idFromMovieFile.toLowerCase().replaceAll("-", "")); if (urltoMatch.contains(idFromMovieFileToMatch)) { // let's do some fuzzy logic searching to try to get the "best" match in case we got some // that are pretty close // and update the variables accordingly so we know what our best match so far is int candidateLevDistanceOfCurrentMatch = StringUtils.getLevenshteinDistance(urltoMatch.toLowerCase(), idFromMovieFileToMatch); if (candidateLevDistanceOfCurrentMatch < levDistanceOfCurrentMatch) { levDistanceOfCurrentMatch = candidateLevDistanceOfCurrentMatch; searchResultNumberToUse = i; } } } } // just use the URL to parse from the parameter else if (useURLtoScrapeFrom) { searchResults = new SearchResult[1]; if (siteToParseFrom instanceof DmmParsingProfile) searchResults[0] = new SearchResult(urlToScrapeFromDMM); else if (siteToParseFrom instanceof Data18MovieParsingProfile || siteToParseFrom instanceof Data18WebContentParsingProfile) searchResults[0] = new SearchResult(urlToScrapeFromDMM); else if (siteToParseFrom instanceof JavLibraryParsingProfile) searchResults[0] = new SearchResult( ((JavLibraryParsingProfile) siteToParseFrom).getOverrideURLJavLibrary()); else if (siteToParseFrom instanceof IAFDParsingProfile) searchResults[0] = new SearchResult(urlToScrapeFromDMM); } if (searchResults != null && searchResults.length > 0 && searchResults[searchResultNumberToUse].getUrlPath().length() > 0) { System.out.println( "Scraping this webpage for movie: " + searchResults[searchResultNumberToUse].getUrlPath()); // for now just set the movie to the first thing found unless we found a link which had // something close to the ID Document searchMatch = Jsoup.connect(searchResults[searchResultNumberToUse].getUrlPath()) .timeout(0) .userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0") .get(); siteToParseFrom.setDocument(searchMatch); siteToParseFrom.setOverrideURLDMM(urlToScrapeFromDMM); return new Movie(siteToParseFrom); } else return null; // TODO return some kind of default movie }
@Override public double compare(String word1, String word2) { double distance = StringUtils.getLevenshteinDistance(word1, word2); distance /= word1.length() > word2.length() ? word1.length() : word2.length(); return 1 - distance; }