public String processVoiceResults(ArrayList<String> matchedStrings) {
   Log.d(TAG, "Initial results = " + matchedStrings);
   // Default result is the most likely match of those returned
   String result = matchedStrings.get(0);
   boolean resultFound = false;
   for (int i = 0; i < Search.getCurrentSearch().getSearchObjects().size() && !resultFound; i++) {
     LocalEntity currentSearchItem = Search.getCurrentSearch().getSearchObjects().get(i);
     for (int j = 0; j < matchedStrings.size() && !resultFound; j++) {
       for (int k = 0; k < currentSearchItem.getSearchTerms().size() && !resultFound; k++) {
         //                    Log.d(TAG, "matched string = " + matchedStrings.get(j));
         //                    Log.d(TAG, "search string = " +
         // currentSearchItem.getSearchTerms().get(k));
         // If magic algorithm finds a match
         if (StringUtils.getLevenshteinDistance(
                     matchedStrings.get(j), currentSearchItem.getSearchTerms().get(k))
                 < (currentSearchItem.getSearchTerms().get(k).length() / 3)
             || currentSearchItem.getSearchTerms().get(k).equals(matchedStrings.get(j))) {
           resultFound = true;
           if (Search.getCurrentSearch().getName().equals("yesno")) {
             Log.d(TAG, "got here");
             result = currentSearchItem.getSearchTerms().get(k);
           } else {
             result = Search.getCurrentSearch().getSearchObjects().get(i).getName();
           }
           Log.d(TAG, "result = " + result);
           // If result found then update next search based on match
           processExpectedResult(result);
           return null;
         }
       }
     }
   }
   return processUnexpectedResult(result);
 }
Example #2
0
  /**
   * Returns the nearest valid dictionary words to a given word, by calculating the Levenshtein
   * distance between the given word and all valid ones, and returning an array of tiered sizes,
   * based on the length of the given word. The returned array is ordered by closeness of match, and
   * contains more close matches than distant ones. It also checks for more matches with larger
   * Levenshtein distances for long given words than for short ones.
   *
   * @param wordToMatch the word to find similar valid words for.
   * @return an array of valid dictionary words close to the given one, ordered by relevance and in
   *     order of increasing Levenshtein distance.
   */
  public static String[] closeMatches(String wordToMatch) {
    String[] dictionary = VALUES.toArray(new String[VALUES.size()]);

    int count = 0;
    int countOne = 0;
    int countTwo = 0;
    int countThree = 0;
    String[] nearestMatches = new String[15];
    String[] nearestMatchesTwo = new String[10];
    String[] nearestMatchesThree = new String[5];
    for (int i = 0; i < dictionary.length; i++) {
      if (count < 30
          && StringUtils.getLevenshteinDistance(wordToMatch, dictionary[i]) == 1
          && countOne < 15) {
        nearestMatches[countOne] = dictionary[i];
        countOne++;
        count++;
      } else if (count < 30
          && StringUtils.getLevenshteinDistance(wordToMatch, dictionary[i]) == 2
          && countTwo < 10) {
        nearestMatchesTwo[countTwo] = dictionary[i];
        countTwo++;
        count++;
      } else if (count < 30
          && StringUtils.getLevenshteinDistance(wordToMatch, dictionary[i]) == 3
          && countThree < 5) {
        nearestMatchesThree[countThree] = dictionary[i];
        countThree++;
        count++;
      }
    }

    String[] moreMatches = ArrayUtils.addAll(nearestMatches, nearestMatchesTwo);
    String[] evenMoreMatches = ArrayUtils.addAll(moreMatches, nearestMatchesThree);

    if (wordToMatch.length() > 5) {
      return removeNullVals(moreMatches);
    }
    if (wordToMatch.length() > 9) {
      return removeNullVals(evenMoreMatches);
    }

    return removeNullVals(nearestMatches);
  }
 /**
  * Checks the given player name.
  *
  * @param expected expected name
  * @param actual actual name
  */
 private void assertPlayerName(String expected, String actual) {
   // OCR'ing player names is hard, so we're going to give Tesseract a break and allow for some
   // mismatched
   // characters.
   int distance = StringUtils.getLevenshteinDistance(expected, actual);
   String message =
       String.format(
           "Expected \"%s\", got \"%s\", Levenshtein distance: %s",
           expected, actual, "" + distance);
   log.info(message);
   Assert.assertTrue(message, distance < 5);
 }
 public static SupportedWebDriver getClosestDriverValueTo(final String value) {
   SupportedWebDriver closestDriver = null;
   int closestDriverDistance = Integer.MAX_VALUE;
   for (SupportedWebDriver supportedDriver : values()) {
     int distance = StringUtils.getLevenshteinDistance(supportedDriver.toString(), value);
     if (distance < closestDriverDistance) {
       closestDriverDistance = distance;
       closestDriver = supportedDriver;
     }
   }
   return closestDriver;
 }
Example #5
0
  private static boolean containsAll(List<String> what, List<String> where) {
    int count = 0;

    for (String whatC : what) {
      for (String whereC : where) {
        if (StringUtils.getLevenshteinDistance(whatC, whereC) <= Math.max(1, whereC.length() / 3)
            && Math.abs(whatC.length() - whereC.length()) <= 1) {
          count++;
        }
      }
    }

    return (what.size() <= 3) ? what.size() <= count : what.size() - 1 <= count;
  }
Example #6
0
 public static List<Weibo> filterWeibo(List<Weibo> topicList) throws IOException {
   for (int i = 0; i < topicList.size(); i++) {
     String str1 = topicList.get(i).getContent();
     for (int j = i + 1; j < topicList.size(); j++) {
       String str2 = topicList.get(j).getContent();
       int ld = StringUtils.getLevenshteinDistance(str1, str2);
       double sim = 1 - (ld / (double) Math.max(str1.length(), str2.length()));
       if ((sim > 0.4) || (str2.length() < 5)) {
         topicList.remove(j);
         j--;
       }
     }
   }
   return topicList;
 }
Example #7
0
 /**
  * Finds and returns the string from the alternatives that closest matches the input string based
  * on the Levenshtein distance.
  *
  * @see <a href="http://en.wikipedia.org/wiki/Levenshtein_distance">Levenshtein Distance</a>
  * @param input
  * @param alternatives
  * @return the string from the alternatives that closest matches the input string based on the
  *     Levenshtein distance.
  */
 public static String getClosestMatchingString(String input, Collection<String> alternatives) {
   if (input == null || alternatives == null || alternatives.isEmpty()) {
     return null;
   }
   String current = null;
   int minDistance = Integer.MAX_VALUE;
   for (String alternative : alternatives) {
     int currentDistance = StringUtils.getLevenshteinDistance(input, alternative);
     if ((current == null) || (currentDistance < minDistance)) {
       current = alternative;
       minDistance = currentDistance;
     }
   }
   return current;
 }
  private static Player getPlayerByDisplayNameAlt(String needle) {
    needle = needle.toLowerCase().trim();

    Integer minEditDistance = null;
    Player minEditMatch = null;

    for (Player player : Bukkit.getOnlinePlayers()) {
      String haystack = player.getDisplayName().toLowerCase().trim();
      int editDistance = StringUtils.getLevenshteinDistance(needle, haystack.toLowerCase());
      if (minEditDistance == null || minEditDistance.intValue() > editDistance) {
        minEditDistance = editDistance;
        minEditMatch = player;
      }
    }

    return minEditMatch;
  }
Example #9
0
  /**
   * Gets map object associated with given map name.
   *
   * @param name name of map
   * @return map object associated with the name
   */
  public static AutoRefMap getMap(String name) {
    // assume worldName exists
    if (name == null) return null;
    name = AutoRefMatch.normalizeMapName(name);

    // if there is no map library, quit
    File mapLibrary = AutoRefMap.getMapLibrary();
    if (!mapLibrary.exists()) return null;

    AutoRefMap bmap = null;
    int ldist = MAX_NAME_DISTANCE;
    for (AutoRefMap map : getAvailableMaps()) {
      String mapName = AutoRefMatch.normalizeMapName(map.name);
      int namedist = StringUtils.getLevenshteinDistance(name, mapName);
      if (namedist <= ldist) {
        bmap = map;
        ldist = namedist;
      }
    }

    // get best match
    return bmap;
  }
Example #10
0
  public static Movie scrapeMovie(
      File movieFile,
      SiteParsingProfile siteToParseFrom,
      String urlToScrapeFromDMM,
      boolean useURLtoScrapeFrom)
      throws IOException {
    // System.out.println("movieFile = " + movieFile);
    String searchString = siteToParseFrom.createSearchString(movieFile);
    SearchResult[] searchResults = null;
    int searchResultNumberToUse = 0;
    // no URL was passed in so we gotta figure it ourselves
    if (!useURLtoScrapeFrom) {
      searchResults = siteToParseFrom.getSearchResults(searchString);
      int levDistanceOfCurrentMatch = 999999; // just some super high number
      String idFromMovieFile = SiteParsingProfile.findIDTagFromFile(movieFile);

      // loop through search results and see if URL happens to contain ID number in the URL. This
      // will improve accuracy!
      for (int i = 0; i < searchResults.length; i++) {
        String urltoMatch = searchResults[i].getUrlPath().toLowerCase();
        String idFromMovieFileToMatch = idFromMovieFile.toLowerCase().replaceAll("-", "");
        // System.out.println("Comparing " + searchResults[i].toLowerCase() + " to " +
        // idFromMovieFile.toLowerCase().replaceAll("-", ""));
        if (urltoMatch.contains(idFromMovieFileToMatch)) {
          // let's do some fuzzy logic searching to try to get the "best" match in case we got some
          // that are pretty close
          // and update the variables accordingly so we know what our best match so far is
          int candidateLevDistanceOfCurrentMatch =
              StringUtils.getLevenshteinDistance(urltoMatch.toLowerCase(), idFromMovieFileToMatch);
          if (candidateLevDistanceOfCurrentMatch < levDistanceOfCurrentMatch) {
            levDistanceOfCurrentMatch = candidateLevDistanceOfCurrentMatch;
            searchResultNumberToUse = i;
          }
        }
      }
    }
    // just use the URL to parse from the parameter
    else if (useURLtoScrapeFrom) {
      searchResults = new SearchResult[1];
      if (siteToParseFrom instanceof DmmParsingProfile)
        searchResults[0] = new SearchResult(urlToScrapeFromDMM);
      else if (siteToParseFrom instanceof Data18MovieParsingProfile
          || siteToParseFrom instanceof Data18WebContentParsingProfile)
        searchResults[0] = new SearchResult(urlToScrapeFromDMM);
      else if (siteToParseFrom instanceof JavLibraryParsingProfile)
        searchResults[0] =
            new SearchResult(
                ((JavLibraryParsingProfile) siteToParseFrom).getOverrideURLJavLibrary());
      else if (siteToParseFrom instanceof IAFDParsingProfile)
        searchResults[0] = new SearchResult(urlToScrapeFromDMM);
    }
    if (searchResults != null
        && searchResults.length > 0
        && searchResults[searchResultNumberToUse].getUrlPath().length() > 0) {
      System.out.println(
          "Scraping this webpage for movie: "
              + searchResults[searchResultNumberToUse].getUrlPath());
      // for now just set the movie to the first thing found unless we found a link which had
      // something close to the ID
      Document searchMatch =
          Jsoup.connect(searchResults[searchResultNumberToUse].getUrlPath())
              .timeout(0)
              .userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0")
              .get();
      siteToParseFrom.setDocument(searchMatch);
      siteToParseFrom.setOverrideURLDMM(urlToScrapeFromDMM);
      return new Movie(siteToParseFrom);
    } else return null; // TODO return some kind of default movie
  }
 @Override
 public double compare(String word1, String word2) {
   double distance = StringUtils.getLevenshteinDistance(word1, word2);
   distance /= word1.length() > word2.length() ? word1.length() : word2.length();
   return 1 - distance;
 }