コード例 #1
0
 // by Jaume Ortola
 private boolean areEqual(final char x, final char y) {
   if (x == y) {
     return true;
   }
   if (dictionaryMetadata.getEquivalentChars() != null
       && dictionaryMetadata.getEquivalentChars().containsKey(x)
       && dictionaryMetadata.getEquivalentChars().get(x).contains(y)) {
     return true;
   }
   if (dictionaryMetadata.isIgnoringDiacritics()) {
     String xn = Normalizer.normalize(Character.toString(x), Form.NFD);
     String yn = Normalizer.normalize(Character.toString(y), Form.NFD);
     if (xn.charAt(0) == yn.charAt(0)) { // avoid case conversion, if possible
       return true;
     }
     if (dictionaryMetadata.isConvertingCase()) {
       // again case conversion only when needed -- we
       // do not need String.lowercase because we only check
       // single characters, so a cheaper method is enough
       if (Character.isLetter(xn.charAt(0))) {
         boolean testNeeded =
             Character.isLowerCase(xn.charAt(0)) != Character.isLowerCase(yn.charAt(0));
         if (testNeeded) {
           return Character.toLowerCase(xn.charAt(0)) == Character.toLowerCase(yn.charAt(0));
         }
       }
     }
     return xn.charAt(0) == yn.charAt(0);
   }
   return false;
 }
コード例 #2
0
 /**
  * Checks whether the word is misspelled, by performing a series of checks according to properties
  * of the dictionary.
  *
  * <p>If the flag <code>fsa.dict.speller.ignore-punctuation</code> is set, then all non-alphabetic
  * characters are considered to be correctly spelled.
  *
  * <p>If the flag <code>fsa.dict.speller.ignore-numbers</code> is set, then all words containing
  * decimal digits are considered to be correctly spelled.
  *
  * <p>If the flag <code>fsa.dict.speller.ignore-camel-case</code> is set, then all CamelCase words
  * are considered to be correctly spelled.
  *
  * <p>If the flag <code>fsa.dict.speller.ignore-all-uppercase</code> is set, then all alphabetic
  * words composed of only uppercase characters are considered to be correctly spelled.
  *
  * <p>Otherwise, the word is checked in the dictionary. If the test fails, and the dictionary does
  * not perform any case conversions (as set by <code>fsa.dict.speller.convert-case</code> flag),
  * then the method returns false. In case of case conversions, it is checked whether a non-mixed
  * case word is found in its lowercase version in the dictionary, and for all-uppercase words,
  * whether the word is found in the dictionary with the initial uppercase letter.
  *
  * @param word - the word to be checked
  * @return true if the word is misspelled
  */
 public boolean isMisspelled(final String word) {
   // dictionaries usually do not contain punctuation
   String wordToCheck = word;
   if (!dictionaryMetadata.getInputConversionPairs().isEmpty()) {
     wordToCheck =
         DictionaryLookup.applyReplacements(word, dictionaryMetadata.getInputConversionPairs());
   }
   boolean isAlphabetic = wordToCheck.length() != 1 || isAlphabetic(wordToCheck.charAt(0));
   return wordToCheck.length() > 0
       && (!dictionaryMetadata.isIgnoringPunctuation() || isAlphabetic)
       && (!dictionaryMetadata.isIgnoringNumbers() || containsNoDigit(wordToCheck))
       && !(dictionaryMetadata.isIgnoringCamelCase() && isCamelCase(wordToCheck))
       && !(dictionaryMetadata.isIgnoringAllUppercase()
           && isAlphabetic
           && isAllUppercase(wordToCheck))
       && !isInDictionary(wordToCheck)
       && (!dictionaryMetadata.isConvertingCase()
           || !(!isMixedCase(wordToCheck)
               && (isInDictionary(wordToCheck.toLowerCase(dictionaryMetadata.getLocale()))
                   || isAllUppercase(wordToCheck)
                       && isInDictionary(initialUppercase(wordToCheck)))));
 }
コード例 #3
0
 /**
  * Used to determine whether the dictionary supports case conversions.
  *
  * @return boolean value that answers this question in a deep and meaningful way.
  * @since 1.9
  */
 public boolean convertsCase() {
   return dictionaryMetadata.isConvertingCase();
 }
コード例 #4
0
  /**
   * Find suggestions by using K. Oflazer's algorithm. See Jan Daciuk's s_fsa package, spell.cc for
   * further explanation.
   *
   * @param w The original misspelled word.
   * @return A list of suggested replacements.
   */
  public List<String> findReplacements(final String w) {
    String word = w;
    if (!dictionaryMetadata.getInputConversionPairs().isEmpty()) {
      word = DictionaryLookup.applyReplacements(w, dictionaryMetadata.getInputConversionPairs());
    }
    candidates.clear();
    if (word.length() > 0 && word.length() < MAX_WORD_LENGTH && !isInDictionary(word)) {
      List<String> wordsToCheck = new ArrayList<String>();
      if (replacementsTheRest != null && word.length() > MIN_WORD_LENGTH) {
        for (final String wordChecked : getAllReplacements(word, 0, 0)) {
          boolean found = false;
          if (isInDictionary(wordChecked)) {
            candidates.add(new CandidateData(wordChecked, 0));
            found = true;
          } else if (dictionaryMetadata.isConvertingCase()) {
            String lowerWord = wordChecked.toLowerCase(dictionaryMetadata.getLocale());
            String upperWord = wordChecked.toUpperCase(dictionaryMetadata.getLocale());
            if (isInDictionary(lowerWord)) {
              // add the word as it is in the dictionary, not mixed-case versions of it
              candidates.add(new CandidateData(lowerWord, 0));
              found = true;
            }
            if (isInDictionary(upperWord)) {
              candidates.add(new CandidateData(upperWord, 0));
              found = true;
            }
            if (lowerWord.length() > 1) {
              String firstupperWord =
                  Character.toUpperCase(lowerWord.charAt(0)) + lowerWord.substring(1);
              if (isInDictionary(firstupperWord)) {
                candidates.add(new CandidateData(firstupperWord, 0));
                found = true;
              }
            }
          }
          if (!found) {
            wordsToCheck.add(wordChecked);
          }
        }
      } else {
        wordsToCheck.add(word);
      }

      // If at least one candidate was found with the replacement pairs (which are usual errors),
      // probably there is no need for more candidates
      if (candidates.isEmpty()) {
        int i = 1;
        for (final String wordChecked : wordsToCheck) {
          i++;
          if (i > UPPER_SEARCH_LIMIT) { // for performance reasons, do not search too deeply
            break;
          }
          wordProcessed = wordChecked.toCharArray();
          wordLen = wordProcessed.length;
          if (wordLen < MIN_WORD_LENGTH
              && i > 2) { // three-letter replacements make little sense anyway
            break;
          }
          candidate = new char[MAX_WORD_LENGTH];
          candLen = candidate.length;
          effectEditDistance = wordLen <= editDistance ? wordLen - 1 : editDistance;
          charBuffer = BufferUtils.clearAndEnsureCapacity(charBuffer, MAX_WORD_LENGTH);
          byteBuffer = BufferUtils.clearAndEnsureCapacity(byteBuffer, MAX_WORD_LENGTH);
          final byte[] prevBytes = new byte[0];
          findRepl(0, fsa.getRootNode(), prevBytes, 0, 0);
        }
      }
    }

    Collections.sort(candidates);

    // Use a linked set to avoid duplicates and preserve the ordering of candidates.
    final Set<String> candStringSet = new LinkedHashSet<String>();
    for (final CandidateData cd : candidates) {
      candStringSet.add(
          DictionaryLookup.applyReplacements(
                  cd.getWord(), dictionaryMetadata.getOutputConversionPairs())
              .toString());
    }
    final List<String> candStringList = new ArrayList<String>(candStringSet.size());
    candStringList.addAll(candStringSet);
    return candStringList;
  }