/**
   * Get the frequency value for a word form. It is taken from the first entry with this word form.
   *
   * @param word the word to be tested
   * @return frequency value in range: 0..FREQ_RANGE-1 (0: less frequent).
   */
  public int getFrequency(final CharSequence word) {
    if (!dictionaryMetadata.isFrequencyIncluded()) {
      return 0;
    }

    final byte separator = dictionaryMetadata.getSeparator();
    try {
      byteBuffer = charSequenceToBytes(word);
    } catch (UnmappableInputException e) {
      return 0;
    }

    final MatchResult match =
        matcher.match(matchResult, byteBuffer.array(), 0, byteBuffer.remaining(), rootNode);
    if (match.kind == SEQUENCE_IS_A_PREFIX) {
      final int arc = fsa.getArc(match.node, separator);
      if (arc != 0 && !fsa.isArcFinal(arc)) {
        finalStatesIterator.restartFrom(fsa.getEndNode(arc));
        if (finalStatesIterator.hasNext()) {
          final ByteBuffer bb = finalStatesIterator.next();
          final byte[] ba = bb.array();
          final int bbSize = bb.remaining();
          // the last byte contains the frequency after a separator
          return ba[bbSize - 1] - FIRST_RANGE_CODE;
        }
      }
    }
    return 0;
  }
 // by Jaume Ortola
 private boolean areEqual(final char x, final char y) {
   if (x == y) {
     return true;
   }
   if (dictionaryMetadata.getEquivalentChars() != null
       && dictionaryMetadata.getEquivalentChars().containsKey(x)
       && dictionaryMetadata.getEquivalentChars().get(x).contains(y)) {
     return true;
   }
   if (dictionaryMetadata.isIgnoringDiacritics()) {
     String xn = Normalizer.normalize(Character.toString(x), Form.NFD);
     String yn = Normalizer.normalize(Character.toString(y), Form.NFD);
     if (xn.charAt(0) == yn.charAt(0)) { // avoid case conversion, if possible
       return true;
     }
     if (dictionaryMetadata.isConvertingCase()) {
       // again case conversion only when needed -- we
       // do not need String.lowercase because we only check
       // single characters, so a cheaper method is enough
       if (Character.isLetter(xn.charAt(0))) {
         boolean testNeeded =
             Character.isLowerCase(xn.charAt(0)) != Character.isLowerCase(yn.charAt(0));
         if (testNeeded) {
           return Character.toLowerCase(xn.charAt(0)) == Character.toLowerCase(yn.charAt(0));
         }
       }
     }
     return xn.charAt(0) == yn.charAt(0);
   }
   return false;
 }
  public Speller(final Dictionary dictionary, final int editDistance) {
    this.editDistance = editDistance;
    this.hMatrix = new HMatrix(editDistance, MAX_WORD_LENGTH);

    this.dictionaryMetadata = dictionary.metadata;
    this.rootNode = dictionary.fsa.getRootNode();
    this.fsa = dictionary.fsa;
    this.matcher = new FSATraversal(fsa);
    this.finalStatesIterator = new ByteSequenceIterator(fsa, rootNode);

    if (rootNode == 0) {
      throw new IllegalArgumentException("Dictionary must have at least the root node.");
    }

    if (dictionaryMetadata == null) {
      throw new IllegalArgumentException("Dictionary metadata must not be null.");
    }

    encoder = dictionaryMetadata.getEncoder();
    decoder = dictionaryMetadata.getDecoder();

    // Multibyte separator will result in an exception here.
    dictionaryMetadata.getSeparatorAsChar();

    this.createReplacementsMaps();
  }
 private boolean isBeforeSeparator(final int arc) {
   if (containsSeparators) {
     final int arc1 = fsa.getArc(fsa.getEndNode(arc), dictionaryMetadata.getSeparator());
     return arc1 != 0 && !fsa.isArcTerminal(arc1);
   }
   return false;
 }
 /**
  * Propose suggestions for misspelled run-on words. This algorithm is inspired by spell.cc in
  * s_fsa package by Jan Daciuk.
  *
  * @param original The original misspelled word.
  * @return The list of suggested pairs, as space-concatenated strings.
  */
 public List<String> replaceRunOnWords(final String original) {
   final List<String> candidates = new ArrayList<String>();
   if (!isInDictionary(
           DictionaryLookup.applyReplacements(
               original, dictionaryMetadata.getInputConversionPairs()))
       && dictionaryMetadata.isSupportingRunOnWords()) {
     for (int i = 1; i < original.length(); i++) {
       // chop from left to right
       final CharSequence firstCh = original.subSequence(0, i);
       if (isInDictionary(firstCh) && isInDictionary(original.subSequence(i, original.length()))) {
         if (!dictionaryMetadata.getOutputConversionPairs().isEmpty()) {
           candidates.add(firstCh + " " + original.subSequence(i, original.length()));
         } else {
           candidates.add(
               DictionaryLookup.applyReplacements(
                       firstCh + " " + original.subSequence(i, original.length()),
                       dictionaryMetadata.getOutputConversionPairs())
                   .toString());
         }
       }
     }
   }
   return candidates;
 }
 /**
  * Checks whether the word is misspelled, by performing a series of checks according to properties
  * of the dictionary.
  *
  * <p>If the flag <code>fsa.dict.speller.ignore-punctuation</code> is set, then all non-alphabetic
  * characters are considered to be correctly spelled.
  *
  * <p>If the flag <code>fsa.dict.speller.ignore-numbers</code> is set, then all words containing
  * decimal digits are considered to be correctly spelled.
  *
  * <p>If the flag <code>fsa.dict.speller.ignore-camel-case</code> is set, then all CamelCase words
  * are considered to be correctly spelled.
  *
  * <p>If the flag <code>fsa.dict.speller.ignore-all-uppercase</code> is set, then all alphabetic
  * words composed of only uppercase characters are considered to be correctly spelled.
  *
  * <p>Otherwise, the word is checked in the dictionary. If the test fails, and the dictionary does
  * not perform any case conversions (as set by <code>fsa.dict.speller.convert-case</code> flag),
  * then the method returns false. In case of case conversions, it is checked whether a non-mixed
  * case word is found in its lowercase version in the dictionary, and for all-uppercase words,
  * whether the word is found in the dictionary with the initial uppercase letter.
  *
  * @param word - the word to be checked
  * @return true if the word is misspelled
  */
 public boolean isMisspelled(final String word) {
   // dictionaries usually do not contain punctuation
   String wordToCheck = word;
   if (!dictionaryMetadata.getInputConversionPairs().isEmpty()) {
     wordToCheck =
         DictionaryLookup.applyReplacements(word, dictionaryMetadata.getInputConversionPairs());
   }
   boolean isAlphabetic = wordToCheck.length() != 1 || isAlphabetic(wordToCheck.charAt(0));
   return wordToCheck.length() > 0
       && (!dictionaryMetadata.isIgnoringPunctuation() || isAlphabetic)
       && (!dictionaryMetadata.isIgnoringNumbers() || containsNoDigit(wordToCheck))
       && !(dictionaryMetadata.isIgnoringCamelCase() && isCamelCase(wordToCheck))
       && !(dictionaryMetadata.isIgnoringAllUppercase()
           && isAlphabetic
           && isAllUppercase(wordToCheck))
       && !isInDictionary(wordToCheck)
       && (!dictionaryMetadata.isConvertingCase()
           || !(!isMixedCase(wordToCheck)
               && (isInDictionary(wordToCheck.toLowerCase(dictionaryMetadata.getLocale()))
                   || isAllUppercase(wordToCheck)
                       && isInDictionary(initialUppercase(wordToCheck)))));
 }
  /**
   * Test whether the word is found in the dictionary.
   *
   * @param word the word to be tested
   * @return True if it is found.
   */
  public boolean isInDictionary(final CharSequence word) {
    try {
      byteBuffer = charSequenceToBytes(word);
    } catch (UnmappableInputException e) {
      return false;
    }

    // Try to find a partial match in the dictionary.
    final MatchResult match =
        matcher.match(matchResult, byteBuffer.array(), 0, byteBuffer.remaining(), rootNode);

    if (match.kind == EXACT_MATCH) {
      containsSeparators = false;
      return true;
    }

    return containsSeparators
        && match.kind == SEQUENCE_IS_A_PREFIX
        && byteBuffer.remaining() > 0
        && fsa.getArc(match.node, dictionaryMetadata.getSeparator()) != 0;
  }
 private void createReplacementsMaps() {
   for (Map.Entry<String, List<String>> entry :
       dictionaryMetadata.getReplacementPairs().entrySet()) {
     for (String s : entry.getValue()) {
       // replacements any to one
       // the new key is the target of the replacement pair
       if (s.length() == 1) {
         if (!replacementsAnyToOne.containsKey(s.charAt(0))) {
           List<char[]> charList = new ArrayList<char[]>();
           charList.add(entry.getKey().toCharArray());
           replacementsAnyToOne.put(s.charAt(0), charList);
         } else {
           replacementsAnyToOne.get(s.charAt(0)).add(entry.getKey().toCharArray());
         }
       }
       // replacements any to two
       // the new key is the target of the replacement pair
       else if (s.length() == 2) {
         if (!replacementsAnyToTwo.containsKey(s)) {
           List<char[]> charList = new ArrayList<char[]>();
           charList.add(entry.getKey().toCharArray());
           replacementsAnyToTwo.put(s, charList);
         } else {
           replacementsAnyToTwo.get(s).add(entry.getKey().toCharArray());
         }
       } else {
         if (!replacementsTheRest.containsKey(entry.getKey())) {
           List<String> charList = new ArrayList<String>();
           charList.add(s);
           replacementsTheRest.put(entry.getKey(), charList);
         } else {
           replacementsTheRest.get(entry.getKey()).add(s);
         }
       }
     }
   }
 }
 /**
  * Used to determine whether the dictionary supports case conversions.
  *
  * @return boolean value that answers this question in a deep and meaningful way.
  * @since 1.9
  */
 public boolean convertsCase() {
   return dictionaryMetadata.isConvertingCase();
 }
 private boolean isArcNotTerminal(final int arc, final int candIndex) {
   return !fsa.isArcTerminal(arc)
       && !(containsSeparators && candidate[candIndex] == dictionaryMetadata.getSeparatorAsChar());
 }
  /**
   * Find suggestions by using K. Oflazer's algorithm. See Jan Daciuk's s_fsa package, spell.cc for
   * further explanation.
   *
   * @param w The original misspelled word.
   * @return A list of suggested replacements.
   */
  public List<String> findReplacements(final String w) {
    String word = w;
    if (!dictionaryMetadata.getInputConversionPairs().isEmpty()) {
      word = DictionaryLookup.applyReplacements(w, dictionaryMetadata.getInputConversionPairs());
    }
    candidates.clear();
    if (word.length() > 0 && word.length() < MAX_WORD_LENGTH && !isInDictionary(word)) {
      List<String> wordsToCheck = new ArrayList<String>();
      if (replacementsTheRest != null && word.length() > MIN_WORD_LENGTH) {
        for (final String wordChecked : getAllReplacements(word, 0, 0)) {
          boolean found = false;
          if (isInDictionary(wordChecked)) {
            candidates.add(new CandidateData(wordChecked, 0));
            found = true;
          } else if (dictionaryMetadata.isConvertingCase()) {
            String lowerWord = wordChecked.toLowerCase(dictionaryMetadata.getLocale());
            String upperWord = wordChecked.toUpperCase(dictionaryMetadata.getLocale());
            if (isInDictionary(lowerWord)) {
              // add the word as it is in the dictionary, not mixed-case versions of it
              candidates.add(new CandidateData(lowerWord, 0));
              found = true;
            }
            if (isInDictionary(upperWord)) {
              candidates.add(new CandidateData(upperWord, 0));
              found = true;
            }
            if (lowerWord.length() > 1) {
              String firstupperWord =
                  Character.toUpperCase(lowerWord.charAt(0)) + lowerWord.substring(1);
              if (isInDictionary(firstupperWord)) {
                candidates.add(new CandidateData(firstupperWord, 0));
                found = true;
              }
            }
          }
          if (!found) {
            wordsToCheck.add(wordChecked);
          }
        }
      } else {
        wordsToCheck.add(word);
      }

      // If at least one candidate was found with the replacement pairs (which are usual errors),
      // probably there is no need for more candidates
      if (candidates.isEmpty()) {
        int i = 1;
        for (final String wordChecked : wordsToCheck) {
          i++;
          if (i > UPPER_SEARCH_LIMIT) { // for performance reasons, do not search too deeply
            break;
          }
          wordProcessed = wordChecked.toCharArray();
          wordLen = wordProcessed.length;
          if (wordLen < MIN_WORD_LENGTH
              && i > 2) { // three-letter replacements make little sense anyway
            break;
          }
          candidate = new char[MAX_WORD_LENGTH];
          candLen = candidate.length;
          effectEditDistance = wordLen <= editDistance ? wordLen - 1 : editDistance;
          charBuffer = BufferUtils.clearAndEnsureCapacity(charBuffer, MAX_WORD_LENGTH);
          byteBuffer = BufferUtils.clearAndEnsureCapacity(byteBuffer, MAX_WORD_LENGTH);
          final byte[] prevBytes = new byte[0];
          findRepl(0, fsa.getRootNode(), prevBytes, 0, 0);
        }
      }
    }

    Collections.sort(candidates);

    // Use a linked set to avoid duplicates and preserve the ordering of candidates.
    final Set<String> candStringSet = new LinkedHashSet<String>();
    for (final CandidateData cd : candidates) {
      candStringSet.add(
          DictionaryLookup.applyReplacements(
                  cd.getWord(), dictionaryMetadata.getOutputConversionPairs())
              .toString());
    }
    final List<String> candStringList = new ArrayList<String>(candStringSet.size());
    candStringList.addAll(candStringSet);
    return candStringList;
  }
 private CharSequence initialUppercase(final String wordToCheck) {
   return wordToCheck.substring(0, 1)
       + wordToCheck.substring(1).toLowerCase(dictionaryMetadata.getLocale());
 }