Beispiel #1
0
 public void setOptions(int options) {
   // System.out.println("in " + getClass().getName() + ".setOptions(" +
   // Integer.toBinaryString(options) + ")");
   Configuration config = checker.getConfiguration();
   for (int i = 1; i <= 0x20000; i = i << 1) {
     String key = (String) configmap.get(new Integer(i));
     if (key != null) config.setBoolean(key, (options & i) > 0);
   }
 }
Beispiel #2
0
 private boolean capitalizeSuggestions(String word, WordTokenizer wordTokenizer) {
   // if SPELL_IGNORESENTENCECAPITALIZATION and the initial word is capitalized, suggestions should
   // also be capitalized
   // if !SPELL_IGNORESENTENCECAPITALIZATION, capitalize suggestions only for the first word in a
   // sentence
   boolean configCapitalize = !config.getBoolean(Configuration.SPELL_IGNORESENTENCECAPITALIZATION);
   boolean uppercase = Character.isUpperCase(word.charAt(0));
   return (configCapitalize && wordTokenizer.isNewSentence()) || (!configCapitalize && uppercase);
 }
Beispiel #3
0
 private boolean isSupposedToBeCapitalized(String word, WordTokenizer wordTokenizer) {
   boolean configCapitalize = !config.getBoolean(Configuration.SPELL_IGNORESENTENCECAPITALIZATION);
   return configCapitalize
       && wordTokenizer.isNewSentence()
       && Character.isLowerCase(word.charAt(0));
 }
Beispiel #4
0
/**
 * This is the main class for spell checking (using the new event based spell checking).
 *
 * <p>By default, the class makes a user dictionary to accumulate added words. Since this user
 * directory has no file assign to persist added words, they will be retained for the duration of
 * the spell checker instance. If you set a user dictionary like {@link
 * com.swabunga.spell.engine.SpellDictionaryHashMap SpellDictionaryHashMap} to persist the added
 * word, the user dictionary will have the possibility to grow and be available across differents
 * invocations of the spell checker.
 *
 * @author Jason Height ([email protected]) 19 June 2002
 */
public class SpellChecker {
  /** Flag indicating that the Spell Check completed without any errors present */
  public static final int SPELLCHECK_OK = -1;
  /** Flag indicating that the Spell Check completed due to user cancellation */
  public static final int SPELLCHECK_CANCEL = -2;

  @SuppressWarnings("unchecked")
  private final Vector eventListeners = new Vector();

  @SuppressWarnings("unchecked")
  private final Vector dictionaries = new Vector();

  private SpellDictionary userdictionary;

  private final Configuration config = Configuration.getConfiguration();

  /** This variable holds all of the words that are to be always ignored */
  @SuppressWarnings("unchecked")
  private Vector ignoredWords = new Vector();

  @SuppressWarnings("unchecked")
  private Hashtable autoReplaceWords = new Hashtable();

  // added caching - bd
  // For cached operation a separate user dictionary is required
  @SuppressWarnings("unchecked")
  private Map cache;

  private int threshold = 0;
  private int cacheSize = 0;

  /** Constructs the SpellChecker. */
  public SpellChecker() {
    try {
      userdictionary = new SpellDictionaryHashMap();
    } catch (IOException e) {
      throw new RuntimeException(
          "this exception should never happen because we are using null phonetic file");
    }
  }

  /**
   * Constructs the SpellChecker. The default threshold is used
   *
   * @param dictionary The dictionary used for looking up words.
   */
  public SpellChecker(SpellDictionary dictionary) {
    this();
    addDictionary(dictionary);
  }

  /**
   * Constructs the SpellChecker with a threshold
   *
   * @param dictionary the dictionary used for looking up words.
   * @param threshold the cost value above which any suggestions are thrown away
   */
  public SpellChecker(SpellDictionary dictionary, int threshold) {
    this(dictionary);
    config.setInteger(Configuration.SPELL_THRESHOLD, threshold);
  }

  /**
   * Accumulates a dictionary at the end of the dictionaries list used for looking up words. Adding
   * a dictionary give the flexibility to assign the base language dictionary, then a more
   * technical, then...
   *
   * @param dictionary the dictionary to add at the end of the dictionary list.
   */
  @SuppressWarnings("unchecked")
  public void addDictionary(SpellDictionary dictionary) {
    if (dictionary == null) {
      throw new IllegalArgumentException("dictionary must be non-null");
    }
    this.dictionaries.addElement(dictionary);
  }

  /**
   * Registers the user dictionary to which words are added.
   *
   * @param dictionary the dictionary to use when the user specify a new word to add.
   */
  public void setUserDictionary(SpellDictionary dictionary) {
    userdictionary = dictionary;
  }

  /**
   * Supply the instance of the configuration holding the spell checking engine parameters.
   *
   * @return Current Configuration
   */
  public Configuration getConfiguration() {
    return config;
  }

  /**
   * Adds a SpellCheckListener to the listeners list.
   *
   * @param listener The feature to be added to the SpellCheckListener attribute
   */
  @SuppressWarnings("unchecked")
  public void addSpellCheckListener(SpellCheckListener listener) {
    eventListeners.addElement(listener);
  }

  /**
   * Removes a SpellCheckListener from the listeners list.
   *
   * @param listener The listener to be removed from the listeners list.
   */
  public void removeSpellCheckListener(SpellCheckListener listener) {
    eventListeners.removeElement(listener);
  }

  /**
   * Fires off a spell check event to the listeners.
   *
   * @param event The event that need to be processed by the spell checking system.
   */
  protected void fireSpellCheckEvent(SpellCheckEvent event) {
    for (int i = eventListeners.size() - 1; i >= 0; i--) {
      ((SpellCheckListener) eventListeners.elementAt(i)).spellingError(event);
    }
  }

  /**
   * This method clears the words that are currently being remembered as <code>Ignore All</code>
   * words and <code>Replace All</code> words.
   */
  @SuppressWarnings("unchecked")
  public void reset() {
    ignoredWords = new Vector();
    autoReplaceWords = new Hashtable();
  }

  /**
   * Checks the text string.
   *
   * <p>Returns the corrected string.
   *
   * @param text The text that need to be spelled checked
   * @return The text after spell checking
   * @deprecated use checkSpelling(WordTokenizer)
   */
  @Deprecated
  public String checkString(String text) {
    StringWordTokenizer tokens = new StringWordTokenizer(text);
    checkSpelling(tokens);
    return tokens.getContext();
  }

  /**
   * Verifies if the word that is being spell checked contains at least a digit. Returns true if
   * this word contains a digit.
   *
   * @param word The word to analyze for digit.
   * @return true if the word contains at least a digit.
   */
  private static final boolean isDigitWord(String word) {
    for (int i = word.length() - 1; i >= 0; i--) {
      if (Character.isDigit(word.charAt(i))) {
        return true;
      }
    }
    return false;
  }

  /**
   * Verifies if the word that is being spell checked contains an Internet address. The method look
   * for typical protocol or the habitual string in the word:
   *
   * <ul>
   *   <li>http://
   *   <li>ftp://
   *   <li>https://
   *   <li>ftps://
   *   <li>www.
   * </ul>
   *
   * One limitation is that this method cannot currently recognize email addresses. Since the 'word'
   * that is passed in, may in fact contain the rest of the document to be checked, it is not (yet!)
   * a good idea to scan for the @ character.
   *
   * @param word The word to analyze for an Internet address.
   * @return true if this word looks like an Internet address.
   */
  public static final boolean isINETWord(String word) {
    String lowerCaseWord = word.toLowerCase();
    return lowerCaseWord.startsWith("http://")
        || lowerCaseWord.startsWith("www.")
        || lowerCaseWord.startsWith("ftp://")
        || lowerCaseWord.startsWith("https://")
        || lowerCaseWord.startsWith("ftps://");
  }

  /**
   * Verifies if the word that is being spell checked contains all uppercases characters.
   *
   * @param word The word to analyze for uppercases characters
   * @return true if this word contains all upper case characters
   */
  private static final boolean isUpperCaseWord(String word) {
    for (int i = word.length() - 1; i >= 0; i--) {
      if (Character.isLowerCase(word.charAt(i))) {
        return false;
      }
    }
    return true;
  }

  /**
   * Verifies if the word that is being spell checked contains lower and upper cased characters.
   * Note that a phrase beginning with an upper cased character is not considered a mixed case word.
   *
   * @param word The word to analyze for mixed cases characters
   * @param startsSentence True if this word is at the start of a sentence
   * @return true if this word contains mixed case characters
   */
  private static final boolean isMixedCaseWord(String word, boolean startsSentence) {
    int strLen = word.length();
    boolean isUpper = Character.isUpperCase(word.charAt(0));
    // Ignore the first character if this word starts the sentence and the first
    // character was upper cased, since this is normal behaviour
    if ((startsSentence) && isUpper && (strLen > 1))
      isUpper = Character.isUpperCase(word.charAt(1));
    if (isUpper) {
      for (int i = word.length() - 1; i > 0; i--) {
        if (Character.isLowerCase(word.charAt(i))) {
          return true;
        }
      }
    } else {
      for (int i = word.length() - 1; i > 0; i--) {
        if (Character.isUpperCase(word.charAt(i))) {
          return true;
        }
      }
    }
    return false;
  }

  /**
   * This method will fire the spell check event and then handle the event action that has been
   * selected by the user.
   *
   * @param tokenizer Description of the Parameter
   * @param event The event to handle
   * @return Returns true if the event action is to cancel the current spell checking, false if the
   *     spell checking should continue
   */
  @SuppressWarnings("unchecked")
  protected boolean fireAndHandleEvent(WordTokenizer tokenizer, SpellCheckEvent event) {
    fireSpellCheckEvent(event);
    String word = event.getInvalidWord();
    // Work out what to do in response to the event.
    switch (event.getAction()) {
      case SpellCheckEvent.INITIAL:
        break;
      case SpellCheckEvent.IGNORE:
        break;
      case SpellCheckEvent.IGNOREALL:
        ignoreAll(word);
        break;
      case SpellCheckEvent.REPLACE:
        tokenizer.replaceWord(event.getReplaceWord());
        break;
      case SpellCheckEvent.REPLACEALL:
        String replaceAllWord = event.getReplaceWord();
        if (!autoReplaceWords.containsKey(word)) {
          autoReplaceWords.put(word, replaceAllWord);
        }
        tokenizer.replaceWord(replaceAllWord);
        break;
      case SpellCheckEvent.ADDTODICT:
        String addWord = event.getReplaceWord();
        if (!addWord.equals(word)) tokenizer.replaceWord(addWord);
        userdictionary.addWord(addWord);
        break;
      case SpellCheckEvent.CANCEL:
        return true;
      default:
        throw new IllegalArgumentException("Unhandled case.");
    }
    return false;
  }

  /**
   * Adds a word to the list of ignored words
   *
   * @param word The text of the word to ignore
   */
  @SuppressWarnings("unchecked")
  public void ignoreAll(String word) {
    if (!ignoredWords.contains(word)) {
      ignoredWords.addElement(word);
    }
  }

  /**
   * Adds a word to the user dictionary
   *
   * @param word The text of the word to add
   */
  public void addToDictionary(String word) {
    if (!userdictionary.isCorrect(word)) userdictionary.addWord(word);
  }

  /**
   * Indicates if a word is in the list of ignored words
   *
   * @param word The text of the word check
   */
  public boolean isIgnored(String word) {
    return ignoredWords.contains(word);
  }

  /**
   * Verifies if the word to analyze is contained in dictionaries. The order of dictionary lookup
   * is:
   *
   * <ul>
   *   <li>The default user dictionary or the one set through {@link SpellChecker#setUserDictionary}
   *   <li>The dictionary specified at construction time, if any.
   *   <li>Any dictionary in the order they were added through {@link SpellChecker#addDictionary}
   * </ul>
   *
   * @param word The word to verify that it's spelling is known.
   * @return true if the word is in a dictionary.
   */
  @SuppressWarnings("unchecked")
  public boolean isCorrect(String word) {
    if (userdictionary.isCorrect(word)) return true;
    for (Enumeration e = dictionaries.elements(); e.hasMoreElements(); ) {
      SpellDictionary dictionary = (SpellDictionary) e.nextElement();
      if (dictionary.isCorrect(word)) return true;
    }
    return false;
  }

  /**
   * Produces a list of suggested word after looking for suggestions in various dictionaries. The
   * order of dictionary lookup is:
   *
   * <ul>
   *   <li>The default user dictionary or the one set through {@link SpellChecker#setUserDictionary}
   *   <li>The dictionary specified at construction time, if any.
   *   <li>Any dictionary in the order they were added through {@link SpellChecker#addDictionary}
   * </ul>
   *
   * @param word The word for which we want to gather suggestions
   * @param threshold the cost value above which any suggestions are thrown away
   * @return the list of words suggested
   */
  @SuppressWarnings("unchecked")
  public List getSuggestions(String word, int threshold) {
    if (this.threshold != threshold && cache != null) {
      this.threshold = threshold;
      cache.clear();
    }

    ArrayList suggestions = null;

    if (cache != null) suggestions = (ArrayList) cache.get(word);

    if (suggestions == null) {
      suggestions = new ArrayList(50);

      for (Enumeration e = dictionaries.elements(); e.hasMoreElements(); ) {
        SpellDictionary dictionary = (SpellDictionary) e.nextElement();

        if (dictionary != userdictionary)
          VectorUtility.addAll(suggestions, dictionary.getSuggestions(word, threshold), false);
      }

      if (cache != null && cache.size() < cacheSize) cache.put(word, suggestions);
    }

    VectorUtility.addAll(suggestions, userdictionary.getSuggestions(word, threshold), false);
    suggestions.trimToSize();

    return suggestions;
  }

  /** Activates a cache with the maximum number of entries set to 300 */
  public void setCache() {
    setCache(300);
  }

  /**
   * Activates a cache with specified size
   *
   * @param size - max. number of cache entries (0 to disable chache)
   */
  @SuppressWarnings("unchecked")
  public void setCache(int size) {
    cacheSize = size;
    if (size == 0) cache = null;
    else cache = new HashMap((size + 2) / 3 * 4);
  }

  /**
   * This method is called to check the spelling of the words that are returned by the
   * WordTokenizer.
   *
   * <p>For each invalid word the action listeners will be informed with a new SpellCheckEvent.
   *
   * <p>
   *
   * @param tokenizer The media containing the text to analyze.
   * @return Either SPELLCHECK_OK, SPELLCHECK_CANCEL or the number of errors found. The number of
   *     errors are those that are found BEFORE any corrections are made.
   */
  @SuppressWarnings("unchecked")
  public final int checkSpelling(WordTokenizer tokenizer) {
    int errors = 0;
    boolean terminated = false;
    // Keep track of the previous word
    //    String previousWord = null;
    while (tokenizer.hasMoreWords() && !terminated) {
      String word = tokenizer.nextWord();
      // Check the spelling of the word
      if (!isCorrect(word)) {
        if ((config.getBoolean(Configuration.SPELL_IGNOREMIXEDCASE)
                && isMixedCaseWord(word, tokenizer.isNewSentence()))
            || (config.getBoolean(Configuration.SPELL_IGNOREUPPERCASE) && isUpperCaseWord(word))
            || (config.getBoolean(Configuration.SPELL_IGNOREDIGITWORDS) && isDigitWord(word))
            || (config.getBoolean(Configuration.SPELL_IGNOREINTERNETADDRESSES)
                && isINETWord(word))) {
          // Null event. Since we are ignoring this word due
          // to one of the above cases.
        } else {
          // We cant ignore this misspelt word
          // For this invalid word are we ignoring the misspelling?
          if (!isIgnored(word)) {
            errors++;
            // Is this word being automagically replaced
            if (autoReplaceWords.containsKey(word)) {
              tokenizer.replaceWord((String) autoReplaceWords.get(word));
            } else {
              // JMH Need to somehow capitalise the suggestions if
              // ignoreSentenceCapitalisation is not set to true
              // Fire the event.
              List suggestions =
                  getSuggestions(word, config.getInteger(Configuration.SPELL_THRESHOLD));
              if (capitalizeSuggestions(word, tokenizer))
                suggestions = makeSuggestionsCapitalized(suggestions);
              SpellCheckEvent event = new BasicSpellCheckEvent(word, suggestions, tokenizer);
              terminated = fireAndHandleEvent(tokenizer, event);
            }
          }
        }
      } else {
        // This is a correctly spelt word. However perform some extra checks
        /*
         *  JMH TBD          //Check for multiple words
         *  if (!ignoreMultipleWords &&) {
         *  }
         */
        // Check for capitalisation
        if (isSupposedToBeCapitalized(word, tokenizer)) {
          errors++;
          StringBuffer buf = new StringBuffer(word);
          buf.setCharAt(0, Character.toUpperCase(word.charAt(0)));
          Vector suggestion = new Vector();
          suggestion.addElement(new Word(buf.toString(), 0));
          SpellCheckEvent event = new BasicSpellCheckEvent(word, suggestion, tokenizer);
          terminated = fireAndHandleEvent(tokenizer, event);
        }
      }
    }
    if (terminated) return SPELLCHECK_CANCEL;
    else if (errors == 0) return SPELLCHECK_OK;
    else return errors;
  }

  @SuppressWarnings("unchecked")
  private List makeSuggestionsCapitalized(List suggestions) {
    Iterator iterator = suggestions.iterator();
    while (iterator.hasNext()) {
      Word word = (Word) iterator.next();
      String suggestion = word.getWord();
      StringBuffer stringBuffer = new StringBuffer(suggestion);
      stringBuffer.setCharAt(0, Character.toUpperCase(suggestion.charAt(0)));
      word.setWord(stringBuffer.toString());
    }
    return suggestions;
  }

  private boolean isSupposedToBeCapitalized(String word, WordTokenizer wordTokenizer) {
    boolean configCapitalize = !config.getBoolean(Configuration.SPELL_IGNORESENTENCECAPITALIZATION);
    return configCapitalize
        && wordTokenizer.isNewSentence()
        && Character.isLowerCase(word.charAt(0));
  }

  private boolean capitalizeSuggestions(String word, WordTokenizer wordTokenizer) {
    // if SPELL_IGNORESENTENCECAPITALIZATION and the initial word is capitalized, suggestions should
    // also be capitalized
    // if !SPELL_IGNORESENTENCECAPITALIZATION, capitalize suggestions only for the first word in a
    // sentence
    boolean configCapitalize = !config.getBoolean(Configuration.SPELL_IGNORESENTENCECAPITALIZATION);
    boolean uppercase = Character.isUpperCase(word.charAt(0));
    return (configCapitalize && wordTokenizer.isNewSentence()) || (!configCapitalize && uppercase);
  }
}
Beispiel #5
0
 /**
  * This method is called to check the spelling of the words that are returned by the
  * WordTokenizer.
  *
  * <p>For each invalid word the action listeners will be informed with a new SpellCheckEvent.
  *
  * <p>
  *
  * @param tokenizer The media containing the text to analyze.
  * @return Either SPELLCHECK_OK, SPELLCHECK_CANCEL or the number of errors found. The number of
  *     errors are those that are found BEFORE any corrections are made.
  */
 @SuppressWarnings("unchecked")
 public final int checkSpelling(WordTokenizer tokenizer) {
   int errors = 0;
   boolean terminated = false;
   // Keep track of the previous word
   //    String previousWord = null;
   while (tokenizer.hasMoreWords() && !terminated) {
     String word = tokenizer.nextWord();
     // Check the spelling of the word
     if (!isCorrect(word)) {
       if ((config.getBoolean(Configuration.SPELL_IGNOREMIXEDCASE)
               && isMixedCaseWord(word, tokenizer.isNewSentence()))
           || (config.getBoolean(Configuration.SPELL_IGNOREUPPERCASE) && isUpperCaseWord(word))
           || (config.getBoolean(Configuration.SPELL_IGNOREDIGITWORDS) && isDigitWord(word))
           || (config.getBoolean(Configuration.SPELL_IGNOREINTERNETADDRESSES)
               && isINETWord(word))) {
         // Null event. Since we are ignoring this word due
         // to one of the above cases.
       } else {
         // We cant ignore this misspelt word
         // For this invalid word are we ignoring the misspelling?
         if (!isIgnored(word)) {
           errors++;
           // Is this word being automagically replaced
           if (autoReplaceWords.containsKey(word)) {
             tokenizer.replaceWord((String) autoReplaceWords.get(word));
           } else {
             // JMH Need to somehow capitalise the suggestions if
             // ignoreSentenceCapitalisation is not set to true
             // Fire the event.
             List suggestions =
                 getSuggestions(word, config.getInteger(Configuration.SPELL_THRESHOLD));
             if (capitalizeSuggestions(word, tokenizer))
               suggestions = makeSuggestionsCapitalized(suggestions);
             SpellCheckEvent event = new BasicSpellCheckEvent(word, suggestions, tokenizer);
             terminated = fireAndHandleEvent(tokenizer, event);
           }
         }
       }
     } else {
       // This is a correctly spelt word. However perform some extra checks
       /*
        *  JMH TBD          //Check for multiple words
        *  if (!ignoreMultipleWords &&) {
        *  }
        */
       // Check for capitalisation
       if (isSupposedToBeCapitalized(word, tokenizer)) {
         errors++;
         StringBuffer buf = new StringBuffer(word);
         buf.setCharAt(0, Character.toUpperCase(word.charAt(0)));
         Vector suggestion = new Vector();
         suggestion.addElement(new Word(buf.toString(), 0));
         SpellCheckEvent event = new BasicSpellCheckEvent(word, suggestion, tokenizer);
         terminated = fireAndHandleEvent(tokenizer, event);
       }
     }
   }
   if (terminated) return SPELLCHECK_CANCEL;
   else if (errors == 0) return SPELLCHECK_OK;
   else return errors;
 }
Beispiel #6
0
 /**
  * Constructs the SpellChecker with a threshold
  *
  * @param dictionary the dictionary used for looking up words.
  * @param threshold the cost value above which any suggestions are thrown away
  */
 public SpellChecker(SpellDictionary dictionary, int threshold) {
   this(dictionary);
   config.setInteger(Configuration.SPELL_THRESHOLD, threshold);
 }