public void setOptions(int options) { // System.out.println("in " + getClass().getName() + ".setOptions(" + // Integer.toBinaryString(options) + ")"); Configuration config = checker.getConfiguration(); for (int i = 1; i <= 0x20000; i = i << 1) { String key = (String) configmap.get(new Integer(i)); if (key != null) config.setBoolean(key, (options & i) > 0); } }
private boolean capitalizeSuggestions(String word, WordTokenizer wordTokenizer) { // if SPELL_IGNORESENTENCECAPITALIZATION and the initial word is capitalized, suggestions should // also be capitalized // if !SPELL_IGNORESENTENCECAPITALIZATION, capitalize suggestions only for the first word in a // sentence boolean configCapitalize = !config.getBoolean(Configuration.SPELL_IGNORESENTENCECAPITALIZATION); boolean uppercase = Character.isUpperCase(word.charAt(0)); return (configCapitalize && wordTokenizer.isNewSentence()) || (!configCapitalize && uppercase); }
private boolean isSupposedToBeCapitalized(String word, WordTokenizer wordTokenizer) { boolean configCapitalize = !config.getBoolean(Configuration.SPELL_IGNORESENTENCECAPITALIZATION); return configCapitalize && wordTokenizer.isNewSentence() && Character.isLowerCase(word.charAt(0)); }
/** * This is the main class for spell checking (using the new event based spell checking). * * <p>By default, the class makes a user dictionary to accumulate added words. Since this user * directory has no file assign to persist added words, they will be retained for the duration of * the spell checker instance. If you set a user dictionary like {@link * com.swabunga.spell.engine.SpellDictionaryHashMap SpellDictionaryHashMap} to persist the added * word, the user dictionary will have the possibility to grow and be available across differents * invocations of the spell checker. * * @author Jason Height ([email protected]) 19 June 2002 */ public class SpellChecker { /** Flag indicating that the Spell Check completed without any errors present */ public static final int SPELLCHECK_OK = -1; /** Flag indicating that the Spell Check completed due to user cancellation */ public static final int SPELLCHECK_CANCEL = -2; @SuppressWarnings("unchecked") private final Vector eventListeners = new Vector(); @SuppressWarnings("unchecked") private final Vector dictionaries = new Vector(); private SpellDictionary userdictionary; private final Configuration config = Configuration.getConfiguration(); /** This variable holds all of the words that are to be always ignored */ @SuppressWarnings("unchecked") private Vector ignoredWords = new Vector(); @SuppressWarnings("unchecked") private Hashtable autoReplaceWords = new Hashtable(); // added caching - bd // For cached operation a separate user dictionary is required @SuppressWarnings("unchecked") private Map cache; private int threshold = 0; private int cacheSize = 0; /** Constructs the SpellChecker. */ public SpellChecker() { try { userdictionary = new SpellDictionaryHashMap(); } catch (IOException e) { throw new RuntimeException( "this exception should never happen because we are using null phonetic file"); } } /** * Constructs the SpellChecker. The default threshold is used * * @param dictionary The dictionary used for looking up words. */ public SpellChecker(SpellDictionary dictionary) { this(); addDictionary(dictionary); } /** * Constructs the SpellChecker with a threshold * * @param dictionary the dictionary used for looking up words. * @param threshold the cost value above which any suggestions are thrown away */ public SpellChecker(SpellDictionary dictionary, int threshold) { this(dictionary); config.setInteger(Configuration.SPELL_THRESHOLD, threshold); } /** * Accumulates a dictionary at the end of the dictionaries list used for looking up words. Adding * a dictionary give the flexibility to assign the base language dictionary, then a more * technical, then... * * @param dictionary the dictionary to add at the end of the dictionary list. */ @SuppressWarnings("unchecked") public void addDictionary(SpellDictionary dictionary) { if (dictionary == null) { throw new IllegalArgumentException("dictionary must be non-null"); } this.dictionaries.addElement(dictionary); } /** * Registers the user dictionary to which words are added. * * @param dictionary the dictionary to use when the user specify a new word to add. */ public void setUserDictionary(SpellDictionary dictionary) { userdictionary = dictionary; } /** * Supply the instance of the configuration holding the spell checking engine parameters. * * @return Current Configuration */ public Configuration getConfiguration() { return config; } /** * Adds a SpellCheckListener to the listeners list. * * @param listener The feature to be added to the SpellCheckListener attribute */ @SuppressWarnings("unchecked") public void addSpellCheckListener(SpellCheckListener listener) { eventListeners.addElement(listener); } /** * Removes a SpellCheckListener from the listeners list. * * @param listener The listener to be removed from the listeners list. */ public void removeSpellCheckListener(SpellCheckListener listener) { eventListeners.removeElement(listener); } /** * Fires off a spell check event to the listeners. * * @param event The event that need to be processed by the spell checking system. */ protected void fireSpellCheckEvent(SpellCheckEvent event) { for (int i = eventListeners.size() - 1; i >= 0; i--) { ((SpellCheckListener) eventListeners.elementAt(i)).spellingError(event); } } /** * This method clears the words that are currently being remembered as <code>Ignore All</code> * words and <code>Replace All</code> words. */ @SuppressWarnings("unchecked") public void reset() { ignoredWords = new Vector(); autoReplaceWords = new Hashtable(); } /** * Checks the text string. * * <p>Returns the corrected string. * * @param text The text that need to be spelled checked * @return The text after spell checking * @deprecated use checkSpelling(WordTokenizer) */ @Deprecated public String checkString(String text) { StringWordTokenizer tokens = new StringWordTokenizer(text); checkSpelling(tokens); return tokens.getContext(); } /** * Verifies if the word that is being spell checked contains at least a digit. Returns true if * this word contains a digit. * * @param word The word to analyze for digit. * @return true if the word contains at least a digit. */ private static final boolean isDigitWord(String word) { for (int i = word.length() - 1; i >= 0; i--) { if (Character.isDigit(word.charAt(i))) { return true; } } return false; } /** * Verifies if the word that is being spell checked contains an Internet address. The method look * for typical protocol or the habitual string in the word: * * <ul> * <li>http:// * <li>ftp:// * <li>https:// * <li>ftps:// * <li>www. * </ul> * * One limitation is that this method cannot currently recognize email addresses. Since the 'word' * that is passed in, may in fact contain the rest of the document to be checked, it is not (yet!) * a good idea to scan for the @ character. * * @param word The word to analyze for an Internet address. * @return true if this word looks like an Internet address. */ public static final boolean isINETWord(String word) { String lowerCaseWord = word.toLowerCase(); return lowerCaseWord.startsWith("http://") || lowerCaseWord.startsWith("www.") || lowerCaseWord.startsWith("ftp://") || lowerCaseWord.startsWith("https://") || lowerCaseWord.startsWith("ftps://"); } /** * Verifies if the word that is being spell checked contains all uppercases characters. * * @param word The word to analyze for uppercases characters * @return true if this word contains all upper case characters */ private static final boolean isUpperCaseWord(String word) { for (int i = word.length() - 1; i >= 0; i--) { if (Character.isLowerCase(word.charAt(i))) { return false; } } return true; } /** * Verifies if the word that is being spell checked contains lower and upper cased characters. * Note that a phrase beginning with an upper cased character is not considered a mixed case word. * * @param word The word to analyze for mixed cases characters * @param startsSentence True if this word is at the start of a sentence * @return true if this word contains mixed case characters */ private static final boolean isMixedCaseWord(String word, boolean startsSentence) { int strLen = word.length(); boolean isUpper = Character.isUpperCase(word.charAt(0)); // Ignore the first character if this word starts the sentence and the first // character was upper cased, since this is normal behaviour if ((startsSentence) && isUpper && (strLen > 1)) isUpper = Character.isUpperCase(word.charAt(1)); if (isUpper) { for (int i = word.length() - 1; i > 0; i--) { if (Character.isLowerCase(word.charAt(i))) { return true; } } } else { for (int i = word.length() - 1; i > 0; i--) { if (Character.isUpperCase(word.charAt(i))) { return true; } } } return false; } /** * This method will fire the spell check event and then handle the event action that has been * selected by the user. * * @param tokenizer Description of the Parameter * @param event The event to handle * @return Returns true if the event action is to cancel the current spell checking, false if the * spell checking should continue */ @SuppressWarnings("unchecked") protected boolean fireAndHandleEvent(WordTokenizer tokenizer, SpellCheckEvent event) { fireSpellCheckEvent(event); String word = event.getInvalidWord(); // Work out what to do in response to the event. switch (event.getAction()) { case SpellCheckEvent.INITIAL: break; case SpellCheckEvent.IGNORE: break; case SpellCheckEvent.IGNOREALL: ignoreAll(word); break; case SpellCheckEvent.REPLACE: tokenizer.replaceWord(event.getReplaceWord()); break; case SpellCheckEvent.REPLACEALL: String replaceAllWord = event.getReplaceWord(); if (!autoReplaceWords.containsKey(word)) { autoReplaceWords.put(word, replaceAllWord); } tokenizer.replaceWord(replaceAllWord); break; case SpellCheckEvent.ADDTODICT: String addWord = event.getReplaceWord(); if (!addWord.equals(word)) tokenizer.replaceWord(addWord); userdictionary.addWord(addWord); break; case SpellCheckEvent.CANCEL: return true; default: throw new IllegalArgumentException("Unhandled case."); } return false; } /** * Adds a word to the list of ignored words * * @param word The text of the word to ignore */ @SuppressWarnings("unchecked") public void ignoreAll(String word) { if (!ignoredWords.contains(word)) { ignoredWords.addElement(word); } } /** * Adds a word to the user dictionary * * @param word The text of the word to add */ public void addToDictionary(String word) { if (!userdictionary.isCorrect(word)) userdictionary.addWord(word); } /** * Indicates if a word is in the list of ignored words * * @param word The text of the word check */ public boolean isIgnored(String word) { return ignoredWords.contains(word); } /** * Verifies if the word to analyze is contained in dictionaries. The order of dictionary lookup * is: * * <ul> * <li>The default user dictionary or the one set through {@link SpellChecker#setUserDictionary} * <li>The dictionary specified at construction time, if any. * <li>Any dictionary in the order they were added through {@link SpellChecker#addDictionary} * </ul> * * @param word The word to verify that it's spelling is known. * @return true if the word is in a dictionary. */ @SuppressWarnings("unchecked") public boolean isCorrect(String word) { if (userdictionary.isCorrect(word)) return true; for (Enumeration e = dictionaries.elements(); e.hasMoreElements(); ) { SpellDictionary dictionary = (SpellDictionary) e.nextElement(); if (dictionary.isCorrect(word)) return true; } return false; } /** * Produces a list of suggested word after looking for suggestions in various dictionaries. The * order of dictionary lookup is: * * <ul> * <li>The default user dictionary or the one set through {@link SpellChecker#setUserDictionary} * <li>The dictionary specified at construction time, if any. * <li>Any dictionary in the order they were added through {@link SpellChecker#addDictionary} * </ul> * * @param word The word for which we want to gather suggestions * @param threshold the cost value above which any suggestions are thrown away * @return the list of words suggested */ @SuppressWarnings("unchecked") public List getSuggestions(String word, int threshold) { if (this.threshold != threshold && cache != null) { this.threshold = threshold; cache.clear(); } ArrayList suggestions = null; if (cache != null) suggestions = (ArrayList) cache.get(word); if (suggestions == null) { suggestions = new ArrayList(50); for (Enumeration e = dictionaries.elements(); e.hasMoreElements(); ) { SpellDictionary dictionary = (SpellDictionary) e.nextElement(); if (dictionary != userdictionary) VectorUtility.addAll(suggestions, dictionary.getSuggestions(word, threshold), false); } if (cache != null && cache.size() < cacheSize) cache.put(word, suggestions); } VectorUtility.addAll(suggestions, userdictionary.getSuggestions(word, threshold), false); suggestions.trimToSize(); return suggestions; } /** Activates a cache with the maximum number of entries set to 300 */ public void setCache() { setCache(300); } /** * Activates a cache with specified size * * @param size - max. number of cache entries (0 to disable chache) */ @SuppressWarnings("unchecked") public void setCache(int size) { cacheSize = size; if (size == 0) cache = null; else cache = new HashMap((size + 2) / 3 * 4); } /** * This method is called to check the spelling of the words that are returned by the * WordTokenizer. * * <p>For each invalid word the action listeners will be informed with a new SpellCheckEvent. * * <p> * * @param tokenizer The media containing the text to analyze. * @return Either SPELLCHECK_OK, SPELLCHECK_CANCEL or the number of errors found. The number of * errors are those that are found BEFORE any corrections are made. */ @SuppressWarnings("unchecked") public final int checkSpelling(WordTokenizer tokenizer) { int errors = 0; boolean terminated = false; // Keep track of the previous word // String previousWord = null; while (tokenizer.hasMoreWords() && !terminated) { String word = tokenizer.nextWord(); // Check the spelling of the word if (!isCorrect(word)) { if ((config.getBoolean(Configuration.SPELL_IGNOREMIXEDCASE) && isMixedCaseWord(word, tokenizer.isNewSentence())) || (config.getBoolean(Configuration.SPELL_IGNOREUPPERCASE) && isUpperCaseWord(word)) || (config.getBoolean(Configuration.SPELL_IGNOREDIGITWORDS) && isDigitWord(word)) || (config.getBoolean(Configuration.SPELL_IGNOREINTERNETADDRESSES) && isINETWord(word))) { // Null event. Since we are ignoring this word due // to one of the above cases. } else { // We cant ignore this misspelt word // For this invalid word are we ignoring the misspelling? if (!isIgnored(word)) { errors++; // Is this word being automagically replaced if (autoReplaceWords.containsKey(word)) { tokenizer.replaceWord((String) autoReplaceWords.get(word)); } else { // JMH Need to somehow capitalise the suggestions if // ignoreSentenceCapitalisation is not set to true // Fire the event. List suggestions = getSuggestions(word, config.getInteger(Configuration.SPELL_THRESHOLD)); if (capitalizeSuggestions(word, tokenizer)) suggestions = makeSuggestionsCapitalized(suggestions); SpellCheckEvent event = new BasicSpellCheckEvent(word, suggestions, tokenizer); terminated = fireAndHandleEvent(tokenizer, event); } } } } else { // This is a correctly spelt word. However perform some extra checks /* * JMH TBD //Check for multiple words * if (!ignoreMultipleWords &&) { * } */ // Check for capitalisation if (isSupposedToBeCapitalized(word, tokenizer)) { errors++; StringBuffer buf = new StringBuffer(word); buf.setCharAt(0, Character.toUpperCase(word.charAt(0))); Vector suggestion = new Vector(); suggestion.addElement(new Word(buf.toString(), 0)); SpellCheckEvent event = new BasicSpellCheckEvent(word, suggestion, tokenizer); terminated = fireAndHandleEvent(tokenizer, event); } } } if (terminated) return SPELLCHECK_CANCEL; else if (errors == 0) return SPELLCHECK_OK; else return errors; } @SuppressWarnings("unchecked") private List makeSuggestionsCapitalized(List suggestions) { Iterator iterator = suggestions.iterator(); while (iterator.hasNext()) { Word word = (Word) iterator.next(); String suggestion = word.getWord(); StringBuffer stringBuffer = new StringBuffer(suggestion); stringBuffer.setCharAt(0, Character.toUpperCase(suggestion.charAt(0))); word.setWord(stringBuffer.toString()); } return suggestions; } private boolean isSupposedToBeCapitalized(String word, WordTokenizer wordTokenizer) { boolean configCapitalize = !config.getBoolean(Configuration.SPELL_IGNORESENTENCECAPITALIZATION); return configCapitalize && wordTokenizer.isNewSentence() && Character.isLowerCase(word.charAt(0)); } private boolean capitalizeSuggestions(String word, WordTokenizer wordTokenizer) { // if SPELL_IGNORESENTENCECAPITALIZATION and the initial word is capitalized, suggestions should // also be capitalized // if !SPELL_IGNORESENTENCECAPITALIZATION, capitalize suggestions only for the first word in a // sentence boolean configCapitalize = !config.getBoolean(Configuration.SPELL_IGNORESENTENCECAPITALIZATION); boolean uppercase = Character.isUpperCase(word.charAt(0)); return (configCapitalize && wordTokenizer.isNewSentence()) || (!configCapitalize && uppercase); } }
/** * This method is called to check the spelling of the words that are returned by the * WordTokenizer. * * <p>For each invalid word the action listeners will be informed with a new SpellCheckEvent. * * <p> * * @param tokenizer The media containing the text to analyze. * @return Either SPELLCHECK_OK, SPELLCHECK_CANCEL or the number of errors found. The number of * errors are those that are found BEFORE any corrections are made. */ @SuppressWarnings("unchecked") public final int checkSpelling(WordTokenizer tokenizer) { int errors = 0; boolean terminated = false; // Keep track of the previous word // String previousWord = null; while (tokenizer.hasMoreWords() && !terminated) { String word = tokenizer.nextWord(); // Check the spelling of the word if (!isCorrect(word)) { if ((config.getBoolean(Configuration.SPELL_IGNOREMIXEDCASE) && isMixedCaseWord(word, tokenizer.isNewSentence())) || (config.getBoolean(Configuration.SPELL_IGNOREUPPERCASE) && isUpperCaseWord(word)) || (config.getBoolean(Configuration.SPELL_IGNOREDIGITWORDS) && isDigitWord(word)) || (config.getBoolean(Configuration.SPELL_IGNOREINTERNETADDRESSES) && isINETWord(word))) { // Null event. Since we are ignoring this word due // to one of the above cases. } else { // We cant ignore this misspelt word // For this invalid word are we ignoring the misspelling? if (!isIgnored(word)) { errors++; // Is this word being automagically replaced if (autoReplaceWords.containsKey(word)) { tokenizer.replaceWord((String) autoReplaceWords.get(word)); } else { // JMH Need to somehow capitalise the suggestions if // ignoreSentenceCapitalisation is not set to true // Fire the event. List suggestions = getSuggestions(word, config.getInteger(Configuration.SPELL_THRESHOLD)); if (capitalizeSuggestions(word, tokenizer)) suggestions = makeSuggestionsCapitalized(suggestions); SpellCheckEvent event = new BasicSpellCheckEvent(word, suggestions, tokenizer); terminated = fireAndHandleEvent(tokenizer, event); } } } } else { // This is a correctly spelt word. However perform some extra checks /* * JMH TBD //Check for multiple words * if (!ignoreMultipleWords &&) { * } */ // Check for capitalisation if (isSupposedToBeCapitalized(word, tokenizer)) { errors++; StringBuffer buf = new StringBuffer(word); buf.setCharAt(0, Character.toUpperCase(word.charAt(0))); Vector suggestion = new Vector(); suggestion.addElement(new Word(buf.toString(), 0)); SpellCheckEvent event = new BasicSpellCheckEvent(word, suggestion, tokenizer); terminated = fireAndHandleEvent(tokenizer, event); } } } if (terminated) return SPELLCHECK_CANCEL; else if (errors == 0) return SPELLCHECK_OK; else return errors; }
/** * Constructs the SpellChecker with a threshold * * @param dictionary the dictionary used for looking up words. * @param threshold the cost value above which any suggestions are thrown away */ public SpellChecker(SpellDictionary dictionary, int threshold) { this(dictionary); config.setInteger(Configuration.SPELL_THRESHOLD, threshold); }