private RuleMatch ruleMatchWrongVerbSubject( final AnalyzedTokenReadings subject, final AnalyzedTokenReadings verb, final String expectedVerbPOS) { final String msg = "Möglicherweise fehlende grammatische Übereinstimmung zwischen Subjekt (" + subject.getToken() + ") und Prädikat (" + verb.getToken() + ") bezüglich Person oder Numerus (Einzahl, Mehrzahl - Beispiel: " + "'ich sind' statt 'ich bin')."; List<String> suggestions = new ArrayList<>(); List<String> verbSuggestions = new ArrayList<>(); List<String> pronounSuggestions = new ArrayList<>(); RuleMatch ruleMatch; if (subject.getStartPos() < verb.getStartPos()) { ruleMatch = new RuleMatch( this, subject.getStartPos(), verb.getStartPos() + verb.getToken().length(), msg); verbSuggestions.addAll(getVerbSuggestions(verb, expectedVerbPOS, false)); for (String verbSuggestion : verbSuggestions) { suggestions.add(subject.getToken() + " " + verbSuggestion); } pronounSuggestions.addAll( getPronounSuggestions(verb, Character.isUpperCase(subject.getToken().charAt(0)))); for (String pronounSuggestion : pronounSuggestions) { suggestions.add(pronounSuggestion + " " + verb.getToken()); } ruleMatch.setSuggestedReplacements(suggestions); } else { ruleMatch = new RuleMatch( this, verb.getStartPos(), subject.getStartPos() + subject.getToken().length(), msg); verbSuggestions.addAll( getVerbSuggestions( verb, expectedVerbPOS, Character.isUpperCase(verb.getToken().charAt(0)))); for (String verbSuggestion : verbSuggestions) { suggestions.add(verbSuggestion + " " + subject.getToken()); } pronounSuggestions.addAll(getPronounSuggestions(verb, false)); for (String pronounSuggestion : pronounSuggestions) { suggestions.add(verb.getToken() + " " + pronounSuggestion); } ruleMatch.setSuggestedReplacements(suggestions); } return ruleMatch; }
@Override public RuleMatch[] match(AnalyzedSentence sentence) throws IOException { final List<RuleMatch> ruleMatches = new ArrayList<>(); if (needsInit) { init(); } if (hunspellDict == null) { // some languages might not have a dictionary, be silent about it return toRuleMatchArray(ruleMatches); } final String[] tokens = tokenizeText(getSentenceTextWithoutUrlsAndImmunizedTokens(sentence)); // starting with the first token to skip the zero-length START_SENT int len = sentence.getTokens()[1].getStartPos(); for (int i = 0; i < tokens.length; i++) { String word = tokens[i]; if (ignoreWord(Arrays.asList(tokens), i) || ignoreWord(word)) { len += word.length() + 1; continue; } if (isMisspelled(word)) { final RuleMatch ruleMatch = new RuleMatch( this, len, len + word.length(), messages.getString("spelling"), messages.getString("desc_spelling_short")); final List<String> suggestions = getSuggestions(word); suggestions.addAll(0, getAdditionalTopSuggestions(suggestions, word)); suggestions.addAll(getAdditionalSuggestions(suggestions, word)); if (!suggestions.isEmpty()) { filterSuggestions(suggestions); ruleMatch.setSuggestedReplacements(suggestions); } ruleMatches.add(ruleMatch); } len += word.length() + 1; } return toRuleMatchArray(ruleMatches); }
@Override public RuleMatch[] match(final AnalyzedSentence sentence) { final List<RuleMatch> ruleMatches = new ArrayList<>(); final AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace(); RuleMatch prevRuleMatch = null; final Queue<AnalyzedTokenReadings> prevTokens = new ArrayBlockingQueue<>(MAX_TERMS); for (int i = 0; i < tokens.length + MAX_TERMS - 1; i++) { final AnalyzedTokenReadings token; // we need to extend the token list so we find matches at the end of the original list: if (i >= tokens.length) { token = new AnalyzedTokenReadings( new AnalyzedToken("", "", null), prevTokens.peek().getStartPos()); } else { token = tokens[i]; } if (i == 0) { addToQueue(token, prevTokens); continue; } final StringBuilder sb = new StringBuilder(); int j = 0; AnalyzedTokenReadings firstMatchToken = null; final List<String> stringsToCheck = new ArrayList<>(); final List<String> origStringsToCheck = new ArrayList<>(); // original upper/lowercase spelling final Map<String, AnalyzedTokenReadings> stringToToken = new HashMap<>(); for (AnalyzedTokenReadings atr : prevTokens) { if (j == 0) { firstMatchToken = atr; } sb.append(' '); sb.append(atr.getToken()); if (j >= 1) { final String stringToCheck = normalize(sb.toString()); stringsToCheck.add(stringToCheck); origStringsToCheck.add(sb.toString().trim()); if (!stringToToken.containsKey(stringToCheck)) { stringToToken.put(stringToCheck, atr); } } j++; } // iterate backwards over all potentially incorrect strings to make // sure we match longer strings first: for (int k = stringsToCheck.size() - 1; k >= 0; k--) { final String stringToCheck = stringsToCheck.get(k); final String origStringToCheck = origStringsToCheck.get(k); if (incorrectCompounds.contains(stringToCheck)) { final AnalyzedTokenReadings atr = stringToToken.get(stringToCheck); String msg = null; final List<String> replacement = new ArrayList<>(); if (!noDashSuggestion.contains(stringToCheck)) { replacement.add(origStringToCheck.replace(' ', '-')); msg = withHyphenMessage; } if (isNotAllUppercase(origStringToCheck) && !onlyDashSuggestion.contains(stringToCheck)) { replacement.add(mergeCompound(origStringToCheck)); msg = withoutHyphenMessage; } final String[] parts = stringToCheck.split(" "); if (parts.length > 0 && parts[0].length() == 1) { replacement.clear(); replacement.add(origStringToCheck.replace(' ', '-')); msg = withHyphenMessage; } else if (replacement.isEmpty() || replacement.size() == 2) { // isEmpty shouldn't happen msg = withOrWithoutHyphenMessage; } final RuleMatch ruleMatch = new RuleMatch( this, firstMatchToken.getStartPos(), atr.getStartPos() + atr.getToken().length(), msg, shortDesc); // avoid duplicate matches: if (prevRuleMatch != null && prevRuleMatch.getFromPos() == ruleMatch.getFromPos()) { prevRuleMatch = ruleMatch; break; } prevRuleMatch = ruleMatch; ruleMatch.setSuggestedReplacements(replacement); ruleMatches.add(ruleMatch); break; } } addToQueue(token, prevTokens); } return toRuleMatchArray(ruleMatches); }
/** @see #getSuggestedReplacements() */ public void setSuggestedReplacement(final String replacement) { Objects.requireNonNull(replacement, "replacement may be empty but not null"); final List<String> replacements = new ArrayList<>(); replacements.add(replacement); setSuggestedReplacements(replacements); }
@Override public RuleMatch[] match(AnalyzedSentence sentence) { List<RuleMatch> ruleMatches = new ArrayList<>(); AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace(); RuleMatch prevRuleMatch = null; Queue<AnalyzedTokenReadings> prevTokens = new ArrayBlockingQueue<>(MAX_TERMS); for (int i = 0; i < tokens.length + MAX_TERMS - 1; i++) { AnalyzedTokenReadings token; // we need to extend the token list so we find matches at the end of the original list: if (i >= tokens.length) { token = new AnalyzedTokenReadings( new AnalyzedToken("", "", null), prevTokens.peek().getStartPos()); } else { token = tokens[i]; } if (i == 0) { addToQueue(token, prevTokens); continue; } if (token.isImmunized()) { continue; } AnalyzedTokenReadings firstMatchToken = prevTokens.peek(); List<String> stringsToCheck = new ArrayList<>(); List<String> origStringsToCheck = new ArrayList<>(); // original upper/lowercase spelling Map<String, AnalyzedTokenReadings> stringToToken = getStringToTokenMap(prevTokens, stringsToCheck, origStringsToCheck); // iterate backwards over all potentially incorrect strings to make // sure we match longer strings first: for (int k = stringsToCheck.size() - 1; k >= 0; k--) { String stringToCheck = stringsToCheck.get(k); String origStringToCheck = origStringsToCheck.get(k); if (getCompoundRuleData().getIncorrectCompounds().contains(stringToCheck)) { AnalyzedTokenReadings atr = stringToToken.get(stringToCheck); String msg = null; List<String> replacement = new ArrayList<>(); if (!getCompoundRuleData().getNoDashSuggestion().contains(stringToCheck)) { replacement.add(origStringToCheck.replace(' ', '-')); msg = withHyphenMessage; } if (isNotAllUppercase(origStringToCheck) && !getCompoundRuleData().getOnlyDashSuggestion().contains(stringToCheck)) { replacement.add(mergeCompound(origStringToCheck)); msg = withoutHyphenMessage; } String[] parts = stringToCheck.split(" "); if (parts.length > 0 && parts[0].length() == 1) { replacement.clear(); replacement.add(origStringToCheck.replace(' ', '-')); msg = withHyphenMessage; } else if (replacement.isEmpty() || replacement.size() == 2) { // isEmpty shouldn't happen msg = withOrWithoutHyphenMessage; } RuleMatch ruleMatch = new RuleMatch(this, firstMatchToken.getStartPos(), atr.getEndPos(), msg, shortDesc); ruleMatch.setSuggestedReplacements(replacement); // avoid duplicate matches: if (prevRuleMatch != null && prevRuleMatch.getFromPos() == ruleMatch.getFromPos()) { prevRuleMatch = ruleMatch; break; } prevRuleMatch = ruleMatch; ruleMatches.add(ruleMatch); break; } } addToQueue(token, prevTokens); } return toRuleMatchArray(ruleMatches); }