@Override public RuleMatch[] match(AnalyzedSentence sentence) { final List<RuleMatch> ruleMatches = new ArrayList<>(); final AnalyzedTokenReadings[] tokens = sentence.getTokens(); boolean foundSpiegelt = false; boolean foundWieder = false; boolean foundWider = false; for (int i = 0; i < tokens.length; i++) { final String token = tokens[i].getToken(); if (!token.trim().equals("")) { if (token.equalsIgnoreCase("spiegelt") || token.equalsIgnoreCase("spiegeln") || token.equalsIgnoreCase("spiegelte") || token.equalsIgnoreCase("spiegelten") || token.equalsIgnoreCase("spiegelst")) { foundSpiegelt = true; } else if (token.equalsIgnoreCase("wieder") && foundSpiegelt) { foundWieder = true; } else if (token.equalsIgnoreCase("wider") && foundSpiegelt) { foundWider = true; } if (foundSpiegelt && foundWieder && !foundWider && !(tokens.length > i + 2 && (tokens[i + 1].getToken().equals("wider") || tokens[i + 2].getToken().equals("wider")))) { final String shortMsg = "'wider' in 'widerspiegeln' wird mit 'i' geschrieben"; final String msg = "'wider' in 'widerspiegeln' wird mit 'i' statt mit 'ie' " + "geschrieben, z.B. 'Das spiegelt die Situation gut wider.'"; final int pos = tokens[i].getStartPos(); final RuleMatch ruleMatch = new RuleMatch(this, pos, pos + token.length(), msg, shortMsg); ruleMatch.setSuggestedReplacement("wider"); ruleMatches.add(ruleMatch); foundSpiegelt = false; foundWieder = false; foundWider = false; } } } return toRuleMatchArray(ruleMatches); }
@Override public RuleMatch[] match(final AnalyzedSentence sentence) { final List<RuleMatch> ruleMatches = new ArrayList<>(); final AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace(); String prevToken = null; for (int i = 0; i < tokens.length; i++) { final String token = tokens[i].getToken(); if (tokens[i].isWhitespace()) { // ignore continue; } if (prevToken != null && !prevToken.equals("-") && !prevToken.contains("--") && !prevToken.contains( "–-") // first char is some special kind of dash, found in Wikipedia && prevToken.endsWith("-")) { final char firstChar = token.charAt(0); if (Character.isUpperCase(firstChar)) { final String msg = "Möglicherweise fehlt ein 'und' oder ein Komma, oder es wurde nach dem Wort " + "ein überflüssiges Leerzeichen eingefügt. Eventuell haben Sie auch versehentlich einen Bindestrich statt eines Punktes eingefügt."; final RuleMatch ruleMatch = new RuleMatch( this, tokens[i - 1].getStartPos(), tokens[i - 1].getStartPos() + prevToken.length() + 1, msg); ruleMatch.setSuggestedReplacement(tokens[i - 1].getToken()); ruleMatches.add(ruleMatch); } } prevToken = token; } return toRuleMatchArray(ruleMatches); }
@Override public final RuleMatch[] match(final AnalyzedSentence text) { final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>(); final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace(); if (tokens.length < 2) { return toRuleMatchArray(ruleMatches); } int matchTokenPos = 1; // 0 = SENT_START final String firstToken = tokens[matchTokenPos].getToken(); String secondToken = null; String thirdToken = null; // ignore quote characters: if (tokens.length >= 3 && ("'".equals(firstToken) || "\"".equals(firstToken) || "„".equals(firstToken))) { matchTokenPos = 2; secondToken = tokens[matchTokenPos].getToken(); } final String firstDutchToken = dutchSpecialCase(firstToken, secondToken, tokens); if (firstDutchToken != null) { thirdToken = firstDutchToken; matchTokenPos = 3; } String checkToken = firstToken; if (thirdToken != null) { checkToken = thirdToken; } else if (secondToken != null) { checkToken = secondToken; } String lastToken = tokens[tokens.length - 1].getToken(); if (lastToken.matches("[ \"'„»«“]") && tokens.length >= 2) { // ignore trailing whitespace or quote lastToken = tokens[tokens.length - 2].getToken(); } boolean preventError = false; // TODO: why do only *these* languages have that special case? final String langCode = language.getShortName(); final boolean languageHasSpecialCases = langCode.equals("ru") || langCode.equals("pl") || langCode.equals("uk") || langCode.equals("be") || langCode.equals(Locale.ENGLISH.getLanguage()) || langCode.equals(Locale.ITALIAN.getLanguage()) || langCode.equals(Locale.GERMAN.getLanguage()); if (languageHasSpecialCases) { // fix for lists; note - this will not always work for the last point in OOo, // as OOo might serve paragraphs in any order. if (";".equals(lastParagraphString) || ";".equals(lastToken) || ",".equals(lastParagraphString) || ",".equals(lastToken)) { preventError = true; } // fix for words in table (not sentences); note - this will not always work for the last point // in OOo, // as OOo might serve paragraphs in any order. if (!lastToken.matches("[.?!…]")) { preventError = true; } } lastParagraphString = lastToken; if (checkToken.length() > 0) { final char firstChar = checkToken.charAt(0); if (!preventError && Character.isLowerCase(firstChar)) { final RuleMatch ruleMatch = new RuleMatch( this, tokens[matchTokenPos].getStartPos(), tokens[matchTokenPos].getStartPos() + tokens[matchTokenPos].getToken().length(), messages.getString("incorrect_case")); ruleMatch.setSuggestedReplacement(StringTools.uppercaseFirstChar(checkToken)); ruleMatches.add(ruleMatch); } } return toRuleMatchArray(ruleMatches); }
@Override public final RuleMatch[] match(final AnalyzedSentence sentence) { final List<RuleMatch> ruleMatches = new ArrayList<>(); final AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace(); if (tokens.length < 2) { return toRuleMatchArray(ruleMatches); } int matchTokenPos = 1; // 0 = SENT_START final String firstToken = tokens[matchTokenPos].getToken(); String secondToken = null; String thirdToken = null; // ignore quote characters: if (tokens.length >= 3 && ("'".equals(firstToken) || "\"".equals(firstToken) || "„".equals(firstToken))) { matchTokenPos = 2; secondToken = tokens[matchTokenPos].getToken(); } final String firstDutchToken = dutchSpecialCase(firstToken, secondToken, tokens); if (firstDutchToken != null) { thirdToken = firstDutchToken; matchTokenPos = 3; } String checkToken = firstToken; if (thirdToken != null) { checkToken = thirdToken; } else if (secondToken != null) { checkToken = secondToken; } String lastToken = tokens[tokens.length - 1].getToken(); if (tokens.length >= 2 && WHITESPACE_OR_QUOTE.matcher(lastToken).matches()) { // ignore trailing whitespace or quote lastToken = tokens[tokens.length - 2].getToken(); } boolean preventError = false; if (lastParagraphString.equals(",") || lastParagraphString.equals(";")) { preventError = true; } if (!SENTENCE_END1.matcher(lastParagraphString).matches() && !SENTENCE_END2.matcher(lastToken).matches()) { preventError = true; } lastParagraphString = lastToken; // allows enumeration with lowercase letters: a), iv., etc. if (matchTokenPos + 1 < tokens.length && NUMERALS_EN.matcher(tokens[matchTokenPos].getToken()).matches() && (tokens[matchTokenPos + 1].getToken().equals(".") || tokens[matchTokenPos + 1].getToken().equals(")"))) { preventError = true; } if (isUrl(checkToken)) { preventError = true; } if (checkToken.length() > 0) { final char firstChar = checkToken.charAt(0); if (!preventError && Character.isLowerCase(firstChar)) { final RuleMatch ruleMatch = new RuleMatch( this, tokens[matchTokenPos].getStartPos(), tokens[matchTokenPos].getStartPos() + tokens[matchTokenPos].getToken().length(), messages.getString("incorrect_case")); ruleMatch.setSuggestedReplacement(StringTools.uppercaseFirstChar(checkToken)); ruleMatches.add(ruleMatch); } } return toRuleMatchArray(ruleMatches); }