/** * @param args a map with values for {@code year}, {@code month}, {@code day} (day of month), * {@code weekDay} */ @Override public RuleMatch acceptRuleMatch( RuleMatch match, Map<String, String> args, AnalyzedTokenReadings[] patternTokens) { int dayOfWeekFromString = getDayOfWeek(getRequired("weekDay", args)); Calendar dateFromDate = getDate(args); int dayOfWeekFromDate; try { dayOfWeekFromDate = dateFromDate.get(Calendar.DAY_OF_WEEK); } catch (IllegalArgumentException ignore) { // happens with 'dates' like '32.8.2014' - those should be caught by a different rule return null; } if (dayOfWeekFromString != dayOfWeekFromDate) { String realDayName = getDayOfWeek(dateFromDate); String message = match.getMessage().replace("\\realDay", realDayName); RuleMatch newMatch = new RuleMatch( match.getRule(), match.getFromPos(), match.getToPos(), message, match.getShortMessage()); return newMatch; } else { return null; } }
@Override public RuleMatch[] match(final AnalyzedSentence sentence) { final List<RuleMatch> ruleMatches = new ArrayList<>(); final AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace(); RuleMatch prevRuleMatch = null; final Queue<AnalyzedTokenReadings> prevTokens = new ArrayBlockingQueue<>(MAX_TERMS); for (int i = 0; i < tokens.length + MAX_TERMS - 1; i++) { final AnalyzedTokenReadings token; // we need to extend the token list so we find matches at the end of the original list: if (i >= tokens.length) { token = new AnalyzedTokenReadings( new AnalyzedToken("", "", null), prevTokens.peek().getStartPos()); } else { token = tokens[i]; } if (i == 0) { addToQueue(token, prevTokens); continue; } final StringBuilder sb = new StringBuilder(); int j = 0; AnalyzedTokenReadings firstMatchToken = null; final List<String> stringsToCheck = new ArrayList<>(); final List<String> origStringsToCheck = new ArrayList<>(); // original upper/lowercase spelling final Map<String, AnalyzedTokenReadings> stringToToken = new HashMap<>(); for (AnalyzedTokenReadings atr : prevTokens) { if (j == 0) { firstMatchToken = atr; } sb.append(' '); sb.append(atr.getToken()); if (j >= 1) { final String stringToCheck = normalize(sb.toString()); stringsToCheck.add(stringToCheck); origStringsToCheck.add(sb.toString().trim()); if (!stringToToken.containsKey(stringToCheck)) { stringToToken.put(stringToCheck, atr); } } j++; } // iterate backwards over all potentially incorrect strings to make // sure we match longer strings first: for (int k = stringsToCheck.size() - 1; k >= 0; k--) { final String stringToCheck = stringsToCheck.get(k); final String origStringToCheck = origStringsToCheck.get(k); if (incorrectCompounds.contains(stringToCheck)) { final AnalyzedTokenReadings atr = stringToToken.get(stringToCheck); String msg = null; final List<String> replacement = new ArrayList<>(); if (!noDashSuggestion.contains(stringToCheck)) { replacement.add(origStringToCheck.replace(' ', '-')); msg = withHyphenMessage; } if (isNotAllUppercase(origStringToCheck) && !onlyDashSuggestion.contains(stringToCheck)) { replacement.add(mergeCompound(origStringToCheck)); msg = withoutHyphenMessage; } final String[] parts = stringToCheck.split(" "); if (parts.length > 0 && parts[0].length() == 1) { replacement.clear(); replacement.add(origStringToCheck.replace(' ', '-')); msg = withHyphenMessage; } else if (replacement.isEmpty() || replacement.size() == 2) { // isEmpty shouldn't happen msg = withOrWithoutHyphenMessage; } final RuleMatch ruleMatch = new RuleMatch( this, firstMatchToken.getStartPos(), atr.getStartPos() + atr.getToken().length(), msg, shortDesc); // avoid duplicate matches: if (prevRuleMatch != null && prevRuleMatch.getFromPos() == ruleMatch.getFromPos()) { prevRuleMatch = ruleMatch; break; } prevRuleMatch = ruleMatch; ruleMatch.setSuggestedReplacements(replacement); ruleMatches.add(ruleMatch); break; } } addToQueue(token, prevTokens); } return toRuleMatchArray(ruleMatches); }
/** Compare by start position. */ @Override public int compareTo(final RuleMatch other) { Objects.requireNonNull(other); return Integer.compare(getFromPos(), other.getFromPos()); }
@Override public RuleMatch[] match(AnalyzedSentence sentence) { List<RuleMatch> ruleMatches = new ArrayList<>(); AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace(); RuleMatch prevRuleMatch = null; Queue<AnalyzedTokenReadings> prevTokens = new ArrayBlockingQueue<>(MAX_TERMS); for (int i = 0; i < tokens.length + MAX_TERMS - 1; i++) { AnalyzedTokenReadings token; // we need to extend the token list so we find matches at the end of the original list: if (i >= tokens.length) { token = new AnalyzedTokenReadings( new AnalyzedToken("", "", null), prevTokens.peek().getStartPos()); } else { token = tokens[i]; } if (i == 0) { addToQueue(token, prevTokens); continue; } if (token.isImmunized()) { continue; } AnalyzedTokenReadings firstMatchToken = prevTokens.peek(); List<String> stringsToCheck = new ArrayList<>(); List<String> origStringsToCheck = new ArrayList<>(); // original upper/lowercase spelling Map<String, AnalyzedTokenReadings> stringToToken = getStringToTokenMap(prevTokens, stringsToCheck, origStringsToCheck); // iterate backwards over all potentially incorrect strings to make // sure we match longer strings first: for (int k = stringsToCheck.size() - 1; k >= 0; k--) { String stringToCheck = stringsToCheck.get(k); String origStringToCheck = origStringsToCheck.get(k); if (getCompoundRuleData().getIncorrectCompounds().contains(stringToCheck)) { AnalyzedTokenReadings atr = stringToToken.get(stringToCheck); String msg = null; List<String> replacement = new ArrayList<>(); if (!getCompoundRuleData().getNoDashSuggestion().contains(stringToCheck)) { replacement.add(origStringToCheck.replace(' ', '-')); msg = withHyphenMessage; } if (isNotAllUppercase(origStringToCheck) && !getCompoundRuleData().getOnlyDashSuggestion().contains(stringToCheck)) { replacement.add(mergeCompound(origStringToCheck)); msg = withoutHyphenMessage; } String[] parts = stringToCheck.split(" "); if (parts.length > 0 && parts[0].length() == 1) { replacement.clear(); replacement.add(origStringToCheck.replace(' ', '-')); msg = withHyphenMessage; } else if (replacement.isEmpty() || replacement.size() == 2) { // isEmpty shouldn't happen msg = withOrWithoutHyphenMessage; } RuleMatch ruleMatch = new RuleMatch(this, firstMatchToken.getStartPos(), atr.getEndPos(), msg, shortDesc); ruleMatch.setSuggestedReplacements(replacement); // avoid duplicate matches: if (prevRuleMatch != null && prevRuleMatch.getFromPos() == ruleMatch.getFromPos()) { prevRuleMatch = ruleMatch; break; } prevRuleMatch = ruleMatch; ruleMatches.add(ruleMatch); break; } } addToQueue(token, prevTokens); } return toRuleMatchArray(ruleMatches); }