public List<RuleMatch> startMatch( RuleMatch ruleMatch, RuleApply ruleApply, ComposedRuleElementMatch containerMatch, RuleElement entryPoint, RutaStream stream, InferenceCrowd crowd) { List<RuleMatch> result = new ArrayList<RuleMatch>(); ComposedRuleElementMatch composedMatch = createComposedMatch(ruleMatch, containerMatch, stream); boolean allMatched = true; for (RuleElement each : elements) { List<RuleMatch> startMatch = each.startMatch(ruleMatch, null, composedMatch, each, stream, crowd); boolean oneMatched = false; ; for (RuleMatch eachRuleMatch : startMatch) { boolean matched = eachRuleMatch.matched(); if (matched) { oneMatched = true; break; } } allMatched &= oneMatched; result.addAll(startMatch); } for (RuleMatch each : result) { if (!each.isApplied()) { ruleApply.add(each); if (each.matched() && allMatched) { each.getRule().getRoot().applyRuleElements(each, stream, crowd); } each.setApplied(true); } } return result; }
@Override public final RuleMatch[] match(final AnalyzedSentence text) { final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>(); final AnalyzedTokenReadings[] tokens = text.getTokens(); String prevToken = ""; String prevPrevToken = ""; boolean prevWhite = false; int pos = 0; int prevLen = 0; for (int i = 0; i < tokens.length; i++) { final String token = tokens[i].getToken(); final boolean isWhite = tokens[i].isWhitespace() || tokens[i].isFieldCode(); pos += token.length(); String msg = null; int fixLen = 0; String suggestionText = null; if (isWhite && isLeftBracket(prevToken)) { msg = messages.getString("no_space_after"); suggestionText = prevToken; fixLen = 1; } else if (!isWhite && prevToken.equals(",") && isNotQuoteOrHyphen(token) && containsNoNumber(prevPrevToken) && containsNoNumber(token) && !",".equals(prevPrevToken)) { msg = messages.getString("missing_space_after_comma"); suggestionText = ", "; } else if (prevWhite) { if (isRightBracket(token)) { msg = messages.getString("no_space_before"); suggestionText = token; fixLen = 1; } else if (token.equals(",")) { msg = messages.getString("space_after_comma"); suggestionText = ","; fixLen = 1; // exception for duplicated comma (we already have another rule for that) if (i + 1 < tokens.length && ",".equals(tokens[i + 1].getToken())) { msg = null; } } else if (token.equals(".")) { msg = messages.getString("no_space_before_dot"); suggestionText = "."; fixLen = 1; // exception case for figures such as ".5" and ellipsis if (i + 1 < tokens.length && isNumberOrDot(tokens[i + 1].getToken())) { msg = null; } } } if (msg != null) { final int fromPos = tokens[i - 1].getStartPos(); final int toPos = tokens[i - 1].getStartPos() + fixLen + prevLen; // TODO: add some good short comment here final RuleMatch ruleMatch = new RuleMatch(this, fromPos, toPos, msg); ruleMatch.setSuggestedReplacement(suggestionText); ruleMatches.add(ruleMatch); } prevPrevToken = prevToken; prevToken = token; prevWhite = isWhite && !tokens[i].isFieldCode(); // OOo code before comma/dot prevLen = tokens[i].getToken().length(); } return toRuleMatchArray(ruleMatches); }