public List<Mistake> check(Sentence sentence) { List<Mistake> mistakes = new ArrayList<Mistake>(); if (sentence.getTokens().size() < 2) { return mistakes; } for (int i = 0; i < sentence.getTokens().size(); i++) { Token originalToken = sentence.getTokens().get(i); String wanted = originalToken.getLexeme(); String wantedLowerCase = wanted.toLowerCase(); if (map.containsKey(wantedLowerCase)) { String candidate = RuleUtils.useCasedString(wanted, map.get(wantedLowerCase)); String sentenceText = sentence.getText(); String alternativeText = sentenceText.substring(0, originalToken.getStart()) + candidate + sentenceText.substring(originalToken.getEnd()); if (LOGGER.isDebugEnabled()) { LOGGER.debug("\n****** Sentença alternativa **********:\n" + alternativeText); } Document alternative = new DocumentImpl(alternativeText); this.analyzer.analyze(alternative); Sentence alternativeSentence = alternative.getSentences().get(0); if (alternativeSentence.getTokensProb() - sentence.getTokensProb() > 0.1) { if (LOGGER.isDebugEnabled()) { LOGGER.debug("Prob original: " + sentence.getTokensProb()); LOGGER.debug("Prob alternat: " + alternativeSentence.getTokensProb()); LOGGER.debug( "\n****** Possível correção **********:\n" + sentenceText + " -> " + alternativeText); } Token alternativeToken = alternativeSentence.getTokens().get(i); String[] suggestions = {candidate}; String[] longMsgArgs = {wanted, translate(alternativeToken.getPOSTag()), candidate}; String[] shortMsgArgs = {wanted, candidate}; mistakes.add( createMistake( ID, longMsgArgs, shortMsgArgs, suggestions, originalToken.getStart(), originalToken.getEnd(), sentence.getText())); } } } return mistakes; }
/** * Determines if a chunk is matched by a rule element. * * @param chunk the chunk to be matched by the element * @param element the element to be matched against the chunk * @return <code>true</code> if there's a match, <code>false</code> otherwise */ private boolean match( SyntacticChunk chunk, Element element, int baseTokenIndex, Sentence sentence) { boolean match; boolean negated; // Sees if the mask must or not match. // Negated is optional, so it can be null, true or false. // If null, consider as false. if (element.isNegated() == null) { match = false; negated = false; } else { match = element.isNegated().booleanValue(); negated = element.isNegated().booleanValue(); } for (Mask mask : element.getMask()) { // If the token must match the mask. if (!negated) { // If not negated, match starts as false and just one match is needed to make it true. if (mask.getLexemeMask() != null && mask.getLexemeMask().equalsIgnoreCase(chunk.toString())) { match = true; } else if (mask.getTagMask() != null && chunk.getMorphologicalTag() != null) { match = match | (chunk.getMorphologicalTag().matchExact(mask.getTagMask(), false) && chunk.getSyntacticTag().match(mask.getTagMask())); } else if (mask.getPrimitiveMask() != null /*&& chunk.getTokens().size() > 0*/ && matchLemma(chunk.getChildChunks().get(0).getMainToken(), mask.getPrimitiveMask())) { match = true; } else if (mask.getTagReference() != null && chunk.getMorphologicalTag() != null) { TagMask t = RuleUtils.createTagMaskFromReferenceSyntatic( mask.getTagReference(), sentence, baseTokenIndex); match = match | (chunk.getMorphologicalTag().match(t, false) && (t.getSyntacticFunction() == null || chunk.getSyntacticTag().match(t))); } } else { // The token must NOT match the mask. // If negated, match starts as true and just one match is needed to make it false. if (mask.getLexemeMask() != null && mask.getLexemeMask().equalsIgnoreCase(chunk.toString())) { match = false; } else if (mask.getTagMask() != null) { match = match & !(chunk.getMorphologicalTag().matchExact(mask.getTagMask(), false) && (mask.getTagMask().getSyntacticFunction() == null || chunk.getSyntacticTag().match(mask.getTagMask()))); } else if (mask.getPrimitiveMask() != null /*&& chunk.getTokens().size() > 0*/ && matchLemma(chunk.getChildChunks().get(0).getMainToken(), mask.getPrimitiveMask())) { match = false; } else if (mask.getTagReference() != null) { TagMask t = RuleUtils.createTagMaskFromReferenceSyntatic( mask.getTagReference(), sentence, baseTokenIndex); match = match & !(chunk.getMorphologicalTag().match(t, false) && (t.getSyntacticFunction() == null || chunk.getSyntacticTag().match(t))); } } } return match; }
/** * Determines if a token is matched by a rule element. * * @param token the token to be matched by the element * @param element the element to be matched against the token * @return <code>true</code> if there's a match, <code>false</code> otherwise */ private boolean match(Token token, Element element, int baseTokenIndex, Sentence sentence) { boolean match; boolean negated; // Sees if the mask must or not match. // Negated is optional, so it can be null, true or false. // If null, consider as false. if (element.isNegated() == null) { match = false; negated = false; } else { match = element.isNegated().booleanValue(); negated = element.isNegated().booleanValue(); } for (Mask mask : element.getMask()) { // If the token must match the mask. if (!negated) { // If not negated, match starts as false and just one match is needed to make it true. if (mask.getLexemeMask() != null && mask.getLexemeMask().equalsIgnoreCase(token.getLexeme())) { match = true; } else if (mask.getPrimitiveMask() != null && matchLemma(token, mask.getPrimitiveMask())) { match = true; } else if (mask.getTagMask() != null && token.getMorphologicalTag() != null) { match = match | token.getMorphologicalTag().matchExact(mask.getTagMask(), false); } else if (mask.getTagReference() != null && token.getMorphologicalTag() != null) { match = match | token .getMorphologicalTag() .match( RuleUtils.createTagMaskFromReference( mask.getTagReference(), sentence, baseTokenIndex), false); } else if (mask.getOutOfBounds() != null && (baseTokenIndex == 0 || baseTokenIndex == sentence.getTokens().size() - 1)) { match = false; } } else { // The token must NOT match the mask. // If negated, match starts as true and just one match is needed to make it false. if (mask.getLexemeMask() != null && mask.getLexemeMask().equalsIgnoreCase(token.getLexeme())) { match = false; } else if (mask.getPrimitiveMask() != null && matchLemma(token, mask.getPrimitiveMask())) { match = false; } else if (mask.getTagMask() != null && token != null && token.getMorphologicalTag() != null) { match = match & !token.getMorphologicalTag().matchExact(mask.getTagMask(), false); } else if (mask.getTagReference() != null && token != null && token.getMorphologicalTag() != null) { match = match & !token .getMorphologicalTag() .match( RuleUtils.createTagMaskFromReference( mask.getTagReference(), sentence, baseTokenIndex), false); } else if (mask.getOutOfBounds() != null && (baseTokenIndex == 0 || baseTokenIndex == sentence.getTokens().size() - 1)) { match = false; } } } return match; }