Пример #1
0
  public List<Mistake> check(Sentence sentence) {

    List<Mistake> mistakes = new ArrayList<Mistake>();

    if (sentence.getTokens().size() < 2) {
      return mistakes;
    }

    for (int i = 0; i < sentence.getTokens().size(); i++) {
      Token originalToken = sentence.getTokens().get(i);
      String wanted = originalToken.getLexeme();
      String wantedLowerCase = wanted.toLowerCase();
      if (map.containsKey(wantedLowerCase)) {
        String candidate = RuleUtils.useCasedString(wanted, map.get(wantedLowerCase));
        String sentenceText = sentence.getText();
        String alternativeText =
            sentenceText.substring(0, originalToken.getStart())
                + candidate
                + sentenceText.substring(originalToken.getEnd());

        if (LOGGER.isDebugEnabled()) {
          LOGGER.debug("\n****** Sentença alternativa **********:\n" + alternativeText);
        }

        Document alternative = new DocumentImpl(alternativeText);
        this.analyzer.analyze(alternative);

        Sentence alternativeSentence = alternative.getSentences().get(0);
        if (alternativeSentence.getTokensProb() - sentence.getTokensProb() > 0.1) {
          if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("Prob original: " + sentence.getTokensProb());
            LOGGER.debug("Prob alternat: " + alternativeSentence.getTokensProb());
            LOGGER.debug(
                "\n****** Possível correção **********:\n"
                    + sentenceText
                    + " -> "
                    + alternativeText);
          }
          Token alternativeToken = alternativeSentence.getTokens().get(i);
          String[] suggestions = {candidate};
          String[] longMsgArgs = {wanted, translate(alternativeToken.getPOSTag()), candidate};
          String[] shortMsgArgs = {wanted, candidate};
          mistakes.add(
              createMistake(
                  ID,
                  longMsgArgs,
                  shortMsgArgs,
                  suggestions,
                  originalToken.getStart(),
                  originalToken.getEnd(),
                  sentence.getText()));
        }
      }
    }

    return mistakes;
  }
Пример #2
0
 /**
  * Determines if a chunk is matched by a rule element.
  *
  * @param chunk the chunk to be matched by the element
  * @param element the element to be matched against the chunk
  * @return <code>true</code> if there's a match, <code>false</code> otherwise
  */
 private boolean match(
     SyntacticChunk chunk, Element element, int baseTokenIndex, Sentence sentence) {
   boolean match;
   boolean negated;
   // Sees if the mask must or not match.
   // Negated is optional, so it can be null, true or false.
   // If null, consider as false.
   if (element.isNegated() == null) {
     match = false;
     negated = false;
   } else {
     match = element.isNegated().booleanValue();
     negated = element.isNegated().booleanValue();
   }
   for (Mask mask : element.getMask()) {
     // If the token must match the mask.
     if (!negated) {
       // If not negated, match starts as false and just one match is needed to make it true.
       if (mask.getLexemeMask() != null
           && mask.getLexemeMask().equalsIgnoreCase(chunk.toString())) {
         match = true;
       } else if (mask.getTagMask() != null && chunk.getMorphologicalTag() != null) {
         match =
             match
                 | (chunk.getMorphologicalTag().matchExact(mask.getTagMask(), false)
                     && chunk.getSyntacticTag().match(mask.getTagMask()));
       } else if (mask.getPrimitiveMask() != null /*&& chunk.getTokens().size() > 0*/
           && matchLemma(chunk.getChildChunks().get(0).getMainToken(), mask.getPrimitiveMask())) {
         match = true;
       } else if (mask.getTagReference() != null && chunk.getMorphologicalTag() != null) {
         TagMask t =
             RuleUtils.createTagMaskFromReferenceSyntatic(
                 mask.getTagReference(), sentence, baseTokenIndex);
         match =
             match
                 | (chunk.getMorphologicalTag().match(t, false)
                     && (t.getSyntacticFunction() == null || chunk.getSyntacticTag().match(t)));
       }
     } else { // The token must NOT match the mask.
       // If negated, match starts as true and just one match is needed to make it false.
       if (mask.getLexemeMask() != null
           && mask.getLexemeMask().equalsIgnoreCase(chunk.toString())) {
         match = false;
       } else if (mask.getTagMask() != null) {
         match =
             match
                 & !(chunk.getMorphologicalTag().matchExact(mask.getTagMask(), false)
                     && (mask.getTagMask().getSyntacticFunction() == null
                         || chunk.getSyntacticTag().match(mask.getTagMask())));
       } else if (mask.getPrimitiveMask() != null /*&& chunk.getTokens().size() > 0*/
           && matchLemma(chunk.getChildChunks().get(0).getMainToken(), mask.getPrimitiveMask())) {
         match = false;
       } else if (mask.getTagReference() != null) {
         TagMask t =
             RuleUtils.createTagMaskFromReferenceSyntatic(
                 mask.getTagReference(), sentence, baseTokenIndex);
         match =
             match
                 & !(chunk.getMorphologicalTag().match(t, false)
                     && (t.getSyntacticFunction() == null || chunk.getSyntacticTag().match(t)));
       }
     }
   }
   return match;
 }
Пример #3
0
 /**
  * Determines if a token is matched by a rule element.
  *
  * @param token the token to be matched by the element
  * @param element the element to be matched against the token
  * @return <code>true</code> if there's a match, <code>false</code> otherwise
  */
 private boolean match(Token token, Element element, int baseTokenIndex, Sentence sentence) {
   boolean match;
   boolean negated;
   // Sees if the mask must or not match.
   // Negated is optional, so it can be null, true or false.
   // If null, consider as false.
   if (element.isNegated() == null) {
     match = false;
     negated = false;
   } else {
     match = element.isNegated().booleanValue();
     negated = element.isNegated().booleanValue();
   }
   for (Mask mask : element.getMask()) {
     // If the token must match the mask.
     if (!negated) {
       // If not negated, match starts as false and just one match is needed to make it true.
       if (mask.getLexemeMask() != null
           && mask.getLexemeMask().equalsIgnoreCase(token.getLexeme())) {
         match = true;
       } else if (mask.getPrimitiveMask() != null && matchLemma(token, mask.getPrimitiveMask())) {
         match = true;
       } else if (mask.getTagMask() != null && token.getMorphologicalTag() != null) {
         match = match | token.getMorphologicalTag().matchExact(mask.getTagMask(), false);
       } else if (mask.getTagReference() != null && token.getMorphologicalTag() != null) {
         match =
             match
                 | token
                     .getMorphologicalTag()
                     .match(
                         RuleUtils.createTagMaskFromReference(
                             mask.getTagReference(), sentence, baseTokenIndex),
                         false);
       } else if (mask.getOutOfBounds() != null
           && (baseTokenIndex == 0 || baseTokenIndex == sentence.getTokens().size() - 1)) {
         match = false;
       }
     } else { // The token must NOT match the mask.
       // If negated, match starts as true and just one match is needed to make it false.
       if (mask.getLexemeMask() != null
           && mask.getLexemeMask().equalsIgnoreCase(token.getLexeme())) {
         match = false;
       } else if (mask.getPrimitiveMask() != null && matchLemma(token, mask.getPrimitiveMask())) {
         match = false;
       } else if (mask.getTagMask() != null
           && token != null
           && token.getMorphologicalTag() != null) {
         match = match & !token.getMorphologicalTag().matchExact(mask.getTagMask(), false);
       } else if (mask.getTagReference() != null
           && token != null
           && token.getMorphologicalTag() != null) {
         match =
             match
                 & !token
                     .getMorphologicalTag()
                     .match(
                         RuleUtils.createTagMaskFromReference(
                             mask.getTagReference(), sentence, baseTokenIndex),
                         false);
       } else if (mask.getOutOfBounds() != null
           && (baseTokenIndex == 0 || baseTokenIndex == sentence.getTokens().size() - 1)) {
         match = false;
       }
     }
   }
   return match;
 }