Example #1
0
  public void analyze(Document document) {
    List<Sentence> sentences = document.getSentences();

    for (Sentence sentence : sentences) {
      List<Token> tokens = sentence.getTokens();

      for (int i = 0; i < tokens.size(); i++) {
        String tag = tokens.get(i).getPOSTag();
        String word = tokens.get(i).getLexeme();

        String[] lemmas = dict.getLemmas(word, tag);

        if (lemmas == null || lemmas.length == 0) {
          lemmas = dict.getLemmas(word.toLowerCase(), tag);
        }

        tokens.get(i).setLemmas(lemmas);
      }
    }
  }
Example #2
0
  public void analyze(Document document) {
    List<Sentence> sentences = document.getSentences();

    for (Sentence sentence : sentences) {
      Span[] contractionsSpan;

      synchronized (this.contractionFinder) {
        contractionsSpan = contractionFinder.find(TextUtils.tokensToString(sentence.getTokens()));
      }

      List<Token> newTokens = sentence.getTokens();

      for (int i = contractionsSpan.length - 1; i >= 0; i--) {

        int start = contractionsSpan[i].getStart();

        String lexeme = sentence.getTokens().get(start).getLexeme();
        String[] contractions = ContractionUtility.expand(lexeme);

        Token original = newTokens.remove(start);
        if (contractions != null) {
          for (int j = contractions.length - 1; j >= 0; j--) {
            Token token = new TokenImpl(original.getStart(), original.getEnd(), contractions[j]);
            newTokens.add(start, token);

            String caze = null;
            if (j == 0) caze = "B";
            else if (j == contractions.length - 1) caze = "E";
            else caze = "I";

            token.addContext(Analyzers.CONTRACTION_FINDER, caze);
          }
        } else {
          LOGGER.debug("Missing contraction: " + lexeme);
        }
      }
      sentence.setTokens(newTokens);
    }
  }
  public List<Mistake> check(Sentence sentence) {

    List<Mistake> mistakes = new ArrayList<Mistake>();

    if (sentence.getTokens().size() < 2) {
      return mistakes;
    }

    for (int i = 0; i < sentence.getTokens().size(); i++) {
      Token originalToken = sentence.getTokens().get(i);
      String wanted = originalToken.getLexeme();
      String wantedLowerCase = wanted.toLowerCase();
      if (map.containsKey(wantedLowerCase)) {
        String candidate = RuleUtils.useCasedString(wanted, map.get(wantedLowerCase));
        String sentenceText = sentence.getText();
        String alternativeText =
            sentenceText.substring(0, originalToken.getStart())
                + candidate
                + sentenceText.substring(originalToken.getEnd());

        if (LOGGER.isDebugEnabled()) {
          LOGGER.debug("\n****** Sentença alternativa **********:\n" + alternativeText);
        }

        Document alternative = new DocumentImpl(alternativeText);
        this.analyzer.analyze(alternative);

        Sentence alternativeSentence = alternative.getSentences().get(0);
        if (alternativeSentence.getTokensProb() - sentence.getTokensProb() > 0.1) {
          if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("Prob original: " + sentence.getTokensProb());
            LOGGER.debug("Prob alternat: " + alternativeSentence.getTokensProb());
            LOGGER.debug(
                "\n****** Possível correção **********:\n"
                    + sentenceText
                    + " -> "
                    + alternativeText);
          }
          Token alternativeToken = alternativeSentence.getTokens().get(i);
          String[] suggestions = {candidate};
          String[] longMsgArgs = {wanted, translate(alternativeToken.getPOSTag()), candidate};
          String[] shortMsgArgs = {wanted, candidate};
          mistakes.add(
              createMistake(
                  ID,
                  longMsgArgs,
                  shortMsgArgs,
                  suggestions,
                  originalToken.getStart(),
                  originalToken.getEnd(),
                  sentence.getText()));
        }
      }
    }

    return mistakes;
  }