예제 #1
0
 private int getPriority(Rule rule) {
   if (rule.getPriority() != null) return rule.getPriority().intValue();
   return (int) (getPriority() - rule.getId());
 }
예제 #2
0
 /**
  * A recursive method that iterates the sentence given a base chunk. Used to match subject-verb
  * rules.
  *
  * @param mistakes a list of mistakes found in the process of checking the sentence
  * @param currentStates the applier will check if these states match the current token
  * @param syntacticChunks an array of chunks
  * @param baseChunkIndex the index of the chunk in which the process of searching for mistakes
  *     began
  * @param currentChunkIndex the index of the current chunk
  * @param sentence the complete sentence, used to get the location of the mistake counted by chars
  *     inside the sentence
  * @return the mistakes in the parameter <code>mistakes</code> plus the mistakes found in this
  *     invocation, if any
  */
 private List<Mistake> getMistakes(
     List<Mistake> mistakes,
     List<State> currentStates,
     List<SyntacticChunk> syntacticChunks,
     int baseChunkIndex,
     int currentChunkIndex,
     Sentence sentence) {
   for (State state : currentStates) {
     boolean chunkAndElementMatched =
         this.match(
             syntacticChunks.get(currentChunkIndex), state.getElement(), baseChunkIndex, sentence);
     if (chunkAndElementMatched) {
       if (state instanceof AcceptState) {
         // Got a mistake!
         Rule rule = ((AcceptState) state).getRule();
         // The mistake is located between the chunks indicated by lower and upper.
         // Gets the lower index by chars.
         int lower =
             sentence
                 .getSyntacticChunks()
                 .get(baseChunkIndex + rule.getBoundaries().getLower())
                 .getFirstToken();
         int upper =
             sentence.getSyntacticChunks().get(currentChunkIndex).getFirstToken()
                 + rule.getBoundaries().getUpper();
         int lowerCountedByChars = sentence.getTokens().get(lower).getSpan().getStart();
         // Gets the upper index by chars.
         SyntacticChunk chunkUpper = sentence.getSyntacticChunks().get(currentChunkIndex);
         int upperCountedByChars =
             chunkUpper.getTokens().get(chunkUpper.getTokens().size() - 1).getSpan().getEnd();
         // Suggestions.
         String[] suggestions =
             SuggestionBuilder.getSuggestions(
                 sentence,
                 true,
                 baseChunkIndex,
                 lower,
                 upper,
                 rule.getSuggestion(),
                 dictionary,
                 Method.SUBJECT_VERB);
         Mistake mistake =
             new MistakeImpl(
                 ID_PREFIX + rule.getId(),
                 getPriority(rule),
                 rule.getMessage(),
                 rule.getShortMessage(),
                 suggestions,
                 lowerCountedByChars + sentence.getOffset(),
                 upperCountedByChars + sentence.getOffset(),
                 rule.getExample(),
                 sentence.getSentence());
         mistakes.add(mistake);
       } else if (currentChunkIndex + 1 < syntacticChunks.size()) {
         // Keep looking: recurse.
         this.getMistakes(
             mistakes,
             state.getNextStates(),
             syntacticChunks,
             baseChunkIndex,
             currentChunkIndex + 1,
             sentence);
       }
     }
   }
   return mistakes;
 }
예제 #3
0
  /**
   * A recursive method that iterates the sentence given a base token group (sentence or chunk).
   * Used to match general and phrase local rules.
   *
   * @param mistakes a list of mistakes found in the process of checking the sentence
   * @param currentStates the applier will check if these states match the current token
   * @param tokenGroup can be a sentence or a chunk (classes that implement the interface
   *     TokenGroup)
   * @param baseTokenIndex the index of the token in which the process of searching for mistakes
   *     began
   * @param currentTokenIndex the index of the current token group
   * @param sentence the complete sentence, used to get the location of the mistake counted by chars
   *     inside the sentence
   * @param dictionary a word and tag dictionary.
   * @return the mistakes in the parameter <code>mistakes</code> plus the mistakes found in this
   *     invocation, if any
   */
  private List<Mistake> getMistakes(
      List<Mistake> mistakes,
      List<State> currentStates,
      TokenGroup tokenGroup,
      int baseTokenIndex,
      int currentTokenIndex,
      Sentence sentence) {
    Method method = Method.GENERAL;

    int offset = 0;
    if (tokenGroup instanceof Chunk) {
      offset = ((Chunk) tokenGroup).getFirstToken();
      method = Method.PHRASE_LOCAL;
    }

    for (State state : currentStates) {
      boolean tokenAndElementMatched =
          this.match(
              tokenGroup.getTokens().get(currentTokenIndex),
              state.getElement(),
              baseTokenIndex + offset,
              sentence);
      if (tokenAndElementMatched) {
        if (state instanceof AcceptState) {
          // Got a mistake!
          Rule rule = ((AcceptState) state).getRule();
          // The mistake is located between the tokens indicated by lower and upper.
          int lower = baseTokenIndex + rule.getBoundaries().getLower();
          int upper = currentTokenIndex + rule.getBoundaries().getUpper();
          lower += offset;
          upper += offset;
          // Pointing the mistake location using the chars in the sentence.
          int lowerCountedByChars = sentence.getTokens().get(lower).getSpan().getStart();
          int upperCountedByChars = sentence.getTokens().get(upper).getSpan().getEnd();
          // Suggestions.
          String[] suggestions = new String[0];
          try {
            suggestions =
                SuggestionBuilder.getSuggestions(
                    sentence,
                    false,
                    baseTokenIndex,
                    lower,
                    upper,
                    rule.getSuggestion(),
                    dictionary,
                    method);
          } catch (NullPointerException e) {
            LOGGER.error(
                "Failed to apply rule " + rule.getId() + " in: " + sentence.getSentence(), e);
          }

          Mistake mistake =
              new MistakeImpl(
                  ID_PREFIX + rule.getId(),
                  getPriority(rule),
                  rule.getMessage(),
                  rule.getShortMessage(),
                  suggestions,
                  lowerCountedByChars + sentence.getOffset(),
                  upperCountedByChars + sentence.getOffset(),
                  rule.getExample(),
                  sentence.getSentence());
          mistakes.add(mistake);
        } else if (currentTokenIndex + 1 < tokenGroup.getTokens().size()) {
          // Keep looking: recurse.
          this.getMistakes(
              mistakes,
              state.getNextStates(),
              tokenGroup,
              baseTokenIndex,
              currentTokenIndex + 1,
              sentence);
        }
      }
    }
    return mistakes;
  }