/** Returns true iff the token at the given position should be ignored by the spell checker. */
 protected boolean ignoreToken(AnalyzedTokenReadings[] tokens, int idx) throws IOException {
   List<String> words = new ArrayList<>();
   for (AnalyzedTokenReadings token : tokens) {
     words.add(token.getToken());
   }
   return ignoreWord(words, idx);
 }
  /**
   * @return false if neither the verb @param token1 (if any) nor @param token2 match @param person
   *     and @param number, and none of them is "und" or "," if a finite verb is found, it is saved
   *     in finiteVerb
   */
  private boolean verbDoesMatchPersonAndNumber(
      final AnalyzedTokenReadings token1,
      final AnalyzedTokenReadings token2,
      final String person,
      final String number) {
    if (token1.getToken().equals(",")
        || token1.getToken().equals("und")
        || token2.getToken().equals(",")
        || token2.getToken().equals("und")) {
      return true;
    }

    boolean foundFiniteVerb = false;

    if (isFiniteVerb(token1)) {
      foundFiniteVerb = true;
      finiteVerb = token1;
      if (token1.hasPartialPosTag(":" + person + ":" + number)) {
        return true;
      }
    }

    if (isFiniteVerb(token2)) {
      foundFiniteVerb = true;
      finiteVerb = token2;
      if (token2.hasPartialPosTag(":" + person + ":" + number)) {
        return true;
      }
    }

    return !foundFiniteVerb;
  }
 private RuleMatch ruleMatchWrongVerb(final AnalyzedTokenReadings token) {
   final String msg =
       "Möglicherweise fehlende grammatische Übereinstimmung zwischen Subjekt und Prädikat ("
           + token.getToken()
           + ") bezüglich Person oder Numerus (Einzahl, Mehrzahl - Beispiel: "
           + "'Max bist' statt 'Max ist').";
   return new RuleMatch(
       this, token.getStartPos(), token.getStartPos() + token.getToken().length(), msg);
 }
 @Nullable
 private String baseForThirdPersonSingularVerb(String word) throws IOException {
   List<AnalyzedTokenReadings> readings = tagger.tag(Collections.singletonList(word));
   for (AnalyzedTokenReadings reading : readings) {
     if (reading.hasPartialPosTag("VER:3:SIN:")) {
       return reading.getReadings().get(0).getLemma();
     }
   }
   return null;
 }
 private static String getPOS(final AnalyzedTokenReadings atr) {
   final StringBuilder sb = new StringBuilder();
   final int readNum = atr.getReadingsLength();
   for (int i = 0; i < readNum; i++) {
     if (!atr.isWhitespace()) {
       sb.append(atr.getAnalyzedToken(i).getPOSTag());
       if (i != readNum - 1) {
         sb.append('+');
       }
     }
   }
   return sb.toString();
 }
  @Override
  public RuleMatch[] match(final AnalyzedSentence text) {
    final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
    final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();

    if (tokens.length > 3) {
      final AnalyzedTokenReadings analyzedToken = tokens[1];
      final String token = analyzedToken.getToken();
      // avoid "..." etc. to be matched:
      boolean isWord = true;
      if (token.length() == 1) {
        final char c = token.charAt(0);
        if (!Character.isLetter(c)) {
          isWord = false;
        }
      }

      if (isWord
          && lastToken.equals(token)
          && !isException(token)
          && !isException(tokens[2].getToken())
          && !isException(tokens[3].getToken())) {
        final String shortMsg;
        if (isAdverb(analyzedToken)) {
          shortMsg = messages.getString("desc_repetition_beginning_adv");
        } else if (beforeLastToken.equals(token)) {
          shortMsg = messages.getString("desc_repetition_beginning_word");
        } else {
          shortMsg = "";
        }

        if (!shortMsg.equals("")) {
          final String msg =
              shortMsg + " " + messages.getString("desc_repetition_beginning_thesaurus");
          final int startPos = analyzedToken.getStartPos();
          final int endPos = startPos + token.length();
          final RuleMatch ruleMatch = new RuleMatch(this, startPos, endPos, msg, shortMsg);
          ruleMatches.add(ruleMatch);
        }
      }
      beforeLastToken = lastToken;
      lastToken = token;
    }

    // TODO should we ignore repetitions involving multiple paragraphs?
    // if (tokens[tokens.length - 1].isParaEnd()) beforeLastToken = "";

    return toRuleMatchArray(ruleMatches);
  }
  /**
   * @return true if the verb @param token (if it is a verb) matches @param person and @param
   *     number, and matches no other person/number
   */
  private boolean hasUnambiguouslyPersonAndNumber(
      final AnalyzedTokenReadings tokenReadings, final String person, final String number) {
    if (tokenReadings.getToken().length() == 0
        || (Character.isUpperCase(tokenReadings.getToken().charAt(0))
            && !(tokenReadings.getStartPos() == 0))
        || !tokenReadings.hasPartialPosTag("VER")) return false;

    for (AnalyzedToken analyzedToken : tokenReadings) {
      final String postag = analyzedToken.getPOSTag();
      if (postag.contains("_END")) // ignore SENT_END and PARA_END
      continue;
      if (!postag.contains(":" + person + ":" + number)) return false;
    } // for each reading

    return true;
  }
 @Override
 protected boolean isAdverb(final AnalyzedTokenReadings token) {
   if (ADVERBS.contains(token.getToken())) {
     return true;
   }
   return false;
 }
  /**
   * @return a list of forms of @param verb which match @param expectedVerbPOS (person:number)
   * @param toUppercase true when the suggestions should be capitalized
   */
  private List<String> getVerbSuggestions(
      final AnalyzedTokenReadings verb, final String expectedVerbPOS, final boolean toUppercase) {
    // find the first verb reading
    AnalyzedToken verbToken = new AnalyzedToken("", "", "");
    for (AnalyzedToken token : verb.getReadings()) {
      if (token.getPOSTag().startsWith("VER:")) {
        verbToken = token;
        break;
      }
    }

    try {
      String[] synthesized =
          german.getSynthesizer().synthesize(verbToken, "VER.*:" + expectedVerbPOS + ".*", true);
      // remove duplicates
      Set<String> suggestionSet = new HashSet<>();
      suggestionSet.addAll(Arrays.asList(synthesized));
      List<String> suggestions = new ArrayList<>();
      suggestions.addAll(suggestionSet);
      if (toUppercase) {
        for (int i = 0; i < suggestions.size(); ++i) {
          suggestions.set(i, StringTools.uppercaseFirstChar(suggestions.get(i)));
        }
      }
      Collections.sort(suggestions);
      return suggestions;
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }
 /** @return true if @param token is a finite verb, and it is no participle, pronoun or number */
 private boolean isFiniteVerb(final AnalyzedTokenReadings token) {
   if (token.getToken().length() == 0
       || (Character.isUpperCase(token.getToken().charAt(0)) && token.getStartPos() != 0)
       || !token.hasPartialPosTag("VER")
       || token.hasPartialPosTag("PA2")
       || token.hasPartialPosTag("PRO:")
       || token.hasPartialPosTag("ZAL")) {
     return false;
   }
   return (token.hasPartialPosTag(":1:")
       || token.hasPartialPosTag(":2:")
       || token.hasPartialPosTag(":3:"));
 }
 private RuleMatch ruleMatchWrongVerbSubject(
     final AnalyzedTokenReadings subject, final AnalyzedTokenReadings verb) {
   final String msg =
       "Möglicherweise fehlende grammatische Übereinstimmung zwischen Subjekt ("
           + subject.getToken()
           + ") und Prädikat ("
           + verb.getToken()
           + ") bezüglich Person oder Numerus (Einzahl, Mehrzahl - Beispiel: "
           + "'ich sind' statt 'ich bin').";
   if (subject.getStartPos() < verb.getStartPos()) {
     return new RuleMatch(
         this, subject.getStartPos(), verb.getStartPos() + verb.getToken().length(), msg);
   } else {
     return new RuleMatch(
         this, verb.getStartPos(), subject.getStartPos() + subject.getToken().length(), msg);
   }
 }
 @Override
 protected boolean isTagged(AnalyzedTokenReadings tokenReadings) {
   for (AnalyzedToken token : tokenReadings.getReadings()) {
     String posTag = token.getPOSTag();
     if (isGoodPosTag(posTag)) {
       return true;
     }
   }
   return false;
 }
 /**
  * @return a list of pronouns which match the person and number of @param verb
  * @param toUppercase true when the suggestions should be capitalized
  */
 private List<String> getPronounSuggestions(
     final AnalyzedTokenReadings verb, final boolean toUppercase) {
   List<String> result = new ArrayList<>();
   if (verb.hasPartialPosTag(":1:SIN")) {
     result.add("ich");
   }
   if (verb.hasPartialPosTag(":2:SIN")) {
     result.add("du");
   }
   if (verb.hasPartialPosTag(":3:SIN")) {
     result.add("er");
     result.add("sie");
     result.add("es");
   }
   if (verb.hasPartialPosTag(":1:PLU")) {
     result.add("wir");
   }
   if (verb.hasPartialPosTag(":2:PLU")) {
     result.add("ihr");
   }
   if (verb.hasPartialPosTag(":3:PLU") && !result.contains("sie")) { // do not add "sie" twice
     result.add("sie");
   }
   if (toUppercase) {
     for (int i = 0; i < result.size(); ++i) {
       result.set(i, StringTools.uppercaseFirstChar(result.get(i)));
     }
   }
   return result;
 }
 private Map<String, AnalyzedTokenReadings> getStringToTokenMap(
     Queue<AnalyzedTokenReadings> prevTokens,
     List<String> stringsToCheck,
     List<String> origStringsToCheck) {
   StringBuilder sb = new StringBuilder();
   Map<String, AnalyzedTokenReadings> stringToToken = new HashMap<>();
   int j = 0;
   for (AnalyzedTokenReadings atr : prevTokens) {
     sb.append(' ');
     sb.append(atr.getToken());
     if (j >= 1) {
       String stringToCheck = normalize(sb.toString());
       stringsToCheck.add(stringToCheck);
       origStringsToCheck.add(sb.toString().trim());
       if (!stringToToken.containsKey(stringToCheck)) {
         stringToToken.put(stringToCheck, atr);
       }
     }
     j++;
   }
   return stringToToken;
 }
  @Override
  public RuleMatch[] match(AnalyzedSentence sentence) {
    List<RuleMatch> ruleMatches = new ArrayList<>();
    AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();

    RuleMatch prevRuleMatch = null;
    Queue<AnalyzedTokenReadings> prevTokens = new ArrayBlockingQueue<>(MAX_TERMS);
    for (int i = 0; i < tokens.length + MAX_TERMS - 1; i++) {
      AnalyzedTokenReadings token;
      // we need to extend the token list so we find matches at the end of the original list:
      if (i >= tokens.length) {
        token =
            new AnalyzedTokenReadings(
                new AnalyzedToken("", "", null), prevTokens.peek().getStartPos());
      } else {
        token = tokens[i];
      }
      if (i == 0) {
        addToQueue(token, prevTokens);
        continue;
      }
      if (token.isImmunized()) {
        continue;
      }

      AnalyzedTokenReadings firstMatchToken = prevTokens.peek();
      List<String> stringsToCheck = new ArrayList<>();
      List<String> origStringsToCheck = new ArrayList<>(); // original upper/lowercase spelling
      Map<String, AnalyzedTokenReadings> stringToToken =
          getStringToTokenMap(prevTokens, stringsToCheck, origStringsToCheck);
      // iterate backwards over all potentially incorrect strings to make
      // sure we match longer strings first:
      for (int k = stringsToCheck.size() - 1; k >= 0; k--) {
        String stringToCheck = stringsToCheck.get(k);
        String origStringToCheck = origStringsToCheck.get(k);
        if (getCompoundRuleData().getIncorrectCompounds().contains(stringToCheck)) {
          AnalyzedTokenReadings atr = stringToToken.get(stringToCheck);
          String msg = null;
          List<String> replacement = new ArrayList<>();
          if (!getCompoundRuleData().getNoDashSuggestion().contains(stringToCheck)) {
            replacement.add(origStringToCheck.replace(' ', '-'));
            msg = withHyphenMessage;
          }
          if (isNotAllUppercase(origStringToCheck)
              && !getCompoundRuleData().getOnlyDashSuggestion().contains(stringToCheck)) {
            replacement.add(mergeCompound(origStringToCheck));
            msg = withoutHyphenMessage;
          }
          String[] parts = stringToCheck.split(" ");
          if (parts.length > 0 && parts[0].length() == 1) {
            replacement.clear();
            replacement.add(origStringToCheck.replace(' ', '-'));
            msg = withHyphenMessage;
          } else if (replacement.isEmpty() || replacement.size() == 2) { // isEmpty shouldn't happen
            msg = withOrWithoutHyphenMessage;
          }
          RuleMatch ruleMatch =
              new RuleMatch(this, firstMatchToken.getStartPos(), atr.getEndPos(), msg, shortDesc);
          ruleMatch.setSuggestedReplacements(replacement);
          // avoid duplicate matches:
          if (prevRuleMatch != null && prevRuleMatch.getFromPos() == ruleMatch.getFromPos()) {
            prevRuleMatch = ruleMatch;
            break;
          }
          prevRuleMatch = ruleMatch;
          ruleMatches.add(ruleMatch);
          break;
        }
      }
      addToQueue(token, prevTokens);
    }
    return toRuleMatchArray(ruleMatches);
  }
  @Override
  public RuleMatch[] match(final AnalyzedSentence sentence) {
    final List<RuleMatch> ruleMatches = new ArrayList<>();
    final AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();

    RuleMatch prevRuleMatch = null;
    final Queue<AnalyzedTokenReadings> prevTokens = new ArrayBlockingQueue<>(MAX_TERMS);
    for (int i = 0; i < tokens.length + MAX_TERMS - 1; i++) {
      final AnalyzedTokenReadings token;
      // we need to extend the token list so we find matches at the end of the original list:
      if (i >= tokens.length) {
        token =
            new AnalyzedTokenReadings(
                new AnalyzedToken("", "", null), prevTokens.peek().getStartPos());
      } else {
        token = tokens[i];
      }
      if (i == 0) {
        addToQueue(token, prevTokens);
        continue;
      }

      final StringBuilder sb = new StringBuilder();
      int j = 0;
      AnalyzedTokenReadings firstMatchToken = null;
      final List<String> stringsToCheck = new ArrayList<>();
      final List<String> origStringsToCheck =
          new ArrayList<>(); // original upper/lowercase spelling
      final Map<String, AnalyzedTokenReadings> stringToToken = new HashMap<>();
      for (AnalyzedTokenReadings atr : prevTokens) {
        if (j == 0) {
          firstMatchToken = atr;
        }
        sb.append(' ');
        sb.append(atr.getToken());
        if (j >= 1) {
          final String stringToCheck = normalize(sb.toString());
          stringsToCheck.add(stringToCheck);
          origStringsToCheck.add(sb.toString().trim());
          if (!stringToToken.containsKey(stringToCheck)) {
            stringToToken.put(stringToCheck, atr);
          }
        }
        j++;
      }
      // iterate backwards over all potentially incorrect strings to make
      // sure we match longer strings first:
      for (int k = stringsToCheck.size() - 1; k >= 0; k--) {
        final String stringToCheck = stringsToCheck.get(k);
        final String origStringToCheck = origStringsToCheck.get(k);
        if (incorrectCompounds.contains(stringToCheck)) {
          final AnalyzedTokenReadings atr = stringToToken.get(stringToCheck);
          String msg = null;
          final List<String> replacement = new ArrayList<>();
          if (!noDashSuggestion.contains(stringToCheck)) {
            replacement.add(origStringToCheck.replace(' ', '-'));
            msg = withHyphenMessage;
          }
          if (isNotAllUppercase(origStringToCheck) && !onlyDashSuggestion.contains(stringToCheck)) {
            replacement.add(mergeCompound(origStringToCheck));
            msg = withoutHyphenMessage;
          }
          final String[] parts = stringToCheck.split(" ");
          if (parts.length > 0 && parts[0].length() == 1) {
            replacement.clear();
            replacement.add(origStringToCheck.replace(' ', '-'));
            msg = withHyphenMessage;
          } else if (replacement.isEmpty() || replacement.size() == 2) { // isEmpty shouldn't happen
            msg = withOrWithoutHyphenMessage;
          }
          final RuleMatch ruleMatch =
              new RuleMatch(
                  this,
                  firstMatchToken.getStartPos(),
                  atr.getStartPos() + atr.getToken().length(),
                  msg,
                  shortDesc);
          // avoid duplicate matches:
          if (prevRuleMatch != null && prevRuleMatch.getFromPos() == ruleMatch.getFromPos()) {
            prevRuleMatch = ruleMatch;
            break;
          }
          prevRuleMatch = ruleMatch;
          ruleMatch.setSuggestedReplacements(replacement);
          ruleMatches.add(ruleMatch);
          break;
        }
      }
      addToQueue(token, prevTokens);
    }
    return toRuleMatchArray(ruleMatches);
  }
  private RuleMatch ruleMatchWrongVerbSubject(
      final AnalyzedTokenReadings subject,
      final AnalyzedTokenReadings verb,
      final String expectedVerbPOS) {
    final String msg =
        "Möglicherweise fehlende grammatische Übereinstimmung zwischen Subjekt ("
            + subject.getToken()
            + ") und Prädikat ("
            + verb.getToken()
            + ") bezüglich Person oder Numerus (Einzahl, Mehrzahl - Beispiel: "
            + "'ich sind' statt 'ich bin').";

    List<String> suggestions = new ArrayList<>();
    List<String> verbSuggestions = new ArrayList<>();
    List<String> pronounSuggestions = new ArrayList<>();

    RuleMatch ruleMatch;
    if (subject.getStartPos() < verb.getStartPos()) {
      ruleMatch =
          new RuleMatch(
              this, subject.getStartPos(), verb.getStartPos() + verb.getToken().length(), msg);
      verbSuggestions.addAll(getVerbSuggestions(verb, expectedVerbPOS, false));
      for (String verbSuggestion : verbSuggestions) {
        suggestions.add(subject.getToken() + " " + verbSuggestion);
      }
      pronounSuggestions.addAll(
          getPronounSuggestions(verb, Character.isUpperCase(subject.getToken().charAt(0))));
      for (String pronounSuggestion : pronounSuggestions) {
        suggestions.add(pronounSuggestion + " " + verb.getToken());
      }
      ruleMatch.setSuggestedReplacements(suggestions);
    } else {
      ruleMatch =
          new RuleMatch(
              this, verb.getStartPos(), subject.getStartPos() + subject.getToken().length(), msg);
      verbSuggestions.addAll(
          getVerbSuggestions(
              verb, expectedVerbPOS, Character.isUpperCase(verb.getToken().charAt(0))));
      for (String verbSuggestion : verbSuggestions) {
        suggestions.add(verbSuggestion + " " + subject.getToken());
      }
      pronounSuggestions.addAll(getPronounSuggestions(verb, false));
      for (String pronounSuggestion : pronounSuggestions) {
        suggestions.add(verb.getToken() + " " + pronounSuggestion);
      }
      ruleMatch.setSuggestedReplacements(suggestions);
    }

    return ruleMatch;
  }
 private boolean isQuotationMark(final AnalyzedTokenReadings token) {
   return QUOTATION_MARKS.contains(token.getToken());
 }