Exemplo n.º 1
   * @return false if neither the verb @param token1 (if any) nor @param token2 match @param person
   *     and @param number, and none of them is "und" or "," if a finite verb is found, it is saved
   *     in finiteVerb
  private boolean verbDoesMatchPersonAndNumber(
      final AnalyzedTokenReadings token1,
      final AnalyzedTokenReadings token2,
      final String person,
      final String number) {
    if (token1.getToken().equals(",")
        || token1.getToken().equals("und")
        || token2.getToken().equals(",")
        || token2.getToken().equals("und")) {
      return true;

    boolean foundFiniteVerb = false;

    if (isFiniteVerb(token1)) {
      foundFiniteVerb = true;
      finiteVerb = token1;
      if (token1.hasPartialPosTag(":" + person + ":" + number)) {
        return true;

    if (isFiniteVerb(token2)) {
      foundFiniteVerb = true;
      finiteVerb = token2;
      if (token2.hasPartialPosTag(":" + person + ":" + number)) {
        return true;

    return !foundFiniteVerb;
Exemplo n.º 2
 private RuleMatch ruleMatchWrongVerb(final AnalyzedTokenReadings token) {
   final String msg =
       "Möglicherweise fehlende grammatische Übereinstimmung zwischen Subjekt und Prädikat ("
           + token.getToken()
           + ") bezüglich Person oder Numerus (Einzahl, Mehrzahl - Beispiel: "
           + "'Max bist' statt 'Max ist').";
   return new RuleMatch(
       this, token.getStartPos(), token.getStartPos() + token.getToken().length(), msg);
Exemplo n.º 3
 /** @return true if @param token is a finite verb, and it is no participle, pronoun or number */
 private boolean isFiniteVerb(final AnalyzedTokenReadings token) {
   if (token.getToken().length() == 0
       || (Character.isUpperCase(token.getToken().charAt(0)) && token.getStartPos() != 0)
       || !token.hasPartialPosTag("VER")
       || token.hasPartialPosTag("PA2")
       || token.hasPartialPosTag("PRO:")
       || token.hasPartialPosTag("ZAL")) {
     return false;
   return (token.hasPartialPosTag(":1:")
       || token.hasPartialPosTag(":2:")
       || token.hasPartialPosTag(":3:"));
Exemplo n.º 4
  private RuleMatch ruleMatchWrongVerbSubject(
      final AnalyzedTokenReadings subject,
      final AnalyzedTokenReadings verb,
      final String expectedVerbPOS) {
    final String msg =
        "Möglicherweise fehlende grammatische Übereinstimmung zwischen Subjekt ("
            + subject.getToken()
            + ") und Prädikat ("
            + verb.getToken()
            + ") bezüglich Person oder Numerus (Einzahl, Mehrzahl - Beispiel: "
            + "'ich sind' statt 'ich bin').";

    List<String> suggestions = new ArrayList<>();
    List<String> verbSuggestions = new ArrayList<>();
    List<String> pronounSuggestions = new ArrayList<>();

    RuleMatch ruleMatch;
    if (subject.getStartPos() < verb.getStartPos()) {
      ruleMatch =
          new RuleMatch(
              this, subject.getStartPos(), verb.getStartPos() + verb.getToken().length(), msg);
      verbSuggestions.addAll(getVerbSuggestions(verb, expectedVerbPOS, false));
      for (String verbSuggestion : verbSuggestions) {
        suggestions.add(subject.getToken() + " " + verbSuggestion);
          getPronounSuggestions(verb, Character.isUpperCase(subject.getToken().charAt(0))));
      for (String pronounSuggestion : pronounSuggestions) {
        suggestions.add(pronounSuggestion + " " + verb.getToken());
    } else {
      ruleMatch =
          new RuleMatch(
              this, verb.getStartPos(), subject.getStartPos() + subject.getToken().length(), msg);
              verb, expectedVerbPOS, Character.isUpperCase(verb.getToken().charAt(0))));
      for (String verbSuggestion : verbSuggestions) {
        suggestions.add(verbSuggestion + " " + subject.getToken());
      pronounSuggestions.addAll(getPronounSuggestions(verb, false));
      for (String pronounSuggestion : pronounSuggestions) {
        suggestions.add(verb.getToken() + " " + pronounSuggestion);

    return ruleMatch;
 protected boolean isAdverb(final AnalyzedTokenReadings token) {
   if (ADVERBS.contains(token.getToken())) {
     return true;
   return false;
 /** Returns true iff the token at the given position should be ignored by the spell checker. */
 protected boolean ignoreToken(AnalyzedTokenReadings[] tokens, int idx) throws IOException {
   List<String> words = new ArrayList<>();
   for (AnalyzedTokenReadings token : tokens) {
   return ignoreWord(words, idx);
Exemplo n.º 7
   * @return true if the verb @param token (if it is a verb) matches @param person and @param
   *     number, and matches no other person/number
  private boolean hasUnambiguouslyPersonAndNumber(
      final AnalyzedTokenReadings tokenReadings, final String person, final String number) {
    if (tokenReadings.getToken().length() == 0
        || (Character.isUpperCase(tokenReadings.getToken().charAt(0))
            && !(tokenReadings.getStartPos() == 0))
        || !tokenReadings.hasPartialPosTag("VER")) return false;

    for (AnalyzedToken analyzedToken : tokenReadings) {
      final String postag = analyzedToken.getPOSTag();
      if (postag.contains("_END")) // ignore SENT_END and PARA_END
      if (!postag.contains(":" + person + ":" + number)) return false;
    } // for each reading

    return true;
Exemplo n.º 8
 private RuleMatch ruleMatchWrongVerbSubject(
     final AnalyzedTokenReadings subject, final AnalyzedTokenReadings verb) {
   final String msg =
       "Möglicherweise fehlende grammatische Übereinstimmung zwischen Subjekt ("
           + subject.getToken()
           + ") und Prädikat ("
           + verb.getToken()
           + ") bezüglich Person oder Numerus (Einzahl, Mehrzahl - Beispiel: "
           + "'ich sind' statt 'ich bin').";
   if (subject.getStartPos() < verb.getStartPos()) {
     return new RuleMatch(
         this, subject.getStartPos(), verb.getStartPos() + verb.getToken().length(), msg);
   } else {
     return new RuleMatch(
         this, verb.getStartPos(), subject.getStartPos() + subject.getToken().length(), msg);
  public RuleMatch[] match(final AnalyzedSentence text) {
    final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
    final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();

    if (tokens.length > 3) {
      final AnalyzedTokenReadings analyzedToken = tokens[1];
      final String token = analyzedToken.getToken();
      // avoid "..." etc. to be matched:
      boolean isWord = true;
      if (token.length() == 1) {
        final char c = token.charAt(0);
        if (!Character.isLetter(c)) {
          isWord = false;

      if (isWord
          && lastToken.equals(token)
          && !isException(token)
          && !isException(tokens[2].getToken())
          && !isException(tokens[3].getToken())) {
        final String shortMsg;
        if (isAdverb(analyzedToken)) {
          shortMsg = messages.getString("desc_repetition_beginning_adv");
        } else if (beforeLastToken.equals(token)) {
          shortMsg = messages.getString("desc_repetition_beginning_word");
        } else {
          shortMsg = "";

        if (!shortMsg.equals("")) {
          final String msg =
              shortMsg + " " + messages.getString("desc_repetition_beginning_thesaurus");
          final int startPos = analyzedToken.getStartPos();
          final int endPos = startPos + token.length();
          final RuleMatch ruleMatch = new RuleMatch(this, startPos, endPos, msg, shortMsg);
      beforeLastToken = lastToken;
      lastToken = token;

    // TODO should we ignore repetitions involving multiple paragraphs?
    // if (tokens[tokens.length - 1].isParaEnd()) beforeLastToken = "";

    return toRuleMatchArray(ruleMatches);
Exemplo n.º 10
 private Map<String, AnalyzedTokenReadings> getStringToTokenMap(
     Queue<AnalyzedTokenReadings> prevTokens,
     List<String> stringsToCheck,
     List<String> origStringsToCheck) {
   StringBuilder sb = new StringBuilder();
   Map<String, AnalyzedTokenReadings> stringToToken = new HashMap<>();
   int j = 0;
   for (AnalyzedTokenReadings atr : prevTokens) {
     sb.append(' ');
     if (j >= 1) {
       String stringToCheck = normalize(sb.toString());
       if (!stringToToken.containsKey(stringToCheck)) {
         stringToToken.put(stringToCheck, atr);
   return stringToToken;
  public RuleMatch[] match(final AnalyzedSentence sentence) {
    final List<RuleMatch> ruleMatches = new ArrayList<>();
    final AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();

    RuleMatch prevRuleMatch = null;
    final Queue<AnalyzedTokenReadings> prevTokens = new ArrayBlockingQueue<>(MAX_TERMS);
    for (int i = 0; i < tokens.length + MAX_TERMS - 1; i++) {
      final AnalyzedTokenReadings token;
      // we need to extend the token list so we find matches at the end of the original list:
      if (i >= tokens.length) {
        token =
            new AnalyzedTokenReadings(
                new AnalyzedToken("", "", null), prevTokens.peek().getStartPos());
      } else {
        token = tokens[i];
      if (i == 0) {
        addToQueue(token, prevTokens);

      final StringBuilder sb = new StringBuilder();
      int j = 0;
      AnalyzedTokenReadings firstMatchToken = null;
      final List<String> stringsToCheck = new ArrayList<>();
      final List<String> origStringsToCheck =
          new ArrayList<>(); // original upper/lowercase spelling
      final Map<String, AnalyzedTokenReadings> stringToToken = new HashMap<>();
      for (AnalyzedTokenReadings atr : prevTokens) {
        if (j == 0) {
          firstMatchToken = atr;
        sb.append(' ');
        if (j >= 1) {
          final String stringToCheck = normalize(sb.toString());
          if (!stringToToken.containsKey(stringToCheck)) {
            stringToToken.put(stringToCheck, atr);
      // iterate backwards over all potentially incorrect strings to make
      // sure we match longer strings first:
      for (int k = stringsToCheck.size() - 1; k >= 0; k--) {
        final String stringToCheck = stringsToCheck.get(k);
        final String origStringToCheck = origStringsToCheck.get(k);
        if (incorrectCompounds.contains(stringToCheck)) {
          final AnalyzedTokenReadings atr = stringToToken.get(stringToCheck);
          String msg = null;
          final List<String> replacement = new ArrayList<>();
          if (!noDashSuggestion.contains(stringToCheck)) {
            replacement.add(origStringToCheck.replace(' ', '-'));
            msg = withHyphenMessage;
          if (isNotAllUppercase(origStringToCheck) && !onlyDashSuggestion.contains(stringToCheck)) {
            msg = withoutHyphenMessage;
          final String[] parts = stringToCheck.split(" ");
          if (parts.length > 0 && parts[0].length() == 1) {
            replacement.add(origStringToCheck.replace(' ', '-'));
            msg = withHyphenMessage;
          } else if (replacement.isEmpty() || replacement.size() == 2) { // isEmpty shouldn't happen
            msg = withOrWithoutHyphenMessage;
          final RuleMatch ruleMatch =
              new RuleMatch(
                  atr.getStartPos() + atr.getToken().length(),
          // avoid duplicate matches:
          if (prevRuleMatch != null && prevRuleMatch.getFromPos() == ruleMatch.getFromPos()) {
            prevRuleMatch = ruleMatch;
          prevRuleMatch = ruleMatch;
      addToQueue(token, prevTokens);
    return toRuleMatchArray(ruleMatches);
Exemplo n.º 12
 private boolean isQuotationMark(final AnalyzedTokenReadings token) {
   return QUOTATION_MARKS.contains(token.getToken());