Exemplo n.º 1
0
 @Override
 public RuleMatch[] match(AnalyzedSentence sentence) {
   final List<RuleMatch> ruleMatches = new ArrayList<>();
   final AnalyzedTokenReadings[] tokens = sentence.getTokens();
   boolean foundSpiegelt = false;
   boolean foundWieder = false;
   boolean foundWider = false;
   for (int i = 0; i < tokens.length; i++) {
     final String token = tokens[i].getToken();
     if (!token.trim().equals("")) {
       if (token.equalsIgnoreCase("spiegelt")
           || token.equalsIgnoreCase("spiegeln")
           || token.equalsIgnoreCase("spiegelte")
           || token.equalsIgnoreCase("spiegelten")
           || token.equalsIgnoreCase("spiegelst")) {
         foundSpiegelt = true;
       } else if (token.equalsIgnoreCase("wieder") && foundSpiegelt) {
         foundWieder = true;
       } else if (token.equalsIgnoreCase("wider") && foundSpiegelt) {
         foundWider = true;
       }
       if (foundSpiegelt
           && foundWieder
           && !foundWider
           && !(tokens.length > i + 2
               && (tokens[i + 1].getToken().equals("wider")
                   || tokens[i + 2].getToken().equals("wider")))) {
         final String shortMsg = "'wider' in 'widerspiegeln' wird mit 'i' geschrieben";
         final String msg =
             "'wider' in 'widerspiegeln' wird mit 'i' statt mit 'ie' "
                 + "geschrieben, z.B. 'Das spiegelt die Situation gut wider.'";
         final int pos = tokens[i].getStartPos();
         final RuleMatch ruleMatch = new RuleMatch(this, pos, pos + token.length(), msg, shortMsg);
         ruleMatch.setSuggestedReplacement("wider");
         ruleMatches.add(ruleMatch);
         foundSpiegelt = false;
         foundWieder = false;
         foundWider = false;
       }
     }
   }
   return toRuleMatchArray(ruleMatches);
 }
Exemplo n.º 2
0
 @Override
 public RuleMatch[] match(final AnalyzedSentence sentence) {
   final List<RuleMatch> ruleMatches = new ArrayList<>();
   final AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
   String prevToken = null;
   for (int i = 0; i < tokens.length; i++) {
     final String token = tokens[i].getToken();
     if (tokens[i].isWhitespace()) {
       // ignore
       continue;
     }
     if (prevToken != null
         && !prevToken.equals("-")
         && !prevToken.contains("--")
         && !prevToken.contains(
             "–-") // first char is some special kind of dash, found in Wikipedia
         && prevToken.endsWith("-")) {
       final char firstChar = token.charAt(0);
       if (Character.isUpperCase(firstChar)) {
         final String msg =
             "Möglicherweise fehlt ein 'und' oder ein Komma, oder es wurde nach dem Wort "
                 + "ein überflüssiges Leerzeichen eingefügt. Eventuell haben Sie auch versehentlich einen Bindestrich statt eines Punktes eingefügt.";
         final RuleMatch ruleMatch =
             new RuleMatch(
                 this,
                 tokens[i - 1].getStartPos(),
                 tokens[i - 1].getStartPos() + prevToken.length() + 1,
                 msg);
         ruleMatch.setSuggestedReplacement(tokens[i - 1].getToken());
         ruleMatches.add(ruleMatch);
       }
     }
     prevToken = token;
   }
   return toRuleMatchArray(ruleMatches);
 }
  @Override
  public final RuleMatch[] match(final AnalyzedSentence text) {
    final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
    final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
    if (tokens.length < 2) {
      return toRuleMatchArray(ruleMatches);
    }
    int matchTokenPos = 1; // 0 = SENT_START
    final String firstToken = tokens[matchTokenPos].getToken();
    String secondToken = null;
    String thirdToken = null;
    // ignore quote characters:
    if (tokens.length >= 3
        && ("'".equals(firstToken) || "\"".equals(firstToken) || "„".equals(firstToken))) {
      matchTokenPos = 2;
      secondToken = tokens[matchTokenPos].getToken();
    }
    final String firstDutchToken = dutchSpecialCase(firstToken, secondToken, tokens);
    if (firstDutchToken != null) {
      thirdToken = firstDutchToken;
      matchTokenPos = 3;
    }

    String checkToken = firstToken;
    if (thirdToken != null) {
      checkToken = thirdToken;
    } else if (secondToken != null) {
      checkToken = secondToken;
    }

    String lastToken = tokens[tokens.length - 1].getToken();
    if (lastToken.matches("[ \"'„»«“]") && tokens.length >= 2) {
      // ignore trailing whitespace or quote
      lastToken = tokens[tokens.length - 2].getToken();
    }

    boolean preventError = false;
    // TODO: why do only *these* languages have that special case?
    final String langCode = language.getShortName();
    final boolean languageHasSpecialCases =
        langCode.equals("ru")
            || langCode.equals("pl")
            || langCode.equals("uk")
            || langCode.equals("be")
            || langCode.equals(Locale.ENGLISH.getLanguage())
            || langCode.equals(Locale.ITALIAN.getLanguage())
            || langCode.equals(Locale.GERMAN.getLanguage());
    if (languageHasSpecialCases) {
      // fix for lists; note - this will not always work for the last point in OOo,
      // as OOo might serve paragraphs in any order.
      if (";".equals(lastParagraphString)
          || ";".equals(lastToken)
          || ",".equals(lastParagraphString)
          || ",".equals(lastToken)) {
        preventError = true;
      }
      // fix for words in table (not sentences); note - this will not always work for the last point
      // in OOo,
      // as OOo might serve paragraphs in any order.
      if (!lastToken.matches("[.?!…]")) {
        preventError = true;
      }
    }

    lastParagraphString = lastToken;

    if (checkToken.length() > 0) {
      final char firstChar = checkToken.charAt(0);
      if (!preventError && Character.isLowerCase(firstChar)) {
        final RuleMatch ruleMatch =
            new RuleMatch(
                this,
                tokens[matchTokenPos].getStartPos(),
                tokens[matchTokenPos].getStartPos() + tokens[matchTokenPos].getToken().length(),
                messages.getString("incorrect_case"));
        ruleMatch.setSuggestedReplacement(StringTools.uppercaseFirstChar(checkToken));
        ruleMatches.add(ruleMatch);
      }
    }
    return toRuleMatchArray(ruleMatches);
  }
  @Override
  public final RuleMatch[] match(final AnalyzedSentence sentence) {
    final List<RuleMatch> ruleMatches = new ArrayList<>();
    final AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
    if (tokens.length < 2) {
      return toRuleMatchArray(ruleMatches);
    }
    int matchTokenPos = 1; // 0 = SENT_START
    final String firstToken = tokens[matchTokenPos].getToken();
    String secondToken = null;
    String thirdToken = null;
    // ignore quote characters:
    if (tokens.length >= 3
        && ("'".equals(firstToken) || "\"".equals(firstToken) || "„".equals(firstToken))) {
      matchTokenPos = 2;
      secondToken = tokens[matchTokenPos].getToken();
    }
    final String firstDutchToken = dutchSpecialCase(firstToken, secondToken, tokens);
    if (firstDutchToken != null) {
      thirdToken = firstDutchToken;
      matchTokenPos = 3;
    }

    String checkToken = firstToken;
    if (thirdToken != null) {
      checkToken = thirdToken;
    } else if (secondToken != null) {
      checkToken = secondToken;
    }

    String lastToken = tokens[tokens.length - 1].getToken();
    if (tokens.length >= 2 && WHITESPACE_OR_QUOTE.matcher(lastToken).matches()) {
      // ignore trailing whitespace or quote
      lastToken = tokens[tokens.length - 2].getToken();
    }

    boolean preventError = false;
    if (lastParagraphString.equals(",") || lastParagraphString.equals(";")) {
      preventError = true;
    }
    if (!SENTENCE_END1.matcher(lastParagraphString).matches()
        && !SENTENCE_END2.matcher(lastToken).matches()) {
      preventError = true;
    }

    lastParagraphString = lastToken;

    // allows enumeration with lowercase letters: a), iv., etc.
    if (matchTokenPos + 1 < tokens.length
        && NUMERALS_EN.matcher(tokens[matchTokenPos].getToken()).matches()
        && (tokens[matchTokenPos + 1].getToken().equals(".")
            || tokens[matchTokenPos + 1].getToken().equals(")"))) {
      preventError = true;
    }

    if (isUrl(checkToken)) {
      preventError = true;
    }

    if (checkToken.length() > 0) {
      final char firstChar = checkToken.charAt(0);
      if (!preventError && Character.isLowerCase(firstChar)) {
        final RuleMatch ruleMatch =
            new RuleMatch(
                this,
                tokens[matchTokenPos].getStartPos(),
                tokens[matchTokenPos].getStartPos() + tokens[matchTokenPos].getToken().length(),
                messages.getString("incorrect_case"));
        ruleMatch.setSuggestedReplacement(StringTools.uppercaseFirstChar(checkToken));
        ruleMatches.add(ruleMatch);
      }
    }
    return toRuleMatchArray(ruleMatches);
  }