  * @param args a map with values for {@code year}, {@code month}, {@code day} (day of month),
  *     {@code weekDay}
 public RuleMatch acceptRuleMatch(
     RuleMatch match, Map<String, String> args, AnalyzedTokenReadings[] patternTokens) {
   int dayOfWeekFromString = getDayOfWeek(getRequired("weekDay", args));
   Calendar dateFromDate = getDate(args);
   int dayOfWeekFromDate;
   try {
     dayOfWeekFromDate = dateFromDate.get(Calendar.DAY_OF_WEEK);
   } catch (IllegalArgumentException ignore) {
     // happens with 'dates' like '32.8.2014' - those should be caught by a different rule
     return null;
   if (dayOfWeekFromString != dayOfWeekFromDate) {
     String realDayName = getDayOfWeek(dateFromDate);
     String message = match.getMessage().replace("\\realDay", realDayName);
     RuleMatch newMatch =
         new RuleMatch(
     return newMatch;
   } else {
     return null;
 public static void main(String[] args) throws TwitterException, IOException {
   Twitter twitter = TwitterFactory.getSingleton();
   JLanguageTool langTool = new JLanguageTool(new AmericanEnglish());
   List<String> twts = new ArrayList<String>();
   for (String arg : args) {
     Query query = new Query(arg);
     QueryResult result;
     int counter = 0;
     do {
       result = twitter.search(query);
       List<Status> tweets = result.getTweets();
       for (Status tweet : tweets) {
         if (isEligible(tweet)) {
           System.out.println("@" + tweet.getUser().getScreenName() + " - " + tweet.getText());
     } while ((query = result.nextQuery()) != null && counter < 5);
   for (String str : twts) {
     List<RuleMatch> matches = langTool.check(str);
     for (RuleMatch match : matches) {
           "Potential error at line "
               + match.getLine()
               + ", column "
               + match.getColumn()
               + ": "
               + match.getMessage());
       System.out.println("Suggested correction: " + match.getSuggestedReplacements());
 private void assertBad(String s, String... expectedSuggestions) throws IOException {
   RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence(s));
   assertEquals("Did not find one match in sentence '" + s + "'", 1, matches.length);
   if (expectedSuggestions.length > 0) {
     RuleMatch match = matches[0];
     List<String> suggestions = match.getSuggestedReplacements();
     assertThat(suggestions, is(Arrays.asList(expectedSuggestions)));
  private RuleMatch ruleMatchWrongVerbSubject(
      final AnalyzedTokenReadings subject,
      final AnalyzedTokenReadings verb,
      final String expectedVerbPOS) {
    final String msg =
        "Möglicherweise fehlende grammatische Übereinstimmung zwischen Subjekt ("
            + subject.getToken()
            + ") und Prädikat ("
            + verb.getToken()
            + ") bezüglich Person oder Numerus (Einzahl, Mehrzahl - Beispiel: "
            + "'ich sind' statt 'ich bin').";

    List<String> suggestions = new ArrayList<>();
    List<String> verbSuggestions = new ArrayList<>();
    List<String> pronounSuggestions = new ArrayList<>();

    RuleMatch ruleMatch;
    if (subject.getStartPos() < verb.getStartPos()) {
      ruleMatch =
          new RuleMatch(
              this, subject.getStartPos(), verb.getStartPos() + verb.getToken().length(), msg);
      verbSuggestions.addAll(getVerbSuggestions(verb, expectedVerbPOS, false));
      for (String verbSuggestion : verbSuggestions) {
        suggestions.add(subject.getToken() + " " + verbSuggestion);
          getPronounSuggestions(verb, Character.isUpperCase(subject.getToken().charAt(0))));
      for (String pronounSuggestion : pronounSuggestions) {
        suggestions.add(pronounSuggestion + " " + verb.getToken());
    } else {
      ruleMatch =
          new RuleMatch(
              this, verb.getStartPos(), subject.getStartPos() + subject.getToken().length(), msg);
              verb, expectedVerbPOS, Character.isUpperCase(verb.getToken().charAt(0))));
      for (String verbSuggestion : verbSuggestions) {
        suggestions.add(verbSuggestion + " " + subject.getToken());
      pronounSuggestions.addAll(getPronounSuggestions(verb, false));
      for (String pronounSuggestion : pronounSuggestions) {
        suggestions.add(verb.getToken() + " " + pronounSuggestion);

    return ruleMatch;
 private void assertBad(String s, int n, String... expectedSuggestions) throws IOException {
   RuleMatch[] matches = rule.match(langTool.getAnalyzedSentence(s));
   assertEquals("Did not find " + n + " match(es) in sentence '" + s + "'", n, matches.length);
   if (expectedSuggestions.length > 0) {
     RuleMatch match = matches[0];
     // When two errors are reported by the rule (so TODO above), it might happen that the first
     // match does not have the suggestions, but the second one
     if (matches.length > 1 && match.getSuggestedReplacements().size() == 0) {
       match = matches[1];
     List<String> suggestions = match.getSuggestedReplacements();
     assertThat(suggestions, is(Arrays.asList(expectedSuggestions)));
 public RuleMatch[] match(AnalyzedSentence sentence) {
   final List<RuleMatch> ruleMatches = new ArrayList<>();
   final AnalyzedTokenReadings[] tokens = sentence.getTokens();
   boolean foundSpiegelt = false;
   boolean foundWieder = false;
   boolean foundWider = false;
   for (int i = 0; i < tokens.length; i++) {
     final String token = tokens[i].getToken();
     if (!token.trim().equals("")) {
       if (token.equalsIgnoreCase("spiegelt")
           || token.equalsIgnoreCase("spiegeln")
           || token.equalsIgnoreCase("spiegelte")
           || token.equalsIgnoreCase("spiegelten")
           || token.equalsIgnoreCase("spiegelst")) {
         foundSpiegelt = true;
       } else if (token.equalsIgnoreCase("wieder") && foundSpiegelt) {
         foundWieder = true;
       } else if (token.equalsIgnoreCase("wider") && foundSpiegelt) {
         foundWider = true;
       if (foundSpiegelt
           && foundWieder
           && !foundWider
           && !(tokens.length > i + 2
               && (tokens[i + 1].getToken().equals("wider")
                   || tokens[i + 2].getToken().equals("wider")))) {
         final String shortMsg = "'wider' in 'widerspiegeln' wird mit 'i' geschrieben";
         final String msg =
             "'wider' in 'widerspiegeln' wird mit 'i' statt mit 'ie' "
                 + "geschrieben, z.B. 'Das spiegelt die Situation gut wider.'";
         final int pos = tokens[i].getStartPos();
         final RuleMatch ruleMatch = new RuleMatch(this, pos, pos + token.length(), msg, shortMsg);
         foundSpiegelt = false;
         foundWieder = false;
         foundWider = false;
   return toRuleMatchArray(ruleMatches);
  public RuleMatch[] match(AnalyzedSentence sentence) throws IOException {
    final List<RuleMatch> ruleMatches = new ArrayList<>();
    if (needsInit) {
    if (hunspellDict == null) {
      // some languages might not have a dictionary, be silent about it
      return toRuleMatchArray(ruleMatches);
    final String[] tokens = tokenizeText(getSentenceTextWithoutUrlsAndImmunizedTokens(sentence));

    // starting with the first token to skip the zero-length START_SENT
    int len = sentence.getTokens()[1].getStartPos();
    for (int i = 0; i < tokens.length; i++) {
      String word = tokens[i];
      if (ignoreWord(Arrays.asList(tokens), i) || ignoreWord(word)) {
        len += word.length() + 1;
      if (isMisspelled(word)) {
        final RuleMatch ruleMatch =
            new RuleMatch(
                len + word.length(),
        final List<String> suggestions = getSuggestions(word);
        suggestions.addAll(0, getAdditionalTopSuggestions(suggestions, word));
        suggestions.addAll(getAdditionalSuggestions(suggestions, word));
        if (!suggestions.isEmpty()) {
      len += word.length() + 1;

    return toRuleMatchArray(ruleMatches);
Beispiel #8
 public RuleMatch[] match(final AnalyzedSentence sentence) {
   final List<RuleMatch> ruleMatches = new ArrayList<>();
   final AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
   String prevToken = null;
   for (int i = 0; i < tokens.length; i++) {
     final String token = tokens[i].getToken();
     if (tokens[i].isWhitespace()) {
       // ignore
     if (prevToken != null
         && !prevToken.equals("-")
         && !prevToken.contains("--")
         && !prevToken.contains(
             "–-") // first char is some special kind of dash, found in Wikipedia
         && prevToken.endsWith("-")) {
       final char firstChar = token.charAt(0);
       if (Character.isUpperCase(firstChar)) {
         final String msg =
             "Möglicherweise fehlt ein 'und' oder ein Komma, oder es wurde nach dem Wort "
                 + "ein überflüssiges Leerzeichen eingefügt. Eventuell haben Sie auch versehentlich einen Bindestrich statt eines Punktes eingefügt.";
         final RuleMatch ruleMatch =
             new RuleMatch(
                 tokens[i - 1].getStartPos(),
                 tokens[i - 1].getStartPos() + prevToken.length() + 1,
         ruleMatch.setSuggestedReplacement(tokens[i - 1].getToken());
     prevToken = token;
   return toRuleMatchArray(ruleMatches);
  public final RuleMatch[] match(final AnalyzedSentence text) {
    final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
    final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
    if (tokens.length < 2) {
      return toRuleMatchArray(ruleMatches);
    int matchTokenPos = 1; // 0 = SENT_START
    final String firstToken = tokens[matchTokenPos].getToken();
    String secondToken = null;
    String thirdToken = null;
    // ignore quote characters:
    if (tokens.length >= 3
        && ("'".equals(firstToken) || "\"".equals(firstToken) || "„".equals(firstToken))) {
      matchTokenPos = 2;
      secondToken = tokens[matchTokenPos].getToken();
    final String firstDutchToken = dutchSpecialCase(firstToken, secondToken, tokens);
    if (firstDutchToken != null) {
      thirdToken = firstDutchToken;
      matchTokenPos = 3;

    String checkToken = firstToken;
    if (thirdToken != null) {
      checkToken = thirdToken;
    } else if (secondToken != null) {
      checkToken = secondToken;

    String lastToken = tokens[tokens.length - 1].getToken();
    if (lastToken.matches("[ \"'„»«“]") && tokens.length >= 2) {
      // ignore trailing whitespace or quote
      lastToken = tokens[tokens.length - 2].getToken();

    boolean preventError = false;
    // TODO: why do only *these* languages have that special case?
    final String langCode = language.getShortName();
    final boolean languageHasSpecialCases =
            || langCode.equals("pl")
            || langCode.equals("uk")
            || langCode.equals("be")
            || langCode.equals(Locale.ENGLISH.getLanguage())
            || langCode.equals(Locale.ITALIAN.getLanguage())
            || langCode.equals(Locale.GERMAN.getLanguage());
    if (languageHasSpecialCases) {
      // fix for lists; note - this will not always work for the last point in OOo,
      // as OOo might serve paragraphs in any order.
      if (";".equals(lastParagraphString)
          || ";".equals(lastToken)
          || ",".equals(lastParagraphString)
          || ",".equals(lastToken)) {
        preventError = true;
      // fix for words in table (not sentences); note - this will not always work for the last point
      // in OOo,
      // as OOo might serve paragraphs in any order.
      if (!lastToken.matches("[.?!…]")) {
        preventError = true;

    lastParagraphString = lastToken;

    if (checkToken.length() > 0) {
      final char firstChar = checkToken.charAt(0);
      if (!preventError && Character.isLowerCase(firstChar)) {
        final RuleMatch ruleMatch =
            new RuleMatch(
                tokens[matchTokenPos].getStartPos() + tokens[matchTokenPos].getToken().length(),
    return toRuleMatchArray(ruleMatches);
  public final RuleMatch[] match(final AnalyzedSentence sentence) {
    final List<RuleMatch> ruleMatches = new ArrayList<>();
    final AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
    if (tokens.length < 2) {
      return toRuleMatchArray(ruleMatches);
    int matchTokenPos = 1; // 0 = SENT_START
    final String firstToken = tokens[matchTokenPos].getToken();
    String secondToken = null;
    String thirdToken = null;
    // ignore quote characters:
    if (tokens.length >= 3
        && ("'".equals(firstToken) || "\"".equals(firstToken) || "„".equals(firstToken))) {
      matchTokenPos = 2;
      secondToken = tokens[matchTokenPos].getToken();
    final String firstDutchToken = dutchSpecialCase(firstToken, secondToken, tokens);
    if (firstDutchToken != null) {
      thirdToken = firstDutchToken;
      matchTokenPos = 3;

    String checkToken = firstToken;
    if (thirdToken != null) {
      checkToken = thirdToken;
    } else if (secondToken != null) {
      checkToken = secondToken;

    String lastToken = tokens[tokens.length - 1].getToken();
    if (tokens.length >= 2 && WHITESPACE_OR_QUOTE.matcher(lastToken).matches()) {
      // ignore trailing whitespace or quote
      lastToken = tokens[tokens.length - 2].getToken();

    boolean preventError = false;
    if (lastParagraphString.equals(",") || lastParagraphString.equals(";")) {
      preventError = true;
    if (!SENTENCE_END1.matcher(lastParagraphString).matches()
        && !SENTENCE_END2.matcher(lastToken).matches()) {
      preventError = true;

    lastParagraphString = lastToken;

    // allows enumeration with lowercase letters: a), iv., etc.
    if (matchTokenPos + 1 < tokens.length
        && NUMERALS_EN.matcher(tokens[matchTokenPos].getToken()).matches()
        && (tokens[matchTokenPos + 1].getToken().equals(".")
            || tokens[matchTokenPos + 1].getToken().equals(")"))) {
      preventError = true;

    if (isUrl(checkToken)) {
      preventError = true;

    if (checkToken.length() > 0) {
      final char firstChar = checkToken.charAt(0);
      if (!preventError && Character.isLowerCase(firstChar)) {
        final RuleMatch ruleMatch =
            new RuleMatch(
                tokens[matchTokenPos].getStartPos() + tokens[matchTokenPos].getToken().length(),
    return toRuleMatchArray(ruleMatches);
  public RuleMatch[] match(final AnalyzedSentence sentence) {
    final List<RuleMatch> ruleMatches = new ArrayList<>();
    final AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();

    RuleMatch prevRuleMatch = null;
    final Queue<AnalyzedTokenReadings> prevTokens = new ArrayBlockingQueue<>(MAX_TERMS);
    for (int i = 0; i < tokens.length + MAX_TERMS - 1; i++) {
      final AnalyzedTokenReadings token;
      // we need to extend the token list so we find matches at the end of the original list:
      if (i >= tokens.length) {
        token =
            new AnalyzedTokenReadings(
                new AnalyzedToken("", "", null), prevTokens.peek().getStartPos());
      } else {
        token = tokens[i];
      if (i == 0) {
        addToQueue(token, prevTokens);

      final StringBuilder sb = new StringBuilder();
      int j = 0;
      AnalyzedTokenReadings firstMatchToken = null;
      final List<String> stringsToCheck = new ArrayList<>();
      final List<String> origStringsToCheck =
          new ArrayList<>(); // original upper/lowercase spelling
      final Map<String, AnalyzedTokenReadings> stringToToken = new HashMap<>();
      for (AnalyzedTokenReadings atr : prevTokens) {
        if (j == 0) {
          firstMatchToken = atr;
        sb.append(' ');
        if (j >= 1) {
          final String stringToCheck = normalize(sb.toString());
          if (!stringToToken.containsKey(stringToCheck)) {
            stringToToken.put(stringToCheck, atr);
      // iterate backwards over all potentially incorrect strings to make
      // sure we match longer strings first:
      for (int k = stringsToCheck.size() - 1; k >= 0; k--) {
        final String stringToCheck = stringsToCheck.get(k);
        final String origStringToCheck = origStringsToCheck.get(k);
        if (incorrectCompounds.contains(stringToCheck)) {
          final AnalyzedTokenReadings atr = stringToToken.get(stringToCheck);
          String msg = null;
          final List<String> replacement = new ArrayList<>();
          if (!noDashSuggestion.contains(stringToCheck)) {
            replacement.add(origStringToCheck.replace(' ', '-'));
            msg = withHyphenMessage;
          if (isNotAllUppercase(origStringToCheck) && !onlyDashSuggestion.contains(stringToCheck)) {
            msg = withoutHyphenMessage;
          final String[] parts = stringToCheck.split(" ");
          if (parts.length > 0 && parts[0].length() == 1) {
            replacement.add(origStringToCheck.replace(' ', '-'));
            msg = withHyphenMessage;
          } else if (replacement.isEmpty() || replacement.size() == 2) { // isEmpty shouldn't happen
            msg = withOrWithoutHyphenMessage;
          final RuleMatch ruleMatch =
              new RuleMatch(
                  atr.getStartPos() + atr.getToken().length(),
          // avoid duplicate matches:
          if (prevRuleMatch != null && prevRuleMatch.getFromPos() == ruleMatch.getFromPos()) {
            prevRuleMatch = ruleMatch;
          prevRuleMatch = ruleMatch;
      addToQueue(token, prevTokens);
    return toRuleMatchArray(ruleMatches);
Beispiel #12
 /** Compare by start position. */
 public int compareTo(final RuleMatch other) {
   return Integer.compare(getFromPos(), other.getFromPos());
  public RuleMatch[] match(AnalyzedSentence sentence) {
    List<RuleMatch> ruleMatches = new ArrayList<>();
    AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();

    RuleMatch prevRuleMatch = null;
    Queue<AnalyzedTokenReadings> prevTokens = new ArrayBlockingQueue<>(MAX_TERMS);
    for (int i = 0; i < tokens.length + MAX_TERMS - 1; i++) {
      AnalyzedTokenReadings token;
      // we need to extend the token list so we find matches at the end of the original list:
      if (i >= tokens.length) {
        token =
            new AnalyzedTokenReadings(
                new AnalyzedToken("", "", null), prevTokens.peek().getStartPos());
      } else {
        token = tokens[i];
      if (i == 0) {
        addToQueue(token, prevTokens);
      if (token.isImmunized()) {

      AnalyzedTokenReadings firstMatchToken = prevTokens.peek();
      List<String> stringsToCheck = new ArrayList<>();
      List<String> origStringsToCheck = new ArrayList<>(); // original upper/lowercase spelling
      Map<String, AnalyzedTokenReadings> stringToToken =
          getStringToTokenMap(prevTokens, stringsToCheck, origStringsToCheck);
      // iterate backwards over all potentially incorrect strings to make
      // sure we match longer strings first:
      for (int k = stringsToCheck.size() - 1; k >= 0; k--) {
        String stringToCheck = stringsToCheck.get(k);
        String origStringToCheck = origStringsToCheck.get(k);
        if (getCompoundRuleData().getIncorrectCompounds().contains(stringToCheck)) {
          AnalyzedTokenReadings atr = stringToToken.get(stringToCheck);
          String msg = null;
          List<String> replacement = new ArrayList<>();
          if (!getCompoundRuleData().getNoDashSuggestion().contains(stringToCheck)) {
            replacement.add(origStringToCheck.replace(' ', '-'));
            msg = withHyphenMessage;
          if (isNotAllUppercase(origStringToCheck)
              && !getCompoundRuleData().getOnlyDashSuggestion().contains(stringToCheck)) {
            msg = withoutHyphenMessage;
          String[] parts = stringToCheck.split(" ");
          if (parts.length > 0 && parts[0].length() == 1) {
            replacement.add(origStringToCheck.replace(' ', '-'));
            msg = withHyphenMessage;
          } else if (replacement.isEmpty() || replacement.size() == 2) { // isEmpty shouldn't happen
            msg = withOrWithoutHyphenMessage;
          RuleMatch ruleMatch =
              new RuleMatch(this, firstMatchToken.getStartPos(), atr.getEndPos(), msg, shortDesc);
          // avoid duplicate matches:
          if (prevRuleMatch != null && prevRuleMatch.getFromPos() == ruleMatch.getFromPos()) {
            prevRuleMatch = ruleMatch;
          prevRuleMatch = ruleMatch;
      addToQueue(token, prevTokens);
    return toRuleMatchArray(ruleMatches);